Hello, i have proplem with the following excercise:

There's a txt file which contains tab seperated words like:

Name           N1   N2   Type
FFapple        11   15    fruit
ZZbanana       33   45    fruit
ZZcarrot       22   25    vegetable
FFapricot      10   30    fruit
ZZmelon        22   50    fruit
FFbroccoli     16   40    vegetable

i want to do the following things with it:
1. selecting the lines start with FF and write them out without FF into another txt file in a format like:

apple -- fruit -- 15
apricot -- fruit -- 30
broccoli -- vegetable -- 40

2. same as 1. but write the lines only where N2 is bigger than 25

apricot -- fruit -- 30
broccoli -- vegetable -- 40

but i stucked at the begining, i get syntax error for the first comma

def filter("list.txt", "new.txt"):
    fs = open("list.txt", "r")
    fd = open("new.txt", "r")
    while 1:
        txt = fs.readline()
        if txt =="":
            break
        if txt[0] == "FF":
     fd.write(txt)
     fs.close()
     fd.close()
     return

You have not clear how to call a function and define one. Here that point corrected version, You are also writing to file fs, which you opened for reading ('r').

def filter(li, n):
    fs = open(li, "r")
    fd = open(n, "r")
    while 1:
        txt = fs.readline()
        if txt =="":
            break
        if txt[0] == "FF":
     fd.write(txt)
     fs.close()
     fd.close()
     return

## Call the function
filter("list.txt", "new.txt")

Edited 6 Years Ago by pyTony: n/a

Something like this:

#Filters:
def get_FF(entry):
    return entry[0].startswith("FF")
def get_N2_biggerthan_25(entry):
    return int(entry[2]) > 25


def get_file_data(f):
    '''Gets the data from the input file name f'''
    fs = open(f,"r")  #opens the input file
    filedat = [line.split()
                for line in fs.readlines()]
    fs.close()
    return filedat  #file as a list of lists

def format_line(lst):
    '''Formats the line from list to string'''
    lst = list(lst)  #creates copy
    lst[0] = lst[0][2:]  #Removes FF, ZZ, etc
    lst.pop(1)  #remove the N1 column
    lst = [lst[0],lst[2],lst[1]]  #rearrange
    return " -- ".join(lst) + "\n"  #return str

def write_selection(func,fdata,fileout):
    '''Writes the file data into a file
    with a filter'''
    filedata = list(fdata)
    fd = open(fileout,"w")
    flines = filter(func,filedata)
    flines = [format_line(line)
              for line in flines]
    fd.writelines(flines)
    fd.close()

data = get_file_data("list.txt")

write_selection(get_FF,data,"new.txt")

write_selection(get_N2_biggerthan_25,data,"new2.txt")

try this
#!/usr/bin/env python
import re
def filter(file_input,file_out):
fin=open(file_input,'r')
fout=open(file_out,'w')
for eachline in fin:
#print eachline
if re.match('^FF',eachline):
#print eachline[2:]
eachline.rstrip(" \n ")
list=eachline.split(" ")
print list[0],list[2],list[3]
make=str(list[0])[2:] + "-- " + str(list[2]) + " -- "+ str(list[3])
#print make
fout.write(make)
else:
#print "line=" + eachline
eachline.rstrip("\n ")
listano=eachline.split(" ")
if int(listano[2]) > 25:
#make=str(list[0]) + "-- " + str(list[2]) + " -- "+ str(list[3])
fout.write(make)
fin.close()
fout.close()
filter('test.txt',"hello.txt")

try this

#!/usr/bin/env python
import re
def filter(file_input,file_out):
        fin=open(file_input,'r')
        fout=open(file_out,'w')
        for eachline in fin:
                #print eachline
                if re.match('^FF',eachline):
                        #print eachline[2:]
                        eachline.rstrip(" \n ")
                        list=eachline.split(" ")
                        print list[0],list[2],list[3]
                        make=str(list[0])[2:] + "-- " + str(list[2]) + " -- "+ str(list[3])
                        #print make
                        fout.write(make)
                else:
                        #print "line=" + eachline
                        eachline.rstrip("\n ")
                        listano=eachline.split(" ")
                        if int(listano[2]) > 25:
                                #make=str(list[0]) + "-- " + str(list[2]) + " -- "+ str(list[3])
                                fout.write(make)
        fin.close()
        fout.close()
filter('test.txt',"hello.txt")

Thanks, the first part works but for the 2nd i get an error for the

if int(list[2]) > 25:

because of the N2, the letter N i think? I guess i have to ignore the full first line somehow?

#!/usr/bin/env python
import re
def filter(file_input, file_out):
        fin = open(file_input,'r')
        fout = open(file_out,'w')
        for eachline in fin:              
                if re.match('FF, eachline):
                        eachline.rstrip("\n")
                        list = eachline.split("\t")
                        make = str(list[0])[2:] + " -- " + str(list[2]) + " -- " + str(list[3] + "\n")
                        print make
                        fout.write(make)
                else:
                        eachline.rstrip("\n")
                        list = eachline.split("\t")
                        if int(list[2]) > 25:
                                make = str(list[0])[2:] + " -- " + str(list[2]) + " -- " + str(list[3] + "\n")
                                fout.write(make)
                                
        fin.close()
        fout.close()
        
filter("list.txt","123.txt")

You probably shouldn't name your function "filter", or your variable "list".

If you get an exception, just put a try-except in there.

Edited 6 Years Ago by jcao219: n/a

I tried to add (\d+) but i still get:
invalid literal for int() with base 10: 'N2'
I thought \d+ should watch only digit characters or not?

#!/usr/bin/env python
import re
def f1(file_input, file_out):
        m = '(\d+)'
        s = re.compile(m)
        fin = open(file_input,'r')
        fout = open(file_out,'w')
        for eachline in fin:              
                if re.match('FF', eachline):
                        eachline.rstrip("\n")
                        ind = eachline.split("\t")
                        make = str(ind[0])[2:] + " -- " + str(ind[2]) + " -- " + str(ind[3] + "\n")
                        print make
                        fout.write(make)
                else:
                        num = s.findall(eachline)
                        eachline.rstrip("\n")
                        ind = eachline.split("\t")
                        if int(list[2]) > 25:
                                make = str(ind[0])[2:] + " -- " + str(ind[2]) + " -- " + str(ind[3] + "\n")
                                fout.write(make)
                                
        fin.close()
        fout.close()
        
f1("list.txt", "123.txt")

What exactly is the problem?
From this file:

Name           N1   N2   Type
FFapple        11   15    fruit
ZZbanana       33   45    fruit
ZZcarrot       22   25    vegetable
FFapricot      10   30    fruit
ZZmelon        22   50    fruit
FFbroccoli     16   40    vegetable

What do you want the output file to have?

I don't understand what you want but if want to write your second option or print in console then my previous post is ok :-
if int(listano[2]) > 25:
make=str(list[0])[2:] + "-- " + str(list[3])
make=make + " -- " + str(list[2])
print make
#fout.write(make)

invalid literal for int() with base 10: 'N2'
The invalid reason is int() can't convert a empty string(' ') to a int make sure the argument of int() not empty.Just print and check it.
My questtion Are want to check the heading name == N2 ??

Edited 6 Years Ago by utpalendu: n/a

What exactly is the problem?
From this file:

Name           N1   N2   Type
FFapple        11   15    fruit
ZZbanana       33   45    fruit
ZZcarrot       22   25    vegetable
FFapricot      10   30    fruit
ZZmelon        22   50    fruit
FFbroccoli     16   40    vegetable

What do you want the output file to have?

I mean to write out the lines contains FF without FF and also check if the N2 numbers are bigger than 25 so only writeout the FF lines with out FF and only the ones that are bigger than 25.

From

apple -- fruit -- 15
apricot -- fruit -- 30
broccoli -- vegetable -- 40

should be

apricot -- fruit -- 30
broccoli -- vegetable -- 40

because they are bigger than 25

Here is from string to print output solution. Replace test with file and print with write to file if necessary.

test = """
Name           N1   N2   Type
FFapple        11   15    fruit
ZZbanana       33   45    fruit
ZZcarrot       22   25    vegetable
FFapricot      10   30    fruit
ZZmelon        22   50    fruit
FFbroccoli     16   40    vegetable
"""
## if x and .. drops out empty lines before trying to index the line
a = [ x[0][2:]+' -- ' + x[3] + ' -- ' + x[2]
    for x  in [
        line.split() for line in test.split('\n')
        ] if x and x[0][:2] == 'FF' and int(x[2]) > 25
    ]
for r in a: print r
"""Output:
>>> 
apricot -- fruit -- 30
broccoli -- vegetable -- 40
>>> """

Thanks but when i try to write out to file i only get an empty file. What did i do wrong?

#!/usr/bin/env python
import re

fin = open("test.txt", "r")
fout = open("123.txt", "w")
       
a = [ x[0][2:] + " -- " + x[3] + " -- " + x[2] + ("\n")
      for x in [
              eachline.split() for eachline in ("test.txt").split("\n")
              ] if x and x[0][:2] == "FF" and int(x[2]) > 25
      ]
for r in a: print r

for r in a: fout.write(r)
      

fin.close()
fout.close()

Thanks but when i try to write out to file i only get an empty file. What did i do wrong?

Nevermind i got it

#!/usr/bin/env python
import re

fin = open("test.txt", "r")
fout = open("123.txt", "w")
text = fin.read()
       
a = [ x[0][2:] + " -- " + x[3] + " -- " + x[2] + ("\n")
      for x in [
              eachline.split() for eachline in text.split("\n")
              ] if x and x[0][:2] == "FF" and int(x[2]) > 25
      ]
for r in a: print r

for r in a: fout.write(r)

        

fin.close()
fout.close()

And what if the N1 N2 contains times and write only the ones after 10:00? It says error for them

Name           N1      N2      Type
FFapple        06:35   07:20    fruit
ZZbanana       10:11   12:45    fruit
ZZcarrot       09:22   10:25    vegetable
FFapricot      11:12   14:30    fruit
ZZmelon        16:22   17:50    fruit
FFbroccoli     16:15   17:40    vegetable

so to get:

apricot -- fruit -- 14:30
broccoli -- vegetable -- 17:40

so far i could get 1, :30, 4:3, but not the full time

Just use this:

#Filter:
def my_filter(entry):
    try:
        return (entry[0].startswith("FF") and int(entry[2]) > 25)
    except:
        return False


def get_file_data(f):
    '''Gets the data from the input file name f'''
    fs = open(f,"r")  #opens the input file
    filedat = [line.split()
                for line in fs.readlines()]
    fs.close()
    return filedat  #file as a list of lists

def format_line(lst):
    '''Formats the line from list to string'''
    lst = list(lst)  #creates copy
    lst[0] = lst[0][2:]  #Removes FF, ZZ, etc
    lst.pop(1)  #remove the N1 column
    lst = [lst[0],lst[2],lst[1]]  #rearrange
    return " -- ".join(lst) + "\n"  #return str

def write_selection(func,fdata,fileout):
    '''Writes the file data into a file
    with a filter'''
    filedata = list(fdata)
    fd = open(fileout,"w")
    flines = filter(func,filedata)
    flines = [format_line(line)
              for line in flines]
    fd.writelines(flines)
    fd.close()

data = get_file_data("list.txt")

write_selection(my_filter,data,"new.txt")

Just use string comparison and change the numeric condition to string (it works with numbers also if they are right centred integers or equal length with zero padding)

test = """
Name           N1      N2      Type
FFapple        06:35   07:20    fruit
ZZbanana       10:11   12:45    fruit
ZZcarrot       09:22   10:25    vegetable
FFapricot      11:12   14:30    fruit
ZZmelon        16:22   17:50    fruit
FFbroccoli     16:15   17:40    vegetable
"""
## if x and .. drops out empty lines before trying to index the line
a = [ x[0][2:]+' -- ' + x[3] + ' -- ' + x[2]
    for x  in [
        line.split() for line in test.split('\n')
        ] if x and x[0].startswith('FF') and x[2] >= '10:00' ## condition must change
    ]
for r in a: print r
"""Output:
>>> 
apricot -- fruit -- 14:30
broccoli -- vegetable -- 17:40
>>> """

Edited 6 Years Ago by pyTony: n/a

Thanks, i have a last question. Same as above but for appricot it should watch the N1 number/time instead of N2 and N2 for broccoli.
so:

apricot -- fruit -- 11:12
broccoli -- vegetable -- 17:40

Actually i tried to use replace("14:30", "11:12") but i know i should use something else for it.

OK. Lets parametrise it all in function. This is probably too advanced for your.

test = """
Name           N1      N2      Type
FFapple        06:35   07:20    fruit
ZZbanana       10:11   12:45    fruit
ZZcarrot       09:22   10:25    vegetable
FFapricot      11:12   14:30    fruit
ZZmelon        16:22   17:50    fruit
FFbroccoli     16:15   17:40    vegetable
"""
def checkfruit(name=None,fieldnum=2,bigger=None,smaller=None):
## None = anything goes, set fruit name must equal that,
## e.g. checkfruit(bigger='10:00') checks for all fruits with fieldnum 2 bigger than '10:00'
    return [ x[0][2:]+' -- ' + x[3] + ' -- ' + x[fieldnum]
        for x  in [
            line.split() for line in test.split('\n')
            ] if ((x and x[0].startswith('FF')) and
                  (not name or (name==x[0][2:])) and ## check name agains end without FF
                  (not bigger or x[fieldnum] > bigger) and  ## check if must include also =
                  (not smaller or x[fieldnum] < smaller)    ## check if must include also =
                  )
        ]
print checkfruit('broccoli', bigger='10:00')
print checkfruit('apricot',1,smaller= '12:00')
"""Output:
['broccoli -- vegetable -- 17:40']
['apricot -- fruit -- 11:12']
"""

From reading this code you should understand how the conditions function in the list comprehension if and change the previous code to do the same as this function.

tonyjv, you should also add a startswith argument to your function.

Edited 6 Years Ago by jcao219: n/a

This question has already been answered. Start a new discussion instead.