hi everyone i am very close to finishing my first program in python, but there are two things in the way.
first of all the code highlighted in red reads the number of pages of pdf files in a directory. my problem is that it is in a for loop which is wrapped in another for loop which means it repeats itself and than repeats itself again, i tried messing with it but cant figure out a way to get rid of the for loop statement.
secondly the code highlighted in green is suppose to write the output onto a .xls file using xlwt. the problem is that even though i have a counter set, which is suppose to change the value of row from r=1 to r+=1 , but it doesnt work because of the .xls file only one row of information shows up.
any advice would be greatly appreciated. thanxs in advance
import email, getpass, imaplib, os, string, re from itertools import takewhile from operator import methodcaller import xlwt detach_dir = '/Users/defaultuser/Desktop' # directory where to save attachments (default: current) m = imaplib.IMAP4_SSL('imap.gmail.com') m.login('******@gmail.com', '*******') m.list() # Out: list of "folders" aka labels in gmail. m.select("inbox") # connect to inbox. resp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp) items = items.split() # getting the mails id book = xlwt.Workbook(encoding="utf-8") sheet1 = book.add_sheet("Python Sheet 1") sheet1.write(0, 0, "Job") sheet1.write(0, 1, "Date") sheet1.write(0, 2, "Teacher") sheet1.write(0, 3, "Copies") sheet1.write(0, 4, "Pages") for emailid in items: resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc email_body = data # getting the mail content mail = email.message_from_string(email_body) # parsing the mail content to get a mail object #Check if any attachments at all if mail.get_content_maintype() != 'multipart': continue teacher = mail["From"] subject = mail["Subject"] d = mail["date"] date = d[0:16] for part in mail.walk(): # multipart are just containers, so we skip them if part.get_content_maintype() == 'multipart': continue # is this part an attachment ? if part.get('Content-Disposition') is None: continue filename = teacher + subject + ".pdf" counter = 1 # if there is no filename, we create one with a counter to avoid duplicates if not filename: filename = 'part-%03d%s' % (counter, 'bin') counter += 1 att_path = os.path.join(detach_dir, filename) #Check if its already there if not os.path.isfile(att_path) : # finally write the stuff fp = open(att_path, 'wb') fp.write(part.get_payload(decode=True)) fp.close() row = 1 d = r'/Users/defaultuser/Desktop' for part in mail.walk(): # multipart are just containers, so we skip them if part.get_content_maintype() == 'multipart': continue # we are interested only in the simple text messages if part.get_content_subtype() != 'plain': continue payload = part.get_payload() x = payload all=string.maketrans('','') nodigs=all.translate(all, string.digits) copies = x.translate(all, nodigs) for f in (pf for pf in os.listdir(d) if pf.endswith('.pdf')): fn = os.path.join(d,f) with open(fn, 'rb') as pdf: text = pdf.read() pages = int(''.join(takewhile(methodcaller('isdigit'), text[text.rfind('/Count ')+7:].lstrip()))) print('File %s: %i pages' % (f,pages)) print date print teacher print subject print "Number of Copies:" + copies # we use walk to create a generator so we can iterate on the parts and forget about the recursive headach sheet1.write( row, 0, str( row ) ) # 'Job' sheet1.write( row, 1, date ) # 'Date' sheet1.write( row, 2, teacher ) # 'Teacher' sheet1.write( row, 3, copies ) # 'Copies' sheet1.write( row, 4, pages ) # 'Pages' row += 1 book.save( 'python_spreadsheet.xls' )