I'd like to post some code of an app I wrote to spit out large files, and sort them, and finally reassemble them. I'm new to Python, and the 'object' way in general.
If you feel like it, would you please tell me how it should have been written 'properly'. For instance, I don't get the whole 'self' thing, and I don't 'quite' get the modularity. I'm not looking for a lesson so much as just 'you could have done this here', or, 'this is sloppy, it should be done like this'..
Please look at it, and if you have the time, let me know how I 'should' have done it, or at least a tip or two. I'm glad I found this group, you guys are awesome.
import csv import time import sys import os def main(): pass def sortAndWrite(fname, ziploc): try: T = csv.reader(open(fname, 'rb'), quoting = csv.QUOTE_ALL) To = open('Sorted_' + fn,'ab') Tw = csv.writer(To, quoting = csv.QUOTE_ALL) #add all of the rows to a temporary array for row in T: tmpTable.append(row) #sort the data tmpTable.sort(lambda a,b: cmp(a[ziploc][0:5],b[ziploc][0:5]) ) #write the table print "Building Sorted file, adding: " + fname if header == 1: # Have we written it? Tw.writerow(h) # No, write it first header == 0 # Now, it's written # Write the contents of the temp list after sorting to the master output file for row in tmpTable: Tw.writerow(row) #empty the temp table tmpTable[:] =  if fname == 'temp6.tmp': To.flush() To.close() except: print ("Unexpected error:", sys.exc_info()) # Start App ==================================================================== os.system("cls") # Clear the screen # Define the help menu should they type Zipsort.py --help helptext="""\nUsage: Zipsort.py [filename -h | -c ] Zipsort.py is is a small program that will sort your file by Zip Code. -h No header [default, assumes file has a header] -c New Zip Code column (zero based) [default is column 76] Example: Zipsort.py MyFile.csv -h -c In the above example, the file to be sorted is 'MyFile.csv', the file does not have a header record and the column that contains the zip code needs to be overridden. """ print "" # Force a print line so the text isn't wedged against the top of the # DOS window. # Do they need help? if sys.argv.count("--help")>0: print helptext sys.exit() try: fn = sys.argv # The filename is the first argument on the command line print "Filename is: " + fn if os.path.exists(fn) == False: print "File does not exist, try again." exit() except: # rather than mess with indexes, I just catch the exception print "You must define a valid file to sort. Ex: Zipsort.py MyFile.cvs" exit() # is there no header? if sys.argv.count("-h")>0: header = 0 print "File has no header" else: header = 1 print "File has a header" # Do they want to change the zip code location? if sys.argv.count("-c")>0: z = raw_input("What is the new Zip Code column?: ") print "Zip code is now located at: " + str(z) else: z = 76 print "Zip code is located at: " + str(z) # Define the working table we will use to hold the temp file(s) records # for sorting, and other working variables tmpTable =  h = '' # This will hold the header for later # Delete the Sorted out file before we start if os.path.exists('Sorted_' + fn) == True: YN = raw_input("Sorted file already exists, delete it?: ") if YN == 'y' or YN == 'Y': os.remove('Sorted_' + fn) print "Sorted file removed" else: Q = raw_input( "Quit?, or Continue (Q/C)?") if Q == 'q' or Q == 'Q': print "Exiting.." exit() #Open input file and split it into (6) temp files for processing #if there's a header, we will capture it at run time start = time.clock() #start the timer I = open(fn, 'rb') r = csv.reader(I, quoting = csv.QUOTE_ALL) O1 = open('temp1.tmp', 'w+b') w1 = csv.writer(O1, quoting = csv.QUOTE_ALL) O2 = open('temp2.tmp', 'w+b') w2 = csv.writer(O2, quoting = csv.QUOTE_ALL) O3 = open('temp3.tmp', 'w+b') w3 = csv.writer(O3, quoting = csv.QUOTE_ALL) O4 = open('temp4.tmp', 'w+b') w4 = csv.writer(O4, quoting = csv.QUOTE_ALL) O5 = open('temp5.tmp', 'w+b') w5 = csv.writer(O5, quoting = csv.QUOTE_ALL) O6 = open('temp6.tmp', 'w+b') w6 = csv.writer(O6, quoting = csv.QUOTE_ALL) if header == 1: h = r.next() # store the header print "" print "Splitting out the input file" try: for row in r: Zip = int(row[z][0:5]) if Zip <= 20000: w1.writerow(row) if Zip > 20000 and Zip <= 35000: w2.writerow(row) if Zip > 35000 and Zip <= 45000: w3.writerow(row) if Zip > 45000 and Zip <= 65000: w4.writerow(row) if Zip > 65000 and Zip <= 85000: w5.writerow(row) if Zip > 85000: w6.writerow(row) except: w6.writerow(row) print "Error in this record, bad zip: " + row[z][0:5] #close the temp files so we don't have contention issues later O1.close() O2.close() O3.close() O4.close() O5.close() O6.close() #once the file are separated, we need to sort them for f in range(1,7): tmpFile = 'temp' + str(f) + '.tmp' print "Sorting: " + tmpFile sortAndWrite(tmpFile, z) os.remove(tmpFile) # End of app.... end = time.clock() print "Finished" print "Time elapsed = ", end - start, "seconds" if __name__ == '__main__': main()