I want to use Python to make something similar to the fc command found in MS-DOS and Windows. How would I handle opening 2+ files and reading them a byte at a time for comparing?

Recommended Answers

All 4 Replies

You could start with this and maybe clean it up a bit.

import struct
import sys

blocksize = 0x2000
errstart  = 0							# File offset of start of error span
errend    = 0							# File offset just past end of error span
diffct    = 0							# Difference region count

chunk_char = {8:'Q'}

def endSpan(loc):
	global diffct,errstart,errend,inSpan
	errend  = loc
	elength = loc-errstart
	print "Span of %10X bytes bad from %12X to %12X\n" % (elength, errstart, errend-1)
	diffct += 1
	return

def unpack(buffer, chunk):
	rv = []
	for x in range(0, len(buffer)/chunk):
		rv.append('%X' % struct.unpack(chunk_char[chunk],buffer[chunk*x:chunk*(x+1)]))
	return rv

def usage():
	print "Usage: %s file1 file2" % sys.argv[0]
	return

def main():
	global errstart, inSpan
	argc = len(sys.argv)
	if argc < 3:
		usage()
		return 1

	f1name = sys.argv[1]
	f1d    = open(f1name, 'rb')
	if f1d==None:
		print "Error opening %s for read", f1name

	f2name = sys.argv[2]
	f2d    = open(f2name, 'rb')
	if f2d==None:
		print "Error opening %s for read", f2name

	if f1d==None or f2d==None: return 1	# Exit if can't open file

	print "Comparing %s and %s" % (f1name, f2name)

	diffs  = 0							# Count of difference regions
	f1off  = 0							# Offset of read
	f2off  = 0							# ...
	stride = 8							# Our unit of work
	maxlpe = 30							# Max lines to print per error

	inSpan = False						# Not in a span of errors
	s1 = s2 = "Something not null"		# Just not None, dammit!

	while True:
		if s1 != None:
			s1 = f1d.read(blocksize)

		if s2 != None:
			s2 = f2d.read(blocksize)

		if s1=="" or s2=="":
			if inSpan:
				endSpan(f1off)
			break
		
		if s1!=s2:
			lst1 = unpack(s1,stride)
			lst2 = unpack(s2,stride)
			lst  = [(long(lst1[x],16),long(lst2[x],16),f1off+stride*x) for x in range(len(lst1))]

			for e in lst:
				if e[0]==e[1]:			# Equal?
					if inSpan:
						endSpan(e[2])
						inSpan = False
				else:					# Not equal
					if not inSpan:
						inSpan   = True
						errstart = e[2]
						curlpe   = 0	# Lines printed for this error
						print "  FileOffset   1stFileContents   2ndFileContents               XOR"
					if curlpe < maxlpe:	# OK to print this error line?
						print "%12X  %16X  %16X  %16X" % (e[2], e[0], e[1], e[0]^e[1])
						curlpe  += 1
					elif curlpe == maxlpe:
						print "              ... and so on ..."
						curlpe  += 1

		# Here when done with these bits

		f1off += blocksize
		f2off += blocksize

	f1d.close()
	f2d.close()
	print "%u difference regions found" % diffct
main()

You might want to play with the Python builtin module filecmp:

# compare two files and check if they are equal
# files can be binary or text based

import filecmp

# pick two files you want to compare ...
file1 = "Boing1.wav"
file2 = "Boing2.wav"

if filecmp.cmp(file1, file2):
    print "Files %s and %s are identical" % (file1, file2)
else:
    print "Files %s and %s differ!" % (file1, file2)
import struct
import sys
blocksize = 0x2000
errstart  = 0                           # File offset of start of error span
errend    = 0                           # File offset just past end of error span
diffct    = 0                           # Difference region count
chunk_char = {8:'Q'}
def endSpan(loc):
    global diffct,errstart,errend,inSpan
    errend  = loc
    elength = loc-errstart
    print "Span of %10X bytes bad from %12X to %12X\n" % (elength, errstart, errend-1)
    diffct += 1
    return
def unpack(buffer, chunk):
    rv = []
    for x in range(0, len(buffer)/chunk):
        rv.append('%X' % struct.unpack(chunk_char[chunk],buffer[chunk*x:chunk*(x+1)]))
    return rv
def usage():
    print "Usage: %s file1 file2" % sys.argv[0]
    return
def main():
    global errstart, inSpan
    argc = len(sys.argv)
    if argc < 3:
        usage()
        return 1
    f1name = sys.argv[1]
    f1d    = open(f1name, 'rb')
    if f1d==None:
        print "Error opening %s for read", f1name
    f2name = sys.argv[2]
    f2d    = open(f2name, 'rb')
    if f2d==None:
        print "Error opening %s for read", f2name
    if f1d==None or f2d==None: return 1 # Exit if can't open file
    print "Comparing %s and %s" % (f1name, f2name)
    diffs  = 0                          # Count of difference regions
    f1off  = 0                          # Offset of read
    f2off  = 0                          # ...
    stride = 8                          # Our unit of work
    maxlpe = 30                         # Max lines to print per error
    inSpan = False                      # Not in a span of errors
    s1 = s2 = "Something not null"      # Just not None, dammit!
    while True:
        if s1 != None:
            s1 = f1d.read(blocksize)
        if s2 != None:
            s2 = f2d.read(blocksize)
        if s1=="" or s2=="":
            if inSpan:
                endSpan(f1off)
            break

        if s1!=s2:
            lst1 = unpack(s1,stride)
            lst2 = unpack(s2,stride)
            lst  = [(long(lst1[x],16),long(lst2[x],16),f1off+stride*x) for x in range(len(lst1))]
            for e in lst:
                if e[0]==e[1]:          # Equal?
                    if inSpan:
                        endSpan(e[2])
                        inSpan = False
                else:                   # Not equal
                    if not inSpan:
                        inSpan   = True
                        errstart = e[2]
                        curlpe   = 0    # Lines printed for this error
                        print "  FileOffset   1stFileContents   2ndFileContents               XOR"
                    if curlpe < maxlpe:  # OK to print this error line?
                        print "%12X  %16X  %16X  %16X" % (e[2], e[0], e[1], e[0]^e[1])
                        curlpe  += 1
                    elif curlpe == maxlpe:
                        print "              ... and so on ..."
                        curlpe  += 1
        # Here when done with these bits
        f1off += blocksize
        f2off += blocksize
    f1d.close()
    f2d.close()
    print "%u difference regions found" % diffct
main()

Honestly your question is solved. Make it so....Also your code block was written in a terible manner and was unpleasant to look at. Try fixing that for next time.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.