First, I'd like to say that this IS a school project. I'm not going to try to hide that.

Anyway, I was provided with this c++ code:

#include <iostream>
  using namespace std;
  
  
  //----Prototypes------------------------------------------------------
  void CmpTest( char *buffer1, char *buffer2, int size );
  int  MemCmp( char* mem1, char* mem2, int size );
  
  
  //////////////////////////////////////////////////////////////////////
  //  main() : int
  //////////////////////////////////////////////////////////////////////
  int main()
  {
    char buffer1[1024], buffer2[1024];
  
    for (int i=0; i<1024; i+=2 )
    {
      buffer1[i] = 0xCD;
      buffer2[i] = 0xCD;
      buffer1[i+1] = 0xAB;
      buffer2[i+1] = 0xAB;
    }
  
    CmpTest( buffer1, buffer2, 1024 );
    buffer1[251] = 0xAC;
    CmpTest( buffer1, buffer2, 1024 );
  
    return 0;
  }
  
  
  //////////////////////////////////////////////////////////////////////
  //  CmpTest( buffer1, buffer2 : char*, size : int )
  //    Accepts two memory locations, a number of bytes to compare, and
  //    prints the results of the comparsion.
  //////////////////////////////////////////////////////////////////////
  void CmpTest( char *buffer1, char *buffer2, int size )
  {
    int index_of_diff = MemCmp( buffer1, buffer2, size );
    if (index_of_diff == -1)
    {
      cout << "Memory regions match" << endl;
    }
    else
    {
      cout << "Memory regions differ at index " << index_of_diff << endl;
    }
  }
  
  
  //////////////////////////////////////////////////////////////////////
  //  MemCmp( mem1, mem2 : char*, size : int )
  //    Accepts two memory locations, a number of bytes to compare, and
  //    returns the index of the first difference or -1 if there are
  //    no differences.
  //////////////////////////////////////////////////////////////////////
  int MemCmp( char* mem1, char* mem2, int size )
  {
    int result = -1;
  
    // BEGIN inline assembly replacement
    for (int i=0; i<size; ++i)
    {
      if (mem1[i] != mem2[i]) 
      {
        result = i;
        break;
      }
    }
    // END inline assembly replacement
  
    return result;
  }

I'm supposed to change the MemCmp function into inline x86 assembly, as stated in the code. Here's what I've done so far:

int MemCmp( char* mem1, char* mem2, int size )
  {
	  int result;
	  asm(
		".intel_syntax\n"
		"push ebp\n"
		"mov ebp,esp\n"
		"sub esp,4\n"
		"mov DWORD PTR [ebp-4],0\n"
		"jmp while1_cond\n"
		"while1_body:\n"
		"mov %0,[ebp-4]\n"
		"add %2,%0\n"
		"add %3,%0\n"
		"push %0\n"
		"push %1\n"
		"movzx %0, BYTE PTR [%2]\n"
		"movzx %1, BYTE PTR [%3]\n"
		"cmp %0,%1\n"
		"pop %1\n"
		"pop %0\n"
		"je if_end\n"
		"jmp memcmpend\n"
		"if_end:\n"
		"inc %0\n"
		"mov [ebp-4],%0\n"
		"while1_cond:\n"
		"mov %0,[ebp-4]\n"
		"cmp %0,%1\n"
		"jl while1_body\n"
		"memcmpend:\n"
		"leave\n"
		".att_syntax\n"
		: "=a"(result)
		:"b"(size),"c"(mem1),"d"(mem2)
		:"ebp", "esp"
	  );
	  cout << result << endl;
    return result;
  }

And here's the assembly g++ generates for the inline code in this method

.intel_syntax
push ebp
mov ebp,esp
sub esp,4
mov DWORD PTR [ebp-4],0
jmp while1_cond
while1_body:
mov %eax,[ebp-4]
add %ecx,%eax
add %edx,%eax
push %eax
push %ebx
movzx %eax, BYTE PTR [%ecx]
movzx %ebx, BYTE PTR [%edx]
cmp %eax,%ebx
pop %ebx
pop %eax
je if_end
jmp memcmpend
if_end:
inc %eax
mov [ebp-4],%eax
while1_cond:
mov %eax,[ebp-4]
cmp %eax,%ebx
jl while1_body
memcmpend:
leave
.att_syntax

As you can see in the full cpp code, the arrays should only be different at memory location 251, and the first time MemCmp is called, there should be NO difference in the arrays.

My MemCmp function exits when the loop is at its 45th iteration (ebp-4 will be 45).

I have no idea why this is happening, and I was hoping someone could spot something that might be causing this.

Thanks for the help.

Compare with the code generated by the compiler?

gcc -S prog.c
will generate
prog.s

All that push/pop in the loop is expensive!

I've tried that. I even put the code that g++ generates inline, and it still stops at 45...here's that:

asm(
			  ".intel_syntax\n"
				"push	ebp\n"
				"mov	ebp, esp\n"
				"sub	esp, 8\n"
				"mov	DWORD PTR [ebp-4], -1\n"
				"mov	DWORD PTR [ebp-8], 0\n"
			"L18_1:\n"
				"mov	eax, DWORD PTR [ebp-8]\n"
				"cmp    eax, DWORD PTR [ebp+16]\n"
				"jge	L19_1\n"
				"mov	eax, DWORD PTR [ebp-8]\n"
				"mov	ecx, DWORD PTR [ebp+8]\n"
				"add	ecx, eax\n"
				"mov	eax, DWORD PTR [ebp-8]\n"
				"mov	edx, DWORD PTR [ebp+12]\n"
				"add	edx, eax\n"
				"movzx	eax, BYTE PTR [ecx]\n"
				"cmp	al, BYTE PTR [edx]\n"
				"je	L20_1\n"
				"mov	eax, DWORD PTR [ebp-8]\n"
				"mov	DWORD PTR [ebp-4], eax\n"
				"jmp	L19_1\n"
			"L20_1:\n"
				"lea	eax, [ebp-8]\n"
				"inc	DWORD PTR [eax]\n"
				"jmp	L18_1\n"
			"L19_1:\n"
				"mov	eax, DWORD PTR [ebp-4]\n"
				"leave\n"
			  ".att_syntax\n"
			  : "=a"(result)
			  :
			  :"ebp", "esp", "ecx", "eax", "edx"

	  );

I tried your asm, and it won't compile (gcc version 4.3.3 (Ubuntu 4.3.3-5ubuntu4))

$ g++ foo.cpp
foo.cpp: In function ‘int MemCmp(char*, char*, int)’:
foo.cpp:115: error: bp cannot be used in asm here

FWIW, the C++ code produced the expected answer.

$ g++ foo.cpp
$ ./a.out 
Memory regions match
Memory regions differ at index 251
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.