Assembly Improving Sort Speed and Efficiency
I have this issorted code to sort elements of an array. It's very slow as it is right now due to a lot of memory access, number of branch instructions per iteration of the poorly written loops, and unorganized instruction/register usage causing dependencies between instructions. I'm not sure on how to go about improving my code to be faster and efficient. I need a nudge in the right direction as I'm new to assembly.
.globl issorted
.align 4, 0x90
issorted:
movl $1, %eax
.align 4, 0x90
LBB1_1:
cmpq %rsi, %rax
jge LBB1_4
movq (%rdi,%rax,8), %rcx
leaq 1(%rax), %rdx
cmpq %rcx, -8(%rdi,%rax,8)
movq %rdx, %rax
jle LBB1_1
xorl %eax, %eax
ret
LBB1_4:
movl $1, %eax
ret
Related Article: Help with X86 Assembly Project.
is a solved Assembly discussion thread by kww228 that has 5 replies, was last updated 1 year ago and has been tagged with the keywords: assembly, computer-science, homeworkhelper.
salty11
Junior Poster in Training
51 posts since Dec 2010
Reputation Points: 10
Solved Threads: 0
Skill Endorsements: 0
Show what you are doing in pseudo-code, as well as tell us what algorithm you are trying to implement (bubble-sort, qsort, etc).
rubberman
Posting Maven
2,571 posts since Mar 2010
Reputation Points: 365
Solved Threads: 305
Skill Endorsements: 51
I'm using qsort
long issorted(long *p, long n) {
long i;
for (i = 1; i < n; ++i) {
if (p[i - 1] > p[i])
return 0;
}
return 1;
}
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>
static struct timeval start_ts, end_ts, diff_ts;
void startClock() {
gettimeofday(&start_ts, 0);
}
void endClock() {
gettimeofday(&end_ts, 0);
}
struct timeval timeminus(struct timeval a, struct timeval b)
{
struct timeval res;
res.tv_sec = a.tv_sec - b.tv_sec;
if (a.tv_usec > b.tv_usec) {
res.tv_usec = a.tv_usec - b.tv_usec;
} else {
res.tv_usec = 1000000 + a.tv_usec - b.tv_usec;
res.tv_sec--;
}
return res;
}
long usecClock() {
diff_ts = timeminus(end_ts, start_ts);
return diff_ts.tv_usec + 1000000 * diff_ts.tv_sec;
}
int longLess(const void *a, const void *b) {
return *(long *)a - *(long *)b;
}
extern int issorted(long *p, long n);
int main(int argc, char **argv)
{
long l, limit = 500000, size = 10000;
long *p = calloc(size, sizeof(long));
long x;
for (l = 0; l < size; ++l) {
p[l] = random() % size;
}
qsort(p, size, sizeof(long), longLess);
p[0] = size + 1;
if (issorted(p, size))
printf("Got the wrong answer\n");
qsort(p, size, sizeof(long), longLess);
p[size - 1] = -1;
if (issorted(p, size))
printf("Got the wrong answer\n");
qsort(p, size, sizeof(long), longLess);
p[size / 2] = size + 1;
if (issorted(p, size))
printf("Got the wrong answer\n");
qsort(p, size, sizeof(long), longLess);
printf("Timing issorted\n");
x = 1;
startClock();
for (l = 0; l < limit; ++l) {
x &= issorted(p, size);
}
endClock();
if (!x)
printf("Got the wrong answer\n");
printf("%ld invocations of issorted(%ld) took: %ld usec\n", limit, size, usecClock());
return 0;
}
salty11
Junior Poster in Training
51 posts since Dec 2010
Reputation Points: 10
Solved Threads: 0
Skill Endorsements: 0
Sorry it doesn't sort the array, the issorte asm code just checks to see if its sorted. The first place I want to start is by reducing memory access, but I also want to unroll the loop.
salty11
Junior Poster in Training
51 posts since Dec 2010
Reputation Points: 10
Solved Threads: 0
Skill Endorsements: 0