I have this timeissorted.c file, and right now it's running at around about 8397599 usec. I'm looking for help toward improving the performance of this code to make it run faster.

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>

static struct timeval start_ts, end_ts, diff_ts;

void startClock() {
    gettimeofday(&start_ts, 0);
}

void endClock() {
    gettimeofday(&end_ts, 0);
}

struct timeval timeminus(struct timeval a, struct timeval b)
{
    struct timeval res;
    res.tv_sec = a.tv_sec - b.tv_sec;
    if (a.tv_usec > b.tv_usec) {
    res.tv_usec = a.tv_usec - b.tv_usec;
    } else {
    res.tv_usec = 1000000 + a.tv_usec - b.tv_usec;
    res.tv_sec--;
    }
    return res;
}

long usecClock() {
    diff_ts = timeminus(end_ts, start_ts);
    return diff_ts.tv_usec + 1000000 * diff_ts.tv_sec;
}

int longLess(const void *a, const void *b) {
    return *(long *)a - *(long *)b;
}

extern int issorted(long *p, long n);

int main(int argc, char **argv)
{
    long l, limit = 500000, size = 10000;
    long *p = calloc(size, sizeof(long));
    long x;
    for (l = 0; l < size; ++l) {
    p[l] = random() % size;
    }
    qsort(p, size, sizeof(long), longLess);

    p[0] = size + 1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");

    qsort(p, size, sizeof(long), longLess);
    p[size - 1] = -1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");

    qsort(p, size, sizeof(long), longLess);
    p[size / 2] = size + 1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");
    qsort(p, size, sizeof(long), longLess);

    printf("Timing issorted\n");
    x = 1;
    startClock();
    for (l = 0; l < limit; ++l) {
    x &= issorted(p, size);
    }
    endClock();
    if (!x)
    printf("Got the wrong answer\n");

    printf("%ld invocations of issorted(%ld) took: %ld usec\n", limit, size, usecClock());
    return 0;
}

The only thing the program is timing is the run time of the issorted(), being called over and over.

And we have no code for issorted() here.

You might be able to optimize the sorting function, but not without seeing the data being sorted. Qsort is not the fastest sorter, but it's no slouch either. It's claim to fame is it will sort ANYTHING since it works with void pointers, without any changes to the underlying code.

qsort can be beat - but probably not worth it if you want a sorting function that will handle void pointers.

Edited 3 Years Ago by Adak

Also, if you are inserting a lot of data into a list like this, and need it to be sorted after every insertion, then calling qsort after each iteration is HORRIBLY inefficient! I use a head/tail optimizes insertion sort for that. It is a derivative of qsort, but the insert function puts the element in the correct position so the collection is sorted as you insert into it. It behaves much as would bsearch, and the only other overhead is the block move of the data down one element, and the occasional resizing of the array. Block moves are very efficient on modern processors, and an occasional realloc() is not too onerous. Obviously you don't want to do that on each insertion - I usually would do a binary realloc, starting with some reasonable power of 2 for the size of the array, such as 64 elements, and then double the size each time I need to reallocate it.

Here is some code that will tell you where to insert the element:

// Returns location and sets "where" reference to -1 if found.  Returns -1
// and sets "where" reference to insertion location if not found.
static int32 TSearch( const DataType** array,
                      const KeyType* key,
                      int32 count,
                      int32& where,
                      TCompareFunc compare )
{
    int32 mid = 0, low = 0, high = (count - 1);
    int comp = 0;
    where = -1;
    while(low <= high)
    {
        mid = (high + low)/2;

        // We compare in reverse since we are comparing the collection entry
        // to the key instead of vice-versa.
        comp = compare(array[mid],key);
        if (comp == 0)
        {
            // Found object.
            return mid;
        }
        else if (comp > 0)
        {
            if (mid == 0)
            {
                // We are at top so terminate search and return not found.
                // Set insertion point to top of list.
                where = 0;
                return -1;
            }

            // current item > item searched for so look below
            high = mid - 1;
        }
        else if (comp < 0)
        {
            // current item < item searched for so look above.
            low = mid + 1;
        }
    }
    // Not found so set "where" to low or mid as appropriate.
    if (comp < 0)
    {
        where = low;
    }
    else
    {
        where = mid;
    }
    return -1;
}
long issorted(long *p, long n) {
    long i;
    for (i = 1; i < n; ++i) {
    if (p[i - 1] > p[i])
        return 0;
    }
    return 1;
}

This is the code for issorted(). I basically want to reduce memory access, reduce dependencies between instructions, and reduce the number of branch instructions per iteration of the loop wherever possibles. I'm not really sure on how to go about doing this and need a nudge in the right direction. I want to change what code I have, not re-write it all. Sorry for the late responce, have been working on a huge project this week.

I've made a mistake in understanding the lab, I need to improve issorted, not timeissorted, I've posted the c code for it, but what I need to improve is this asm code, sorry about that.

    .globl  issorted
    .align  4, 0x90
issorted:
    movl    $1, %eax
    .align  4, 0x90
LBB1_1:
    cmpq    %rsi, %rax
    jge LBB1_4
    movq    (%rdi,%rax,8), %rcx
    leaq    1(%rax), %rdx
    cmpq    %rcx, -8(%rdi,%rax,8)
    movq    %rdx, %rax
    jle LBB1_1
    xorl    %eax, %eax
    ret
LBB1_4:
    movl    $1, %eax
    ret
This article has been dead for over six months. Start a new discussion instead.