Sorting Algorithms in Python

19 Years Ago vegaseat 1 4K Views

Early versions of Python used a hybrid of samplesort (a variant of quicksort with large sample size) and binary insertion sort as the built-in sorting algorithm. This proved to be somewhat unstable, and was replaced in version 2.3 with an adaptive mergesort algorithm. I am comparing several rudimentary sorting routines, translated to Python from Narue's C code by my friend Micko, with the Python built-in sorting routine. The new Python24 @ function decorator is used to implement the timing.

# timing 7 different Python sorting algorithms with a list of integers
# each function is given the same list (fresh copy each time)
# tested with Python24       vegaseat      21jan2006

import random  # for generating random numbers
import time    # for timing each sort function with time.clock()

DEBUG = False  # set True to check results of each sort

N = 1000     # number of elements in list
list1 = []   # list of integer elements

for i in range(0, N):
    list1.append(random.randint(0, N-1))

#print list1  # test

def print_timing(func):
    def wrapper(*arg):
        t1 = time.clock()
        res = func(*arg)
        t2 = time.clock()
        print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
        return res
    return wrapper

# declare the @ decorator just above each sort function, invokes print_timing()
@print_timing
def adaptive_merge_sort(list2):
    """adaptive merge sort, built into Python since version 2.3"""
    list2.sort()

@print_timing
def bubble_sort(list2):
    #swap_test = False
    for i in range(0, len(list2) - 1):
        # as suggested by kubrick, makes sense
        swap_test = False
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j]  # swap
            swap_test = True
        if swap_test == False:
            break

# selection sort
@print_timing
def selection_sort(list2):
    for i in range(0, len (list2)):
        min = i
        for j in range(i + 1, len(list2)):
            if list2[j] < list2[min]:
                min = j
        list2[i], list2[min] = list2[min], list2[i]  # swap
      
# insertion sort
@print_timing
def insertion_sort(list2):
    for i in range(1, len(list2)):
        save = list2[i]
        j = i
        while j > 0 and list2[j - 1] > save:
            list2[j] = list2[j - 1]
            j -= 1
        list2[j] = save
  
# quick sort
@print_timing
def quick_sort(list2):
    quick_sort_r(list2, 0, len(list2) - 1)

# quick_sort_r, recursive (used by quick_sort)
def quick_sort_r(list2 , first, last):
    if last > first:
        pivot = partition(list2, first, last)
        quick_sort_r(list2, first, pivot - 1)
        quick_sort_r(list2, pivot + 1, last)

# partition (used by quick_sort_r)
def partition(list2, first, last):
    sred = (first + last)/2
    if list2[first] > list2 [sred]:
        list2[first], list2[sred] = list2[sred], list2[first]  # swap
    if list2[first] > list2 [last]:
        list2[first], list2[last] = list2[last], list2[first]  # swap
    if list2[sred] > list2[last]:
        list2[sred], list2[last] = list2[last], list2[sred]    # swap
    list2 [sred], list2 [first] = list2[first], list2[sred]    # swap
    pivot = first
    i = first + 1
    j = last
  
    while True:
        while i <= last and list2[i] <= list2[pivot]:
            i += 1
        while j >= first and list2[j] > list2[pivot]:
            j -= 1
        if i >= j:
            break
        else:
            list2[i], list2[j] = list2[j], list2[i]  # swap
    list2[j], list2[pivot] = list2[pivot], list2[j]  # swap
    return j

# heap sort
@print_timing
def heap_sort(list2):
    first = 0
    last = len(list2) - 1
    create_heap(list2, first, last)
    for i in range(last, first, -1):
        list2[i], list2[first] = list2[first], list2[i]  # swap
        establish_heap_property (list2, first, i - 1)

# create heap (used by heap_sort)
def create_heap(list2, first, last):
    i = last/2
    while i >= first:
        establish_heap_property(list2, i, last)
        i -= 1

# establish heap property (used by create_heap)
def establish_heap_property(list2, first, last):
    while 2 * first + 1 <= last:
        k = 2 * first + 1
        if k < last and list2[k] < list2[k + 1]:
            k += 1
        if list2[first] >= list2[k]:
            break
        list2[first], list2[k] = list2[k], list2[first]  # swap
        first = k

# merge sort
@print_timing
def merge_sort(list2):
    merge_sort_r(list2, 0, len(list2) -1)

# merge sort recursive (used by merge_sort)
def merge_sort_r(list2, first, last):
    if first < last:
        sred = (first + last)/2
        merge_sort_r(list2, first, sred)
        merge_sort_r(list2, sred + 1, last)
        merge(list2, first, last, sred)

# merge (used by merge_sort_r)
def merge(list2, first, last, sred):
    helper_list = []
    i = first
    j = sred + 1
    while i <= sred and j <= last:
        if list2 [i] <= list2 [j]:
            helper_list.append(list2[i])
            i += 1
        else:
            helper_list.append(list2 [j])
            j += 1
    while i <= sred:
        helper_list.append(list2[i])
        i +=1
    while j <= last:
        helper_list.append(list2[j])
        j += 1
    for k in range(0, last - first + 1):
        list2[first + k] = helper_list [k]

# test sorted list by printing the first 10 elements
def print10(list2):
    for k in range(10):
        print list2[k],
    print


# run test if script is executed
if __name__ == "__main__" :
    print "timing 7 sorting algorithms with a list of 1000 integers:"
    # make a true copy of list1 each time
    list2 = list(list1)
    adaptive_merge_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    bubble_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    heap_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    insertion_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    merge_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    quick_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    selection_sort(list2)
    if DEBUG:
        print10(list2)
    # final test
    list2 = list(list1)
    if DEBUG:
        print "final test: ",
        print10(list2)

    #raw_input( "Press Enter to continue..." )

"""
typical results:
timing 7 sorting algorithms with a list of 1000 integers:
adaptive_merge_sort took 0.560ms
bubble_sort took 269.691ms
heap_sort took 13.556ms
insertion_sort took 130.870ms
merge_sort took 19.272ms
quick_sort took 6.849ms
selection_sort took 120.291ms
"""

kevindublin 0 Newbie Poster

18 Years Ago

This is fascinating. I tried your code on my windows PC with Python 2.5 and got a similar result.

I tried it then with psyco.full() and saw the major improvements I expected.

I then tried it with 10000 integers, a 10 times larger sample. The psyco performance knocked my socks off. The quick sort in python beat the inbuilt C implementation (yes I know there are other considerations).

timing 7 sorting algorithms with a list of 10000 integers (using psyco/python2.5):
adaptive_merge_sort took 6.890ms
bubble_sort took 1595.138ms
heap_sort took 8.901ms
insertion_sort took 462.398ms
merge_sort took 14.750ms
quick_sort took 6.441ms
selection_sort took 936.435ms

vegaseat 1,735 DaniWeb's Hypocrite

18 Years Ago

Thank you for the nice information!

For those folks who are not familiar with the psyco module, psyco allows Python to compile to native i386 code rather than the standard virtual byte code. This way the Python interpreter works much faster.

kubrick 0 Newbie Poster

17 Years Ago

Hi,

I suspect there is a small bug in your bubble_sort function.
You wrote :

def bubble_sort(list2):
    swap_test = False
    for i in range(0, len(list2) - 1):
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
            swap_test = True
        if swap_test == False:
            break

Wouldn't the "swap_test" be more useful used like this?

def bubble_sort(list2):
    for i in range(0, len(list2) - 1):
        swap_test = False
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
                swap_test = True
        if swap_test == False:
            break

vegaseat 1,735 DaniWeb's Hypocrite

16 Years Ago

Thanks a lot kubrick, sorry I am so slow. Checked it out and you are right. I corrected the blunder.

jureslak 0 Newbie Poster

14 Years Ago

bubble sort can be implemented even simpler:

def bubblesort(arr):
    done = False
    while not done:
        done = True
        for i in range(len(arr)-1):
            if arr[i] > arr[i+1]:
                arr[i], arr[i+1] = arr[i+1], arr[i]
                done = False
    return arr

and quicksort:

def quicksort(a):
    if len(a) <= 1: return a
    pivot = a.pop()
    before = [x for x in a if x <= pivot]
    after = [x for x in a if x > pivot]
    return quicksort(before) + [pivot] + quicksort(after)

or quicksort in one line:

def quicksort(a): return a if len(a) <= 1 else quicksort([x for x in a[1:] if x <= a[0]]) + [a[0]] + quicksort([x for x in a[1:] if x > a[0]])

Edited 14 Years Ago by jureslak because: n/a

-ordi- 6 Junior Poster in Training

14 Years Ago

from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
@print_timing
def heapsort(N):
  for i in range(0, N):
    heappush(h, random.randint(0, N - 1))
  return [heappop(h) for i in range(len(h))]

print heapsort(1000)

heapsort took 10.000ms

Edited 14 Years Ago by -ordi- because: n/a

TrustyTony 888 ex-Moderator

14 Years Ago

You producing random numbers inside tight loop. You should produce data ready in main code not to mix up timing.

richieking 44 Master Poster

14 Years Ago

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
:)

-ordi- 6 Junior Poster in Training

14 Years Ago

from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
@print_timing
def heapsort(N):
  for i in range(0, N):
    heappush(h, random.randint(0, N - 1))
  return [heappop(h) for i in range(len(h))]

print heapsort(1000)

heapsort took 10.000ms

from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
N = 1000
for i in range(0, N):
  heappush(h, random.randint(0, N - 1))

@print_timing
def heapsort():
  k = len(h)
  return [heappop(h) for i in range(k)]

heapsort()

Ene Uran 638 Posting Virtuoso

14 Years Ago

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
:)

I agree, also one-liners can be very slow.

tleeuwenburg

13 Years Ago

Hi there... I would like to copy/paste this code into my open source project to form part of a tutorial/demo. My project is licensed Apache 2.0. Is there any chance you could put a license into your code snippet to indicate if it is okay for me to re-use it?

Skrell 0 Light Poster

13 Years Ago

Would someone mind explaining the code:

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

as i don't understand how "res" works or what func is doing ?

Edited 13 Years Ago by Skrell because: n/a

Reply to this topic

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.