# Sorting Algorithms in Python

Early versions of Python used a hybrid of samplesort (a variant of quicksort with large sample size) and binary insertion sort as the built-in sorting algorithm. This proved to be somewhat unstable, and was replaced in version 2.3 with an adaptive mergesort algorithm. I am comparing several rudimentary sorting routines, translated to Python from Narue's C code by my friend Micko, with the Python built-in sorting routine. The new Python24 @ function decorator is used to implement the timing.

``````# timing 7 different Python sorting algorithms with a list of integers
# each function is given the same list (fresh copy each time)
# tested with Python24       vegaseat      21jan2006

import random  # for generating random numbers
import time    # for timing each sort function with time.clock()

DEBUG = False  # set True to check results of each sort

N = 1000     # number of elements in list
list1 = []   # list of integer elements

for i in range(0, N):
list1.append(random.randint(0, N-1))

#print list1  # test

def print_timing(func):
def wrapper(*arg):
t1 = time.clock()
res = func(*arg)
t2 = time.clock()
print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper

# declare the @ decorator just above each sort function, invokes print_timing()
@print_timing
"""adaptive merge sort, built into Python since version 2.3"""
list2.sort()

@print_timing
def bubble_sort(list2):
#swap_test = False
for i in range(0, len(list2) - 1):
# as suggested by kubrick, makes sense
swap_test = False
for j in range(0, len(list2) - i - 1):
if list2[j] > list2[j + 1]:
list2[j], list2[j + 1] = list2[j + 1], list2[j]  # swap
swap_test = True
if swap_test == False:
break

# selection sort
@print_timing
def selection_sort(list2):
for i in range(0, len (list2)):
min = i
for j in range(i + 1, len(list2)):
if list2[j] < list2[min]:
min = j
list2[i], list2[min] = list2[min], list2[i]  # swap

# insertion sort
@print_timing
def insertion_sort(list2):
for i in range(1, len(list2)):
save = list2[i]
j = i
while j > 0 and list2[j - 1] > save:
list2[j] = list2[j - 1]
j -= 1
list2[j] = save

# quick sort
@print_timing
def quick_sort(list2):
quick_sort_r(list2, 0, len(list2) - 1)

# quick_sort_r, recursive (used by quick_sort)
def quick_sort_r(list2 , first, last):
if last > first:
pivot = partition(list2, first, last)
quick_sort_r(list2, first, pivot - 1)
quick_sort_r(list2, pivot + 1, last)

# partition (used by quick_sort_r)
def partition(list2, first, last):
sred = (first + last)/2
if list2[first] > list2 [sred]:
list2[first], list2[sred] = list2[sred], list2[first]  # swap
if list2[first] > list2 [last]:
list2[first], list2[last] = list2[last], list2[first]  # swap
if list2[sred] > list2[last]:
list2[sred], list2[last] = list2[last], list2[sred]    # swap
list2 [sred], list2 [first] = list2[first], list2[sred]    # swap
pivot = first
i = first + 1
j = last

while True:
while i <= last and list2[i] <= list2[pivot]:
i += 1
while j >= first and list2[j] > list2[pivot]:
j -= 1
if i >= j:
break
else:
list2[i], list2[j] = list2[j], list2[i]  # swap
list2[j], list2[pivot] = list2[pivot], list2[j]  # swap
return j

# heap sort
@print_timing
def heap_sort(list2):
first = 0
last = len(list2) - 1
create_heap(list2, first, last)
for i in range(last, first, -1):
list2[i], list2[first] = list2[first], list2[i]  # swap
establish_heap_property (list2, first, i - 1)

# create heap (used by heap_sort)
def create_heap(list2, first, last):
i = last/2
while i >= first:
establish_heap_property(list2, i, last)
i -= 1

# establish heap property (used by create_heap)
def establish_heap_property(list2, first, last):
while 2 * first + 1 <= last:
k = 2 * first + 1
if k < last and list2[k] < list2[k + 1]:
k += 1
if list2[first] >= list2[k]:
break
list2[first], list2[k] = list2[k], list2[first]  # swap
first = k

# merge sort
@print_timing
def merge_sort(list2):
merge_sort_r(list2, 0, len(list2) -1)

# merge sort recursive (used by merge_sort)
def merge_sort_r(list2, first, last):
if first < last:
sred = (first + last)/2
merge_sort_r(list2, first, sred)
merge_sort_r(list2, sred + 1, last)
merge(list2, first, last, sred)

# merge (used by merge_sort_r)
def merge(list2, first, last, sred):
helper_list = []
i = first
j = sred + 1
while i <= sred and j <= last:
if list2 [i] <= list2 [j]:
helper_list.append(list2[i])
i += 1
else:
helper_list.append(list2 [j])
j += 1
while i <= sred:
helper_list.append(list2[i])
i +=1
while j <= last:
helper_list.append(list2[j])
j += 1
for k in range(0, last - first + 1):
list2[first + k] = helper_list [k]

# test sorted list by printing the first 10 elements
def print10(list2):
for k in range(10):
print list2[k],
print

# run test if script is executed
if __name__ == "__main__" :
print "timing 7 sorting algorithms with a list of 1000 integers:"
# make a true copy of list1 each time
list2 = list(list1)
if DEBUG:
print10(list2)
list2 = list(list1)
bubble_sort(list2)
if DEBUG:
print10(list2)
list2 = list(list1)
heap_sort(list2)
if DEBUG:
print10(list2)
list2 = list(list1)
insertion_sort(list2)
if DEBUG:
print10(list2)
list2 = list(list1)
merge_sort(list2)
if DEBUG:
print10(list2)
list2 = list(list1)
quick_sort(list2)
if DEBUG:
print10(list2)
list2 = list(list1)
selection_sort(list2)
if DEBUG:
print10(list2)
# final test
list2 = list(list1)
if DEBUG:
print "final test: ",
print10(list2)

#raw_input( "Press Enter to continue..." )

"""
typical results:
timing 7 sorting algorithms with a list of 1000 integers:
bubble_sort took 269.691ms
heap_sort took 13.556ms
insertion_sort took 130.870ms
merge_sort took 19.272ms
quick_sort took 6.849ms
selection_sort took 120.291ms
"""``````

This is fascinating. I tried your code on my windows PC with Python 2.5 and got a similar result.

I tried it then with psyco.full() and saw the major improvements I expected.

I then tried it with 10000 integers, a 10 times larger sample. The psyco performance knocked my socks off. The quick sort in python beat the inbuilt C implementation (yes I know there are other considerations).

``````timing 7 sorting algorithms with a list of 10000 integers (using psyco/python2.5):
bubble_sort took 1595.138ms
heap_sort took 8.901ms
insertion_sort took 462.398ms
merge_sort took 14.750ms
quick_sort took 6.441ms
selection_sort took 936.435ms``````
vegaseat 1,735

Thank you for the nice information!

For those folks who are not familiar with the psyco module, psyco allows Python to compile to native i386 code rather than the standard virtual byte code. This way the Python interpreter works much faster.

Hi,

I suspect there is a small bug in your bubble_sort function.
You wrote :

``````def bubble_sort(list2):
swap_test = False
for i in range(0, len(list2) - 1):
for j in range(0, len(list2) - i - 1):
if list2[j] > list2[j + 1]:
list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
swap_test = True
if swap_test == False:
break``````

Wouldn't the "swap_test" be more useful used like this?

``````def bubble_sort(list2):
for i in range(0, len(list2) - 1):
swap_test = False
for j in range(0, len(list2) - i - 1):
if list2[j] > list2[j + 1]:
list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
swap_test = True
if swap_test == False:
break``````
vegaseat 1,735

Thanks a lot kubrick, sorry I am so slow. Checked it out and you are right. I corrected the blunder.

bubble sort can be implemented even simpler:

``````def bubblesort(arr):
done = False
while not done:
done = True
for i in range(len(arr)-1):
if arr[i] > arr[i+1]:
arr[i], arr[i+1] = arr[i+1], arr[i]
done = False
return arr``````

and quicksort:

``````def quicksort(a):
if len(a) <= 1: return a
pivot = a.pop()
before = [x for x in a if x <= pivot]
after = [x for x in a if x > pivot]
return quicksort(before) + [pivot] + quicksort(after)

or quicksort in one line:``````
``def quicksort(a): return a if len(a) <= 1 else quicksort([x for x in a[1:] if x <= a[0]]) + [a[0]] + quicksort([x for x in a[1:] if x > a[0]])``
``````from heapq import heappush, heappop
import random
import time

def print_timing(func):
def wrapper(*arg):
t1 = time.clock()
res = func(*arg)
t2 = time.clock()
print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper

h = []
@print_timing
def heapsort(N):
for i in range(0, N):
heappush(h, random.randint(0, N - 1))
return [heappop(h) for i in range(len(h))]

print heapsort(1000)``````

heapsort took 10.000ms

TrustyTony 888

You producing random numbers inside tight loop. You should produce data ready in main code not to mix up timing.

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
:)

``````from heapq import heappush, heappop
import random
import time

def print_timing(func):
def wrapper(*arg):
t1 = time.clock()
res = func(*arg)
t2 = time.clock()
print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper

h = []
@print_timing
def heapsort(N):
for i in range(0, N):
heappush(h, random.randint(0, N - 1))
return [heappop(h) for i in range(len(h))]

print heapsort(1000)``````

heapsort took 10.000ms

``````from heapq import heappush, heappop
import random
import time

def print_timing(func):
def wrapper(*arg):
t1 = time.clock()
res = func(*arg)
t2 = time.clock()
print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper

h = []
N = 1000
for i in range(0, N):
heappush(h, random.randint(0, N - 1))

@print_timing
def heapsort():
k = len(h)
return [heappop(h) for i in range(k)]

heapsort()``````
Ene Uran 638

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
:)

I agree, also one-liners can be very slow.

tleeuwenburg

Hi there... I would like to copy/paste this code into my open source project to form part of a tutorial/demo. My project is licensed Apache 2.0. Is there any chance you could put a license into your code snippet to indicate if it is okay for me to re-use it?

Would someone mind explaining the code:

``````def print_timing(func):
def wrapper(*arg):
t1 = time.clock()
res = func(*arg)
t2 = time.clock()
print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper``````

as i don't understand how "res" works or what func is doing ?

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.