954,525 Members — Technology Publication meets Social Media
Username:
Password:
Lost login information?

Sorting Algorithms in Python

0
By vegaseat on Jan 22nd, 2006 10:09 am

Early versions of Python used a hybrid of samplesort (a variant of quicksort with large sample size) and binary insertion sort as the built-in sorting algorithm. This proved to be somewhat unstable, and was replaced in version 2.3 with an adaptive mergesort algorithm. I am comparing several rudimentary sorting routines, translated to Python from Narue's C code by my friend Micko, with the Python built-in sorting routine. The new Python24 @ function decorator is used to implement the timing.

# timing 7 different Python sorting algorithms with a list of integers
# each function is given the same list (fresh copy each time)
# tested with Python24       vegaseat      21jan2006

import random  # for generating random numbers
import time    # for timing each sort function with time.clock()

DEBUG = False  # set True to check results of each sort

N = 1000     # number of elements in list
list1 = []   # list of integer elements

for i in range(0, N):
    list1.append(random.randint(0, N-1))

#print list1  # test

def print_timing(func):
    def wrapper(*arg):
        t1 = time.clock()
        res = func(*arg)
        t2 = time.clock()
        print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
        return res
    return wrapper

# declare the @ decorator just above each sort function, invokes print_timing()
@print_timing
def adaptive_merge_sort(list2):
    """adaptive merge sort, built into Python since version 2.3"""
    list2.sort()

@print_timing
def bubble_sort(list2):
    #swap_test = False
    for i in range(0, len(list2) - 1):
        # as suggested by kubrick, makes sense
        swap_test = False
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j]  # swap
            swap_test = True
        if swap_test == False:
            break

# selection sort
@print_timing
def selection_sort(list2):
    for i in range(0, len (list2)):
        min = i
        for j in range(i + 1, len(list2)):
            if list2[j] < list2[min]:
                min = j
        list2[i], list2[min] = list2[min], list2[i]  # swap
      
# insertion sort
@print_timing
def insertion_sort(list2):
    for i in range(1, len(list2)):
        save = list2[i]
        j = i
        while j > 0 and list2[j - 1] > save:
            list2[j] = list2[j - 1]
            j -= 1
        list2[j] = save
  
# quick sort
@print_timing
def quick_sort(list2):
    quick_sort_r(list2, 0, len(list2) - 1)

# quick_sort_r, recursive (used by quick_sort)
def quick_sort_r(list2 , first, last):
    if last > first:
        pivot = partition(list2, first, last)
        quick_sort_r(list2, first, pivot - 1)
        quick_sort_r(list2, pivot + 1, last)

# partition (used by quick_sort_r)
def partition(list2, first, last):
    sred = (first + last)/2
    if list2[first] > list2 [sred]:
        list2[first], list2[sred] = list2[sred], list2[first]  # swap
    if list2[first] > list2 [last]:
        list2[first], list2[last] = list2[last], list2[first]  # swap
    if list2[sred] > list2[last]:
        list2[sred], list2[last] = list2[last], list2[sred]    # swap
    list2 [sred], list2 [first] = list2[first], list2[sred]    # swap
    pivot = first
    i = first + 1
    j = last
  
    while True:
        while i <= last and list2[i] <= list2[pivot]:
            i += 1
        while j >= first and list2[j] > list2[pivot]:
            j -= 1
        if i >= j:
            break
        else:
            list2[i], list2[j] = list2[j], list2[i]  # swap
    list2[j], list2[pivot] = list2[pivot], list2[j]  # swap
    return j

# heap sort
@print_timing
def heap_sort(list2):
    first = 0
    last = len(list2) - 1
    create_heap(list2, first, last)
    for i in range(last, first, -1):
        list2[i], list2[first] = list2[first], list2[i]  # swap
        establish_heap_property (list2, first, i - 1)

# create heap (used by heap_sort)
def create_heap(list2, first, last):
    i = last/2
    while i >= first:
        establish_heap_property(list2, i, last)
        i -= 1

# establish heap property (used by create_heap)
def establish_heap_property(list2, first, last):
    while 2 * first + 1 <= last:
        k = 2 * first + 1
        if k < last and list2[k] < list2[k + 1]:
            k += 1
        if list2[first] >= list2[k]:
            break
        list2[first], list2[k] = list2[k], list2[first]  # swap
        first = k

# merge sort
@print_timing
def merge_sort(list2):
    merge_sort_r(list2, 0, len(list2) -1)

# merge sort recursive (used by merge_sort)
def merge_sort_r(list2, first, last):
    if first < last:
        sred = (first + last)/2
        merge_sort_r(list2, first, sred)
        merge_sort_r(list2, sred + 1, last)
        merge(list2, first, last, sred)

# merge (used by merge_sort_r)
def merge(list2, first, last, sred):
    helper_list = []
    i = first
    j = sred + 1
    while i <= sred and j <= last:
        if list2 [i] <= list2 [j]:
            helper_list.append(list2[i])
            i += 1
        else:
            helper_list.append(list2 [j])
            j += 1
    while i <= sred:
        helper_list.append(list2[i])
        i +=1
    while j <= last:
        helper_list.append(list2[j])
        j += 1
    for k in range(0, last - first + 1):
        list2[first + k] = helper_list [k]

# test sorted list by printing the first 10 elements
def print10(list2):
    for k in range(10):
        print list2[k],
    print


# run test if script is executed
if __name__ == "__main__" :
    print "timing 7 sorting algorithms with a list of 1000 integers:"
    # make a true copy of list1 each time
    list2 = list(list1)
    adaptive_merge_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    bubble_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    heap_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    insertion_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    merge_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    quick_sort(list2)
    if DEBUG:
        print10(list2)
    list2 = list(list1)
    selection_sort(list2)
    if DEBUG:
        print10(list2)
    # final test
    list2 = list(list1)
    if DEBUG:
        print "final test: ",
        print10(list2)

    #raw_input( "Press Enter to continue..." )

"""
typical results:
timing 7 sorting algorithms with a list of 1000 integers:
adaptive_merge_sort took 0.560ms
bubble_sort took 269.691ms
heap_sort took 13.556ms
insertion_sort took 130.870ms
merge_sort took 19.272ms
quick_sort took 6.849ms
selection_sort took 120.291ms
"""

This is fascinating. I tried your code on my windows PC with Python 2.5 and got a similar result.

I tried it then with psyco.full() and saw the major improvements I expected.

I then tried it with 10000 integers, a 10 times larger sample. The psyco performance knocked my socks off. The quick sort in python beat the inbuilt C implementation (yes I know there are other considerations).

timing 7 sorting algorithms with a list of 10000 integers (using psyco/python2.5):
adaptive_merge_sort took 6.890ms
bubble_sort took 1595.138ms
heap_sort took 8.901ms
insertion_sort took 462.398ms
merge_sort took 14.750ms
quick_sort took 6.441ms
selection_sort took 936.435ms
kevindublin
Newbie Poster
1 post since Jun 2007
Reputation Points: 10
Solved Threads: 0
 

Thank you for the nice information!

For those folks who are not familiar with the psyco module, psyco allows Python to compile to native i386 code rather than the standard virtual byte code. This way the Python interpreter works much faster.

vegaseat
DaniWeb's Hypocrite
Moderator
5,989 posts since Oct 2004
Reputation Points: 1,345
Solved Threads: 1,417
 

Hi,

I suspect there is a small bug in your bubble_sort function.
You wrote :

def bubble_sort(list2):
    swap_test = False
    for i in range(0, len(list2) - 1):
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
            swap_test = True
        if swap_test == False:
            break

Wouldn't the "swap_test" be more useful used like this?

def bubble_sort(list2):
    for i in range(0, len(list2) - 1):
        swap_test = False
        for j in range(0, len(list2) - i - 1):
            if list2[j] > list2[j + 1]:
                list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
                swap_test = True
        if swap_test == False:
            break
kubrick
Newbie Poster
1 post since Aug 2007
Reputation Points: 10
Solved Threads: 0
 

Thanks a lot kubrick, sorry I am so slow. Checked it out and you are right. I corrected the blunder.

vegaseat
DaniWeb's Hypocrite
Moderator
5,989 posts since Oct 2004
Reputation Points: 1,345
Solved Threads: 1,417
 

bubble sort can be implemented even simpler:

def bubblesort(arr):
    done = False
    while not done:
        done = True
        for i in range(len(arr)-1):
            if arr[i] > arr[i+1]:
                arr[i], arr[i+1] = arr[i+1], arr[i]
                done = False
    return arr


and quicksort:

def quicksort(a):
    if len(a) <= 1: return a
    pivot = a.pop()
    before = [x for x in a if x <= pivot]
    after = [x for x in a if x > pivot]
    return quicksort(before) + [pivot] + quicksort(after)

or quicksort in one line:
def quicksort(a): return a if len(a) <= 1 else quicksort([x for x in a[1:] if x <= a[0]]) + [a[0]] + quicksort([x for x in a[1:] if x > a[0]])
jureslak
Newbie Poster
1 post since Nov 2010
Reputation Points: 10
Solved Threads: 0
 
from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
@print_timing
def heapsort(N):
  for i in range(0, N):
    heappush(h, random.randint(0, N - 1))
  return [heappop(h) for i in range(len(h))]

print heapsort(1000)


heapsort took 10.000ms

-ordi-
Junior Poster in Training
92 posts since Dec 2009
Reputation Points: 18
Solved Threads: 11
 

You producing random numbers inside tight loop. You should produce data ready in main code not to mix up timing.

pyTony
pyMod
Moderator
5,359 posts since Apr 2010
Reputation Points: 782
Solved Threads: 852
 

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
:)

richieking
Master Poster
764 posts since Jun 2009
Reputation Points: 61
Solved Threads: 152
 
from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
@print_timing
def heapsort(N):
  for i in range(0, N):
    heappush(h, random.randint(0, N - 1))
  return [heappop(h) for i in range(len(h))]

print heapsort(1000)

heapsort took 10.000ms

from heapq import heappush, heappop
import random
import time

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper

h = []
N = 1000
for i in range(0, N):
  heappush(h, random.randint(0, N - 1))

@print_timing
def heapsort():
  k = len(h)
  return [heappop(h) for i in range(k)]

heapsort()
-ordi-
Junior Poster in Training
92 posts since Dec 2009
Reputation Points: 18
Solved Threads: 11
 
I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance :)

I agree, also one-liners can be very slow.

Ene Uran
Posting Virtuoso
1,723 posts since Aug 2005
Reputation Points: 625
Solved Threads: 213
 

Hi there... I would like to copy/paste this code into my open source project to form part of a tutorial/demo. My project is licensed Apache 2.0. Is there any chance you could put a license into your code snippet to indicate if it is okay for me to re-use it?

tleeuwenburg
Newbie Poster
1 post since Jul 2011
Reputation Points: 10
Solved Threads: 0
 

Would someone mind explaining the code:

def print_timing(func):
  def wrapper(*arg):
    t1 = time.clock()
    res = func(*arg)
    t2 = time.clock()
    print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
    return res
  return wrapper


as i don't understand how "res" works or what func is doing ?

Skrell
Light Poster
29 posts since Aug 2011
Reputation Points: 10
Solved Threads: 0
 

This article has been dead for over three months

Post: Markdown Syntax: Formatting Help
You