944,192 Members | Top Members by Rank

Ad:
  • Python Code Snippet
  • Views: 60907
  • Python RSS
0

Sorting Algorithms in Python

by on Jan 22nd, 2006
Early versions of Python used a hybrid of samplesort (a variant of quicksort with large sample size) and binary insertion sort as the built-in sorting algorithm. This proved to be somewhat unstable, and was replaced in version 2.3 with an adaptive mergesort algorithm. I am comparing several rudimentary sorting routines, translated to Python from Narue's C code by my friend Micko, with the Python built-in sorting routine. The new Python24 @ function decorator is used to implement the timing.
Python Code Snippet (Toggle Plain Text)
  1. # timing 7 different Python sorting algorithms with a list of integers
  2. # each function is given the same list (fresh copy each time)
  3. # tested with Python24 vegaseat 21jan2006
  4.  
  5. import random # for generating random numbers
  6. import time # for timing each sort function with time.clock()
  7.  
  8. DEBUG = False # set True to check results of each sort
  9.  
  10. N = 1000 # number of elements in list
  11. list1 = [] # list of integer elements
  12.  
  13. for i in range(0, N):
  14. list1.append(random.randint(0, N-1))
  15.  
  16. #print list1 # test
  17.  
  18. def print_timing(func):
  19. def wrapper(*arg):
  20. t1 = time.clock()
  21. res = func(*arg)
  22. t2 = time.clock()
  23. print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
  24. return res
  25. return wrapper
  26.  
  27. # declare the @ decorator just above each sort function, invokes print_timing()
  28. @print_timing
  29. def adaptive_merge_sort(list2):
  30. """adaptive merge sort, built into Python since version 2.3"""
  31. list2.sort()
  32.  
  33. @print_timing
  34. def bubble_sort(list2):
  35. #swap_test = False
  36. for i in range(0, len(list2) - 1):
  37. # as suggested by kubrick, makes sense
  38. swap_test = False
  39. for j in range(0, len(list2) - i - 1):
  40. if list2[j] > list2[j + 1]:
  41. list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
  42. swap_test = True
  43. if swap_test == False:
  44. break
  45.  
  46. # selection sort
  47. @print_timing
  48. def selection_sort(list2):
  49. for i in range(0, len (list2)):
  50. min = i
  51. for j in range(i + 1, len(list2)):
  52. if list2[j] < list2[min]:
  53. min = j
  54. list2[i], list2[min] = list2[min], list2[i] # swap
  55.  
  56. # insertion sort
  57. @print_timing
  58. def insertion_sort(list2):
  59. for i in range(1, len(list2)):
  60. save = list2[i]
  61. j = i
  62. while j > 0 and list2[j - 1] > save:
  63. list2[j] = list2[j - 1]
  64. j -= 1
  65. list2[j] = save
  66.  
  67. # quick sort
  68. @print_timing
  69. def quick_sort(list2):
  70. quick_sort_r(list2, 0, len(list2) - 1)
  71.  
  72. # quick_sort_r, recursive (used by quick_sort)
  73. def quick_sort_r(list2 , first, last):
  74. if last > first:
  75. pivot = partition(list2, first, last)
  76. quick_sort_r(list2, first, pivot - 1)
  77. quick_sort_r(list2, pivot + 1, last)
  78.  
  79. # partition (used by quick_sort_r)
  80. def partition(list2, first, last):
  81. sred = (first + last)/2
  82. if list2[first] > list2 [sred]:
  83. list2[first], list2[sred] = list2[sred], list2[first] # swap
  84. if list2[first] > list2 [last]:
  85. list2[first], list2[last] = list2[last], list2[first] # swap
  86. if list2[sred] > list2[last]:
  87. list2[sred], list2[last] = list2[last], list2[sred] # swap
  88. list2 [sred], list2 [first] = list2[first], list2[sred] # swap
  89. pivot = first
  90. i = first + 1
  91. j = last
  92.  
  93. while True:
  94. while i <= last and list2[i] <= list2[pivot]:
  95. i += 1
  96. while j >= first and list2[j] > list2[pivot]:
  97. j -= 1
  98. if i >= j:
  99. break
  100. else:
  101. list2[i], list2[j] = list2[j], list2[i] # swap
  102. list2[j], list2[pivot] = list2[pivot], list2[j] # swap
  103. return j
  104.  
  105. # heap sort
  106. @print_timing
  107. def heap_sort(list2):
  108. first = 0
  109. last = len(list2) - 1
  110. create_heap(list2, first, last)
  111. for i in range(last, first, -1):
  112. list2[i], list2[first] = list2[first], list2[i] # swap
  113. establish_heap_property (list2, first, i - 1)
  114.  
  115. # create heap (used by heap_sort)
  116. def create_heap(list2, first, last):
  117. i = last/2
  118. while i >= first:
  119. establish_heap_property(list2, i, last)
  120. i -= 1
  121.  
  122. # establish heap property (used by create_heap)
  123. def establish_heap_property(list2, first, last):
  124. while 2 * first + 1 <= last:
  125. k = 2 * first + 1
  126. if k < last and list2[k] < list2[k + 1]:
  127. k += 1
  128. if list2[first] >= list2[k]:
  129. break
  130. list2[first], list2[k] = list2[k], list2[first] # swap
  131. first = k
  132.  
  133. # merge sort
  134. @print_timing
  135. def merge_sort(list2):
  136. merge_sort_r(list2, 0, len(list2) -1)
  137.  
  138. # merge sort recursive (used by merge_sort)
  139. def merge_sort_r(list2, first, last):
  140. if first < last:
  141. sred = (first + last)/2
  142. merge_sort_r(list2, first, sred)
  143. merge_sort_r(list2, sred + 1, last)
  144. merge(list2, first, last, sred)
  145.  
  146. # merge (used by merge_sort_r)
  147. def merge(list2, first, last, sred):
  148. helper_list = []
  149. i = first
  150. j = sred + 1
  151. while i <= sred and j <= last:
  152. if list2 [i] <= list2 [j]:
  153. helper_list.append(list2[i])
  154. i += 1
  155. else:
  156. helper_list.append(list2 [j])
  157. j += 1
  158. while i <= sred:
  159. helper_list.append(list2[i])
  160. i +=1
  161. while j <= last:
  162. helper_list.append(list2[j])
  163. j += 1
  164. for k in range(0, last - first + 1):
  165. list2[first + k] = helper_list [k]
  166.  
  167. # test sorted list by printing the first 10 elements
  168. def print10(list2):
  169. for k in range(10):
  170. print list2[k],
  171. print
  172.  
  173.  
  174. # run test if script is executed
  175. if __name__ == "__main__" :
  176. print "timing 7 sorting algorithms with a list of 1000 integers:"
  177. # make a true copy of list1 each time
  178. list2 = list(list1)
  179. adaptive_merge_sort(list2)
  180. if DEBUG:
  181. print10(list2)
  182. list2 = list(list1)
  183. bubble_sort(list2)
  184. if DEBUG:
  185. print10(list2)
  186. list2 = list(list1)
  187. heap_sort(list2)
  188. if DEBUG:
  189. print10(list2)
  190. list2 = list(list1)
  191. insertion_sort(list2)
  192. if DEBUG:
  193. print10(list2)
  194. list2 = list(list1)
  195. merge_sort(list2)
  196. if DEBUG:
  197. print10(list2)
  198. list2 = list(list1)
  199. quick_sort(list2)
  200. if DEBUG:
  201. print10(list2)
  202. list2 = list(list1)
  203. selection_sort(list2)
  204. if DEBUG:
  205. print10(list2)
  206. # final test
  207. list2 = list(list1)
  208. if DEBUG:
  209. print "final test: ",
  210. print10(list2)
  211.  
  212. #raw_input( "Press Enter to continue..." )
  213.  
  214. """
  215. typical results:
  216. timing 7 sorting algorithms with a list of 1000 integers:
  217. adaptive_merge_sort took 0.560ms
  218. bubble_sort took 269.691ms
  219. heap_sort took 13.556ms
  220. insertion_sort took 130.870ms
  221. merge_sort took 19.272ms
  222. quick_sort took 6.849ms
  223. selection_sort took 120.291ms
  224. """
Comments on this Code Snippet
Jun 1st, 2007
0

Re: Sorting Algorithms in Python

This is fascinating. I tried your code on my windows PC with Python 2.5 and got a similar result.

I tried it then with psyco.full() and saw the major improvements I expected.

I then tried it with 10000 integers, a 10 times larger sample. The psyco performance knocked my socks off. The quick sort in python beat the inbuilt C implementation (yes I know there are other considerations).

Python Syntax (Toggle Plain Text)
  1. timing 7 sorting algorithms with a list of 10000 integers (using psyco/python2.5):
  2. adaptive_merge_sort took 6.890ms
  3. bubble_sort took 1595.138ms
  4. heap_sort took 8.901ms
  5. insertion_sort took 462.398ms
  6. merge_sort took 14.750ms
  7. quick_sort took 6.441ms
  8. selection_sort took 936.435ms
Newbie Poster
kevindublin is offline Offline
1 posts
since Jun 2007
Jun 5th, 2007
0

Re: Sorting Algorithms in Python

Thank you for the nice information!

For those folks who are not familiar with the psyco module, psyco allows Python to compile to native i386 code rather than the standard virtual byte code. This way the Python interpreter works much faster.
DaniWeb's Hypocrite
vegaseat is offline Offline
5,792 posts
since Oct 2004
Aug 21st, 2007
0

Re: Sorting Algorithms in Python

Hi,

I suspect there is a small bug in your bubble_sort function.
You wrote :
python Syntax (Toggle Plain Text)
  1. def bubble_sort(list2):
  2. swap_test = False
  3. for i in range(0, len(list2) - 1):
  4. for j in range(0, len(list2) - i - 1):
  5. if list2[j] > list2[j + 1]:
  6. list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
  7. swap_test = True
  8. if swap_test == False:
  9. break
Wouldn't the "swap_test" be more useful used like this?
python Syntax (Toggle Plain Text)
  1. def bubble_sort(list2):
  2. for i in range(0, len(list2) - 1):
  3. swap_test = False
  4. for j in range(0, len(list2) - i - 1):
  5. if list2[j] > list2[j + 1]:
  6. list2[j], list2[j + 1] = list2[j + 1], list2[j] # swap
  7. swap_test = True
  8. if swap_test == False:
  9. break
Newbie Poster
kubrick is offline Offline
1 posts
since Aug 2007
Aug 13th, 2008
0

Re: Sorting Algorithms in Python

Thanks a lot kubrick, sorry I am so slow. Checked it out and you are right. I corrected the blunder.
DaniWeb's Hypocrite
vegaseat is offline Offline
5,792 posts
since Oct 2004
Nov 3rd, 2010
0

Re: Sorting Algorithms in Python

bubble sort can be implemented even simpler:

python Syntax (Toggle Plain Text)
  1. def bubblesort(arr):
  2. done = False
  3. while not done:
  4. done = True
  5. for i in range(len(arr)-1):
  6. if arr[i] > arr[i+1]:
  7. arr[i], arr[i+1] = arr[i+1], arr[i]
  8. done = False
  9. return arr

and quicksort:

python Syntax (Toggle Plain Text)
  1. def quicksort(a):
  2. if len(a) <= 1: return a
  3. pivot = a.pop()
  4. before = [x for x in a if x <= pivot]
  5. after = [x for x in a if x > pivot]
  6. return quicksort(before) + [pivot] + quicksort(after)
  7.  
  8. or quicksort in one line:

python Syntax (Toggle Plain Text)
  1. def quicksort(a): return a if len(a) <= 1 else quicksort([x for x in a[1:] if x <= a[0]]) + [a[0]] + quicksort([x for x in a[1:] if x > a[0]])
Last edited by jureslak; Nov 3rd, 2010 at 12:50 pm.
Newbie Poster
jureslak is offline Offline
1 posts
since Nov 2010
Jan 2nd, 2011
0

Re: Sorting Algorithms in Python

Python Syntax (Toggle Plain Text)
  1. from heapq import heappush, heappop
  2. import random
  3. import time
  4.  
  5. def print_timing(func):
  6. def wrapper(*arg):
  7. t1 = time.clock()
  8. res = func(*arg)
  9. t2 = time.clock()
  10. print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
  11. return res
  12. return wrapper
  13.  
  14. h = []
  15. @print_timing
  16. def heapsort(N):
  17. for i in range(0, N):
  18. heappush(h, random.randint(0, N - 1))
  19. return [heappop(h) for i in range(len(h))]
  20.  
  21. print heapsort(1000)

heapsort took 10.000ms
Last edited by -ordi-; Jan 2nd, 2011 at 7:39 am.
Junior Poster in Training
-ordi- is offline Offline
91 posts
since Dec 2009
Jan 2nd, 2011
0

Re: Sorting Algorithms in Python

You producing random numbers inside tight loop. You should produce data ready in main code not to mix up timing.
Industrious Poster
pyTony is offline Offline
4,209 posts
since Apr 2010
Jan 2nd, 2011
0

Re: Sorting Algorithms in Python

I like one-liners but jureslak one-liner for third pseudo is not good for readability and maintainance
Master Poster
richieking is offline Offline
757 posts
since Jun 2009
Jan 2nd, 2011
0

Re: Sorting Algorithms in Python

Click to Expand / Collapse  Quote originally posted by -ordi- ...
Python Syntax (Toggle Plain Text)
  1. from heapq import heappush, heappop
  2. import random
  3. import time
  4.  
  5. def print_timing(func):
  6. def wrapper(*arg):
  7. t1 = time.clock()
  8. res = func(*arg)
  9. t2 = time.clock()
  10. print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
  11. return res
  12. return wrapper
  13.  
  14. h = []
  15. @print_timing
  16. def heapsort(N):
  17. for i in range(0, N):
  18. heappush(h, random.randint(0, N - 1))
  19. return [heappop(h) for i in range(len(h))]
  20.  
  21. print heapsort(1000)

heapsort took 10.000ms
Python Syntax (Toggle Plain Text)
  1. from heapq import heappush, heappop
  2. import random
  3. import time
  4.  
  5. def print_timing(func):
  6. def wrapper(*arg):
  7. t1 = time.clock()
  8. res = func(*arg)
  9. t2 = time.clock()
  10. print '%s took %0.3fms' % (func.func_name, (t2-t1)*1000.0)
  11. return res
  12. return wrapper
  13.  
  14. h = []
  15. N = 1000
  16. for i in range(0, N):
  17. heappush(h, random.randint(0, N - 1))
  18.  
  19. @print_timing
  20. def heapsort():
  21. k = len(h)
  22. return [heappop(h) for i in range(k)]
  23.  
  24. heapsort()
Junior Poster in Training
-ordi- is offline Offline
91 posts
since Dec 2009
Message:
Previous Thread in Python Forum Timeline: subprocess module newb help
Next Thread in Python Forum Timeline: Finding the occurrences of the spaces using while loops





About Us | Contact Us | Advertise | Acceptable Use Policy
Forum Index | Build Custom RSS Feed


Follow us on Twitter


© 2011 DaniWeb® LLC