Just another word frequency program that shows you how to sort the output by frequency:
# count words in a text and show the first ten items
# by decreasing frequency using a list of tuples
# sample text for testing (could come from a text file)
text = """\
My name is Fred Flintstone and I am a famous TV
star. I have as much authority as the Pope, I
just don't have as many people who believe it.
"""
word_freq = {}
word_list = text.split()
for word in word_list:
# word all lower case
word = word.lower()
# strip any trailing period or comma
word = word.rstrip('.,')
# build the dictionary
count = word_freq.get(word, 0)
word_freq[word] = count + 1
# create a list of (freq, word) tuples for sorting by frequency
freq_list = [(freq, word) for word, freq in word_freq.items()]
# sort the list by the first element in each tuple (default)
freq_list.sort(reverse=True)
print "The ten most frequent words are:"
for n, tup in enumerate(freq_list):
# print the first ten items
if n < 10:
freq, word = tup
print freq, word
"""
my output -->
The ten most frequent words are:
3 i
3 as
2 have
1 who
1 tv
1 the
1 star
1 pope
1 people
1 name
"""