This article has been dead for over three months
You
""" Major learning project of Tony 'pyTony' Veijalainen 2010-2011
keep attribution of source if you use this code elsewhere.
The most clear code version without special tricks or
anagram word synonyms dictionary.
Published 11.11.11 at DaniWeb Python Forum
Usage: python sANAsTony.py uk 1 DaniWeb Forum
# answer prompts interactively
python sANAsTony.py
"""
import sys
import os
if sys.version < '3':
input, range = raw_input, xrange
try:
import psyco; psyco.full()
except:
print('(Psyco would improve performance noticably in Python 2.6 and earlier)')
def contains(bigger,smaller):
""" find the letters that are left from bigger,
when smaller's letters are taken from bigger or
return None, if smaller's letters are not contained in bigger
"""
if len(bigger) >= len(smaller):
while smaller:
this, smaller = smaller[0:1] , smaller[1:]
if this not in bigger:
return None
bigger = bigger.replace(this, '', 1)
return bigger
takeout=" \t'-+\n\r"
def trim_word(word, takeout=takeout):
""" lowercase the word and return cleaned word """
#word with letters not in takeout
return word.lower().translate(None, takeout)
# if you have clean directory and do not mind "we'd" missing use next line instead.
# return word.lower().strip()
def find_words(candidate_words, letters, atleast):
""" candidate_words is iterable giving the words to choose from, like
open file or list of words
"""
valid_words = []
for this in candidate_words:
# we do not assume clean or ordered words, this is costly in execution time,
# but more flexible
this = trim_word(this)
if contains(letters, this) is not None:
if len(letters) >= len(this) >= atleast:
valid_words.append(this)
return sorted(valid_words, key=len, reverse=True)
def find_anagrams(word, words, atleast):
""" Find possibly multiple word anagrams from parameter 'word'
with possibly repeating words of 'atleast' or more letters
from sequence parameter 'words' in order of the sequence
(combinations with repeats, not permutations)
"""
for word_index, current_word in enumerate(words):
remaining_letters = contains(word, current_word)
if remaining_letters=='':
yield current_word
elif remaining_letters is not None and len(remaining_letters) >= atleast:
for x in find_anagrams(remaining_letters, words[word_index:], atleast):
yield (' '.join((current_word, x)))
if __name__ == '__main__':
if not os.path.isdir('dict'):
os.mkdir('dict')
raise SystemExit('''
No dictionaries installed, copy wordlists
like wordlist from http://wordlist.sourceforge.net/
or scrable wordslist http://www.isc.ro/en/commands/lists.html
to dict subdirectory!''')
if len(sys.argv) > 3:
print('Taking language, smallest word and the words (rest of line) from command line.')
language, smallest, words = (sys.argv[1],
int(sys.argv[2]),
trim_word(''.join(sys.argv[3:])))
else:
words = trim_word(input('Give words: '))
smallest = int(input('Minimum acceptable word length: '))
print('\n\t' + '\n\t'.join(d[:-4] for d in os.listdir('dict') if d.endswith('.txt')))
language = input("Language ('.txt' added automatically): ")
language_file = 'dict/%s.txt' % language
if not os.path.isfile(language_file):
print('%s does not exist in dict directory.' % language)
else:
with open(language_file) as word_file:
wordlist = find_words(word_file, words, smallest)
print('%i words loaded.' % len(wordlist))
# print out solution
solution_number = 0
for solution_number, word in enumerate(find_anagrams(words, wordlist, smallest), 1):
print("%5i: %s" % (solution_number, word))
print('\n%i solutions found!' % solution_number)