Multiword anagrams by recursive generator

Updated TrustyTony 0 Tallied Votes 2K Views Share

This is my code for anagrams not utilizing the ready prepared file of anagram synonyms to celebrate 11.11.11 11:11:11.
If you start program it warns you about missing subdirectory dict and creates it. You may put any word files, one word per line, to file <dictionary name>.txt. Program lists available directories if started in interactive prompt mode by not supplying parameters from command line. Few usefull dictionary locations mentioned also in code:
http://wordlist.sourceforge.net/
http://www.isc.ro/en/commands/lists.html

""" Major learning project of Tony 'pyTony' Veijalainen 2010-2011
    keep attribution of source if you use this code elsewhere.
    The most clear code version without special tricks or
    anagram word synonyms dictionary.
    
    Published 11.11.11 at DaniWeb Python Forum

    Usage: python sANAsTony.py uk 1 DaniWeb Forum
           # answer prompts interactively
           python sANAsTony.py

"""

import sys
import os

if sys.version < '3':
    input, range = raw_input, xrange
    try:
        import psyco; psyco.full()
    except:
        print('(Psyco would improve performance noticably in Python 2.6 and earlier)')

def contains(bigger,smaller):
    """ find the letters that are left from bigger,
        when smaller's letters are taken from bigger or
        return None, if smaller's letters are not contained in bigger

    """
    if len(bigger) >= len(smaller):
        while smaller:
            this, smaller = smaller[0:1] , smaller[1:]
            if this not in bigger:
                return None
            bigger = bigger.replace(this, '', 1)
        return bigger

takeout=" \t'-+\n\r"
def trim_word(word, takeout=takeout):
    """ lowercase the word and return cleaned word """
    #word with letters not in takeout
    return word.lower().translate(None, takeout)
    # if you have clean directory and do not mind "we'd" missing use next line instead.
    # return word.lower().strip()

def find_words(candidate_words, letters, atleast):
    """ candidate_words is iterable giving the words to choose from, like
        open file or list of words

    """
    valid_words = []
    for this in candidate_words:
        # we do not assume clean or ordered words, this is costly in execution time,
        # but more flexible
        this = trim_word(this)
        if contains(letters, this) is not None:
            if len(letters) >= len(this) >= atleast:
                valid_words.append(this)
    return sorted(valid_words, key=len, reverse=True)

def find_anagrams(word, words, atleast):
    """ Find possibly multiple word anagrams from parameter 'word'
        with possibly repeating words of 'atleast' or more letters 
        from sequence parameter 'words' in order of the sequence
        (combinations with repeats, not permutations)

    """
    for word_index, current_word in enumerate(words):
        remaining_letters = contains(word, current_word)
        if remaining_letters=='':
            yield current_word
        elif remaining_letters is not None and len(remaining_letters) >= atleast:
            for x in find_anagrams(remaining_letters, words[word_index:], atleast):
                yield (' '.join((current_word, x)))

if __name__ == '__main__':
    if not os.path.isdir('dict'):
        os.mkdir('dict')
        raise SystemExit('''
        No dictionaries installed, copy wordlists
        like wordlist from    http://wordlist.sourceforge.net/
        or scrable wordslist  http://www.isc.ro/en/commands/lists.html
        to dict subdirectory!''')

    if len(sys.argv) > 3:
        print('Taking language, smallest word and the words (rest of line) from command line.')
        language, smallest, words = (sys.argv[1],
                                     int(sys.argv[2]),
                                     trim_word(''.join(sys.argv[3:])))
    else:
        words = trim_word(input('Give words: '))
        smallest = int(input('Minimum acceptable word length: '))
        print('\n\t' + '\n\t'.join(d[:-4] for d in os.listdir('dict') if d.endswith('.txt')))
        language = input("Language ('.txt' added automatically): ")

    language_file = 'dict/%s.txt' % language
    if not os.path.isfile(language_file):
        print('%s does not exist in dict directory.' % language)
    else:
        with open(language_file) as word_file:
            wordlist = find_words(word_file, words, smallest)
            
        print('%i words loaded.' % len(wordlist))
        # print out solution
        solution_number = 0
        for solution_number, word in enumerate(find_anagrams(words, wordlist, smallest), 1):
            print("%5i: %s" % (solution_number, word))
        print('\n%i solutions found!' % solution_number)