I am trying to optimize some code I have written. I do not wish to add 0 to my list if it is already set to zero. So it only needs to be changed if the letters or words do not match. Here is the working code (You will also need a file called dictionary.txt with some words in it to test):

#!/usr/bin/python
# Filename: unscramble.py
# Python version: 2.6.6
# Date: 1/22/2011
# Author: The Ripper

class UnScramble:
    '''unscramble words which have had their letters swopped around'''
    def __init__(self):
        '''Setup lists for storing matching words and letters'''
        self.string = ""
        self.has_same_letters = []
        self.checked_double_letters = []
        self.letter_a = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        self.letter_b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        self.a_whole = 0
        self.b_whole = 0
        self.possable_solutions = []
    
    def RunUnScramble(self, string='abase', wordlist_path='dictionary.txt'):
        '''Convenience method for running the application'''
        self.SetString(string)
        self.OpenWordlist(wordlist_path)
        self.RunSameLettersCheck()
        self.RunDoubleLettersCheck()
        self.SetPossableSolutions()
    
    def SetString(self, string):
        '''Set the string to be UnScrambled'''
        self.string = string
        
    def SetPossableSolutions(self):
        '''
        set possable_solutions to equal checked double letters.
        This is a convenience for getting the values from the application while leaving room for
        other check methods to be implemented.
        '''
        self.possable_solutions = self.checked_double_letters[:]
    
    def OpenWordlist(self, path_to_wordlist='dictionary.txt'):
        '''Create wordlist file object for iteration'''
        self.wordlist = open(path_to_wordlist, 'r')
        
    def RunSameLettersCheck(self):
        '''
        Checks each character in the scrambled word seperately against each word in the wordlist and
        gives it the value of 0 if it appears in that word
        '''    
        for line in self.wordlist:
            if (self.string[:1]) in (line):     
                self.letter_a[0] = 0               
            else:                          
                self.letter_a[0] = 1                      
       
            if (self.string[1:2]) in (line):
                self.letter_a[1] = 0 
            else:
                self.letter_a[1] = 1 
     
            if (self.string[2:3]) in (line):
                self.letter_a[2] = 0 
            else:
                self.letter_a[2] = 1 
     
            if (self.string[3:4]) in (line):
                self.letter_a[3] = 0 
            else:
                self.letter_a[3] = 1 
     
            if (self.string[4:5]) in (line):
                self.letter_a[4] = 0 
            else:
                self.letter_a[4] = 1 
     
            if (self.string[5:6]) in (line):
                self.letter_a[5] = 0 
            else:
                self.letter_a[5] = 1 
     
            if (self.string[6:7]) in (line):
                self.letter_a[6] = 0 
            else:
                self.letter_a[6] = 1 
     
            if (self.string[7:8]) in (line):
                self.letter_a[7] = 0 
            else:
                self.letter_a[7] = 1 
     
            if (self.string[8:9]) in (line):
                self.letter_a[8] = 0 
            else:
                self.letter_a[8] = 1 
     
            if (self.string[9:10]) in (line):
                self.letter_a[9] = 0 
            else:
                self.letter_a[9] = 1 
            
            # This adds the values of all the characters together
            self.a_whole = (self.letter_a[0] + self.letter_a[1] + self.letter_a[2] + self.letter_a[3] + self.letter_a[4] + self.letter_a[5] + 
                            self.letter_a[6] + self.letter_a[7] + self.letter_a[8] + self.letter_a[9])
            # BRAKES CODE!!!
            # for i in self.letter_b[:]:
            #     self.b_whole += i
            
            if len(line) == len(self.string) + 1:
                # If a_whole == 0 then all the char in the string appear in the word of the list
                if (self.a_whole) == 0:            
                    # Appends the word to the list we setup in __init__
                    self.has_same_letters.append(line)    
                    
    def RunDoubleLettersCheck(self):
        '''
        Checks the characters in the list of words against the string.
        By doing this we check that all the characters in the list appear in the string.
        If we didnt do this the program would say that school = slouch because all the characters 
        in school appear in slouch, but not all the characters in slouch appear in school.
        '''
        for i in range(0, len(self.has_same_letters)): 
            if (self.has_same_letters[i][:1]) in (self.string):
                self.letter_b[0] = 0
            else:
                self.letter_b[0] = 1

            if (self.has_same_letters[i][1:2]) in (self.string):
                self.letter_b[1] = 0
            else:
                self.letter_b[1] = 1

            if (self.has_same_letters[i][2:3]) in (self.string):
                self.letter_b[2] = 0
            else:
                self.letter_b[2] = 1

            if (self.has_same_letters[i][3:4]) in (self.string):
                self.letter_b[3] = 0
            else:
                self.letter_b[3] = 1

            if (self.has_same_letters[i][4:5]) in (self.string):
                self.letter_b[4] = 0
            else:
                self.letter_b[4] = 1

            if (self.has_same_letters[i][5:6]) in (self.string):
                self.letter_b[5] = 0
            else:
                self.letter_b[5] = 1

            if (self.has_same_letters[i][6:7]) in (self.string):
                self.letter_b[6] = 0
            else:
                self.letter_b[6] = 1

            if (self.has_same_letters[i][7:8]) in (self.string):
                self.letter_b[7] = 0
            else:
                self.letter_b[7] = 1

            if (self.has_same_letters[i][8:9]) in (self.string):
                self.letter_b[8] = 0
            else:
                self.letter_b[8] = 1

            if (self.has_same_letters[i][9:10]) in (self.string):
                self.letter_b[9] = 0
            else:
                self.letter_b[9] = 1

            if (self.has_same_letters[i][10:11]) in (self.string):
                self.letter_b[10] = 0
            else:
                self.letter_b[10] = 1

            if (self.has_same_letters[i][11:12]) in (self.string):
                self.letter_b[11] = 0
            else:
                self.letter_b[11] = 1

            if (self.has_same_letters[i][12:13]) in (self.string):
                self.letter_b[12] = 0
            else:
                self.letter_b[12] = 1

            if (self.has_same_letters[i][13:14]) in (self.string):
                self.letter_b[13] = 0
            else:
                self.letter_b[13] = 1
                
            if (self.has_same_letters[i][14:15]) in (self.string):
                self.letter_b[14] = 0
            else:
                self.letter_b[14] = 1
                
            # This adds the values of all the characters together
            for i in self.letter_b[:]:
                self.b_whole += i
                if (self.b_whole) == 0:
                    self.checked_double_letters.append(self.has_same_letters[i])
       
if __name__ == '__main__':
    while True:
        us = UnScramble()
        scrambled_word = raw_input("\nEnter word you would like unscrambled. Type '/end/' to quit: ")
        if scrambled_word == '/end/':         # Allows user to   exit loop       
            break
        us.RunUnScramble(string=scrambled_word)
        print(us.possable_solutions[0])
        us = None

Here is the broken attempt to optimize the if statements.

#!/usr/bin/python
# Filename: unscramble.py
# Python version: 2.6.6
# Date: 1/22/2011
# Author: The Ripper


class UnScramble:
    '''unscramble words which have had their letters swopped around'''
    def __init__(self):
        '''Setup lists for storing matching words and letters'''
        self.string = ""
        self.has_same_letters = []
        self.checked_double_letters = []
        self.letter_a = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        self.letter_b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        self.a_whole = 0
        self.b_whole = 0
        self.possable_solutions = []
    
    def RunUnScramble(self, string='abase', wordlist_path='dictionary.txt'):
        '''Convenience method for running the application'''
        self.SetString(string)
        self.OpenWordlist(wordlist_path)
        self.RunSameLettersCheck()
        self.RunDoubleLettersCheck()
        self.SetPossableSolutions()
    
    def SetString(self, string):
        '''Set the string to be UnScrambled'''
        self.string = string
        
    def SetPossableSolutions(self):
        '''
        set possable_solutions to equal checked double letters.
        This is a convenience for getting the values from the application while leaving room for
        other check methods to be implemented.
        '''
        self.possable_solutions = self.checked_double_letters[:]
    
    def OpenWordlist(self, path_to_wordlist='dictionary.txt'):
        '''Create wordlist file object for iteration'''
        self.wordlist = open(path_to_wordlist, 'r')
        
    def RunSameLettersCheck(self):
        '''
        Checks each character in the scrambled word seperately against each word in the wordlist and
        gives it the value of 0 if it appears in that word
        '''    
        for line in self.wordlist:
            if (not(self.string[:1]) in (line)):                          
                self.letter_a[0] = 1                      
       
            if (not(self.string[1:2]) in (line)):
                self.letter_a[1] = 1 
     
            if (not(self.string[2:3]) in (line)):
                self.letter_a[2] = 1 
     
            if (not(self.string[3:4]) in (line)):
                self.letter_a[3] = 1 
     
            if (not(self.string[4:5]) in (line)):
                self.letter_a[4] = 1 
     
            if (not(self.string[5:6]) in (line)):
                self.letter_a[5] = 1 
     
            if (not(self.string[6:7]) in (line)):
                self.letter_a[6] = 1 
     
            if (not(self.string[7:8]) in (line)):
                self.letter_a[7] = 1 
     
            if (not(self.string[8:9]) in (line)):
                self.letter_a[8] = 1 
     
            if (not(self.string[9:10]) in (line)):
                self.letter_a[9] = 1 
            
            # This adds the values of all the characters together
            self.a_whole = (self.letter_a[0] + self.letter_a[1] + self.letter_a[2] + self.letter_a[3] + self.letter_a[4] + self.letter_a[5] + 
                            self.letter_a[6] + self.letter_a[7] + self.letter_a[8] + self.letter_a[9])
            # BRAKES CODE!!!
            # for i in self.letter_b[:]:
            #     self.b_whole += i
            
            if len(line) == len(self.string) + 1:
                # If a_whole == 0 then all the char in the string appear in the word of the list
                if (self.a_whole) == 0:            
                    # Appends the word to the list we setup in __init__
                    self.has_same_letters.append(line)    
                    
    def RunDoubleLettersCheck(self):
        '''
        Checks the characters in the list of words against the string.
        By doing this we check that all the characters in the list appear in the string.
        If we didnt do this the program would say that school = slouch because all the characters 
        in school appear in slouch, but not all the characters in slouch appear in school.
        '''
        for i in range(0, len(self.has_same_letters)): 
            if (not(self.has_same_letters[i][:1]) in (self.string)):
                self.letter_b[0] = 1

            if (not(self.has_same_letters[i][1:2]) in (self.string)):
                self.letter_b[1] = 1

            if (not(self.has_same_letters[i][2:3]) in (self.string)):
                self.letter_b[2] = 1

            if (not(self.has_same_letters[i][3:4]) in (self.string)):
                self.letter_b[3] = 1

            if (not(self.has_same_letters[i][4:5]) in (self.string)):
                self.letter_b[4] = 1

            if (not(self.has_same_letters[i][5:6]) in (self.string)):
                self.letter_b[5] = 1

            if (not(self.has_same_letters[i][6:7]) in (self.string)):
                self.letter_b[6] = 1

            if (not(self.has_same_letters[i][7:8]) in (self.string)):
                self.letter_b[7] = 1

            if (not(self.has_same_letters[i][8:9]) in (self.string)):
                self.letter_b[8] = 1

            if (not(self.has_same_letters[i][9:10]) in (self.string)):
                self.letter_b[9] = 1

            if (not(self.has_same_letters[i][10:11]) in (self.string)):
                self.letter_b[10] = 1

            if (not(self.has_same_letters[i][11:12]) in (self.string)):
                self.letter_b[11] = 1

            if (not(self.has_same_letters[i][12:13]) in (self.string)):
                self.letter_b[12] = 1

            if (not(self.has_same_letters[i][13:14]) in (self.string)):
                self.letter_b[13] = 1
                
            if (not(self.has_same_letters[i][14:15]) in (self.string)):
                self.letter_b[14] = 1
                
            # This adds the values of all the characters together
            for i in self.letter_b[:]:
                self.b_whole += i
                if (self.b_whole) == 0:
                    self.checked_double_letters.append(self.has_same_letters[i])
       
if __name__ == '__main__':
    while True:
        us = UnScramble()
        scrambled_word = raw_input("\nEnter word you would like unscrambled. Type '/end/' to quit: ")
        if scrambled_word == '/end/':         # Allows user to   exit loop       
            break
        us.RunUnScramble(string=scrambled_word)
        print(us.possable_solutions[:])
        print("")
        print(us.checked_double_letters[:])
        print("")
        print(us.has_same_letters[:])
        us = None

Edited 5 Years Ago by ShadyTyrant: n/a

I decided to just see what I could do. Using my dictionary which holds 234,936 words, my code runs in a little under two seconds on my mac. The technique: Iterate the dictionary of words, creating a hash for each word that ignores letter order, making a dictionary that maps hash -> list of words. Example: the hashes for 'stop', 'pots', 'tops' and 'post' are the same, so worddict[hash('ptso')] = ['post','spot','stop','tops'] After some messing about, I chose a very simple hash: the word itself, lower-cased and sorted:

def hash(w):
  return ''.join(sorted(w.lower()))

The code to load the dictionary into memory is equally simple:

def createDictInfo(d):
  res = {}
  with open(d,'r') as f:
    for w in f:
      w = w.strip()
      res.setdefault(hash(w),[]).append(w)
  return res

Notes:

  • strip() removes the newlines that delimit the words in my dictionary
  • setdefault() sets the value for the key in a dictionary unless there is already a value, and it returns whichever value. This allows line 6 to be very succinct.

These are all very helpful. I like the hashing idea. I will also look into tonys approach. Thank you.

This article has been dead for over six months. Start a new discussion instead.