from string import *

def removePunctuation(sentence):
    sentence = lower(sentence)
    new_sentence = ""
    for char in sentence:
        if char not in punctuation:
            new_sentence = new_sentence + char

    return new_sentence

def wordFrequences(sentence):
    wordCounts = {}
    split_sentence =  new_sentence.split()
    print split_sentence
    for entry in split_sentence:
        for word in entry:
            wordCounts[entry] = wordCounts.get (entry,0) + 1
    wordCounts.items()
    return wordCounts

sentence = "This is a test sentence, to test the function."
new_sentence = removePunctuation(sentence) 
wordFrequences(sentence)

Hi I am trying to write a program which calculates how many times a certain word appears in a string.

Could someone help me how to do this, i.e. is there something similar instead of using .get which counts the characters.

At the moment i get the output of:
{'a': 1, 'function': 8, 'sentence': 8, 'this': 4, 'is': 2, 'to': 2, 'test': 8, 'the': 3}

I am trying to get the following:
{'this': 1, 'a': 1, 'is': 1, 'test': 2, ...}

Edited 6 Years Ago by axa121: n/a

Ok no need for help.

I took a break and when I back I got the solution straight away.

If you find a solution to one of your own problems, it is considered polite to post the solution regardless because other people might also have the same problem.

As for my solution to the problem statement?

I'd probably just strip the string of any non-alphabetic characters excepting spaces and newlines, replace all newlines with spaces, split the resulting string around spaces, iterate over the resulting sequence, and add the word to the dictionary if it is not present with a count of one or increment the counter for the word. (Using the word as the dictionary key. First check that the dictionary has the key for the word, if not then add the key as one or if it does increment the key.)

Also, it is considered bad code to use the '+' operator to concatenate strings. Strings are immutable objects, so appending a string to another string creates a new string object which takes time and memory.

It is better to store each piece in a list until a concatenated string is needed, then join each piece using a string's "join" method on the list.

string1 = 'This'
string2 = 'is'
string3 = 'worse.'
final_string = string1 + ' ' + string2 + ' ' + string3
"""This is worse."""

mylist = ['This', 'is', 'better.']
better_string = ' '.join(mylist)
"""This is better."""
from string import *


def removePunctuation(sentence):
    sentence = lower(sentence)
    new_sentence = ""
    for char in sentence:
        if char not in punctuation:
            new_sentence = new_sentence + char

    return new_sentence

def wordFrequences(sentence):
    wordFreq = {}
    split_sentence =  new_sentence.split()
    for word in split_sentence:
            wordFreq[word] = wordFreq.get(word,0) + 1
    wordFreq.items()
    print wordFreq

sentence = "The first test of the function"
new_sentence = removePunctuation(sentence)
wordFrequences(sentence)

Here is the corrected version.

I think you want to use new_sentence (and is one of the positive results of posting code).

def wordFrequences(new_sentence):
    wordFreq = {}

    ##  new_sentence was not defined
    split_sentence =  new_sentence.split()

    for word in split_sentence:
            wordFreq[word] = wordFreq.get(word,0) + 1
    wordFreq.items()
    print wordFreq
 
sentence = "The first test of the function"
new_sentence = removePunctuation(sentence)
wordFrequences(new_sentence)

Edited 6 Years Ago by woooee: n/a

This question has already been answered. Start a new discussion instead.