I'm stuck on this problem for a day. Since no one responds my question on stack overflow, I hope somebody here may help.
I'm trying to build a dictionary of shakespeare's plays that records the act and scene of the first speech of each character, and the number of speeches of that character (not the first time they are mentioned). Here is the link for the text. For example,in Romeo and Juliet would be 'JULIET': [1, 3, 118] since Juliet first speaks in Act 1, Scene 3 and has 118 speeches in the play. I try to split into lines, but it doesn't give the Act and Scene number.
import string
import re
def word_find(line,words):
return list(set(line.strip().split()) & set(words))
def main (fn,words):
d = {}
fn = open(fn,'r')
r = fn.read().split()
for c,x in r(f, start=1):
common = word_find(x,words)
if common:
print (c), "".join(common)
if __name__ == '__main__':
main(fn, words)
print(firstWords('romeo_and_juliet_folger.txt'))
print(firstWords('shakespeare_sonnet_18.txt'))
print(firstWords('a_midsummer_nights_dream_folger.txt'))
def firstWords (fn):
d = {}
fn = open(fn,'r')
r = fn.read().split()
for c in r:
c= removePunctuation(c.lower())
d[c] = d.get(c,0)+1
fn.close()
return d
print(firstWords('romeo_and_juliet_folger.txt'))
print(firstWords('shakespeare_sonnet_18.txt'))
print(firstWords('a_midsummer_nights_dream_folger.txt'))