Can anyone simplify this for me?
I don't know if it is going to work I tried to adjust the code to fit on the page. Might have to delete the extra +\

def main():
    try:
        
        infile = open('text.txt','r')
        files = infile.readlines()

        lower,count,blank,dig = 0,0,0,0
    #Counting uppercase
        for line in files:
            count+=line.count('A')+line.count('B')+line.count('C')+line.count('D')+\ +line.count('E')+line.count('F')+line.count('G')+line.count('H')+\
+line.count('I')+line.count('J')+line.count('K')+line.count('L')+\
+line.count('M')+line.count('N')+line.count('O')+line.count('P')+\
+line.count('Q')+line.count('R')+line.count('S')+line.count('T')+\
+line.count('U')+line.count('V')+line.count('W')+line.count('X')+\
+line.count('Y')+line.count('Z')

        print 'There are',count,'uppercase letters.'
        print
    #Counting lowercase
        for line in files:
            lower+=line.count('a')+line.count('b')+line.count('c')+line.count('d')+\
+line.count('e')+line.count('f')+line.count('g')+line.count('h')+\
+line.count('i')+line.count('j')+line.count('k')+line.count('l')+\
+line.count('m')+line.count('n')+line.count('o')+line.count('p')+\
+line.count('q')+line.count('r')+line.count('s')+line.count('t')+\
+line.count('u')+line.count('v')+line.count('w')+line.count('x')+\
+line.count('y')+line.count('z')
        
        print 'I also have',lower,'lowercase letters.'
        print
    #Number of didgits
        for line in files:
            dig+=line.count('0')+line.count('1')+line.count('2')+line.count('3')+\
+line.count('4')+line.count('5')+line.count('6')+line.count('7')+\
+line.count('8')+line.count('9')
        print 'The number of digits in the file =',dig
        print
    #number of whitespace characters
        for line in files:
            blank+=line.count(' ')
        print 'I found',blank,'whitespaces in this file'
        print
        raw_input('Hit enter to quit')
        infile.close
    except:
        print 'An error occured.'
        
main()

Recommended Answers

All 16 Replies

I'm not going to go through all of your code because I'm in a hurry, but this should simplify it quite a bit. At a minimum it will make it easier to read.

alphabet = ['A','B','C','D'....etc]
count = 0
infile = open('infile.txt','r')
for character in infile.readlines():
    if character in alphabet:
        count += 1

print count

Here is another idea...

for character in infile.readlines():
    if character.isupper() == True:
        uppercasecount += 1
    if character.islower() == True:
        lowercasecount += 1
    if character.isdigit() == True:
        digitcount += 1

Python has the built in isupper() and islower() (also isdigit, isspace, ispunct) http://www.python.org/doc/2.5.2/lib/module-curses.ascii.html

test=["AabcdEFg.",
      "jKLmnopR?" ]
      
upper_total=0
lower_total=0
neither_total=0
for rec in test:
   for chr in rec:
      if chr.isupper():
         upper_total += 1
      elif chr.islower():
         lower_total += 1
      else:
         neither_total += 1

print "%d Upper Case,  %d lower case,  and %d neither" % \
      (upper_total, lower_total, neither_total)
#
# And if you wanted to go with your original idea, it would be
if (chr >= "A") and (chr <= "Z"):
   upper_total += 1
etc.

Edit: and it looks like great minds think alike.

Here is another idea...

Just polishing what Kthom has written

infile = open("test.txt", "r")
uppercasecount, lowercasecount, digitcount = (0, 0, 0)
for character in infile.readlines():
    if character.isupper() == True:
        uppercasecount += 1
    if character.islower() == True:
        lowercasecount += 1
    if character.isdigit() == True:
        digitcount += 1
    print uppercasecount, lowercasecount, digitcount
print "Total count is %d Upper case, %d Lower case and %d Digit(s)" %(uppercasecount, lowercasecount, digitcount)

Here is another one (faster ?)

#!/usr/bin/env python
import re

def main():
    regexes = [ re.compile(x) for x in
         (r"[^A-Z]+", r"[^a-z]+", r"[^0-9]+", r"[^\ ]+")]
    filename = "test.txt"
    content = open(filename).read()
    counts = [len(s) for s in (r.sub("", content) for r in regexes)]
    print("""There are
%d uppercase letters
%d lowercase letters
%d digits
%d space characters
in file '%s'""" % (tuple(counts) + (filename,)))

main()
commented: nice +6

I would like to learn regular expressions. I have no Idea what it is (I always see people here using). Can someone explain a litle and suggest good tutorial?

Didn't mean to deviate the thread, so keep your first focus on the thread, then you can answer my question

I would like to learn regular expressions. I have no Idea what it is (I always see people here using). Can someone explain a litle and suggest good tutorial?

Didn't mean to deviate the thread, so keep your first focus on the thread, then you can answer my question

You could start with dive into python http://diveintopython.org/regular_expressions/index.html.

I would like to learn regular expressions. I have no Idea what it is (I always see people here using). Can someone explain a litle and suggest good tutorial?

Didn't mean to deviate the thread, so keep your first focus on the thread, then you can answer my question

There is also:
http://www.amk.ca/python/howto/regex/

I got curious, so I timed the 'just Python' and 'module re' approaches:

# timing characer count by type
# compare 'module re' and 'just Python' approches

import timeit
import re

def count_char1(text):
    """
    count upper case char, lower case char, digits and spaces in a text
    """
    regexes = [ re.compile(x) for x in
        (r"[^A-Z]+", r"[^a-z]+", r"[^0-9]+", r"[^\ ]+")]
    counts = [len(s) for s in (r.sub("", text) for r in regexes)]
    return tuple(counts)

def count_char2(text):
    """
    count upper case char, lower case char, digits and spaces in a text
    """    
    upper = lower = digit = space = 0
    for c in text:
        if c.isupper():
            upper += 1
        elif c.islower():
            lower += 1
        elif c.isdigit():
            digit += 1
        elif c.isspace():
            space += 1
    return (upper, lower, digit, space)


text = """
There is one rule for the industrialist and that is: 
Make the best quality of goods possible at the lowest 
cost possible, paying the highest wages possible.

Henry Ford 1924
"""

# for longer text uncomment line below
#text = text*10

stmt = 'count_char1(text)'
t = timeit.Timer(stmt, setup="from __main__ import count_char1, text")
#  doing 10000 passes * 100 gives the time in microseconds/pass
elapsed = (100 * t.timeit(number=10000))
print( "Function %s takes %0.3f micro-seconds/pass" % (stmt, elapsed) )

stmt = 'count_char2(text)'
t = timeit.Timer(stmt, setup="from __main__ import count_char2, text")
#  doing 10000 passes * 100 gives the time in microseconds/pass
elapsed = (100 * t.timeit(number=10000))
print( "Function %s takes %0.3f micro-seconds/pass" % (stmt, elapsed) )

"""
my timing result -->
Function count_char1(text) takes 119.080 micro-seconds/pass
Function count_char2(text) takes 184.978 micro-seconds/pass
"""
commented: good idea +2
commented: Damn fine analysis +2

Here is a function which is 10 times faster on my computer

from string import maketrans
fromst = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
tost = ("a" * 26) + ("A"*26) + ("0"*10)
assert (len(fromst) == 62) and (len(tost) == len(fromst))
table = maketrans(fromst, tost)
def count_char3(text):
    text = text.translate(table)
    low = text.count("a")
    upp = text.count("A")
    dig = text.count("0")
    spa = text.count(" ")
    return upp, low, dig, spa

However this algorithm should work only with 8 bits characters. Also in count_char2, you should replace c.isspace() by c == " ", because isspace matches other chars than the space character.

That was a fantastic analysis! I'm personally going to stick to using the pure python approach because I think it is more readable, and I'm willing to sacrifice 65 micro-seconds for that readability. But at least now I'm making an educated decision and I know what I'm giving up if I don't go the regex route. Big ups to Sneekula!

Here is a function which is 10 times faster on my computer

I don't know if you're comparing apples to apples here. With Sneekula's regex function he was able to feed in raw text and count the characters. Your function requires us to first translate the characters into lowercase a's, uppercase a's or zeros. That translation should be included in the function so that both functions take the same input and produce the same output. Then we can really compare the two objectively.

No the translation is done inside the function. Outside of the function, I only build the translation table which has nothing to do with the text.

Oh, I see. You're right. Wow, ten times faster? That's pretty sweet.

Function calls are time expensive in Python, so I modified Sneekula's count_char2() approach by replacing all those calls to islower(), isupper(), isdigit() and isspace(), and also changing the order of if/elif to use the most common character test first. The result is promising ...

# timing character count by type
# compare 'module re' and 'improved just Python' approaches

import timeit
import re

def count_char1(text):
    """
    count upper case char, lower case char, digits and spaces in a text
    """
    regexes = [ re.compile(x) for x in
        (r"[^A-Z]+", r"[^a-z]+", r"[^0-9]+", r"[^\ ]+")]
    counts = [len(s) for s in (r.sub("", text) for r in regexes)]
    return tuple(counts)

def count_char2(text):
    """
    count upper case char, lower case char, digits and spaces in a text
    """
    upper = lower = digit = space = 0
    for c in text:
        if c.isupper():
            upper += 1
        elif c.islower():
            lower += 1
        elif c.isdigit():
            digit += 1
        elif c.isspace():
            space += 1
    return (upper, lower, digit, space)

def count_char4(text):
    """
    count upper case char, lower case char, digits and spaces in a text
    to improve performance I replaced c.islower() etc. with c in "..."
    also put the test for lower case first since text has mostly lower
    case characters
    """
    upper = lower = digit = space = 0
    for c in text:
        if c in "abcdefghijklmnopqrstuvwxyz":
            lower += 1
        elif c in " ":
            space += 1
        elif c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
            upper += 1
        elif c in "0123456789":
            digit += 1
    return (upper, lower, digit, space)


text = """
There is one rule for the industrialist and that is:
Make the best quality of goods possible at the lowest
cost possible, paying the highest wages possible.

Henry Ford 1924
"""

# for longer text uncomment line below
#text = text*10

stmt = 'count_char1(text)'
t = timeit.Timer(stmt, setup="from __main__ import count_char1, text")
#  doing 10000 passes * 100 gives the time in microseconds/pass
elapsed = (100 * t.timeit(number=10000))
print( "Function %s takes %0.3f micro-seconds/pass" % (stmt, elapsed) )

stmt = 'count_char2(text)'
t = timeit.Timer(stmt, setup="from __main__ import count_char2, text")
#  doing 10000 passes * 100 gives the time in microseconds/pass
elapsed = (100 * t.timeit(number=10000))
print( "Function %s takes %0.3f micro-seconds/pass" % (stmt, elapsed) )

stmt = 'count_char4(text)'
t = timeit.Timer(stmt, setup="from __main__ import count_char4, text")
#  doing 10000 passes * 100 gives the time in microseconds/pass
elapsed = (100 * t.timeit(number=10000))
print( "Function %s takes %0.3f micro-seconds/pass" % (stmt, elapsed) )

"""
my output -->
Function count_char1(text) takes 142.210 micro-seconds/pass
Function count_char2(text) takes 167.409 micro-seconds/pass
Function count_char4(text) takes 58.894 micro-seconds/pass
"""
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.