Averaging iterable without making copy of values

TrustyTony 0 Tallied Votes 607 Views Share

Here is class that can be fed directly from split stream like one number per line file (or generator producing 'words' from multiple numbers per line).

"""
    average an iterable, possibly strings interpretable as numbers or ready numbers
    without producing inmemory list of values

"""

class AverageIterable(object):
    """ stats counter for big iterable without producing copy in memory """
    def __init__(self, it):
        self.total = 0
        for self.count, value in enumerate(it, 1):
            self.total += number(value)
            
    @property
    def average(self):
        return self.total / float(self.count)
    
    def add(self, num):
        self.total += number(num)
        self.count += 1

    def __str__(self):
        return 'Average: %f (sum %s, count %i)' % (self.average, self.total, self.count)

def number(s):
    """ If s is not number convert it to integer/float number, return s """
    try:
        1+s
        # s is number allready, return it
        return s
    except TypeError:
        # transform string to appropriate type based on precence of '.'
        return float(s) if '.' in s else int(s)


if __name__ == '__main__':
    # test data
    data = """
    57
    83
    99
    12
    45
    81
    74
    30
    29
    66
    95
    87
    42""".strip().splitlines()

    it = AverageIterable(data)
    print(it)
    it.add(23)
    print(it)
Gribouillis 1,391 Programming Explorer Team Colleague

There is also a solution using reduce() and itertools:

# python 2
import itertools as itt

def add_pairs((a, b), (c, d)):
    return (a + c, b + d)

iterable = xrange(10,20)
total, size = reduce(add_pairs, itt.izip(iterable, itt.repeat(1)))
print float(total) / size # prints 14.5

also, your function number() could be much improved. For example number("1e2") raises ValueError.

TrustyTony 888 pyMod Team Colleague Featured Poster

Actually I would like to 'pointless' exponential notation to produce integers, so you could use in case you want to include it:

def number(s):
    """ If s is not number convert it to integer/float number, return s
        produce integer 1000000 for 1E6"""
    try:
        1+s
        # s is number already, return it
        return s
    except TypeError:
        # transform string to appropriate type based on presence of '.'
        return (float(s) if '.' in s
                else (int(float(s) if ('e' in s or 'E' in s) else s))
                )

That is not dealing with complex numbers, for checking for correct floating point format see my code snippet: http://www.daniweb.com/software-development/python/code/284490

Actually simpler to use is to filter the string to stop dangerous input to eval (we accept space as it is possible the input is not stripped and we want to allow '12 + 10j'):

def eval_number(n):
    try:
        1+n
        # s is number already, return it
        return n
    except TypeError:
        chars = set(' 0123456789.eEjJ')
        if all(c in chars for c in n):
            return eval(n)
        else:
            raise ValueError('Invalid letter in number: %r' % n)
Gribouillis 1,391 Programming Explorer Team Colleague

I suggest this

import tokenize
from cStringIO import StringIO

valid_tokens = set([tokenize.OP, tokenize.NUMBER, tokenize.ENDMARKER])

def eval_number(n):
    try:
        1+n
        # s is number already, return it
        return n
    except TypeError:
        types = set(t[0] for t in tokenize.generate_tokens(StringIO(n).readline))
        if types.issubset(valid_tokens):
             return eval(n)
        else:
            raise ValueError(n)
TrustyTony 888 pyMod Team Colleague Featured Poster

My eval_number was missing '+-' so it would not evaluate complex numbers or negative ones. The set of valid chars could be global (ie module) variable, like Gribouillis' valid_tokens. Looks little round about way to use StringIO for checking the characters. Thanks for example of using tokenize, though.

TrustyTony 888 pyMod Team Colleague Featured Poster

I suggest this

import tokenize
from cStringIO import StringIO

valid_tokens = set([tokenize.OP, tokenize.NUMBER, tokenize.ENDMARKER])

def eval_number(n):
    try:
        1+n
        # s is number already, return it
        return n
    except TypeError:
        types = set(t[0] for t in tokenize.generate_tokens(StringIO(n).readline))
        if types.issubset(valid_tokens):
             return eval(n)
        else:
            raise ValueError(n)

Dropping cStringIO in favour of iterable:

valid_tokens = set([tokenize.OP, tokenize.NUMBER, tokenize.ENDMARKER, tokenize.INDENT, tokenize.DEDENT])

def eval_number_g(n):
    try:
        1+n
        # n is number already, return it
        return n
    except TypeError:
        types = set(t[0] for t in tokenize.generate_tokens(iter(n).next))
        if types.issubset(valid_tokens):
             return eval(n)
        else:
            raise ValueError('%s (%s)' % (n, types))
eval_number = eval_number_g
Gribouillis 1,391 Programming Explorer Team Colleague

Here is another solution, using the ast module and the builtin function compile(). I think it is very robust because it only accepts expressions consisting of a single python number or the sum or difference of 2 numbers (to allow complex expressions like "1+2j". It will reject for example "(1,)" or "[1]".

# python 2 and 3
import ast

def signature(astnode):
    return (type(astnode),) + tuple(
                signature(n) for n in ast.iter_child_nodes(astnode))

valid_signatures = set([
    (ast.Expression, (ast.Num,)),
    (ast.Expression, (ast.BinOp, (ast.Num,), (ast.Add,), (ast.Num,))),
    (ast.Expression, (ast.BinOp, (ast.Num,), (ast.Sub,), (ast.Num,))),
])

def is_number_string(s):
    expression = compile(s, "<string>", "eval", ast.PyCF_ONLY_AST)
    return signature(expression) in valid_signatures

def eval_number_ast(n):
    try:
        1 + n
        return n
    except TypeError:
        if is_number_string(n):
            return eval(n)
        else:
            raise ValueError(n)
        
def main():
    f = eval_number_ast
    assert f(32) == 32
    assert f(3.0) == 3.0
    assert f(1+3j) == 3.0j + 1.0
    assert f("32") == 32
    assert f("3e2") == 300.0
    assert f("12j-3") == -3.0 + 12.0j
    
if __name__ == "__main__":
    main()
TrustyTony commented: Interesting. +13
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.