scan filetree for files containing text

TrustyTony 0 Tallied Votes 909 Views Share

This is inspired for recent poster, who asked to check multiple strings in multiple files.

import os
def process_file((extensions, find), path, filelist):
    correct = (os.path.join(path,filename)
               for filename in  filelist
                if not extensions or any(filename.endswith(ext) for ext in extensions))
    
    for this in correct:
        try:
            print('%60s has %s in it.' %
                  (this, next(tofind
                              for tofind in find
                              if tofind in open(this, 'rb').read()) ))
        except StopIteration:
            pass


def files_recursively(topdir, extensions, find):
    os.path.walk(os.path.realpath(topdir), process_file, (extensions, find))

files_recursively('d:/test',('.py','.txt'), ('Tkinter','itertools','copyright'))
TrustyTony 888 ex-Moderator Team Colleague Featured Poster

Here current command line argument/library routine. Notice using raw docstring so help(findfiles) output is OK with backslashes, when imported.

r""" filesearch.py
    Finds recursively from topdir (arg1) all files with extensions separated by ';' (arg2)
    for one of texts rest of argument joined by one space separated with colons in rest of arguments

    Examples:
    
    filesearch . .py;.pyw tony veijalainen;for this;takewhile
  'tony veijalainen' found in K:\test\crypt.py
          'for this' found in K:\test\csvlist.py
          'for this' found in K:\test\euler.py
          'for this' found in K:\test\factor.py
          'for this' found in K:\test\factors.py
         'takewhile' found in K:\test\fib.py
          'for this' found in K:\test\filesearch.py    
"""

from sys import argv
import os

def process_file((extensions, text_to_find), path, filelist):
    correct = (os.path.join(path,filename)
               for filename in  filelist
                if not extensions or any(filename.endswith(ext) for ext in extensions))
    
    for this in correct:
        try:
            print('%20r found in %s' %
                  (next(tofind
                        for tofind in text_to_find
                        if tofind in open(this, 'rb').read()),
                   this
                   )
                  )
        except StopIteration:
            pass


def files_recursively(topdir, extensions, text_to_find):
    os.path.walk(os.path.realpath(topdir), process_file, (extensions, text_to_find))

if __name__ == '__main__':
    if len(argv) >= 4:
        files_recursively(topdir=argv[1], extensions=argv[2].split(';'), text_to_find=' '.join(argv[3:]).split(';'))
    else:
        print("Usage: %s topdir exttension;extensions alternative;texts to find" % argv[0])
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.