A string filtering function based on patterns.

Gribouillis 0 Tallied Votes 399 Views Share

This snippet defines a function patfilter(pattern, rule, sequence) which implements filtering a sequence of strings according to various criteria. The output is either a subsequence of strings, or a sequence of match objects.

#!/usr/bin/env python
# patfilter.py
# Copyright (c) Gribouillis at www.daniweb.com
import re 
from fnmatch import fnmatch ,fnmatchcase ,filter as fnfilter 
try :# python 2.5
  from itertools import ifilter as filter 
except ImportError :# python 3.0
  pass 

def patfilter (pattern ,rule ,sequence ):
  """patfilter(pattern, rule, sequence_of_strings) --> sequence
     patfilter.rules() -> the set of accepted rules
  ARGUMENTS:
    pattern  <- a regular expression (re object or string)
    rule     <- a string
    sequence <- an iterable sequence of strings
  OUTPUT:
    depending on the rule
    "m"    -> the strings in the sequence which match the pattern
    "s"    -> the strings which contain the pattern
    "!m"   -> the strings which don't match the pattern
    "!s"   -> the strings which don't contain the pattern
    "@m"   -> the match objects for all the matches in the sequence
    "@s"   -> the match objects for at most one search per string
    "@a"   -> the match objects for all the searches in the sequence
                   (subsequent match objects may concern the same string)
    "f"   -> the strings which match the pattern in the sense of
                     the fnmatch module (*)
    "!f"  -> the strings which dont fnmatch the pattern (*)
    "F"   -> the string which fnmatch, case sensitive (*)
    "!F"  -> the strings which don't fnmatch, case sensitive (*)

  (*) the pattern must be a string for fnmatch rules.
"""
  if rule not in _PatFilter ._rules :
    raise ValueError("Unknown rule.")
  if rule [-1 ]not in "fF":
    pattern =re .compile (pattern )
  return getattr (_PatFilter ,_PatFilter ._rules [rule ])(pattern ,sequence )

def rules ():
  "rules() -> the set of rules accepted by patselect."
  return set (_PatFilter ._rules )

patfilter .rules =rules 

__all__ =["patselect","rules"]

class _PatFilter (object ):
  _rules ={
  "m":"match",
  "s":"search",
  "!m":"nomatch",
  "!s":"nosearch",
  "@m":"matches",
  "@s":"searches",
  "@a":"allsearches",
  "f":"fnmatch",
  "!f":"nofnmatch",
  "F":"fnmatchcase",
  "!F":"nofnmatchcase"
  }
  @staticmethod 
  def matches (pat ,seq ):
    return filter (None ,(pat .match (x )for x in seq ))
  @staticmethod 
  def searches (pat ,seq ):
    return filter (None ,(pat .search (x )for x in seq ))
  @staticmethod 
  def allsearches (pat ,seq ):
    return (mo for x in seq for mo in pat .finditer (x ))
  @staticmethod 
  def match (pat ,seq ):
    return filter (lambda x :pat .match (x ),seq )
  @staticmethod 
  def search (pat ,seq ):
    return filter (lambda x :pat .search (x ),seq )
  @staticmethod 
  def nomatch (pat ,seq ):
    return filter (lambda x :not pat .match (x ),seq )
  @staticmethod 
  def nosearch (pat ,seq ):
    return filter (lambda x :not pat .search (x ),seq )
  @staticmethod 
  def fnmatch (pat ,seq ):
    return fnfilter (seq ,pat )
  @staticmethod 
  def nofnmatch (pat ,seq ):
    return filter (lambda x :not fnmatch (x ,pat ),seq )
  @staticmethod 
  def fnmatchcase (pat ,seq ):
    return filter (lambda x :fnmatchcase (x ,pat ),seq )
  @staticmethod 
  def nofnmatchcase (pat ,seq ):
    return filter (lambda x :not fnmatchcase (x ,pat ),seq )

if __name__ =="__main__":
  L =list (open (__file__ ))
  for r in patfilter .rules ():
    pat ="*seq*"if r [-1 ]in "fF"else "seq"
    print ("======= RULE '%s' ==== PATTERN '%s' ===="%(r ,pat ))
    for item in patfilter (pat ,r ,L ):
      if r [0 ]=="@":
        item =item .string 
      print (item .rstrip ())
Member Avatar for leegeorg07
leegeorg07

how do you fill the pattern, rule and sequence sections?

patfilter(pattern, rule, sequence)???

Gribouillis 1,391 Programming Explorer Team Colleague

For example

lines = iter(open(filename))  # this is a sequence of strings (here,  an iterator)
for line in patfilter("<a\s*href", "s", lines): # filter the lines which contain a href
    myfunction(line)

or

filenames = os.listdir(os.getcwd())
for name in patfilter("*.pyc", "f", filenames): # filter the compiled python files
    os.unlink(name)
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.