bleb1982 0 Newbie Poster

Below is a working program that goes through the xml document and outputs all of the data to an output file. That part is fine, but what I need help with is that the tagname Comments: in the xml document will have the word Patch* in it. I need to search through the xml document and only pull the information where it finds a the word Patch in it. Any ideas?

#!/usr/bin/env python
from xml.dom import minidom
from xml.dom.minidom import Document
import re
import sys
import time
class logparser:
    "Log parser"
    def __init__(self):
        self.usermap = {}
        self.modules = []
        try:
            fuserin = open('C:/CVSAutoMailerPY/userMap.txt',"r")
            for line in fuserin:
                usermapping = line.split(":",1)
                p = re.compile('[a-zA-Z.]*\S')
                userid = p.match(usermapping[0])
                username = p.match(usermapping[1])
                self.usermap[userid.group()]=username.group()
            fuserin.close()
        except IOError:
            #Cannot proceed in this case. Write an error log and exit.
            ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
            now = time.localtime(time.time())
            ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - User Map file could not be found\n")
            ferrout.flush()
            ferrout.close()
            fuserin.cloase()
            sys.exit()
        try:
            fmodin = open('C:/CVSAutoMailerPY/modules.txt',"r")
            for module in fmodin:
                self.modules.append(module)
            fmodin.close()
        except IOError:
            #Cannot proceed in this case. Write an error log and exit.
            ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
            now = time.localtime(time.time())
            ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - Module information file could not be found\n")
            ferrout.flush()
            ferrout.close()
            fmodin.close()
            sys.exit()
        
    def parseLog(self):
        #we have to populate a module map but for now....
        for module in self.modules:
            try:
                modname = self.normalizeModuleName(module)
                fin = "C:/CVSAutoMailerPY/logDataForum2.1.OLD"
                fmsgout = "C:/CVSAutoMailerPY/CVSTest/test.txt"
                ftest = open(fin)
                fmsg = open(fmsgout,"w")
                doc = minidom.parse(fin)
                #msgHeader = self.getMessageHeader(modname)
                #msgFooter = self.getMessageFooter(modname)
                #fmsg.write(msgHeader)
                
                # Unwanted but mandatory overhead to detect maliciou activities
                loghead = doc.getElementsByTagName("CommitLog")
                if loghead[0].hasChildNodes():
                    children = loghead[0].childNodes
                    for child in children:
                        if child.nodeType == 1:
                            if self.usermap.has_key(child.nodeName):
                                pass
                            else:
                                self.usermap[child.nodeName] = child.nodeName+" (user unknown)"
                
                for user in self.usermap:
                    userEntry = doc.getElementsByTagName(user)
                    if userEntry:
                        userent = userEntry[0]
                        #print "Populating events performed by "+user
                        message = ""
                        username = ""
                        if self.usermap.has_key(user):
                            username = self.usermap[user]
                        else:
                            username = "Unknown User: "+user
                        fmsg.write("*** Changes by: "+username+" ***\n\n")
                        commitOp = userent.getElementsByTagName("CommitOperation")
                        times = "2"
                        old = None
                        oldmessage = ""
                        files = ""
                        hfg = "false"
                        if commitOp:
                            for ops in commitOp:
                                if old == None:
                                    old = ops
                                newmessage = self.normalizeMessage(ops.getAttribute("message"))
                                #Populate file names(s) in msg
                                if oldmessage == newmessage:
                                    file =ops.getAttribute("file")
                                    path = ops.getAttribute("path")
                                    normfilenames = self.normalizeFileName(file)
                                    for file in normfilenames:
                                        files+="\tFileName:\t"+path+"/"+file+"\n"
                                    fmsg.write(files)
                                    files=""
                                    #fmsg.write("\t ** Grouped ** \n")
                                else:
                                    if oldmessage == "":
                                        pass
                                    else:
                                        fmsg.write("\tComments:\t" + oldmessage)
                                        fmsg.write("\n\n")
                                    file = ops.getAttribute("file")
                                    path = ops.getAttribute("path")
                                    normfilenames = self.normalizeFileName(file)
                                    for file in normfilenames:
                                        files+="\tFileName:\t"+path+"/"+file+"\n"
                                    fmsg.write(files)
                                    files=""
                                    oldmessage = newmessage
                                old = ops
                                
                            fmsg.write("\tComments:\t" + oldmessage)
                            fmsg.write("\n\n")
                        else:                    
                            ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
                            now = time.localtime(time.time())
                            ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - No Commit operation found for this user entry. Atleast one operation expected.\n")
                            ferrout.flush()
                            ferrout.close()
                            fmodin.close()
                #fmsg.write(msgFooter)
                fmsg.close()
            except IOError:
                pass
    def getMessageHeader(self,modname):
        msgHeader = "From: CVSADMIN\n"
        msgHeader+= "To: CVSNT\n"
        msgHeader+= "Subject: CVS "+ modname + " Update Notification\n\n"
        msgHeader+= "DO NOT Reply to this mail. This is an automatic notification of the CVS Updates.\n\n"
        return msgHeader
    
    def getMessageFooter(self,modname):
        msgFooter = "Please update your local working copies.\n"
        msgFooter+= "Thanks.\n"
        msgFooter+= "CVS Admin\n"
        msgFooter+= "Fischer International"
        return msgFooter
     
    def normalizeModuleName(self,module):
        mod = module.strip("\n")
        return mod
    
    def normalizeMessage(self,message):
        #reg = re.compile(r'[\s][\s]')
        if message == "\"":
            return "*** No Comments Added by the user ***"
        else:
            normmsg = message.strip("\"")
            normmsg = normmsg.replace("\\","\"")
            return normmsg
        
    def normalizeFileName(self,filename):
        reg = re.compile(r'[\\]+[ ]+')
        filelist = []
        newfilelist = []
        intmFN = reg.search(filename)
        if intmFN!=None:
            filename = filename.replace("\\\\ ","%")
            filelist = filename.split();
        else:
            filelist.append(filename)
        
        if len(filelist) == 1:
            spacedfiles = []
            temp = filelist[0]
            spacedfiles = temp.split()
            if len(spacedfiles) == 1:
                return filelist
            else:
                return spacedfiles
        else:
            for file in filelist:
                makeorg = re.compile(r'[%]+')
                found = makeorg.search(file)
                if found!=None:
                    newfile = file.replace("%"," ");
                    newfilelist.append(newfile)
                else:
                    newfilelist.append(file)
        
        return newfilelist
lp = logparser()
lp.parseLog()

Here is a sample XML Doc

<?xml version="1.0" ?><CommitLog><jnd><CommitOperation file="tick.gif" message="new image for indicating a member already exists in a policy&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/images"/><CommitOperation file="ProvDBObject.java" message="Fix for issue #5562- , #5560&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningCallbackService.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningWorkflowRunner.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/webservice/provisioning"/><CommitOperation file="PrioPolicyUtilServlet.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/dfweb"/><CommitOperation file="ProvCache.java ProvDBUtil.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="provMemberView.html" message="issue #5560 fixed Changing dates during auto-permanent prevented.ref issue# 5569&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/admin"/><CommitOperation file="DateUtil.java" message="The parseDate method modified to fix issue #5566&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/><CommitOperation file="RecordStore.java" message="Patch_2.2.1 checkin :: Issue# 5567 Temporary workflow files&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/></jnd><jbs><CommitOperation file="provisioningattrcfg.xml" message="Added Petro HPA Attributes to provisionign schema.&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/config"/></jbs><jxb><CommitOperation file="HPA_SYSTEM_ACCOUNT_TYPE.sql" message="Changed PRE_AUTHORIZED_FREQUENCY to PRE_AUTHORIZED_DURATION_ID&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/tables"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/views"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/Oracle/hpa/views"/></jxb></CommitLog>