Searching and Comparing strings from an XML Document

Reply

Join Date: Aug 2006
Posts: 1
Reputation: bleb1982 is an unknown quantity at this point 
Solved Threads: 0
bleb1982 bleb1982 is offline Offline
Newbie Poster

Searching and Comparing strings from an XML Document

 
0
  #1
Aug 14th, 2006
Below is a working program that goes through the xml document and outputs all of the data to an output file. That part is fine, but what I need help with is that the tagname Comments: in the xml document will have the word Patch* in it. I need to search through the xml document and only pull the information where it finds a the word Patch in it. Any ideas?

  1. #!/usr/bin/env python
  2. from xml.dom import minidom
  3. from xml.dom.minidom import Document
  4. import re
  5. import sys
  6. import time
  7. class logparser:
  8. "Log parser"
  9. def __init__(self):
  10. self.usermap = {}
  11. self.modules = []
  12. try:
  13. fuserin = open('C:/CVSAutoMailerPY/userMap.txt',"r")
  14. for line in fuserin:
  15. usermapping = line.split(":",1)
  16. p = re.compile('[a-zA-Z.]*\S')
  17. userid = p.match(usermapping[0])
  18. username = p.match(usermapping[1])
  19. self.usermap[userid.group()]=username.group()
  20. fuserin.close()
  21. except IOError:
  22. #Cannot proceed in this case. Write an error log and exit.
  23. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  24. now = time.localtime(time.time())
  25. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - User Map file could not be found\n")
  26. ferrout.flush()
  27. ferrout.close()
  28. fuserin.cloase()
  29. sys.exit()
  30. try:
  31. fmodin = open('C:/CVSAutoMailerPY/modules.txt',"r")
  32. for module in fmodin:
  33. self.modules.append(module)
  34. fmodin.close()
  35. except IOError:
  36. #Cannot proceed in this case. Write an error log and exit.
  37. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  38. now = time.localtime(time.time())
  39. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - Module information file could not be found\n")
  40. ferrout.flush()
  41. ferrout.close()
  42. fmodin.close()
  43. sys.exit()
  44.  
  45. def parseLog(self):
  46. #we have to populate a module map but for now....
  47. for module in self.modules:
  48. try:
  49. modname = self.normalizeModuleName(module)
  50. fin = "C:/CVSAutoMailerPY/logDataForum2.1.OLD"
  51. fmsgout = "C:/CVSAutoMailerPY/CVSTest/test.txt"
  52. ftest = open(fin)
  53. fmsg = open(fmsgout,"w")
  54. doc = minidom.parse(fin)
  55. #msgHeader = self.getMessageHeader(modname)
  56. #msgFooter = self.getMessageFooter(modname)
  57. #fmsg.write(msgHeader)
  58.  
  59. # Unwanted but mandatory overhead to detect maliciou activities
  60. loghead = doc.getElementsByTagName("CommitLog")
  61. if loghead[0].hasChildNodes():
  62. children = loghead[0].childNodes
  63. for child in children:
  64. if child.nodeType == 1:
  65. if self.usermap.has_key(child.nodeName):
  66. pass
  67. else:
  68. self.usermap[child.nodeName] = child.nodeName+" (user unknown)"
  69.  
  70. for user in self.usermap:
  71. userEntry = doc.getElementsByTagName(user)
  72. if userEntry:
  73. userent = userEntry[0]
  74. #print "Populating events performed by "+user
  75. message = ""
  76. username = ""
  77. if self.usermap.has_key(user):
  78. username = self.usermap[user]
  79. else:
  80. username = "Unknown User: "+user
  81. fmsg.write("*** Changes by: "+username+" ***\n\n")
  82. commitOp = userent.getElementsByTagName("CommitOperation")
  83. times = "2"
  84. old = None
  85. oldmessage = ""
  86. files = ""
  87. hfg = "false"
  88. if commitOp:
  89. for ops in commitOp:
  90. if old == None:
  91. old = ops
  92. newmessage = self.normalizeMessage(ops.getAttribute("message"))
  93. #Populate file names(s) in msg
  94. if oldmessage == newmessage:
  95. file =ops.getAttribute("file")
  96. path = ops.getAttribute("path")
  97. normfilenames = self.normalizeFileName(file)
  98. for file in normfilenames:
  99. files+="\tFileName:\t"+path+"/"+file+"\n"
  100. fmsg.write(files)
  101. files=""
  102. #fmsg.write("\t ** Grouped ** \n")
  103. else:
  104. if oldmessage == "":
  105. pass
  106. else:
  107. fmsg.write("\tComments:\t" + oldmessage)
  108. fmsg.write("\n\n")
  109. file = ops.getAttribute("file")
  110. path = ops.getAttribute("path")
  111. normfilenames = self.normalizeFileName(file)
  112. for file in normfilenames:
  113. files+="\tFileName:\t"+path+"/"+file+"\n"
  114. fmsg.write(files)
  115. files=""
  116. oldmessage = newmessage
  117. old = ops
  118.  
  119. fmsg.write("\tComments:\t" + oldmessage)
  120. fmsg.write("\n\n")
  121. else:
  122. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  123. now = time.localtime(time.time())
  124. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - No Commit operation found for this user entry. Atleast one operation expected.\n")
  125. ferrout.flush()
  126. ferrout.close()
  127. fmodin.close()
  128. #fmsg.write(msgFooter)
  129. fmsg.close()
  130. except IOError:
  131. pass
  132. def getMessageHeader(self,modname):
  133. msgHeader = "From: CVSADMIN\n"
  134. msgHeader+= "To: CVSNT\n"
  135. msgHeader+= "Subject: CVS "+ modname + " Update Notification\n\n"
  136. msgHeader+= "DO NOT Reply to this mail. This is an automatic notification of the CVS Updates.\n\n"
  137. return msgHeader
  138.  
  139. def getMessageFooter(self,modname):
  140. msgFooter = "Please update your local working copies.\n"
  141. msgFooter+= "Thanks.\n"
  142. msgFooter+= "CVS Admin\n"
  143. msgFooter+= "Fischer International"
  144. return msgFooter
  145.  
  146. def normalizeModuleName(self,module):
  147. mod = module.strip("\n")
  148. return mod
  149.  
  150. def normalizeMessage(self,message):
  151. #reg = re.compile(r'[\s][\s]')
  152. if message == "\"":
  153. return "*** No Comments Added by the user ***"
  154. else:
  155. normmsg = message.strip("\"")
  156. normmsg = normmsg.replace("<a rel="nofollow" class="t" href="file://\" target="_blank">\\","\</a>"")
  157. return normmsg
  158.  
  159. def normalizeFileName(self,filename):
  160. reg = re.compile(r'[\\]+[ ]+')
  161. filelist = []
  162. newfilelist = []
  163. intmFN = reg.search(filename)
  164. if intmFN!=None:
  165. filename = filename.replace("<a rel="nofollow" class="t" href="http://www.daniweb.com/techtalkforums/" target="_blank">\\\\</a> ","%")
  166. filelist = filename.split();
  167. else:
  168. filelist.append(filename)
  169.  
  170. if len(filelist) == 1:
  171. spacedfiles = []
  172. temp = filelist[0]
  173. spacedfiles = temp.split()
  174. if len(spacedfiles) == 1:
  175. return filelist
  176. else:
  177. return spacedfiles
  178. else:
  179. for file in filelist:
  180. makeorg = re.compile(r'[%]+')
  181. found = makeorg.search(file)
  182. if found!=None:
  183. newfile = file.replace("%"," ");
  184. newfilelist.append(newfile)
  185. else:
  186. newfilelist.append(file)
  187.  
  188. return newfilelist
  189. lp = logparser()
  190. lp.parseLog()
  191.  
Here is a sample XML Doc

<?xml version="1.0" ?><CommitLog><jnd><CommitOperation file="tick.gif" message="new image for indicating a member already exists in a policy&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/images"/><CommitOperation file="ProvDBObject.java" message="Fix for issue #5562- , #5560&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningCallbackService.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningWorkflowRunner.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/webservice/provisioning"/><CommitOperation file="PrioPolicyUtilServlet.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/dfweb"/><CommitOperation file="ProvCache.java ProvDBUtil.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="provMemberView.html" message="issue #5560 fixed Changing dates during auto-permanent prevented.ref issue# 5569&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/admin"/><CommitOperation file="DateUtil.java" message="The parseDate method modified to fix issue #5566&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/><CommitOperation file="RecordStore.java" message="Patch_2.2.1 checkin :: Issue# 5567 Temporary workflow files&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/></jnd><jbs><CommitOperation file="provisioningattrcfg.xml" message="Added Petro HPA Attributes to provisionign schema.&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/config"/></jbs><jxb><CommitOperation file="HPA_SYSTEM_ACCOUNT_TYPE.sql" message="Changed PRE_AUTHORIZED_FREQUENCY to PRE_AUTHORIZED_DURATION_ID&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/tables"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/views"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/Oracle/hpa/views"/></jxb></CommitLog>
Last edited by vegaseat; Aug 14th, 2006 at 9:28 pm.
Reply With Quote Quick reply to this message  
Reply

This thread is more than three months old.
Perhaps start a new thread instead?
Message:


Thread Tools Search this Thread



About Us | Contact Us | Advertise | DaniWeb | Acceptable Use Policy | RSS Feed

©2003 - 2009 DaniWeb® LLC