944,080 Members | Top Members by Rank

Ad:
  • Python Discussion Thread
  • Unsolved
  • Views: 2854
  • Python RSS
Aug 14th, 2006
0

Searching and Comparing strings from an XML Document

Expand Post »
Below is a working program that goes through the xml document and outputs all of the data to an output file. That part is fine, but what I need help with is that the tagname Comments: in the xml document will have the word Patch* in it. I need to search through the xml document and only pull the information where it finds a the word Patch in it. Any ideas?

Python Syntax (Toggle Plain Text)
  1. #!/usr/bin/env python
  2. from xml.dom import minidom
  3. from xml.dom.minidom import Document
  4. import re
  5. import sys
  6. import time
  7. class logparser:
  8. "Log parser"
  9. def __init__(self):
  10. self.usermap = {}
  11. self.modules = []
  12. try:
  13. fuserin = open('C:/CVSAutoMailerPY/userMap.txt',"r")
  14. for line in fuserin:
  15. usermapping = line.split(":",1)
  16. p = re.compile('[a-zA-Z.]*\S')
  17. userid = p.match(usermapping[0])
  18. username = p.match(usermapping[1])
  19. self.usermap[userid.group()]=username.group()
  20. fuserin.close()
  21. except IOError:
  22. #Cannot proceed in this case. Write an error log and exit.
  23. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  24. now = time.localtime(time.time())
  25. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - User Map file could not be found\n")
  26. ferrout.flush()
  27. ferrout.close()
  28. fuserin.cloase()
  29. sys.exit()
  30. try:
  31. fmodin = open('C:/CVSAutoMailerPY/modules.txt',"r")
  32. for module in fmodin:
  33. self.modules.append(module)
  34. fmodin.close()
  35. except IOError:
  36. #Cannot proceed in this case. Write an error log and exit.
  37. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  38. now = time.localtime(time.time())
  39. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - Module information file could not be found\n")
  40. ferrout.flush()
  41. ferrout.close()
  42. fmodin.close()
  43. sys.exit()
  44.  
  45. def parseLog(self):
  46. #we have to populate a module map but for now....
  47. for module in self.modules:
  48. try:
  49. modname = self.normalizeModuleName(module)
  50. fin = "C:/CVSAutoMailerPY/logDataForum2.1.OLD"
  51. fmsgout = "C:/CVSAutoMailerPY/CVSTest/test.txt"
  52. ftest = open(fin)
  53. fmsg = open(fmsgout,"w")
  54. doc = minidom.parse(fin)
  55. #msgHeader = self.getMessageHeader(modname)
  56. #msgFooter = self.getMessageFooter(modname)
  57. #fmsg.write(msgHeader)
  58.  
  59. # Unwanted but mandatory overhead to detect maliciou activities
  60. loghead = doc.getElementsByTagName("CommitLog")
  61. if loghead[0].hasChildNodes():
  62. children = loghead[0].childNodes
  63. for child in children:
  64. if child.nodeType == 1:
  65. if self.usermap.has_key(child.nodeName):
  66. pass
  67. else:
  68. self.usermap[child.nodeName] = child.nodeName+" (user unknown)"
  69.  
  70. for user in self.usermap:
  71. userEntry = doc.getElementsByTagName(user)
  72. if userEntry:
  73. userent = userEntry[0]
  74. #print "Populating events performed by "+user
  75. message = ""
  76. username = ""
  77. if self.usermap.has_key(user):
  78. username = self.usermap[user]
  79. else:
  80. username = "Unknown User: "+user
  81. fmsg.write("*** Changes by: "+username+" ***\n\n")
  82. commitOp = userent.getElementsByTagName("CommitOperation")
  83. times = "2"
  84. old = None
  85. oldmessage = ""
  86. files = ""
  87. hfg = "false"
  88. if commitOp:
  89. for ops in commitOp:
  90. if old == None:
  91. old = ops
  92. newmessage = self.normalizeMessage(ops.getAttribute("message"))
  93. #Populate file names(s) in msg
  94. if oldmessage == newmessage:
  95. file =ops.getAttribute("file")
  96. path = ops.getAttribute("path")
  97. normfilenames = self.normalizeFileName(file)
  98. for file in normfilenames:
  99. files+="\tFileName:\t"+path+"/"+file+"\n"
  100. fmsg.write(files)
  101. files=""
  102. #fmsg.write("\t ** Grouped ** \n")
  103. else:
  104. if oldmessage == "":
  105. pass
  106. else:
  107. fmsg.write("\tComments:\t" + oldmessage)
  108. fmsg.write("\n\n")
  109. file = ops.getAttribute("file")
  110. path = ops.getAttribute("path")
  111. normfilenames = self.normalizeFileName(file)
  112. for file in normfilenames:
  113. files+="\tFileName:\t"+path+"/"+file+"\n"
  114. fmsg.write(files)
  115. files=""
  116. oldmessage = newmessage
  117. old = ops
  118.  
  119. fmsg.write("\tComments:\t" + oldmessage)
  120. fmsg.write("\n\n")
  121. else:
  122. ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
  123. now = time.localtime(time.time())
  124. ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - No Commit operation found for this user entry. Atleast one operation expected.\n")
  125. ferrout.flush()
  126. ferrout.close()
  127. fmodin.close()
  128. #fmsg.write(msgFooter)
  129. fmsg.close()
  130. except IOError:
  131. pass
  132. def getMessageHeader(self,modname):
  133. msgHeader = "From: CVSADMIN\n"
  134. msgHeader+= "To: CVSNT\n"
  135. msgHeader+= "Subject: CVS "+ modname + " Update Notification\n\n"
  136. msgHeader+= "DO NOT Reply to this mail. This is an automatic notification of the CVS Updates.\n\n"
  137. return msgHeader
  138.  
  139. def getMessageFooter(self,modname):
  140. msgFooter = "Please update your local working copies.\n"
  141. msgFooter+= "Thanks.\n"
  142. msgFooter+= "CVS Admin\n"
  143. msgFooter+= "Fischer International"
  144. return msgFooter
  145.  
  146. def normalizeModuleName(self,module):
  147. mod = module.strip("\n")
  148. return mod
  149.  
  150. def normalizeMessage(self,message):
  151. #reg = re.compile(r'[\s][\s]')
  152. if message == "\"":
  153. return "*** No Comments Added by the user ***"
  154. else:
  155. normmsg = message.strip("\"")
  156. normmsg = normmsg.replace("<a rel="nofollow" href="file://\" target="_blank">\\","\</a>"")
  157. return normmsg
  158.  
  159. def normalizeFileName(self,filename):
  160. reg = re.compile(r'[\\]+[ ]+')
  161. filelist = []
  162. newfilelist = []
  163. intmFN = reg.search(filename)
  164. if intmFN!=None:
  165. filename = filename.replace("<a rel="nofollow" href="http://www.daniweb.com/techtalkforums/" target="_blank">\\\\</a> ","%")
  166. filelist = filename.split();
  167. else:
  168. filelist.append(filename)
  169.  
  170. if len(filelist) == 1:
  171. spacedfiles = []
  172. temp = filelist[0]
  173. spacedfiles = temp.split()
  174. if len(spacedfiles) == 1:
  175. return filelist
  176. else:
  177. return spacedfiles
  178. else:
  179. for file in filelist:
  180. makeorg = re.compile(r'[%]+')
  181. found = makeorg.search(file)
  182. if found!=None:
  183. newfile = file.replace("%"," ");
  184. newfilelist.append(newfile)
  185. else:
  186. newfilelist.append(file)
  187.  
  188. return newfilelist
  189. lp = logparser()
  190. lp.parseLog()
  191.  
Here is a sample XML Doc

<?xml version="1.0" ?><CommitLog><jnd><CommitOperation file="tick.gif" message="new image for indicating a member already exists in a policy&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/images"/><CommitOperation file="ProvDBObject.java" message="Fix for issue #5562- , #5560&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningCallbackService.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningWorkflowRunner.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/webservice/provisioning"/><CommitOperation file="PrioPolicyUtilServlet.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/dfweb"/><CommitOperation file="ProvCache.java ProvDBUtil.java" message="issue #5560 fixed&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="provMemberView.html" message="issue #5560 fixed Changing dates during auto-permanent prevented.ref issue# 5569&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/admin"/><CommitOperation file="DateUtil.java" message="The parseDate method modified to fix issue #5566&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/><CommitOperation file="RecordStore.java" message="Patch_2.2.1 checkin :: Issue# 5567 Temporary workflow files&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/></jnd><jbs><CommitOperation file="provisioningattrcfg.xml" message="Added Petro HPA Attributes to provisionign schema.&quot;" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/config"/></jbs><jxb><CommitOperation file="HPA_SYSTEM_ACCOUNT_TYPE.sql" message="Changed PRE_AUTHORIZED_FREQUENCY to PRE_AUTHORIZED_DURATION_ID&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/tables"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/views"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view.&quot;" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/Oracle/hpa/views"/></jxb></CommitLog>
Last edited by vegaseat; Aug 14th, 2006 at 9:28 pm.
Similar Threads
Reputation Points: 10
Solved Threads: 0
Newbie Poster
bleb1982 is offline Offline
1 posts
since Aug 2006

This thread is more than three months old

No one has posted to this discussion for at least three months. Please let old threads die and do not reply to them unless you feel you have something new and valuable to contribute that absolutely must be added to make the discussion complete. Otherwise, please start a new thread in this forum instead.
Message:
Previous Thread in Python Forum Timeline: Chores Assignment Script
Next Thread in Python Forum Timeline: sessions in python





About Us | Contact Us | Advertise | Acceptable Use Policy
Forum Index | Build Custom RSS Feed


Follow us on Twitter


© 2011 DaniWeb® LLC