the feedparser module is so nice and easy to use. It is specifically designed to parse rss xml files. below is the script I wrote that downloads the mp3 files and and makes a text file which contains the mp3 file description. It stores each mp3 and description file in the same directory
here is the link to feedparser
http://feedparser.org/
[php]
#!/usr/bin/env python
#
# this program downloads mp3 files and description based on the rss feed file(xml)
#
import sys, feedparser, os, urllib
# this code allows the program to take a command argument(the location of the xml file)
rss_file = sys.argv[1]
feed_obj = feedparser.parse(rss_file)
# this function parses the rss file and places all of the info that it returns as a list
# this list contains tuples continaing the info of each item: title, link, description
def parse_rss(v_feed_obj):
rss_info = []
for i in range(len(feed_obj.entries)):
title = name_cleaner(feed_obj.entries[i].title)
link = feed_obj.entries[i].link
description = feed_obj.entries[i].description
rss_info.append((title, link, description))
return rss_info
# this module removes the symbol "/" and replaces it with "-" from directory names and
# files, replaces spaces with underscores and also makes all files or directorys lowercase
def name_cleaner(cleanfile):
newfile = cleanfile.replace('/','-')
newfile = newfile.replace(' ','_')
newfile = newfile.lower()
return newfile
# this is obviously the main function
def main():
rss_info = parse_rss(feed_obj)
for i in rss_info:
os.mkdir(i[0], 0775)
argument1 = i[1]
argument2 = "%s/%s.mp3" %(i[0], i[0])
urllib.urlretrieve(argument1, argument2)
output_file = "%s/INFO" %i[0]
text_file = open(output_file, 'w')
text_file.write(i[2]+"\n")
text_file.close()
main()[/php]