I did little clean up for your code, looks like you have compressed my between function . The lambdas looked little out of place and I changed them to normal defs to be more understandable for people without Lisp or similar experience, hope I did not break anything:
import urllib
sock = urllib.urlopen('http://www.columbia.edu/')
htmlSource = sock.read()
sock.close()
# Slicer takes as arguments a tuple containing the string before,
# the string after and the string to truncate. It returns the string
# between the two given strings
def slicer((before, after, text)):
return (text.partition(before)[2].partition(after)[0])
# Spliter takes as arguments a tuple containing the string before,
# the string after and the string to truncate. It returns a tuple
# containing the string before, the string between and the string
# after the given strings
def spliter((b, a, t)):
return ((t.partition(b)[0]),) + t.partition(b)[2].partition(a)[0::2]
def stripwhite(x):
return x.strip()
tag_before = '<!-- BEGIN COLUMBIA NEWS -->'
tag_after = '<!-- END COLUMBIA NEWS -->'
newsSource = slicer((tag_before, tag_after, htmlSource)).split('\r')
newsSource = [stripwhite(item)
for item in newsSource
if '' != stripwhite(item) != '' ]
tag_before = '<a href='
tag_after = '>'
for line in newsSource:
news_title, link, rest = spliter((tag_before, tag_after, line))
print 'News:', news_title
print 'Link:', link