import urllib
from urllib2 import urlopen
from gzip import GzipFile
from cStringIO import StringIO
import re
import urllib2
def download(url):
s = urlopen(url).read()
if s[:2] == '\x1f\x8b': # assume it's gzipped data
with GzipFile(mode='rb', fileobj=StringIO(s)) as ifh:
s = ifh.read()
return s
s = download('http://www.locationary.com/place/en/US/Virginia/Richmond-page20/?ACTION_TOKEN=NumericAction')
findLoc = re.compile('http://www\.locationary\.com/place/en/US/Virginia/Richmond/.{1,100}\.jsp')
findLocL = re.findall(findLoc,s)
for i in range(0,25):
def download(url):
s = urlopen(url).read()
if s[:2] == '\x1f\x8b': # assume it's gzipped data
with GzipFile(mode='rb', fileobj=StringIO(s)) as ifh:
s = ifh.read()
return s
b = download(findLocL[i])
findYP = re.compile('http://www\.yellowpages\.com/.{1,100}\d{1,100}')
findYPL = re.findall(findYP,b)
for c in range(1):
print findYPL[c]
jacob501 0 Light Poster
Recommended Answers
Jump to PostYour code is just doing so stupid things like redefining 25 times in loop (and two unnecessary imports still), and you have got your basic code from Gribouillis, so we would really appreciate some honest efforts.
Jump to PostYou should also incrementally test your code and not just post a load of crap and demand that we fix it for you, or otherwise we are jerks. I get that your web page is zipped in some odd form. Note that urlopen().read() returns a string or bytes, not a …
Jump to PostWell, that's off-topic and more of a personal problem.
Perhaps a reply acknowledging the suggestions pyTony and woooee made above would be more appropriate.
All 10 Replies
jacob501 0 Light Poster
jacob501 0 Light Poster
jacob501 0 Light Poster
jacob501 0 Light Poster
TrustyTony 888 ex-Moderator Team Colleague Featured Poster
jacob501 0 Light Poster
TrustyTony 888 ex-Moderator Team Colleague Featured Poster
woooee 814 Nearly a Posting Maven
jacob501 0 Light Poster
Ezzaral 2,714 Posting Sage Team Colleague Featured Poster
Be a part of the DaniWeb community
We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.