from urllib import urlretrieve 
import urllib2

import re
import gzip

urlretrieve('http://www.locationary.com/place/en/US/Virginia/Richmond-page28/?ACTION_TOKEN=NumericAction', 'myfile')

page = gzip.open('myfile', 'rb').read()

findLoc = re.compile('http://www\.locationary\.com/place/en/US/Virginia/Richmond/.{1,100}\.jsp')

findLocL = re.findall(findLoc,page)

listIterator = []
listIterator[:] = range (0,25)

for i in listIterator:
    
    urlretrieve(i, 'myfile2')

    page2 = gzip.open('myfile2', 'rb').read()

    findYP = re.compile('http://www\.yellowpages\.com.{1,100}\d{1,100}')

    findYPL = re.findall(findYP,page2)

    listIterator2 = []
    listIterator2[:] = range(0,1)

    for i in listIterator2:
        print findYPL[i]
        print "\n"

When I load this code, I get the following error from Python:

Traceback (most recent call last):
File "C:\Users\Robert\Documents\python\locationary.py", line 38, in <module>
urlretrieve(i, 'myfile2')
File "C:\Python27\lib\urllib.py", line 91, in urlretrieve
return _urlopener.retrieve(url, filename, reporthook, data)
File "C:\Python27\lib\urllib.py", line 225, in retrieve
url = unwrap(toBytes(url))
File "C:\Python27\lib\urllib.py", line 1038, in unwrap
url = url.strip()
AttributeError: 'int' object has no attribute 'strip'

What did I do wrong and how can I fix this?

Help would be greatly appreciated. Thanks!

Recommended Answers

All 9 Replies

Wow...never mind...I see my dumb mistake now! Thanks DaniWeb...sort of...

Wait!!! Now I have another problem! I keep getting the error:

Traceback (most recent call last):
File "C:\Users\Robert\Documents\python\locationary.py", line 41, in <module>
page2 = gzip.open('myfile2', 'rb').read()
File "C:\Python27\lib\gzip.py", line 245, in read
self._read(readsize)
File "C:\Python27\lib\gzip.py", line 287, in _read
self._read_gzip_header()
File "C:\Python27\lib\gzip.py", line 181, in _read_gzip_header
raise IOError, 'Not a gzipped file'
IOError: Not a gzipped file

from urllib import urlretrieve 
import urllib2

from BeautifulSoup import BeautifulSoup
from BeautifulSoup import BeautifulStoneSoup

import os
import re
import gzip

urlretrieve('http://www.locationary.com/place/en/US/Virginia/Richmond-page28/?ACTION_TOKEN=NumericAction', 'myfile')

page = gzip.open('myfile', 'rb').read()

"""

print page

print "\n"
print "\n"
print "\n"
print "\n"
print "\n"
print "\n"

"""

findLoc = re.compile('http://www\.locationary\.com/place/en/US/Virginia/Richmond/.{1,100}\.jsp')

findLocL = re.findall(findLoc,page)

listIterator = []
listIterator[:] = range (0,25)

for i in listIterator:
    #print findLocL[i]
    #print "\n"
    
    urlretrieve(findLocL[i], 'myfile2')

    page2 = gzip.open('myfile2', 'rb').read()



            

    """

    print page2

    print "\n"
    print "\n"
    print "\n"
    print "\n"
    print "\n"
    print "\n"

    """

    # Grab all of the YellowPages links
    findYP = re.compile('http://www\.yellowpages\.com.{1,100}\d{1,100}')

    # Store all of the links
    findYPL = re.findall(findYP,page2)

    # Create an iterator that will give the first link
    listIterator2 = []
    listIterator2[:] = range(0,1)

    # Print out the results to screen
    for i in listIterator2:
        print findYPL[i]
        print "\n"
    
    os.remove("C:\Users\Robert\Documents\python\myfile2")

What is wrong with my code?

Hey pyTony. If you read this, would you mind helping me out a little if you can?

Thanks.

Does not make any sense for me, importing modules never used, defining variable immediately overwritten (line 32 and 33) loop of one iteration...

Wel...if you ignore all of that...what do you think about the gzip error. I don't understand it at all...

Wel...if you ignore all of that...what do you think about the gzip error. I don't understand it at all...

The gzip error may happen because your site sometimes sends gzipped data and sometimes uncompressed data. I suggest a function which recognizes compressed data

from urllib2 import urlopen
from gzip import GzipFile
from cStringIO import StringIO

def download(url):
    s = urlopen(url).read()
    if s[:2] == '\x1f\x8b': # assume it's gzipped data
        with GzipFile(mode='rb', fileobj=StringIO(s)) as ifh:
            s = ifh.read()
    return s

s = download('http://www.locationary.com/place/en/US/Virginia/Richmond-page28/?ACTION_TOKEN=NumericAction')
print s
commented: Very helpful!! +1

The gzip error may happen because your site sometimes sends gzipped data and sometimes uncompressed data. I suggest a function which recognizes compressed data

from urllib2 import urlopen
from gzip import GzipFile
from cStringIO import StringIO

def download(url):
    s = urlopen(url).read()
    if s[:2] == '\x1f\x8b': # assume it's gzipped data
        with GzipFile(mode='rb', fileobj=StringIO(s)) as ifh:
            s = ifh.read()
    return s

s = download('http://www.locationary.com/place/en/US/Virginia/Richmond-page28/?ACTION_TOKEN=NumericAction')
print s

Thanks so much! It worked! You have been a great help today!

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.