''' xml_extract_data1.py
read an xml string and extract needed data
modified to work with Python27 and Python33
'''

try:
    # Python2
    from urllib2 import urlopen
    import urllib
except ImportError:
    # Python3
    from urllib.request import urlopen
    import urllib.parse as urllib 

def extract_between(text, sub1, sub2):
    """
    extract a substring from text between two given substrings
    sub1 (first occurrence) and sub2 (first occurrence)
    arguments are case sensitive
    """
    # prevent sub2 from being ignored if it's not there
    if sub2 not in text.partition(sub1)[-1]:
            return None
    return text.partition(sub1)[2].partition(sub2)[0]

address = "Lowell Observatory, Flagstaff, AZ"
url1 = "http://maps.googleapis.com/maps/api/geocode/xml?address="
url2 = url1 + urllib.quote(address.encode('utf-8')) + "&sensor=false"

xml_code = urlopen(url2).read()

print(type(xml_code))

#if type(xml_code) == str:
if isinstance(xml_code, str):
    print("using Python2 string")
else:
    print("decoding Python3 bytes to string")
    xml_code = xml_code.decode()

print(type(xml_code))
#print(xml_code)
print('-'*50)

print("extracting address:")
sub1 = "<formatted_address>"
sub2 = "</formatted_address>"
address2 = extract_between(xml_code, sub1, sub2)
print(address2)

''' result ...

<class 'bytes'>
decoding Python3 bytes to string
<class 'str'>
--------------------------------------------------
extracting address:
Lowell Observatory, 1400 West Mars Hill Road, Flagstaff, AZ 86001, USA

'''
5
Contributors
5
Replies
55
Views
4 Years
Discussion Span
Last Post by HiHe
0

When I was researching on how to get the type of an object everything I read said "don't do type(), use isinstance() instead".

Videos from people like Guido van Rossum, or others with a high standing in the python community always drop little comments like:

"type() would work here, but you should be using isinstance() for that anyway"

They drop little lines like this in their talks, without going into a lot of detail as to why. I wish I had a direct quote, I know I've heard it more than once.

What I'm asking is, in something as simple as what you've done there with if type(xml_code) == str: , which is really similar to what I was trying to do. Just basic types, nothing fancy. Is it okay to do that? Or is it still better to do a if isinstance(xml_code, str):? I don't know everyone here at daniweb but I know vegaseat and pyTony know their stuff, and I'm sure a lot of others do too. So I'm asking you, what do you think?

Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.