This Python code shows a way to retrieve a picture from a web page and save it to a file.

# load a given picture from a web page and save it to a file
# (you have to be on the internet to do this)
# tested with Python24     vegaseat     19sep2006

import urllib2
import webbrowser
import os

# find yourself a picture on a web page you like
# (right click on the picture, look under properties and copy the address)
picture_page = "http://www.google.com/intl/en/images/logo.gif"

#webbrowser.open(picture_page)  # test

# open the web page picture and read it into a variable
opener1 = urllib2.build_opener()
page1 = opener1.open(picture_page)
my_picture = page1.read()

# open file for binary write and save picture
# picture_page[-4:] extracts extension eg. .gif
# (most image file extensions have three letters, otherwise modify)
filename = "my_image" + picture_page[-4:]
print filename  # test
fout = open(filename, "wb")
fout.write(my_picture)
fout.close()

# was it saved correctly?
# test it out ...
webbrowser.open(filename)

# or ...
# on Windows this will display the image in the default viewer
#os.startfile(filename)

hello,

thanks for posting nice n easy code.
i tried it and it works fine for me

now i want to make some changes in it.

e.g;
currently it can only save specified image.
what if i want to save all image present on a particular URL.

i.e;

fetch all images on a particular URL and save it on disk


can anyone give me a hint or help me to do it.

thanks

Hi,

I had a go using the posted code, and then found a simpler way.

urllib.urlretrieve(page_url,save_url )

I did create a file for it since I was using it for the same issue as you had.

#SaveBinary.py
import urllib

def saveBinary(page_url,save_url):
        """saves the page to the specified path

       need to add progress report and exception handling
        >>> saveBinary( "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_003_cap02.mp3", "D:\\cd\\Audobooks\\barrie - peter_pan_nei_giardini_di_kensington\\barrie_peter_pan_sil_003_cap02.mp3")
	print "saved" + "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_003_cap02.mp3"
        """
	print "getting" + page_url
        urllib.urlretrieve(page_url,save_url )
        urllib.urlcleanup()
	print "saved" + page_url
        
#saveBinary( "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_004_cap03.mp3", "D:\\cd\\Audobooks\\barrie - peter_pan_nei_giardini_di_kensington\\barrie_peter_pan_sil_004_cap03.mp3")

RonaldDuncan
http://www.uk-plc.net

full programme to download the mp3 audio books on http://www.liberliber.it This is my first python program so I am sure it can be massively improved. However you should be able to easily hack it to download your images :)


Bring back the authors

#GetListofAuthors.py
import urllib
import re
import GetListOfUrls
root_url = "http://www.liberliber.it/audioteca/libroparlato.htm"
root = "http://www.liberliber.it/audioteca/"
# print root
f = urllib.urlopen(root_url )
html=f.read()
# print html
href = re.compile(r'\<a href="\S/[^"]+/index.htm"\>')
hrefs = href.findall(html)
# print hrefs
currentHrefs = hrefs[4:]
setHrefs = set(currentHrefs)
for ref in setHrefs:
        # print ref
        GetListOfUrls.getListOfUrls(root + ref)
        
print "end of programme"

This calls GetListOfUrls

#GetListOfUrls.py
import urllib
import string
import os
import SaveBinary
import re

def getListOfUrls(author_href):
        
        getQ = re.compile(r'\"[^"]+/index.htm\"')
        a=getQ.findall(author_href)
        # print a
        author_url = a[0]
        author_url = author_url[1:-1]
        bits = string.split(author_url,"/")
        author = bits[1] 
        author_url = bits[0] + "/" + bits[1] + "/"
        root_url = "http://www.liberliber.it/audioteca/" + author_url
        root = root_url[0:-9]
        print "getting" + root_url
        f = urllib.urlopen(root_url )
        html=f.read()
## print html
        href = re.compile(r'\<a href="[^"]+\.mp3"\>')
        hrefs = href.findall(html)
## print hrefs
        save_path_root = "D:\\CD\\Audobooks\\"


#ref = hrefs[0]
        for ref in hrefs:
                # print ref
                getQuoted = re.compile(r'\"[^"]+\.mp3\"')
                m=getQuoted.findall(ref)
                # print m
                item_url = m[0]
                # print item_url
                item_url = item_url[1:-1]
                # print item_url
                parts = string.split(item_url,"/")
                # print parts

                book = parts[0]
                file = parts[-1]
                # print author, book, file
                save_path = save_path_root + author + " - " + book + "\\"
                if not os.access(save_path,os.F_OK) :
                        os.mkdir(save_path)

                # print root_url + item_url

                SaveBinary.saveBinary(root_url+item_url,save_path+file)

And finally SaveBinary

#SaveBinary.py
import urllib

def saveBinary(page_url,save_url):
        """saves the page to the specified path


        >>> saveBinary( "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_003_cap02.mp3", "D:\\cd\\Audobooks\\barrie - peter_pan_nei_giardini_di_kensington\\barrie_peter_pan_sil_003_cap02.mp3")
	print "saved" + "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_003_cap02.mp3"
        """
	print "getting" + page_url
        urllib.urlretrieve(page_url,save_url )
        urllib.urlcleanup()
	print "saved" + page_url
        
#saveBinary( "http://www.liberliber.it/audioteca/b/barrie/peter_pan_nei_giardini_di_kensington/mp3/barrie_peter_pan_sil_004_cap03.mp3", "D:\\cd\\Audobooks\\barrie - peter_pan_nei_giardini_di_kensington\\barrie_peter_pan_sil_004_cap03.mp3")

PS as you can see I am learning italian :)

RonaldDuncan
http://www.uk-plc.net

The article starter has earned a lot of community kudos, and such articles offer a bounty for quality replies.