Database for lazy people(Dataset)

snippsat 3 Tallied Votes 291 Views Share

A look at Dataset and easy and Pythonic way to create a database.
Other candidates in this categorie,i will mention pyDAL and Peewee.
I did some test of Dataset in this post to look at.

So here a common task some web-scraping of food recipes(just a task i did help someone with).
A task you may not use a database for(to much work).

So let see how Dataset will work for this task.
Requirement Requests and BeautifulSoup.
First clean code without Dataset.

import requests
from bs4 import BeautifulSoup

start_page = 1
end_page = 3
for page in range(start_page, end_page+1):
    url = 'http://www.taste.com.au/search-recipes/?q=&%3Btag[]=13&%3Btag[]=28&%3B=&sort=title&order=asc&page={}'.format(page)
    url_page = requests.get(url)        
    soup = BeautifulSoup(url_page.text)
    tag_div = soup.find_all('div', {'class': "content-item tab-content current"})[0]\
    .find_all('div', {'class': 'story-block'})
    print('--- Page {} ---'.format(page))
    for content in tag_div:
        print(url_page.status_code, content.find('a')['href'])

In code snippet i bring in Dataset,and take out some data.

'''
Take some data from a web-scraping task an bring into Dataset.
Also looking at taking data out from database.
'''
import requests
from bs4 import BeautifulSoup
import dataset

#-- Dataset
db = dataset.connect('sqlite:///url_list.db')
table = db['url_recipes']
#--*
start_page = 1
end_page = 3
for page in range(start_page, end_page+1):
    url = 'http://www.taste.com.au/search-recipes/?q=&%3Btag[]=13&%3Btag[]=28&%3B=&sort=title&order=asc&page={}'.format(page)
    url_page = requests.get(url)   
    soup = BeautifulSoup(url_page.text)
    tag_div = soup.find_all('div', {'class': "content-item tab-content current"})[0]\
    .find_all('div', {'class': 'story-block'})
    print('--- Page {} ---'.format(page))
    for content in tag_div:	    
        data = dict(page=content.find('a')['href'], status_code=url_page.status_code)
        print(data) # Test print
        table.insert(data) #-- Into database


# Test out database:
>>> print(db.tables)
[u'url_recipes']
>>> print(db['url_recipes'].columns)
['id', 'status_code', 'page']
>>> print(len(db['url_recipes']))
186

>>> table.find_one(id='5')
OrderedDict([(u'id', 5), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/23959/hot+chocolate+mousse')])
>>> table.find_one(id='5')['page']
u'http://www.taste.com.au/recipes/23959/hot+chocolate+mousse'
>>> table.find_one(id='5')['status_code']
200

>>> [url for url in db['url_recipes']][5:8]
[OrderedDict([(u'id', 6), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/25/im+a+star+tomato+soup')]),
 OrderedDict([(u'id', 7), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/6196/impossible+coconut+pie')]),
 OrderedDict([(u'id', 8), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/10717/sketti+bolognaise')])]		


# Test out some SQL queries:
>>> [i for i in db.query("select * from url_recipes order by rowid desc limit 3")]
[OrderedDict([(u'id', 62), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/37592/afghan+biscuits')]),
 OrderedDict([(u'id', 61), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/3123/affogato+mexico')]),
 OrderedDict([(u'id', 60), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/23589/affogato+ice+cream+with+espresso+syrup')])]

>>> [i for i in db.query("select * from url_recipes order by rowid asc limit 3")]
[OrderedDict([(u'id', 1), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/34238/chilli+con+carne+with+spiced+pita+chips')]),
 OrderedDict([(u'id', 2), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/34380/turkey+san+choy+bow')]),
 OrderedDict([(u'id', 3), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/24869/ants+climbing+a+tree')])]

>>> [i for i in db.query("SELECT * FROM url_recipes LIMIT 3 OFFSET 13;")]
[OrderedDict([(u'id', 14), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/36642/not+so+fried+ice+cream')]),
 OrderedDict([(u'id', 15), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/39502/roast+lamb+and+pea+mash+jacket+potatoes')]),
 OrderedDict([(u'id', 16), (u'status_code', 200), (u'page', u'http://www.taste.com.au/recipes/28006/seaweed+salad')])]
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.