-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
52 lines (51 loc) · 1.82 KB
/
data.py
File metadata and controls
52 lines (51 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import re
from urllib3.exceptions import InsecureRequestWarning
import warnings
import urllib3
import sqlite3
import json
import requests
import os
from bs4 import BeautifulSoup
warnings.simplefilter('ignore', InsecureRequestWarning)
ROOT_FOLDER = os.path.dirname(os.path.abspath(__file__))
bookstore = os.path.join(ROOT_FOLDER, 'bookstore.db')
conn = sqlite3.connect(bookstore)
url = "http://books.toscrape.com/"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
sideCat = soup.find(class_="side_categories")
links=sideCat.findAll("a")[1:]
genrelist=[]
stock=5
for link in links:
tempurl=url+link.get('href')
temppage = requests.get(tempurl)
tempsoup = BeautifulSoup(temppage.content, 'html.parser')
rowSoup=tempsoup.find('ol',class_='row')
genre = tempsoup.find('div', class_='page-header').find('h1').string
genrelist.append(genre)
olSoup=rowSoup.findAll(class_='product_pod')
for bookSoup in olSoup:
book=bookSoup.find('h3').find("a").get("title")
price=bookSoup.find('p',class_="price_color").string[1:]
ratingText=bookSoup.find('p',class_="star-rating")['class'][1]
rating=0
if(ratingText=="One"):
rating=1
elif(ratingText == "Two"):
rating = 2
elif(ratingText == "Three"):
rating=3
elif(ratingText == "Four"):
rating=4
elif(ratingText=="Five"):
rating=5
image = url + bookSoup.find('img')['src'][12:]
details=url+'catalogue/'+ bookSoup.find('div',class_='image_container').find('a')['href'][9:]
# print(image)
dbURL = "INSERT INTO books values(?,?,?,?,?,?,?)"
cursor = conn.cursor()
cursor.execute(dbURL, (book,cursor.lastrowid,int(rating) ,float(price), image, details, genre))
conn.commit()
print(genrelist)