1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
|
import requests import mysql.connector from idna import unicode from bs4 import BeautifulSoup
__author__ = 'Carlos Leo'
id = 0
def get_response(page): resp = requests.get('https://book.douban.com/top250', params={'start': str(page * 10)}) if resp.status_code == 200: return resp.text else: raise RecursionError('fail to request to target.')
def parse_html(html): soup = BeautifulSoup(html, 'lxml') tables = soup.find('div', id='wrapper').find_all('table') for table in tables: img = table.find('img')['src'] name = table.select('.pl2')[0].a['title'] string = unicode(table.find('p').string) lst = string.split(' / ') author = lst[0].strip() publisher = lst[-3].strip() date = lst[-2].strip() price = lst[-1].strip() credit = unicode(table.select('.rating_nums')[0].string) try: desc = unicode(table.select('.inq')[0].string) except IndexError: desc = '无' global id id += 1 yield { 'id': id, 'img': img, 'name': name, 'author': author, 'publisher': publisher, 'date': date, 'price': price, 'credit': float(credit), 'desc': desc }
conn = mysql.connector.connect(host='192.168.1.71', user='root', password='root', database='message') cursor = conn.cursor()
cursor.execute('create table book (id int(11) primary key,img varchar(100),name varchar(30),author varchar(30),publisher varchar(30),date varchar(20),price varchar(15),credit float,description varchar(30))')
def save(): for i in range(10):
for book in parse_html(get_response(i)): save_to_db(book['id'], book['img'], book['name'], book['author'], book['publisher'], book['date'], book['price'], book['credit'], book['desc']) print('save book' + str(book) + 'to db')
def save_to_db(*args): conn = mysql.connector.connect(host='192.168.1.71',user='root', password='root', database='message') cursor = conn.cursor() cursor.execute('insert into book(id, img, name, author, publisher, date, price, credit, description) ' 'values(%s, %s, %s, %s, %s, %s, %s, %s, %s)', args) conn.commit() cursor.close() conn.close() if __name__ == '__main__': save()
|