1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
| #!/usr/bin/python3
# -*- coding: utf-8 -*-
import requests
import mysql.connector
from idna import unicode
from bs4 import BeautifulSoup
__author__ = 'Carlos Leo'
id = 0
# 获取网页返回信息
def get_response(page):
resp = requests.get('https://book.douban.com/top250', params={'start': str(page * 10)})
if resp.status_code == 200:
return resp.text
else:
raise RecursionError('fail to request to target.')
# 使用BeautifulSoup解析
def parse_html(html):
soup = BeautifulSoup(html, 'lxml')
tables = soup.find('div', id='wrapper').find_all('table')
for table in tables:
img = table.find('img')['src']
name = table.select('.pl2')[0].a['title']
string = unicode(table.find('p').string)
lst = string.split(' / ')
author = lst[0].strip()
publisher = lst[-3].strip()
date = lst[-2].strip()
price = lst[-1].strip()
credit = unicode(table.select('.rating_nums')[0].string)
try:
desc = unicode(table.select('.inq')[0].string)
except IndexError:
desc = '无'
global id
id += 1
yield {
'id': id,
'img': img,
'name': name,
'author': author,
'publisher': publisher,
'date': date,
'price': price,
'credit': float(credit),
'desc': desc
}
# 写入文件
# def write_to_file():
# with open('books.txt', 'w') as f:
# for i in range(10):
# for book in parse_html(get_response(i)):
# json.dump(book, f, ensure_ascii=False)
# f.write('\n')
# print(book, 'was saved into book.txt')
conn = mysql.connector.connect(host='192.168.1.71', user='root', password='root', database='message')
cursor = conn.cursor()
#创建表
cursor.execute('create table book (id int(11) primary key,img varchar(100),name varchar(30),author varchar(30),publisher varchar(30),date varchar(20),price varchar(15),credit float,description varchar(30))')
# 存入数据库
def save():
for i in range(10):
for book in parse_html(get_response(i)):
save_to_db(book['id'], book['img'], book['name'], book['author'], book['publisher'], book['date'],
book['price'], book['credit'], book['desc'])
print('save book' + str(book) + 'to db')
# 存入一本书到数据库
def save_to_db(*args):
conn = mysql.connector.connect(host='192.168.1.71',user='root', password='root', database='message')
cursor = conn.cursor()
cursor.execute('insert into book(id, img, name, author, publisher, date, price, credit, description) '
'values(%s, %s, %s, %s, %s, %s, %s, %s, %s)', args)
conn.commit()
cursor.close()
conn.close()
if __name__ == '__main__':
save()
|