1.准备工作:
1.1安装requests: cmd >> pip install requests
1.2 安装lxml: cmd >> pip install lxml
1.3安装wheel: cmd >> pip install wheel
1.4 安装xlwt: cmd >> pip install xlwt
1.5 安装pymongo: cmd >> pip install pymongo
完整代码
import requests from lxml import etree import xlwt from pymongo import MongoClient#设置浏览器的请求头,告诉服务器我们是从浏览器来的,作用是阻止被网站反爬 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36','Accept-Encoding': 'gzip, deflate','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3','Connection': 'keep-alive' }# 创建数据库 client = MongoClient() database = client['Chapter6'] collection = database['webdata']for i in range(1, 21):url = "http://kaijiang.zhcw.com/zhcw/html/3d/list_{}.html".format(i)#发送请求 得到数据response = requests.get(url=url,headers=headers)#print(response.text)#将数据改成xpath结构res_xpath = etree.HTML(response.text)trs = res_xpath.xpath('/html/body/table//tr')# 将数据写入MongoDB数据库for tr in trs[2:-1]:data = {'开奖日期': tr.xpath("./td[1]/text()")[0],'期号': tr.xpath("./td[2]/text()")[0],'中奖号码1': tr.xpath("./td[3]/em[1]/text()")[0],'中奖号码2': tr.xpath("./td[3]/em[2]/text()")[0],'中奖号码3': tr.xpath("./td[3]/em[3]/text()")[0],'销售额(元)': tr.xpath("./td[4]/text()")[0],'返奖比例': tr.xpath("./td[5]/text()")[0]}collection.insert_one(data);
实现效果