爬取豆瓣电影的电影数据,具体爬取的是电影名和被评论的内容
import requests
import re
for i in range(1,6):index=(i-1)*20url="https://movie.douban.com/review/best/?start={}".format(index)d={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",}resp=requests.get(url,headers=d)# print(resp.text)obj=re.compile(r'<div data-cid=.*?<h2><a href=.*?>(?P<name>.*?)</a></h2>.*? <p class=.*?剧透</p>(?P<comment>.*?) ',re.S)result=obj.finditer(resp.text)for i in result:r=i.groupdict()r["comment"]=r["comment"].strip()print(r)