爬虫之selenium爬取斗鱼网站
示例代码:
from selenium import webdriver
import timeclass Douyu(object):def __init__(self):self.url = 'https://www.douyu.com/directory/all'self.driver = webdriver.Chrome()def parse_data(self):time.sleep(3)room_list = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div/a')print(len(room_list))data_list = []for room in room_list:temp = {}temp['title'] = room.find_element_by_xpath('./div[2]/div[1]/h3').texttemp['type'] = room.find_element_by_xpath('./div[2]/div[1]/span').text data_list.append(temp)return data_listdef save_data(self, data_list):for data in data_list:print(data)def run(self):# url# driver# getself.driver.get(self.url)# 用于判断首页是否有广告try:time.sleep(6)self.driver.find_element_by_xpath('/html/body/div[2]/span[1]').click() 【此处需要等待几秒把弹窗关闭】except Exception as e:print(e)while True:# parsedata_list = self.parse_data()# saveself.save_data(data_list)# nexttry:# el_next = self.driver.find_element_by_xpath('//*[contains(text(),"下一页")]')el_next = self.driver.find_element_by_xpath('//*[@id="listAll"]/section[2]/div[2]/div/ul/li[9]/span')self.driver.execute_script('scrollTo(110,100000)')el_next.click()except:break
if __name__ == '__main__':douyu = Douyu()douyu.run()
运行效果:
思路用图: