def hdktest(url,userid):
try:
pcres=urllib.request.urlopen(url)
pcres=pcres.read().decode()
return pcres
#异常处理
except urllib.error.HTTPError as reason:
taskinfo=hdktask.objects.get(hdktask_linkuserid=userid)
taskinfo.hdktask_status=0
taskinfo.save(update_fields=["hdktask_status"])
print(reason)
#执行hdk任务
def runhdktask(uname,userid,url):
shopnum=re.findall(r'shoptype=(\d+)',url)
serurl="https:/xxx/allitem/new_get_allitem_list?keyword=&shopsearch=&user_name=&brand_name=&cid=0&min_price=&max_price=&filtrate_type=&min_sale=&min_rate=&min_tkMoney=&min_avg=&min_coupon=&p=1&shoptype="+str(shopnum[0])+"&discount=&favorites_count=&min_itemprice=&max_itemprice=&sort=1"
res=hdktest(serurl,userid)
pagenum=re.findall(r'"num_page":(\d+),',str(res))
hdkexcel=xlwt.Workbook()
worksheet=hdkexcel.add_sheet(uname,cell_overwrite_ok=True)
worksheet.write(0,0,"商品名称")
worksheet.write(0,1,"券后价格")
worksheet.write(0,2,"今日销量")
worksheet.write(0,3,"店铺")
# 遍历页数
for page in range(1,int(pagenum[0])+1):
pageurl="https://xxx/allitem/new_get_allitem_list?keyword=&shopsearch=&user_name=&brand_name=&cid=0&min_price=&max_price=&filtrate_type=&min_sale=&min_rate=&min_tkMoney=&min_avg=&min_coupon=&p="+str(page)+"&shoptype="+str(shopnum[0])+"&discount=&favorites_count=&min_itemprice=&max_itemprice=&sort=1"
serres=hdktest(pageurl,userid)
serlist=re.findall(r'"itemendprice".+?},',serres)
#遍历每个商品
for num in range(0,len(serlist)):
#商品标题
itemtitle=re.findall(r'"itemtitle":"(.+?)"',str(serlist[num]))
#获取券后价列表
itemendprice=re.findall(r'"itemendprice":(\d+[.]{0,1}[\d+]{0,2})',str(serlist[num]))
#今日销量
todaysale=re.findall(r'"todaysale":(\d+)',str(serlist[num]))
#获取店铺名
shopname=re.findall(r'"shopname":"(.+?)"',str(serlist[num]))
time.sleep(1)
worksheet.write(num+1+100*(page-1),0,str(itemtitle[0]))
worksheet.write(num+1+100*(page-1),1,str(itemendprice[0]))
worksheet.write(num+1+100*(page-1),2,str(todaysale[0]))
worksheet.write(num+1+100*(page-1),3,str(shopname[0]))
hdkpath=settings.MEDIA_ROOT+"/hdk/"
hdkexcel.save(hdkpath+"/"+uname+".xlsx")
taskinfo=hdktask.objects.get(hdktask_linkuserid=userid)
taskinfo.hdktask_status=0
taskinfo.save(update_fields=["hdktask_status"])
爬取数据写入excel效果如下,字段可以对应添加就行了
新手学py编程更多源码分享 技能干货