import re
import time
from urllib import parse
import urllib
import requests
def updatepostinfo(startid,endid):
for num in range(int(startid),int(endid)+1):
time.sleep(2)
print(num)
posturl="http:网站/detailnew.php?id="+str(num)
postres=urllib.request.urlopen(posturl)
postres=postres.read().decode()
print(postres)
if "帖子不存在" in postres:
continue
elif "错误,帖子" in postres:
continue
elif "该论坛不存在" in postres:
continue
elif "访问错误" in postres:
continue
elif "此帖审核中" in postres:
continue
elif "分版权限" in postres:
continue
else:
posttitle=re.findall(r'<card id=\"main\" title=\"(.+?)\">',str(postres))
postcontent=re.findall(r'name=\"content\" value=\"(.+?)\" />',str(postres))
postcontent=postcontent[0]
postauthid=re.findall(r"userid=(\d+)",str(postres))
postauthid=postauthid[0]
postforumid=re.findall(r"bid=(\d+)",str(postres))
postforumid=postforumid[0]
#下面存入对应字段到数据库
if __name__ == '__main__':
updatepostinfo(1,292812334)