首页 >> 新闻资讯 >>新闻资讯 >> Python脚本工具,python百度排名查询源码
详细内容

Python脚本工具,python百度排名查询源码

  Python脚本工具,python百度排名查询源码

#百度排名查询 import requests import re headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win ** ; x ** ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36" } def cx(keyword,x,cxurl): if x==1: x=0 else: x=str(x) x=f'{x}0' print(x) url=f"https:// ** .baidu.com/s?wd={keyword}&ie=UTF-8&pn={x}" print(url) html=requests.get(url,headers=headers).text #print(html) sousze=r'<div id="content_left">(.+?)<div style="clear:both;height:0;"></div>' sous=re.findall(sousze,html,re.S) #print(sous) sousjgze=r'<div class="result(.+?)</h3>' sousjg=re.findall(sousjgze,sous[0],re.S) #print(sousjg) #print(len(sousjg)) idze=r'" id="([0-9]{1,4})"' hrefze=r'''}" href = "(.+?)"''' for ssjg in sousjg: #print(ssjg) id=re.findall(idze,ssjg,re.S) if id==[]: idrze=r'id="([0-9]{1,4})" tpl="' id = re.findall(idrze, ssjg, re.S) id=id[0] #print(id) href=re.findall(hrefze,ssjg,re.S) if href==[]: hrefrze=r'''<h3 class="t c-gap-bottom-s ** ll"> <a href="(.+?)"''' href = re.findall(hrefrze,ssjg,re.S) href=href[0] if "#34; not in href: href=f' >{href}' #print(href) ul=requests.get(href,headers=headers) zsurl=ul.url #print(zsurl) if cxurl in zsurl: print(id) #cx("工业设计考研",2," ** .ugainian.com") def cxpm(keyword,x,cxurl): url=f"https:// ** .baidu.com/s?wd={keyword}&ie=UTF-8&pn={x}0&rn=50" print(url) html=requests.get(url,headers=headers).text #print(html) sousze=r'<div id="content_left">(.+?)<div style="clear:both;height:0;"></div>' sous=re.findall(sousze,html,re.S) #print(sous) sousjgze=r'<div class="result(.+?)class="m">百度快照</a></div></div>' sousjg=re.findall(sousjgze,sous[0],re.S) #print(sousjg) #print(len(sousjg)) idze=r'" id="([0-9]{1,4})"' hrefze=r'<div class="f13"><a target="_blank" href="(.+?)" class="c-showurl"' #hrefze=r'''}" #href = "(.+?)"''' hrefrze=r'''<h3 class="t c-gap-bottom-s ** ll"> <a href="(.+?)"''' for ssjg in sousjg: #print(ssjg) id=re.findall(idze,ssjg,re.S) if id==[]: idrze=r'id="([0-9]{1,4})" tpl="' id = re.findall(idrze, ssjg, re.S) id=id[0] #print(id) href=re.findall(hrefze,ssjg,re.S) print(href) if href==[]: #hrefrze=r'''<h3 class="t c-gap-bottom-s ** ll"> #<a href="(.+?)"''' href = re.findall(hrefrze,ssjg,re.S) href=href[0] if "#34; not in href: href=f' >{href}' #print(href) ul=requests.get(href,headers=headers) zsurl=ul.url #print(zsurl) if cxurl in zsurl: print(keyword,id,zsurl) #cxpm('工业设计考研',0," ** .ugainain.com") import re a='''aaafg sfdfgg1224 fssf''' re_htm=re.findall('aaafg(.+?)fssf',a,re.S) print(re_htm) re_htm1=re.findall('aaafgs(.+?)fssf',a,re.S) print(re_htm1) re_htm2=re.findall('aaafgs(.+?)sfssf',a,re.S) print(re_htm2)

  

联系我们 / Contact us

地址:四川成都市青羊区西御街西御大厦基地、四川传媒大厦基地、倪家桥基地、鞋王大厦基地、成华区萌想星球基地、迎宾大道基地

电话:13281057808

邮件:lining@ygbrsc.com


微信咨询

Copyright @ 2018 . All rights reserved.

技术支持: 四川线在科技有限公司 | 管理登录
返回顶部 seo seo