
Python爬取百度统计后台海量搜索词代码如下:
import requests
import json
import requests
def getzhuanhua(number):
cookies = {
'BAIDUID': '6411D8714682F1536AA3F7305344227F:SL=0:NR=10:FG=1',
'BIDUPSID': 'FBA83424C99A9EC221A43472DDDE8EAA',
'PSTM': '1574400803',
'H_WISE_SIDS': '145788_148998_149203_143879_147277_148320_147091_141744_147895_148193_148865_147684_147280_146537_148001_148824_147849_147827_148439_148754_147889_148524_147346_127969_146036_147239_146548_148206_142421_146653_147024_146732_138425_146000_137817_144658_142210_147528_147912_125696_107318_146850_146395_144966_149278_145607_146785_148346_144762_148749_146053_148868_110085',
'Hm_lvt_41fc030db57d5570dd22f78997dc4a7e': '1595215389,1595297431,1595304658,1595386271',
'Hm_ct_41fc030db57d5570dd22f78997dc4a7e': '306*1*27412333',
'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
'Hm_up_41fc030db57d5570dd22f78997dc4a7e': '%7B%22uid_%22%3A%7B%22value%22%3A%2227412333%22%2C%22scope%22%3A1%7D%7D',
'RT': 'sl=7&ss=kcwrbzhc&tt=3am&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=mrmvt5s094h&ld=bxfu&cl=bpt7',
'uc_login_unique': '3d58516e8f69ca96ef511739b2a87e78',
'uc_recom_mark': 'cmVjb21tYXJrXzI3NDEyMzMz',
'H_PS_PSSID': '32294_1435_31660_32350_32045_32231_32245_32117_32295',
'BDUSS': 'pLMnY2cWhBR05iT285NkpVU2xLUHhZWVB0R3AzS0Y2Z1NHV35udkg2WURJejlmSVFBQUFBJCQAAAAAAAAAAAEAAADP~WPFt7jg1rm-NLj2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOWF18Dlhdfcl',
'BDRCVFR[SHOOb3ODEBt]': 'mk3SLVN4HKm',
'delPer': '0',
'PSINO': '3',
'cflag': '13%3A3',
'BDRCVFR[gltLrB7qNCt]': 'mk3SLVN4HKm',
'SIGNIN_UC': '70a2711cf1d3d9b1a82d2f87d633bd8a03453942100',
'TJSSID': 'ihs3apqmi9bt9fj886q8c0ioj6',
'__cas__st__': '60d43409b2b4ebc63e065c0a6850779cfcd0aa0a12e72bfab4911c2609c361fdba7b5402c8998c42f0d860d4',
'__cas__id__': '27412333',
'Hm_lpvt_41fc030db57d5570dd22f78997dc4a7e': '1595386602',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0',
'Accept': 'text/plain, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://tongji.baidu.com',
'Connection': 'keep-alive',
'Referer': 'https://tongji.baidu.com/web/27412333/source/searchword?flag=indicator&siteId=13139568',
}
data = {
'siteId': '13139568',
'st': '1594828800000',
'et': '1595347200000',
'st2': '',
'et2': '',
'indicators': 'trans_count,ip_count,visitor_count,pv_count,bounce_ratio,avg_visit_time',
'order': 'pv_count,desc',
'offset': number,
'pageSize': '1000',
'target': '1374147',
'flag': 'indicator',
'source': '',
'isGroup': '0',
'clientDevice': 'all',
'reportId': '12',
'method': 'source/searchword/a',
'queryId': ''
}
response = requests.post('http://tongji.baidu.com/web/27412333/ajax/post', headers=headers, cookies=cookies, data=data, verify=False)
res1=json.loads(response.text)
res2=res1['data']['items']
keywords=res2[0]
keywordlist=[]
for keyword in keywords:
keywordlist.append(keyword[0]['name'])
# print(keywordlist,type(keywordlist))
pv=res2[1]
# print(pv,type(pv))
res3=dict(zip(keywordlist,pv))
print(res3)
for k,v in res3.items():
with open("allinfo5.txt","a+",encoding='utf8')as f:
f.writelines(str(k)+','+str(v)+'\n')
for cishu in range(1,330):
try:
print('正在打印第{}页'.format(cishu))
number=cishu*1000
getzhuanhua(number)
except:
print('error')
Python爬取百度统计后台海量搜索词-可以超过10000个,自己下载的话,只能下载10000个词。
微信号:irenyuwang
关注微信公众号,站长免费提供流量增长方案。
复制微信号
关注微信公众号,站长免费提供流量增长方案。
复制微信号
标签: Python和seo
版权声明:除非特别标注,否则均为本站原创文章,转载时请以链接形式注明文章出处。
评论列表
好文章是一定要收藏的~
测试,还可以用,不用挨个点了