sem&python 批量查询竞价关键词的竞争度,和竞价已投关键词的排名

ernestwang 3 0

批量查询竞价关键词的竞争度:代码如下

import requests
import re
import cssselect
from lxml import html
from lxml import etree
import time
def getinfo(keyword):
    cookies = {
        'BIDUPSID': '3ADBC5DB4EDCA8474B065029A50833D9',
        'PSTM': '1569726470',
        'MSA_WH': '375_812',
        'BAIDUID': '251420D71D629FB588B717CDCE0B593B:SL=0:NR=10:FG=1',
        'H_WISE_SIDS': '148077_149389_150724_147366_150967_150075_147089_150083_148193_148867_148211_148713_150744_147279_150038_150190_131861_149586_148754_147896_146575_148523_151032_127969_149908_146549_149535_142421_146653_146732_138426_149558_145994_131423_144658_147527_107314_148186_147715_149253_140311_146395_144966_150340_147546_148869_110085',
        'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
        'BDUSS': 'FAza0hZWDVaTXEyamk2N0hMLVhxaDAxendUSDZOM3RzTGktfi1pRzdRWFQ2alZmSVFBQUFBJCQAAAAAAAAAAAEAAABkPXJTuuzCzLXGvN3Qo7Gow~sAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANNdDl~TXQ5fS',
        'H_PS_PSSID': '1438_32139_32046_32230_32296_32261',
        'delPer': '0',
        'BD_CK_SAM': '1',
        'PSINO': '3',
        'BD_HOME': '1',
        'BD_NOT_HTTPS': '1',
        'ISSW': '1',
        'BD_UPN': '3146',
        'sug': '0',
        'sugstore': '0',
        'ORIGIN': '2',
        'bdime': '0',
        'COOKIE_SESSION': '1038142_1_9_9_0_12_0_1_9_3_0_5_0_0_0_0_0_1593401398_1594786007%7C9%232773554_6_1593401393%7C4',
        'H_PS_645EC': 'b787r%2BVzA0qjW4QKdzfHp7XbTojbyi%2F4Th7UN5qaMrmc7Bk5tgbNT4yrLIw',
        'WWW_ST': '1594786220856',
    }

    headers = {
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0+(compatible;+Baiduspider/2.0;++http://www.baidu.com/search/spider.html)',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Referer': 'http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=%E6%89%8B%E6%9C%BA%E5%AE%A3%E4%BC%A0%E6%B5%B7%E6%8A%A5&oq=%25E6%2589%258B%25E6%259C%25BA%25E5%25AE%25A3%25E4%25BC%25A0%25E6%25B5%25B7%25E6%258A%25A5&rsv_pq=fce7313300006a80&rsv_t=0c98lGY1wW0HWixeQKPPpbty9W%2Fex7OSdUYangV%2Fx4ntQJm1sSr9lfflGWA&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_btype=t&bs=%E6%89%8B%E6%9C%BA%E5%AE%A3%E4%BC%A0%E6%B5%B7%E6%8A%A5&__eis=1&__eist=227',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
    }

    params = (
        ('ie', 'utf-8'),
        ('f', '8'),
        ('rsv_bp', '1'),
        ('tn', 'baidu'),
        ('wd', keyword),
        ('oq', '%E6%89%8B%E6%9C%BA%E5%AE%A3%E4%BC%A0%E6%B5%B7%E6%8A%A5'),
        ('rsv_pq', 'fce7313300006a80'),
        ('rsv_t', '0c98lGY1wW0HWixeQKPPpbty9W/ex7OSdUYangV/x4ntQJm1sSr9lfflGWA'),
        ('rqlang', 'cn'),
        ('rsv_enter', '0'),
        ('rsv_dl', 'tb'),
        ('rsv_btype', 't'),
        ('bs', '\u624B\u673A\u5BA3\u4F20\u6D77\u62A5'),
        ('__eis', '1'),
        ('__eist', '227'),
    )



    response = requests.get('http://www.baidu.com/s', headers=headers, params=params, cookies=cookies, verify=False)
    print(response.text)
    html2 = etree.HTML(response.text)
    b = html2.xpath("//div[@class='c-tools']/preceding-sibling::a[1]")
    print(b)
    number = len(b)
    urllist=[]
    for url in b :
        tree3 = html.tostring(url[0], encoding='utf-8').decode('utf-8')
        urls=re.findall('>.*?<',str(tree3))
        for lasturl in urls:
            urllist.append(lasturl)
    with open('reszhanghu.txt','a+',encoding='utf8')as f:
        f.writelines('竞价公司数量{}'.format(str(number))+','+str(keyword.split())+str(urllist)+'\n')
        print(keyword,number)

a=open('zhanghu.txt','r+',encoding='utf8')
b=a.readlines()
stop=0
for keywords in b:
    stop+=1
    try:
        getinfo(str(keywords))
        time.sleep(0.5)
        if stop%100==0:
            time.sleep(10)

    except:
        print('error')
        with open('errorkey.txt', 'a+', encoding='utf8')as g:
            g.writelines(keywords)

#NB. Original query string below. It seems impossible to parse and
#reproduce query strings 100% accurately so the one below is given
#in case the reproduced version is not "correct".
# response = requests.get('http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%98%A5%E8%8A%82%E6%89%8B%E6%8A%84%E6%8A%A5&ct=1&rn=50&oq=%25E6%2598%25A5%25E8%258A%2582%25E6%2589%258B%25E6%258A%2584%25E6%258A%25A5&rsv_pq=a552707900039c5a&rsv_t=cbdeRMMwwsMzhOqCHr6vhWhkJpc7xFaAeJqM3EDMfoOjmMSfwzMGLK8j10o&rqlang=cn&rsv_dl=tb&rsv_enter=0&rsv_btype=t&bs=%E6%98%A5%E8%8A%82%E6%89%8B%E6%8A%84%E6%8A%A5&__eis=1&__eist=227', headers=headers, cookies=cookies, verify=False)

sem&python 批量查询竞价关键词的竞争度,和竞价已投关键词的排名

标签: sem&python

发表评论 (已有0条评论)

还木有评论哦,快来抢沙发吧~