1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| import requests import re
def get_html(url): try: r = requests.get(url+"/archives") r.encoding = "utf-8" html = r.text return html except Exception: pass
def main(url): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': '', 'Host': 'data.zz.baidu.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'curl/7.12.1', } seo_url = 'http://data.zz.baidu.com/urls?site=121.196.169.103&token={token}' html = get_html(url) result = re.findall('<li.*?href=("/archives/.*?")>', html, re.S) urls = "" for res in result: urls += url+res.split('"')[1] + '\n' print(urls) response = requests.post(seo_url, data=urls.encode(), headers=headers) print(response.text)
if __name__ == '__main__': url = "http://121.196.169.103" main(url)
|