用 Python 获取百度搜索结果链接

2022-11-25,,,

前言

近期有许多项目需要这个功能,由于Python实现起来比较简单就这么做了,代码贴下来觉得好点个赞吧~

代码

# coding: utf-8
import os
import time
import requests
import urllib.parse
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from fake_useragent import UserAgent
from multiprocessing.pool import ThreadPool LOCATIONS = {}
GLOBAL_THREAD = 500
GLOBAL_TIMEOUT = 50 def get_links(keyword, generator, pages):
links = [] for page in range(int(pages.split("-")[0]), int(pages.split("-")[1]) + 1):
for genera in range(int(generator.split("-")[0]), int(generator.split("-")[1]) + 1):
links.append(
"http://www.baidu.com.cn/s?wd=" + urllib.parse.quote(keyword + str(genera)) + "&pn=" + str(page * 10)) return links def get_page(url):
headers = {"user-agent": UserAgent().chrome}
req = requests.get(url, headers=headers)
req.encoding = "utf-8"
soup = BeautifulSoup(req.text, "lxml") for link in soup.select("div.result > h3.t > a"):
req = requests.get(link.get("href"), headers=headers, allow_redirects=False)
if "=" in req.headers["location"]:
root = urlparse(req.headers["location"]).netloc
LOCATIONS[root] = req.headers["location"] def baidu_search():
try:
os.system("cls")
print("-" * 56 + "\n")
print("| BaiduSearch Engine By 美图博客[https://www.meitubk.com/] |\n")
print("-" * 56 + "\n") keyword = input("Keyword: ")
generator = input("Generator(1-10): ")
pages = input("Pages(0-10): ") start = time.time()
pool = ThreadPool(processes=GLOBAL_THREAD)
pool.map(get_page, get_links(keyword, generator, pages))
pool.close()
pool.join()
end = time.time() path = r"D:\Desktop\result.txt"
save_result(path)
print("\nSava in %s" % path)
print("Result count: %d" % len(LOCATIONS.values()))
print("Running time: %ds" % (end - start))
except:
print("\nInput Error!")
exit(0) def save_result(path):
with open(path, "w") as file:
for url in list(LOCATIONS.values()):
file.write(url + "\n") baidu_search()

使用

用 Python 获取百度搜索结果链接的相关教程结束。

《用 Python 获取百度搜索结果链接.doc》

下载本文的Word格式文档,以方便收藏与打印。