一、安装 requests
pip install requests
二、基础用法
1. 发送 GET 请求
import requestsresponse = requests.get("https://www.example.com")
print(response.status_code) # 状态码(200表示成功)
print(response.text) # 响应内容(文本格式)
print(response.content) # 响应内容(二进制格式,适合图片/文件)
2. 发送 POST 请求
data = {"key1": "value1", "key2": "value2"}
response = requests.post("https://httpbin.org/post", data=data)
print(response.json()) # 解析 JSON 响应
3. 处理 URL 参数
params = {"q": "python", "page": 2}
response = requests.get("https://www.example.com/search", params=params)
print(response.url) # 实际请求的URL:https://www.example.com/search?q=python&page=2
4. 自定义请求头
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36","Accept-Language": "en-US,en;q=0.9",
}
response = requests.get("https://www.example.com", headers=headers)
三、响应处理
1. 检查响应状态
if response.status_code == requests.codes.ok:print("请求成功!")
else:response.raise_for_status() # 自动抛出HTTP错误
2. 解析内容
# JSON解析
data = response.json()# 文本解析(指定编码)
response.encoding = "utf-8"
text = response.text# 保存二进制内容(如图片)
with open("image.jpg", "wb") as f:f.write(response.content)
四、高级功能
1. 使用会话(Session)
# 会话保持(自动处理Cookies,复用TCP连接)
with requests.Session() as session:session.headers.update({"User-Agent": "MyBot"})response = session.get("https://www.example.com/login")# 后续请求自动携带Cookies
2. 处理 Cookies
# 获取Cookies
cookies = response.cookies.get("session_id")# 发送Cookies
response = requests.get(url, cookies={"session_id": "123456"})
3. 文件上传
files = {"file": open("data.txt", "rb")}
response = requests.post("https://httpbin.org/post", files=files)
4. 设置代理
proxies = {"http": "http://10.10.1.10:3128","https": "http://10.10.1.10:1080",
}
response = requests.get("https://example.com", proxies=proxies)
5. 超时设置
try:response = requests.get(url, timeout=5) # 连接+读取总超时
except requests.exceptions.Timeout:print("请求超时!")
五、异常处理
try:response = requests.get(url)response.raise_for_status() # 自动检查4xx/5xx错误
except requests.exceptions.HTTPError as err:print(f"HTTP错误: {err}")
except requests.exceptions.ConnectionError:print("连接失败")
except requests.exceptions.RequestException as e:print(f"请求异常: {e}")
六、其他实用功能
1. 重定向控制
response = requests.get(url, allow_redirects=False) # 禁用重定向
2. SSL 证书验证
response = requests.get(url, verify=False) # 关闭SSL验证(不推荐)
# 或指定证书路径
response = requests.get(url, verify="/path/to/cert.pem")
3. 流式下载(大文件)
response = requests.get(url, stream=True)
with open("large_file.zip", "wb") as f:for chunk in response.iter_content(chunk_size=8192):f.write(chunk)
4. 事件钩子(高级)
def log_response(resp, *args, **kwargs):print(f"耗时: {resp.elapsed.total_seconds()}秒")requests.get("https://example.com", hooks={"response": log_response})
七、反爬虫应对策略
-
设置随机 User-Agent
使用fake_useragent
库生成:from fake_useragent import UserAgent headers = {"User-Agent": UserAgent().random}
-
使用代理 IP 池
结合付费代理服务或自建代理池。 -
控制请求频率
import time time.sleep(2) # 降低请求速度