邢台市做网站电话,专业企业建站系统,最近火爆的新闻大事,软件制作平台免费urllib爬取数据
import urllib.request as request# 定义url
url https://www.baidu.com
#模拟浏览器发起请求获取响应对象
response request.urlopen(url)
read方法返回的是字节形式的二进制数据
二进制--》字符串 解码 decode( 编码的格式…urllib爬取数据
import urllib.request as request# 定义url
url https://www.baidu.com
#模拟浏览器发起请求获取响应对象
response request.urlopen(url)
read方法返回的是字节形式的二进制数据
二进制--》字符串 解码 decode( 编码的格式)content response.read().decode(utf-8)# 一个类型6个方法 response为对象 HTTPResponse
# 6个方法 read readline readlines getcode geturl getheaders // 读取一行 多行 获取响应码获取url 获取请求头print(content) read 读取字节read(5) readline 读取一行 readlines 读取多行 getcode 获取响应码 geturl 获取url getheaders 获取请求头 urllib下载urlretrieve 第一个参数传递资源链接url第二个参数为要保存的文件名源码如下 def urlretrieve(url, filenameNone, reporthookNone, dataNone):
下载图片
import urllib.request as request# 下载图片
url_img https://img1.baidu.com/it/u1187129814,1675470074fm253fmtautoapp138fJPEG?w889h500request.urlretrieve(url_img,test.jpg) urllib请求对象定制 User Agent中文名为用户代理简称 UA它是一个特殊字符串头使得服务器能够识别客户使用的信息get操作系统及版本、CPU 类型、浏览器及版本。浏览器内核、浏览器染引擎、浏览器语言、浏览器插件等 https的时候需要加上ua伪装否则返回的信息不全存在问题http 80 https 443 import urllib.request as request# 下载图片
url https://www.baidu.com/header {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,
}# 构建的请求对象
geneRequestrequest.Request(urlurl,headers header)
# 模拟浏览器发送请求
response request.urlopen(geneRequest)
#获取内容
content response.read().decode(utf-8)print(content) 打印如下 请求qoute方法和urlencode方法 浏览器get请求的中文参数复制下来 会被编码成unicode例如百度搜索陈奕迅,会变成这个样子所以urllib提供了qoute方法和urlencode方法来解决此问题 https://www.baidu.com/s?wd%E9%99%88%E5%A5%95%E8%BF%85 qoute 单参数封装 import urllib.request as request
import urllib.parse as parse# 百度搜索陈奕迅发现中文被编码unicode
url https://www.baidu.com/s?wdheader {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,Cookie: BIDUPSIDF5D1153D001F7BA92AFCBFF6B6995913; PSTM1674736839; BD_UPN12314753; BDUSSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BDUSS_BFESSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BAIDUIDF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; BAIDUID_BFESSF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; channelbaidusearch; baikeVisitId5ef65414-3e3e-44a2-9b90-6b842c55e2b7; BD_HOME1; BA_HECTORag0k2g8g8k2l2ka1252h04ai1idf2ef1o; ZFYar3QXfOOpNBISLowT0W9l3txojdtsgY2xonzVcZtFl8:C; delPer0; BD_CK_SAM1; PSINO2; BDORZB490B5EBF6F3CD402E515D22BCDA1598; B64_BOT1; BDRCVFR[tox4WRQ4-Km]mk3SLVN4HKm; ab_sr1.0.1_NjZlMTM5ZjY1OTQ5YzA5YmY2MmFhOTE2YTY1MGYzMmM5YTA1ZDBhMzY2Y2NiYjdhMTU1NWU1MzE3OWM4MWI3NThiY2JiYTczNDJhNWY3N2FiOWVjNDU5MWVlOTExM2UzMDRjODE4MWZmNDg1MWExNWY1NzY5ZGVhOThkZDFmNTJmYTZlODA3YTg0Y2IxNTI4NmFlODg0ZmE3MzY2ODhkZA; BDRCVFR[-pGxjrCMryR]mk3SLVN4HKm; BDRCVFR[feWj1Vr5u3D]I67x6TjHwwYf0; H_PS_PSSID36552_39109_38831_38880_39115_39118_39040_38917_26350_39138_39137_39101; COOKIE_SESSION1858_0_7_9_1_6_1_0_7_6_33_1_0_0_0_0_1690964160_0_1691849533%7C9%23187206_15_1690528560%7C9; sug3; sugstore0; ORIGIN0; bdime0; H_PS_645ECad5fGs4ULmE01SpZnyJOET%2F2Sji4OEtA4J0bW6WTOQkhh3KutG2uM%2F3Ryak
}name parse.quote(陈奕迅)
# 构建的请求对象
geneRequestrequest.Request(urlurlname,headers header)
# 模拟浏览器发送请求
response request.urlopen(geneRequest)
#获取内容
content response.read().decode(utf-8)print(content)打印如下 urlencode 多参数封装 import urllib.request as request
import urllib.parse as parse# 百度搜索陈奕迅发现中文被编码unicode
url https://www.baidu.com/s?header {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,Cookie: BIDUPSIDF5D1153D001F7BA92AFCBFF6B6995913; PSTM1674736839; BD_UPN12314753; BDUSSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BDUSS_BFESSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BAIDUIDF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; BAIDUID_BFESSF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; channelbaidusearch; baikeVisitId5ef65414-3e3e-44a2-9b90-6b842c55e2b7; BD_HOME1; BA_HECTORag0k2g8g8k2l2ka1252h04ai1idf2ef1o; ZFYar3QXfOOpNBISLowT0W9l3txojdtsgY2xonzVcZtFl8:C; delPer0; BD_CK_SAM1; PSINO2; BDORZB490B5EBF6F3CD402E515D22BCDA1598; B64_BOT1; BDRCVFR[tox4WRQ4-Km]mk3SLVN4HKm; ab_sr1.0.1_NjZlMTM5ZjY1OTQ5YzA5YmY2MmFhOTE2YTY1MGYzMmM5YTA1ZDBhMzY2Y2NiYjdhMTU1NWU1MzE3OWM4MWI3NThiY2JiYTczNDJhNWY3N2FiOWVjNDU5MWVlOTExM2UzMDRjODE4MWZmNDg1MWExNWY1NzY5ZGVhOThkZDFmNTJmYTZlODA3YTg0Y2IxNTI4NmFlODg0ZmE3MzY2ODhkZA; BDRCVFR[-pGxjrCMryR]mk3SLVN4HKm; BDRCVFR[feWj1Vr5u3D]I67x6TjHwwYf0; H_PS_PSSID36552_39109_38831_38880_39115_39118_39040_38917_26350_39138_39137_39101; COOKIE_SESSION1858_0_7_9_1_6_1_0_7_6_33_1_0_0_0_0_1690964160_0_1691849533%7C9%23187206_15_1690528560%7C9; sug3; sugstore0; ORIGIN0; bdime0; H_PS_645ECad5fGs4ULmE01SpZnyJOET%2F2Sji4OEtA4J0bW6WTOQkhh3KutG2uM%2F3Ryak
}data{wd:陈奕迅,sex:男,location:中国香港
}
name parse.urlencode(data)
# 构建的请求对象
geneRequestrequest.Request(urlurlname,headers header)
# 模拟浏览器发送请求
response request.urlopen(geneRequest)
#获取内容
content response.read().decode(utf-8)print(content)urllib发送post请求 post请求的参数必须要进行编码post请求的请求参数放入请求对象的data中也就是请求体中返回的是json数据需要转换json打印 import urllib.request as request
import urllib.parse as parse
import json# 百度翻译
url https://fanyi.baidu.com/sugheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,Cookie: BIDUPSIDF5D1153D001F7BA92AFCBFF6B6995913; PSTM1674736839; REALTIME_TRANS_SWITCH1; FANYI_WORD_SWITCH1; HISTORY_SWITCH1; SOUND_SPD_SWITCH1; SOUND_PREFER_SWITCH1; BDUSSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BDUSS_BFESSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BAIDUIDF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; BDORZB490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID36552_39109_38831_38880_39115_39118_39040_38917_26350_39138_39137_39101; BAIDUID_BFESSF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; delPer0; PSINO2; Hm_lvt_64ecd82404c51e03dc91cb9e8c0255741691554432,1691567796,1691658560,1691850659; Hm_lpvt_64ecd82404c51e03dc91cb9e8c0255741691850659; ab_sr1.0.1_NzlhYWEzMDAyMWUzZTBhNGI1NTFkNDdiZThjNjA4YTVkMmZmMTM4YThkNDZjMzQ5ZWNmNDFmMmMxMzlmYjczMTllM2I0ZTM2ZjM4YzcwNzY3N2MzZjJjMjE1NDk2ODBlNTFlZWFmYTUzZjcyYTc4NjY1MmVmNDRlM2Y1ZTdhYjQ1MDhhODNiZGI2NDk0ZWVlNTBkYTJjMjZjNTUwNmFiOTk1OWY2YTdiYWI1MjY0Zjg4ZGExNmQ4YjA5MzBiNWI4
}data{kw: result
}
#post请求的参数必须要进行编码
data parse.urlencode(data).encode(utf-8)#post的请求的参数是不会拼接在url的后面的而是需要放在请求对象中datageneRequest request.Request(urlurl,datadata,headersheaders)#模拟浏览器向服务器发送请求
response request.urlopen(geneRequest)content response.read().decode(utf-8)# 字符串 》json对象
obj json.loads(content)print(obj)打印如下 urllib的异常URLError和HTTPError HTTPError类是URLError类的子类 2.导入的包urllib.error.HTTPError urllib.error.URLError 3.http错误: http错误是针对浏览器无法连接到服务器而增加出来的错误提示。引导并告诉浏览者该页 是哪里出了问题。 4.通过url1b发送请求的时候有可能会发送失败这个时候如果想让你的代码更加的健壮可以通过try-except进行捕获异常异常有两类URLError\HTTPError import urllib.request as request
import urllib.error as errorurl https://teshi.lcds.comheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,Cookie: BIDUPSIDF5D1153D001F7BA92AFCBFF6B6995913; PSTM1674736839; REALTIME_TRANS_SWITCH1; FANYI_WORD_SWITCH1; HISTORY_SWITCH1; SOUND_SPD_SWITCH1; SOUND_PREFER_SWITCH1; BDUSSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BDUSS_BFESSWdodDZGaVk0flJIYjkzNHMtZWtYTUpwaE1HNEc3VGU1bHEtQUhmQXNia0c4TlZrRVFBQUFBJCQAAAAAAAAAAAEAAACvXzmo0-DJ-sfrtuDLr771AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZjrmQGY65kU; BAIDUIDF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; BDORZB490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID36552_39109_38831_38880_39115_39118_39040_38917_26350_39138_39137_39101; BAIDUID_BFESSF5D1153D001F7BA93B20A6BAB8379B5E:SL0:NR10:FG1; delPer0; PSINO2; Hm_lvt_64ecd82404c51e03dc91cb9e8c0255741691554432,1691567796,1691658560,1691850659; Hm_lpvt_64ecd82404c51e03dc91cb9e8c0255741691850659; ab_sr1.0.1_NzlhYWEzMDAyMWUzZTBhNGI1NTFkNDdiZThjNjA4YTVkMmZmMTM4YThkNDZjMzQ5ZWNmNDFmMmMxMzlmYjczMTllM2I0ZTM2ZjM4YzcwNzY3N2MzZjJjMjE1NDk2ODBlNTFlZWFmYTUzZjcyYTc4NjY1MmVmNDRlM2Y1ZTdhYjQ1MDhhODNiZGI2NDk0ZWVlNTBkYTJjMjZjNTUwNmFiOTk1OWY2YTdiYWI1MjY0Zjg4ZGExNmQ4YjA5MzBiNWI4
}try:geneRequest request.Request(urlurl, headersheaders)response request.urlopen(geneRequest)content response.read().decode(utf-8)print(content)
except error.URLError:print(系统正在升级。。。)打印 系统正在升级 urllib的Cookie登录 数据采集的时候需要登录的场景需要登录访问采集数据页面下面以知乎为例 import urllib.request as requesturl https://zhuanlan.zhihu.com/writeheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,}geneRequest request.Request(urlurl, headersheaders)response request.urlopen(geneRequest)content response.read().decode(utf-8)with open(zhihu.html,w,encodingutf-8) as fp:fp.write(content) 下载到本地的内容为登录界面的内容所以目前是被登录拦截啦所以需要配置Cookie进行访问添加如下代码则可访问文字编辑界面 headers {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,Cookie: _zap3d2d11e9-563c-4294-982b-bc1e50b92dbc; d_c0AdDXSoDm9RaPTnCWFnbmNEw0ZgDlpiXboKQ|1687239218; _xsrf48c581c8-4a4b-438b-bb3e-535f42ba1927; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c491691853680; SESSIONIDFPFgDr4b3gQQv5Cc7KWf3dHjs8FnPIlTxUPa6tybYq2; captcha_session_v22|1:0|10:1691853648|18:captcha_session_v2|88:UnN3Z2t4ZlZPT0crWEJQc0xPd1hEbC9kdUU0ejVGQXJiRlZoRHNsaEdCZ0Z1by9UelpQQkYyOFBSSUE2Skpmdw|6b31a45a03c3db7898a70ddc8ac6a98e14f4856f01721788f81926dfebfbc313; JOIDUV8WBEtS9hD3NIyCU1ZxCMQfIb9EEKJSl3P6sjVilUWiRbrFNAJVxJczjIJQS7KtuQ6GWB0uU38k6DIuInzu9OQ; osdVVERC05W-Bf4MYiMVFl0DMoYLrpAHqVdknf0tTpnkUulSr_BOgVawZM9i41VT7yqtguCVhohVnsq7z0rJnLp--E; __snaker__idE1otU20YwbURudbv; gdxidpyhxdEkXnBfzykm%2F%2BPY7w8oPiYq3Mc7OITRL%2B%2F32Cc0JoN%5CAQDStC5S0arkZdcBdHycQf8XSzWdTgP4GrxigocwlMa09hue8hIxVPaxf2YrBPwLQiXuTM7LS%2BG%2FRick28km81nY6dJ5oVZshVboYBiPkL5GNLp888Ne8O8cJP6nYfwO1Ej8HRH%3A1691854581190; YD00517437729195%3AWM_NIyi748HGbKMSIqOidDU4C49URWV1wzaconfHcqUJZ45hscybQkKbOqZIeBTgYAO7p%2FIDTLUCSBdJCqiIPsvkdBoC%2F%2BqhPNV8lacUqn5oWufyBmWSqXKNU55r71w1DSf4USmw%3D; YD00517437729195%3AWM_NIKE9ca17ae2e6ffcda170e2e6eeaec16a8a87fb92e880aaef8fb7c15e938b9f83c861ededadaeb16ef5a89d93ec2af0fea7c3b92abbb7f9d5bb4485be85aed243b391a785c75ff89dfcdad85a958aff98f96ba1b1a287cb69b38b86bbb73facadad9ae15ba995aeb0f4528bbb8eb5e6538391f991ae43888c84b6c25982bc86a4f4638b90bd97ea7082b18dd0aa7bac9082b7bb68a3bd9b92ed3fad89bb87fc4aaf899d92db418e8bfbd6f13df3a9ffa5d26ef88b9d8ef637e2a3; YD00517437729195%3AWM_TIDplBlLylVh4VEBRFFRBeEhq3MIyb%2FhW7z; captcha_ticket_v22|1:0|10:1691853720|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfMjhZMVEtclZqcURQWmN1UkN1R1lIQ1pHeVNTb0haOExpeFF0VnNTbVNyOE5Nc0s1MXRkSzZtOTdUZncyanFkVlZZdk9DRlVLWk9LbUNFYVZ5UVZ1SjJVZ2I3S1VsMlphZGVsQ0ZGTVdWRUlQNi40OUNoc3FrY1cyUzVLQWhaUGo2dF81RXBIVG9GWTVfTmJ0TW1iQzZkcUcuTEdmLmk1T1JGeWNZN05wWlAxcmZ6c3RLaEpSOENxRFFELi1hcmptYXhnaV81blluMmNOVWY3d0g3N0VLNU9hSzlfUG96SUhpLWtJc2JuOVZGWjZYNkJFcFI1eHNyTk0yX0FGWjVZZXp0a1dqV0JRUnR5SUppelA4ZGZCbGdjaW9uS0N1Vm9lVHRzOW5DRzZJNGVFa0t1RTFVVXJwemc3RHZBQTFJOEZtT0Q5V2EwQXBnS2FLZGJVOXNITy5pZTdGemFmZHZIaGM5bDZOcnFnV0duSDdoTUxBUHRCdGZlelFDU1hYLVdjN2VRS2pSSXo3dmkyc2hOWGNuMFlJemFtY1dtWlQ1WVBYNzU4TjItSGhSeEdnUWJVYm5hY1V5RjcuUnNfTEdGZTdBWnktRER1ZmtRWkxGVUdOLWtJNm13a1FxVGRXdHlvVFRSVWhJZVU4TmktRHltcHYuWUdkb29hckZqMyJ9|410a506ccaf23ec18e7a333608daf48c75c456fa73b5642fec83bc72a817f2dd; q_c133295412cedd47beb8ac73d2f8d799dc|1691853733000|1691853733000; tstr; z_c02|1:0|10:1691853926|4:z_c0|92:Mi4xTjJBTk9RQUFBQUFCME5kS2dPYjFGaVlBQUFCZ0FsVk5wZkhFWlFBTVBnYVM1Q3hxV0daOXpnd1NVYi1TSVpaV0R3|cebdc70f00c92b22caefa8a052d557ac292179eb7bd18584831879633253e775; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c491691854374; KLBRSIDdc02df4a8178e8c4dfd0a3c8cbd8c726|1691854342|1691853646
}urllib的Handler处理器 Handler定制更高级的请求头随着业务逻辑的复杂请求对象定制满足不了我们的需求比如动态Cookie和代理不能使用请求对象的定制 1、获取handler对象2、获取opener对象3、调用open方法 import urllib.request as requesturl http://www.baidu.comheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,
}geneRequest request.Request(urlurl, headersheaders)# handler build_opener open# 获取handler对象
handler request.HTTPHandler()# 获取opener对象对象
opener request.build_opener(handler)# 调用open方法
response opener.open(geneRequest)content response.read().decode(utf-8)print(content)
urllib代理 代理的作用使用别人的ip访问 突破ip访问限制访问内部资源提高访问速度隐藏真实ip 使用与handler一致只不过多了代理配置 import urllib.request as requesturl http://www.baidu.com/s?wdipheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,
}geneRequest request.Request(urlurl, headersheaders)# 代理字典
proxies{http:189.127.90.85:8080
}handler request.ProxyHandler(proxiesproxies)opener request.build_opener(handler)# 调用open方法
response opener.open(geneRequest)content response.read().decode(utf-8)print(content) proxies{ http:189.127.90.85:8080 } 代理地址是网上找的不好用的居多如果长时间没有反应或者报错则不好用可以自己买 代理池 在生产中会有一堆高密的代理池简单实现如下 import urllib.request as request
import randomurl http://www.baidu.com/s?wdipheaders {
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36,
}geneRequest request.Request(urlurl, headersheaders)# 简易版代理池
proxies_pool [{http: 189.127.90.85:8080},{http: 36.88.170.170:8089},
]proxies random.choice(proxies_pool)handler request.ProxyHandler(proxiesproxies)opener request.build_opener(handler)# 调用open方法
response opener.open(geneRequest)content response.read().decode(utf-8)print(content)