博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Python_day05
阅读量:4985 次
发布时间:2019-06-12

本文共 14470 字,大约阅读时间需要 48 分钟。

requestsz之post请求

#访问知乎发现'''Request URL: https://www.zhihu.com/exploreRequest Method: GETuser-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'''#访问知乎# import requests# response = requests.get(url='https://www.zhihu.com/explore')# print(response.status_code)# print(response.text)# #携带请求头参数访问知乎## import requests## #请求头字典# headers = {
# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'# }## #在get请求内添加headers# response = requests.get(url='https://www.zhihu.com/explore',headers = headers)# print(response.status_code)# # print(response.text)## with open('zhihu.html','w',encoding='utf-8')as f:# f.write(response.text)'''携带cookie携带登录cookie破解博客园登录验证请求url: Request URL: https://home.cnblogs.com/set/ Request Method: GET请求头 user-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36 cookie:_ga=GA1.2.897792622.1560415926; _gid=GA1.2.2097222173.1560415926; __gads=ID=90a50b775a32932c:T=1560415924:S=ALNI_MbW1TkLegg86CkkukfahBA7y3wgnw; .Cnblogs.AspNetCore.Cookies=CfDJ8D8Q4oM3DPZMgpKI1MnYlrkt64Ro4vuztDN4I9mCKsOpo66KpTVpoyHTc3VgN3z62nZN14QzEMAPB1O5u7etKJDD6uU9zOhearKCZDYU7iDhDQucYSDobytq6uMDPmc3zFfgfMCs1UB5EdhPnpGGFWrZHCXZbLnLOsrdf98km6FgjfWqfVjKIzxzGq8NZTXmwXie-musLJnFRtnCqc5UsdseCokD6Ea1nMnI57Qa8V-rVLWfrzNhoMAwa6C68qe_I4wyRGRmSORbT2UE6JzNEtgkEftTqU0ZbhpBSw0K7rzwymnI8PYt6CmVq1GpKy6Xiz-cYxkcyUPCD7YUvWTy-E7O9C81fONFS50KTEtJgjHKcioDKlivHKKeKlj62Qso5ITIP9rGbRtE-aajWrkqap5Phifm9T96hnTjWf3heC0ihwOevA4Ywa8EmTky0xYz_6D53Kflvmp3peXI4g-67bo; .CNBlogsCookie=A19E10B88B12CA6DE89A7CDD8BD2178DC8114CDB3ECECB0F5174F7F0031898EEDC0DD2DD35656021DB87E62104F7C96CF6CECE773B09298826E467457E303F9B52A44B14CC964C0BDDEBF2F3578618F45F2CFD20'''import requests#请求urlurl = 'https://home.cnblogs.com/set/'#请求头# headers = {
# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',## #在请求头中拼接cookie# 'cookie':'_ga=GA1.2.897792622.1560415926; _gid=GA1.2.2097222173.1560415926; __gads=ID=90a50b775a32932c:T=1560415924:S=ALNI_MbW1TkLegg86CkkukfahBA7y3wgnw; .Cnblogs.AspNetCore.Cookies=CfDJ8D8Q4oM3DPZMgpKI1MnYlrkt64Ro4vuztDN4I9mCKsOpo66KpTVpoyHTc3VgN3z62nZN14QzEMAPB1O5u7etKJDD6uU9zOhearKCZDYU7iDhDQucYSDobytq6uMDPmc3zFfgfMCs1UB5EdhPnpGGFWrZHCXZbLnLOsrdf98km6FgjfWqfVjKIzxzGq8NZTXmwXie-musLJnFRtnCqc5UsdseCokD6Ea1nMnI57Qa8V-rVLWfrzNhoMAwa6C68qe_I4wyRGRmSORbT2UE6JzNEtgkEftTqU0ZbhpBSw0K7rzwymnI8PYt6CmVq1GpKy6Xiz-cYxkcyUPCD7YUvWTy-E7O9C81fONFS50KTEtJgjHKcioDKlivHKKeKlj62Qso5ITIP9rGbRtE-aajWrkqap5Phifm9T96hnTjWf3heC0ihwOevA4Ywa8EmTky0xYz_6D53Kflvmp3peXI4g-67bo; .CNBlogsCookie=A19E10B88B12CA6DE89A7CDD8BD2178DC8114CDB3ECECB0F5174F7F0031898EEDC0DD2DD35656021DB87E62104F7C96CF6CECE773B09298826E467457E303F9B52A44B14CC964C0BDDEBF2F3578618F45F2CFD20'# }# boke_res = requests.get(url,headers=headers)# headers = {
# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}### 'cookie':'_ga=GA1.2.897792622.1560415926; _gid=GA1.2.2097222173.1560415926; __gads=ID=90a50b775a32932c:T=1560415924:S=ALNI_MbW1TkLegg86CkkukfahBA7y3wgnw; .Cnblogs.AspNetCore.Cookies=CfDJ8D8Q4oM3DPZMgpKI1MnYlrkt64Ro4vuztDN4I9mCKsOpo66KpTVpoyHTc3VgN3z62nZN14QzEMAPB1O5u7etKJDD6uU9zOhearKCZDYU7iDhDQucYSDobytq6uMDPmc3zFfgfMCs1UB5EdhPnpGGFWrZHCXZbLnLOsrdf98km6FgjfWqfVjKIzxzGq8NZTXmwXie-musLJnFRtnCqc5UsdseCokD6Ea1nMnI57Qa8V-rVLWfrzNhoMAwa6C68qe_I4wyRGRmSORbT2UE6JzNEtgkEftTqU0ZbhpBSw0K7rzwymnI8PYt6CmVq1GpKy6Xiz-cYxkcyUPCD7YUvWTy-E7O9C81fONFS50KTEtJgjHKcioDKlivHKKeKlj62Qso5ITIP9rGbRtE-aajWrkqap5Phifm9T96hnTjWf3heC0ihwOevA4Ywa8EmTky0xYz_6D53Kflvmp3peXI4g-67bo; .CNBlogsCookie=A19E10B88B12CA6DE89A7CDD8BD2178DC8114CDB3ECECB0F5174F7F0031898EEDC0DD2DD35656021DB87E62104F7C96CF6CECE773B09298826E467457E303F9B52A44B14CC964C0BDDEBF2F3578618F45F2CFD20'### print('1277886541@qq.com'in boke_res.text)

response响应

import requestsresponse = requests.get('https://baidu.com')# response响应print(response.status_code)  # 获取响应状态码print(response.url)  # 获取url地址print(response.encoding)  # 字符编码response.encoding = 'utf-8'print(response.text)  # 获取文本print(response.content)  # 获取二进制流print(response.headers)  # 获取页面请求头信息print(response.history)  # 上一次跳转的地址# 1、返回cookie字典 2、返回cookies对象print(response.cookies)  # 获取cookies信息,print(response.cookies.get_dict())  # 获取cookies信息转换成字典print(response.cookies.items())  # 获取cookies信息转换成字典print(response.encoding)print(response.elapsed)  # 访问时间import requests# 往音频地址发送get请求url = 'https://vd3.bdstatic.com/mda-ic4pfhh3ex32svqi/hd/mda-ic4pfhh3ex32svqi.mp4?auth_key=1557973824-0-0-bfb2e69bb5198ff65e18065d91b2b8c8&bcevod_channel=searchbox_feed&pd=wisenatural&abtest=all.mp4'response = requests.get(url, stream=True)  # stream=True 把content设置为一个迭代器对象print(response.content)with open('love_for_GD.mp4', 'wb') as f:    for content in response.iter_content():        f.write(content)

requests高级用法

'''''''''证书验证(大部分网站都是https)'''import requests# # 如果是ssl请求,首先检查证书是否合法,不合法则报错,程序终端# response = requests.get('https://www.xiaohuar.com')# print(response.status_code)# 改进1:去掉报错,但是会报警告# import requests# response = requests.get('https://www.xiaohuar.com', verify=False)# # 不验证证书,报警告,返回200# print(response.status_code)# 改进2:去掉报错,并且去掉警报信息# import requests# import urllib3# urllib3.disable_warnings()  # 关闭警告# response = requests.get('https://www.xiaohuar.com', verify=False)# print(response.status_code)# 改进3:加上证书# 很多网站都是https,但是不用证书也可以访问,大多数情况都是可以携带也可以不携带证书# 知乎\百度等都是可带可不带# 有硬性要求的,则必须带,比如对于定向的用户,拿到证书后才有权限访问某个特定网站# import requests# import urllib3# # urllib3.disable_warnings()  # 关闭警告# # 伪代码# response = requests.get(#     'https://www.xiaohuar.com',#     # verify=False,#     # /path/server.crt证书的存放目录, /path/key#     cert=('/path/server.crt', '/path/key'))# print(response.status_code)'''超时设置'''# 超时设置# 两种超时:float or tuple# timeout=0.1  # 代表接收数据的超时时间# timeout=(0.1,0.2)  # 0.1代表链接超时  0.2代表接收数据的超时时间# import requests# response = requests.get('https://www.baidu.com',#                         timeout=0.0001)# # print(response.elapsed)# print(response.status_code)'''代理设置:先发送请求给代理,然后由代理帮忙发送(封ip是常见的事情)'''# import requests# proxies={
# # 带用户名密码的代理,@符号前是用户名与密码# 'http':'http://tank:123@localhost:9527',# 'http':'http://localhost:9527',# 'https':'https://localhost:9527',# }# response=requests.get('https://www.12306.cn',# proxies=proxies)## print(response.status_code)'''爬取西刺免费代理: 1.访问西刺免费代理页面 2.通过re模块解析并提取所有代理 3.通过ip测试网站对爬取的代理进行测试 4.若test_ip函数抛出异常代表代理作废,否则代理有效 5.利用有效的代理进行代理测试 Cn 112.85.131.99 9999 江苏南通 高匿 HTTPS
6天 19-05-16 11:20 re: (.*?).*?(.*?)'''# import requests# import re# import time## HEADERS = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',# }### def get_index(url):# time.sleep(1)# response = requests.get(url, headers=HEADERS)# return response### def parse_index(text):# ip_list = re.findall('.*?(.*?).*?(.*?)', text, re.S)# for ip_port in ip_list:# ip = ':'.join(ip_port)# yield ip## def test_ip(ip):# print('测试ip: %s' % ip)# try:# proxies = {
# 'https': ip# }## # ip测试网站# ip_url = 'https://www.ipip.net/'## # 使用有效与无效的代理对ip测试站点进行访问,若返回的结果为200则代表当前测试ip正常# response = requests.get(ip_url, headers=HEADERS, proxies=proxies, timeout=1)## if response.status_code == 200:# print(f'有用的ip:{ip}')# return ip## # 若ip代理无效则抛出异常# except Exception as e:# print(e)## # 使用代理爬取nba# def spider_nba(good_ip):# url = 'https://china.nba.com/'## proxies = {
# 'https': good_ip# }## response = requests.get(url, headers=HEADERS, proxies=proxies)# print(response.status_code)# print(response.text)### if __name__ == '__main__':# base_url = 'https://www.xicidaili.com/nn/{}'## for line in range(1, 3677):# ip_url = base_url.format(line)## response = get_index(ip_url)## # 解析西刺代理获取每一个ip列表# ip_list = parse_index(response.text)## # 循环每一个ip# for ip in ip_list:# # print(ip)## # 对爬取下来的ip进行测试# good_ip = test_ip(ip)## if good_ip:# # 真是代理,开始测试# spider_nba(good_ip)'''认证设置'''import requests# 通过访问github的api来测试url = 'https://api.github.com/user'HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',}# 测试1,失败返回401# response = requests.get(url, headers=HEADERS)# print(response.status_code) # 401# print(response.text)'''打印结果: { "message": "Requires authentication", "documentation_url": "https://developer.github.com/v3/users/#get-the-authenticated-user" }'''## # 测试2,通过requests.auth内的HTTPBasicAuth进行认证,认证成功返回用户信息# from requests.auth import HTTPBasicAuth# response = requests.get(url, headers=HEADERS, auth=HTTPBasicAuth('tankjam', 'kermit46709394'))# print(response.text)## 测试3,通过requests.get请求内的auth参数默认就是HTTPBasicAuth,认证成功返回用户信息# response = requests.get(url, headers=HEADERS, auth=('tankjam', 'kermit46709394'))# print(response.text)'''上传文件'''import requests# 上传文本文件# files1 = {'file': open('user.txt', 'rb')}# # files参数是POST请求固定参数# response = requests.post('http://httpbin.org/post', files=files1)# print(response.status_code) # 200# print(response.text) # 200# 上传图片文件# files2 = {'jpg': open('一拳.jpg', 'rb')}# response = requests.post('http://httpbin.org/post', files=files2)# print(response.status_code) # 200# print(response.text) # 200## 上传视频文件# files3 = {'movie': open('love_for_GD.mp4', 'rb')}# response = requests.post('http://httpbin.org/post', files=files3)# print(response.status_code) # 200# print(response.text) # 200

selenium基本使用

'''selenium模块讲解一、什么是selenium?    最初是一个自动化测试工具。可以使用它帮我们驱动浏览器    自动去执行某些自定义好的操作。例如在页面中执行js代码、跳过验证登录。二、为什么要使用selenium?    1.优点:        使用requests模块登录需要分析大量的复杂通信流程,使用selenium可轻松跳过登录验证    2.缺点:        浏览器会加载css、js、图片、视频、...数据、爬虫效率相比requests模块要低三、如何使用selenium?    下载selenium模块:        pip3 install -- uesr -i https://pypi.tuna.tsinghua.edu.cn/simple selenium    下载浏览器驱动:        http://npm.taobao.org/mirrors/chromedriver/2.38/'''#selenium之第一次from selenium import webdriver  # 用来驱动浏览器的#调用得到一个动作链对象,破解滑动验证码时候用,可以拖动图片from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片from selenium.webdriver.common.by import By  # 按照什么方式查找,By.ID,By.CSS_SELECTORfrom selenium.webdriver.common.keys import Keys  # 键盘按键操作from selenium.webdriver.support import expected_conditions as EC  # 和下面WebDriverWait一起用的from selenium.webdriver.support.wait import WebDriverWait  # 等待页面加载某些元素# import timefrom selenium.webdriver.support.wait import WebDriverWaitimport time#chrome = webdriver.Chrome(r'C:\Users\涂先生\Downloads\chromedriver.exe')#括号内输入chromedriver绝对路径chrome = webdriver.Chrome()#若try出现异常try:    #  #往涂懿磊博客主页发送get请求    # chrome.get('https://www.cnblogs.com/TuLie-cs/')    #参数1:驱动对象  参数2:等待时间    wait = WebDriverWait(chrome, 10)    #1.访问百度    time.sleep(3)    chrome.get('https://www.baidu.com/')    #2.查找input输入框    input_tag = wait.until(        EC.presence_of_element_located((By.ID,'kw')))    #3.搜索一拳超人    input_tag.send_keys('一拳超人')    #4.按键盘回车键    input_tag.send_keys(Keys.ENTER)    time.sleep(3)#无论发生什么都会关闭浏览器finally:#关闭浏览器    chrome.close()'''实例2'''try:    # 往tank博客主页发送get请求    # chrome.get('https://www.cnblogs.com/kermitjam/')    # 参数1: 驱动对象  参数2: 等待时间    wait = WebDriverWait(chrome, 10)    # 1、访问京东主页    chrome.get('https://www.jd.com/')    # 2、查找input输入框    input_tag = wait.until(EC.presence_of_element_located((By.ID, "key")))    # 3、搜索唐诗三百首    input_tag.send_keys('唐诗三百首')    # 4、根据class属性名称查找标签    search_button = wait.until(        EC.presence_of_element_located((By.CLASS_NAME, 'button')))    # 5、点击搜索按钮    search_button.click()    time.sleep(3)# 无论发生什么都会关闭浏览器finally:    # 关闭浏览器    chrome.close()

selenium之基本选择器

# from selenium import webdriver  # 用来驱动浏览器的# import time## '''# 隐式等待# '''# # 获取驱动对象、# driver = webdriver.Chrome()## try:#     # 显式等待: 等待某个元素加载#     # 参数1: 驱动对象  参数2: 等待时间#     # wait = WebDriverWait(chrome, 10)##     driver.get('https://china.nba.com/')##     # 隐式等待: 等待页面所有元素加载#     driver.implicitly_wait(10)#     news_tag = driver.find_element_by_class_name('nav-news')#     # 获取标签对象#     print(news_tag)#     # 获取标签的名字#     print(news_tag.tag_name)###     time.sleep(10)## finally:#     driver.close()from selenium import webdriver  # 用来驱动浏览器的import time'''===============所有方法===================    element是查找一个标签    elements是查找所有标签    1、find_element_by_link_text  通过链接文本去找    2、find_element_by_id 通过id去找    3、find_element_by_class_name    4、find_element_by_partial_link_text    5、find_element_by_name    6、find_element_by_css_selector    7、find_element_by_tag_name'''# 获取驱动对象、driver = webdriver.Chrome()try:    # 往百度发送请求    driver.get('https://www.baidu.com/')    driver.implicitly_wait(10)    # 1、find_element_by_link_text  通过链接文本去找    # 根据登录    # send_tag = driver.find_element_by_link_text('登录')    # send_tag.click()    # 2、find_element_by_partial_link_text 通过局部文本查找a标签    login_button = driver.find_element_by_partial_link_text('登')    login_button.click()    time.sleep(1)    # 3、find_element_by_class_name 根据class属性名查找    login_tag = driver.find_element_by_class_name('tang-pass-footerBarULogin')    login_tag.click()    time.sleep(1)    # 4、find_element_by_name 根据name属性查找    username = driver.find_element_by_name('userName')    username.send_keys('15622792660')    time.sleep(1)    # 5、find_element_by_id 通过id属性名查找    password = driver.find_element_by_id('TANGRAM__PSP_10__password')    password.send_keys('*******')    time.sleep(1)    # 6、find_element_by_css_selector  根据属性选择器查找    # 根据id查找登录按钮    login_submit = driver.find_element_by_css_selector('#TANGRAM__PSP_10__submit')    # driver.find_element_by_css_selector('.pass-button-submit')    login_submit.click()    # 7、find_element_by_tag_name  根据标签名称查找标签    div = driver.find_element_by_tag_name('div')    print(div.tag_name)    time.sleep(10)finally:    driver.close()

 

转载于:https://www.cnblogs.com/TuLie-cs/p/11040865.html

你可能感兴趣的文章
python之装饰器
查看>>
NIO-3网络通信
查看>>
系统短信库的一些用法
查看>>
日志管理
查看>>
js常见正则表达式验证及方法(一)
查看>>
IOS开发 Missing submodule 'XXXX' 警告
查看>>
c语言海量数据处理
查看>>
create table like 和create table select 比较
查看>>
获取存储过程
查看>>
Good Bye 2015B
查看>>
Parallel Gradient Boosting Decision Trees
查看>>
4.Twisted中的Deferreds
查看>>
[C#学习笔记]你真的理解拆箱装箱吗?
查看>>
CSS背景使用,引入、尺寸、平铺、定位、多重背景
查看>>
DB2 因版本问题 Reorg 出错 解决办法
查看>>
安卓 notes
查看>>
Session对象详解[源于网络]
查看>>
经历无数次失败,终于把kamailio装上了
查看>>
Android ListActivity实现遍历文件列表,查看文档类文件
查看>>
C++自定义NULLPTR
查看>>