Skip to content

Commit 14b7ea3

Browse files
committed
commit
1 parent 828fd42 commit 14b7ea3

9 files changed

+116
-17
lines changed

.idea/workspace.xml

Lines changed: 11 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python各种模块使用/img/1.jpg

413 KB
Loading

Python各种模块使用/img/2.jpg

206 KB
Loading

Python各种模块使用/img/3.jpg

68.9 KB
Loading
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from lxml import etree
2+
import aiohttp, asyncio
3+
import time
4+
5+
list_url = ["https://www.douban.com/doulist/41691053/?start={}&sort=seq&sub_type=4".format(number) for number in
6+
range(0, 125, 25)]
7+
8+
9+
async def fetch(url):
10+
async with aiohttp.ClientSession() as session:
11+
async with session.get(url) as html:
12+
response = await html.text(encoding="utf-8")
13+
return response
14+
15+
16+
async def parser(url):
17+
response = await fetch(url)
18+
dom = etree.HTML(response)
19+
selector = dom.xpath('//div[starts-with(@id,"item")]')
20+
for item in selector:
21+
print(item.xpath('div/div[2]/div[@class="title"]/a/text()')[0].strip(
22+
"\n").strip()) # div//div表示div后面的class="title"的div不管它在此div下什么位置
23+
24+
25+
# 给一个函数添加了async关键字,就会把它变成一个异步函数
26+
# 每个线程有一个事件循环,主线程调用asyncio.get_event_loop时会创建事件循环
27+
# 把异步的任务丢给这个循环的run_until_complete方法,事件循环会安排协同程序的执行
28+
# async关键字将一个函数声明为协程函数,函数执行时返回一个协程对象。
29+
# await关键字将暂停协程函数的执行,等待异步IO返回结果。
30+
31+
# start = time.time()
32+
loop = asyncio.get_event_loop()
33+
tasks = [parser(url) for url in list_url]
34+
loop.run_until_complete(asyncio.gather(*tasks))
35+
# print(time.time() - start)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import csv
2+
3+
# 不用csv操作csv文件,输出内容
4+
# for line in open("file/sample.csv"):
5+
# title, year, director = line.split(",")
6+
# print(title,year, title)
7+
8+
9+
# 使用csv模块操作 csv文件,输出内容
10+
with open('file/sample.csv', 'a') as file:
11+
# reader = csv.reader(file)
12+
# for title, year, director in reader:
13+
# print(title, year, director)
14+
15+
writer = csv.writer(file)
16+
writer.writerow(['title', 'summary', 'year'])
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from selenium import webdriver
2+
from bs4 import BeautifulSoup
3+
import time
4+
5+
driver = webdriver.PhantomJS(executable_path=r'D:\phantomjs-2.1.1-windows\bin\phantomjs') # 构建无头浏览器,用来解析 Js 加载内容
6+
# driver = webdriver.Firefox()
7+
driver.get('https://www.shanbay.com/read/news/')
8+
9+
time.sleep(5) # 显式延时5秒,等待页面完全加载
10+
soup = BeautifulSoup(driver.page_source, 'lxml')
11+
# print(driver.page_source)
12+
tags = soup.find_all('a', attrs={'class': 'linkContainer'})
13+
# for i in tags:
14+
# print(i['href'])
15+
# driver.find_element_by_id('kw').send_keys(keyword)
16+
# driver.find_element_by_id('su').click()
17+
# for i in range(1,81):
18+
# driver.find_element_by_class_name('icon-refresh').send_keys(Keys.DOWN)
19+
'''这将给你屏幕截图在那一刻图像将被保存在你的脚本的工作'''
20+
try:
21+
driver.get('http://whatsmyuseragent.com/')
22+
23+
except Exception as e:
24+
driver.save_screenshot('screenshot.png')
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
# 测试代理的示例1
5+
# proxies = {
6+
# 'http': '115.127.77.10:80'
7+
# }
8+
# r = requests.get("http://icanhazip.com/", proxies=proxies) # http://httpbin.org/ip也可以
9+
# print(r.text)
10+
# r2 = requests.get('http://httpbin.org/get?show_env=1', proxies=proxies)
11+
# print(r2.text)
12+
13+
# 访问 http://httpbin.org/get?show_env=1 ,得到访问头的详细信息,判断代理的匿名程度。
14+
# 代理池 http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt
15+
# 代理池 http://api.xicidaili.com/free2016.txt
16+
17+
# request = requests.get('http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt')
18+
# print(request.text)
19+
20+
# 测试代理的示例2
21+
ss = requests.session()
22+
ss.proxies = {'http': 'http://123.206.6.17:3128', 'https': 'http://123.206.6.17:3128'}
23+
print(ss.get('http://www.qq.com'))
24+
print(ss.get('https://www.github.com'))
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import requests
2+
3+
url = 'http://upload-images.jianshu.io/upload_images/5831032-3e4d3f9ad5a61b78.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1080/q/50'
4+
r = requests.get(url)
5+
with open('chun.jpg', 'wb') as fo:
6+
fo.write(r.content)

0 commit comments

Comments
 (0)