Skip to content

Commit e19fd4e

Browse files
committed
Push Code
1 parent e190b93 commit e19fd4e

File tree

10 files changed

+20384
-53
lines changed

10 files changed

+20384
-53
lines changed

.DS_Store

0 Bytes
Binary file not shown.

Chapter 11/11_13.py

Lines changed: 11 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,20 @@
11
"""
2-
进程池Pool使用示例
2+
线程池使用代码示例
33
"""
4-
import threading as td
54
import multiprocessing as mp
65
import time
76

87

9-
def do_something(queue):
10-
result = 0
11-
for i in range(100000):
12-
result += i ** 2
13-
queue.put(result)
14-
15-
16-
def normal():
17-
result = 0
18-
for _ in range(3):
19-
for i in range(100000):
20-
result += i ** 2
21-
print("单线程处理结果:", result)
22-
23-
24-
def multi_threading():
25-
q = mp.Queue()
26-
t1 = td.Thread(target=do_something, args=(q,))
27-
t2 = td.Thread(target=do_something, args=(q,))
28-
t3 = td.Thread(target=do_something, args=(q,))
29-
t1.start()
30-
t2.start()
31-
t3.start()
32-
t1.join()
33-
t2.join()
34-
t3.join()
35-
print("多线程处理结果:", (q.get() + q.get() + q.get()))
36-
37-
38-
def multi_process():
39-
q = mp.Queue()
40-
p1 = mp.Process(target=do_something, args=(q,))
41-
p2 = mp.Process(target=do_something, args=(q,))
42-
p3 = mp.Process(target=do_something, args=(q,))
43-
p1.start()
44-
p2.start()
45-
p3.start()
46-
p1.join()
47-
p2.join()
48-
p3.join()
49-
print("多进程处理结果:", (q.get() + q.get() + q.get()))
8+
def func(msg):
9+
time.sleep(1)
10+
print(mp.current_process().name + " : " + msg)
5011

5112

5213
if __name__ == '__main__':
53-
start_time_1 = time.time()
54-
normal()
55-
start_time_2 = time.time()
56-
print("单线程处理耗时:", start_time_2 - start_time_1)
57-
multi_threading()
58-
start_time_3 = time.time()
59-
print("多线程处理耗时:", start_time_3 - start_time_2)
60-
multi_process()
61-
start_time_4 = time.time()
62-
print("多继承处理耗时:", start_time_4 - start_time_3)
14+
pool = mp.Pool()
15+
for i in range(20):
16+
msg = "Do Something %d" % (i)
17+
pool.apply_async(func, (msg,))
18+
pool.close()
19+
pool.join()
20+
print("子进程执行任务完毕!")

Chapter 11/11_14.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import multiprocessing as mp
2+
import time
3+
4+
5+
def func(msg):
6+
time.sleep(1)
7+
return mp.current_process().name + " : " + msg
8+
9+
if __name__ == '__main__':
10+
pool = mp.Pool()
11+
results = []
12+
for i in range(20):
13+
msg = "Do Something %d" % i
14+
results.append(pool.apply_async(func, (msg,)))
15+
16+
pool.close()
17+
pool.join()
18+
for result in results:
19+
print(result.get())
20+
print("子进程执行任务完毕!")

Chapter 11/11_15.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
进程池实现文件行数和字数统计脚本实例
3+
"""
4+
import multiprocessing as mp
5+
import time
6+
import os
7+
8+
result_file = 'result.txt' # 统计结果写入文件名
9+
10+
11+
# 获得路径下的文件列表
12+
def get_files(path):
13+
file_list = []
14+
for file in os.listdir(path):
15+
if file.endswith('py'):
16+
file_list.append(os.path.join(path, file))
17+
return file_list
18+
19+
20+
# 统计每个文件中函数与字符数
21+
def get_msg(path):
22+
with open(path, 'r', encoding='utf-8') as f:
23+
content = f.readlines()
24+
f.close()
25+
lines = len(content)
26+
char_count = 0
27+
for i in content:
28+
char_count += len(i.strip("\n"))
29+
return lines, char_count, path
30+
31+
32+
# 将数据写入到文件中
33+
def write_result(result_list):
34+
with open(result_file, 'a', encoding='utf-8') as f:
35+
for result in result_list:
36+
f.write(result[2] + " 行数:" + str(result[0]) + " 字符数:" + str(result[1]) + "\n")
37+
f.close()
38+
39+
40+
if __name__ == '__main__':
41+
start_time = time.time()
42+
file_list = get_files(os.getcwd())
43+
pool = mp.Pool()
44+
result_list = pool.map(get_msg, file_list)
45+
pool.close()
46+
pool.join()
47+
write_result(result_list)
48+
print("处理完毕,用时:", time.time() - start_time)

Chapter 11/11_16.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Queue使用示例(单线程,多线程,多进程效率对比)
3+
"""
4+
import threading as td
5+
import multiprocessing as mp
6+
import time
7+
8+
9+
def do_something(queue):
10+
result = 0
11+
for i in range(100000):
12+
result += i ** 2
13+
queue.put(result)
14+
15+
16+
# 单线程
17+
def normal():
18+
result = 0
19+
for _ in range(3):
20+
for i in range(100000):
21+
result += i ** 2
22+
print("单线程处理结果:", result)
23+
24+
25+
# 多线程
26+
def multi_threading():
27+
q = mp.Queue()
28+
t1 = td.Thread(target=do_something, args=(q,))
29+
t2 = td.Thread(target=do_something, args=(q,))
30+
t3 = td.Thread(target=do_something, args=(q,))
31+
t1.start()
32+
t2.start()
33+
t3.start()
34+
t1.join()
35+
t2.join()
36+
t3.join()
37+
print("多线程处理结果:", (q.get() + q.get() + q.get()))
38+
39+
40+
# 多进程
41+
def multi_process():
42+
q = mp.Queue()
43+
p1 = mp.Process(target=do_something, args=(q,))
44+
p2 = mp.Process(target=do_something, args=(q,))
45+
p3 = mp.Process(target=do_something, args=(q,))
46+
p1.start()
47+
p2.start()
48+
p3.start()
49+
p1.join()
50+
p2.join()
51+
p3.join()
52+
print("多进程处理结果:", (q.get() + q.get() + q.get()))
53+
54+
55+
if __name__ == '__main__':
56+
start_time_1 = time.time()
57+
normal()
58+
start_time_2 = time.time()
59+
print("单线程处理耗时:", start_time_2 - start_time_1)
60+
multi_threading()
61+
start_time_3 = time.time()
62+
print("多线程处理耗时:", start_time_3 - start_time_2)
63+
multi_process()
64+
start_time_4 = time.time()
65+
print("多继承处理耗时:", start_time_4 - start_time_3)

Chapter 11/result.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/Users/jay/Project/Python/Book/Chapter 11/11_4.py 行数:33 字符数:621
2+
/Users/jay/Project/Python/Book/Chapter 11/11_1.py 行数:32 字符数:578
3+
/Users/jay/Project/Python/Book/Chapter 11/11_5.py 行数:52 字符数:1148
4+
/Users/jay/Project/Python/Book/Chapter 11/11_13.py 行数:20 字符数:333
5+
/Users/jay/Project/Python/Book/Chapter 11/11_16.py 行数:62 字符数:1320
6+
/Users/jay/Project/Python/Book/Chapter 11/11_12.py 行数:23 字符数:410
7+
/Users/jay/Project/Python/Book/Chapter 11/11_15.py 行数:48 字符数:1087
8+
/Users/jay/Project/Python/Book/Chapter 11/11_8.py 行数:17 字符数:259
9+
/Users/jay/Project/Python/Book/Chapter 11/11_11.py 行数:18 字符数:314
10+
/Users/jay/Project/Python/Book/Chapter 11/11_10.py 行数:46 字符数:919
11+
/Users/jay/Project/Python/Book/Chapter 11/11_14.py 行数:20 字符数:401
12+
/Users/jay/Project/Python/Book/Chapter 11/11_9.py 行数:31 字符数:623
13+
/Users/jay/Project/Python/Book/Chapter 11/11_2.py 行数:32 字符数:565
14+
/Users/jay/Project/Python/Book/Chapter 11/11_6.py 行数:23 字符数:453
15+
/Users/jay/Project/Python/Book/Chapter 11/11_7.py 行数:37 字符数:745
16+
/Users/jay/Project/Python/Book/Chapter 11/11_3.py 行数:29 字符数:518

Charpter 18/18_1.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
抓取我主良缘妹子交友信息做数据分析
3+
"""
4+
5+
import requests as rq
6+
import pandas as pd
7+
import time
8+
import random
9+
import os
10+
11+
# 结果写入文件
12+
result_save_file = 'wzly.csv'
13+
14+
# Ajax加载url
15+
ajax_url = "http://www.lovewzly.com/api/user/pc/list/search?"
16+
17+
# 模拟请求头
18+
ajax_headers = {
19+
'Accept': 'application/json, text/javascript, */*; q=0.01',
20+
'Accept-Encoding': 'gzip, deflate, br',
21+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
22+
'Connection': 'keep-alive',
23+
'Host': 'www.lovewzly.com',
24+
'Referer': 'http://www.lovewzly.com/jiaoyou.html',
25+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 '
26+
'Safari/537.36',
27+
'X-Requested-With': 'XMLHttpRequest',
28+
}
29+
30+
# post请求参数
31+
form_data = {'gender': '2', 'marry': '1', 'page': '1'}
32+
33+
# csv表头
34+
csv_headers = [
35+
'昵称', '用户id', '头像', '身高', '学历', '省份',
36+
'城市', '出生年份', '性别', '交友宣言'
37+
]
38+
39+
height_interval = ['140', '150', '160', '170', '180'] # 身高范围
40+
edu_interval = ['本科', '大专', '高中', '中专', '初中', '硕士', '博士', '院士'] # 学历范围
41+
age_interval = [
42+
('18-30', 8000), ('26-30', 8000), ('31-40', 8000),
43+
('41-50', 8000), ('50以上', 8000),
44+
] # 年龄范围
45+
46+
47+
# 获取每页交友信息
48+
def fetch_data(page):
49+
while True:
50+
try:
51+
form_data['page'] = page
52+
print("抓取第:" + str(page) + "页!")
53+
resp = rq.get(url=ajax_url, params=form_data, headers=ajax_headers)
54+
if resp.status_code == 200:
55+
data_json = resp.json()['data']['list']
56+
if len(data_json) > 0:
57+
data_list = []
58+
for data in data_json:
59+
data_list.append((
60+
data['username'], data['userid'], data['avatar'],
61+
data['height'], data['education'], data['province'],
62+
data['city'], data['birthdayyear'], data['gender'], data['monolog']))
63+
result = pd.DataFrame(data_list)
64+
if page == 1:
65+
result.to_csv(result_save_file, header=csv_headers, index=False, mode='a+', encoding='utf-8')
66+
else:
67+
result.to_csv(result_save_file, header=False, index=False, mode='a+', encoding='utf-8')
68+
return None
69+
except Exception as e:
70+
print(e)
71+
72+
73+
if __name__ == '__main__':
74+
if not os.path.exists(result_save_file):
75+
for i in range(1, 718):
76+
time.sleep(random.randint(2, 10))
77+
fetch_data(i)

Charpter 18/render.html

Lines changed: 1035 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)