Skip to content

Commit 7eb482b

Browse files
committed
自己写脚本将pdf转换为源码
1 parent 76b4ddc commit 7eb482b

File tree

78 files changed

+9892
-709
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+9892
-709
lines changed

basic/myos/read_write.py

+71-24
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,34 @@ def read_demo():
2525
pass
2626

2727

28-
def read_plus(txt_file, init_c, base_dir):
28+
def convert_cookbook(txt_file, base_dir):
2929
"""演示一下seek方法"""
30-
chapter = init_c - 1 # 章
31-
paper = 0 # 节
32-
write_file = None # 接下来要写入的文件
33-
temp_lines = [] # 临时存放章或节内容
30+
chapter = None # 章
31+
paper = None # 节
32+
write_file = None # 接下来要写入的文件
33+
temp_lines = [] # 临时存放章或节内容
34+
hit_paper = False # 是否命中小节标志
35+
hit_offset = 0 # 命中后行距
3436
with open(txt_file, mode='r', encoding='utf-8') as f:
3537
for line in f:
36-
if re.match('^CHAPTER \d+$', line.strip()):
37-
chapter += 1
38+
c_match = re.match('^CHAPTER (\d+)$', line.strip())
39+
p_match = re.match('^(\d+)\.(\d+)\. ', line.strip())
40+
a_match = re.match('^APPENDIX A$', line.strip())
41+
if c_match:
42+
old_chapter = chapter
43+
chapter = int(c_match.group(1))
44+
if old_chapter and chapter - old_chapter != 1:
45+
_log.error('章节不连续啊: {}'.format(line.strip()))
46+
continue
3847
# 开始新的一章了
39-
_log.info('开始新的一章了,第{}章!'.format(chapter))
48+
_log.info('------------------------------------------------------')
49+
_log.info('---------开始新的一章了,第{}章!-----------'.format(chapter))
4050
# 前面的给写入文件中
4151
if temp_lines:
52+
_log.info('write_file={}'.format(write_file))
53+
with open(write_file, mode='r', encoding='utf-8') as wf:
54+
for i in range(7):
55+
temp_lines.insert(i, wf.readline())
4256
with open(write_file, mode='w', encoding='utf-8') as wf:
4357
wf.writelines(temp_lines)
4458
temp_lines.clear()
@@ -47,32 +61,65 @@ def read_plus(txt_file, init_c, base_dir):
4761
if not os.path.exists(c_dir):
4862
os.makedirs(c_dir)
4963
# 找到章节文件
50-
chapters_dir = join(c_dir, 'source', 'chapters')
51-
onlyfiles = [join(chapters_dir, f) for f in os.listdir(chapters_dir)
64+
chapters_dir = join(base_dir, 'source', 'chapters')
65+
onlyfiles = [f for f in os.listdir(chapters_dir)
5266
if os.path.isfile(join(chapters_dir, f))]
53-
write_file = next(f for f in onlyfiles if f.startswith('p{:02d}'.format(chapter)))
54-
elif re.match('^{}.{}. '.format(chapter, paper + 1), line.strip()):
55-
f.seek(1, 1) # 往前进一行
56-
if f.readline().strip() == 'Problem':
67+
write_file = next(join(chapters_dir, f) for f in onlyfiles if
68+
f.startswith('p{:02d}'.format(chapter)))
69+
_log.info('找到章节文件:{}'.format(write_file))
70+
elif p_match:
71+
hit_paper = True
72+
paper = int(p_match.group(2))
73+
hit_offset = 0
74+
elif hit_paper and hit_offset <= 2:
75+
if line.strip() == 'Problem':
5776
# 说明是新的一节开始了
58-
paper += 1
59-
f.seek(-1, 1) # 退一行
6077
_log.info('开始新的一节了,第{}章,第{}节!'.format(chapter, paper))
6178
# 前面的给写入文件中
6279
if temp_lines:
63-
with open(write_file, mode='w', encoding='utf-8') as wf:
64-
wf.writelines(temp_lines)
80+
if 'chapters' not in write_file:
81+
_log.info('write_file={}'.format(write_file))
82+
with open(write_file, mode='r', encoding='utf-8') as wf:
83+
for i in range(7):
84+
temp_lines.insert(i, wf.readline())
85+
with open(write_file, mode='w', encoding='utf-8') as wf:
86+
wf.writelines(temp_lines)
6587
temp_lines.clear()
6688
# 定义接下来要写入的节文件
6789
paper_dir = join(base_dir, 'source', 'c{:02d}'.format(chapter))
68-
pfs = [join(paper_dir, f) for f in os.listdir(paper_dir)
69-
if os.path.isfile(join(paper_dir, f))]
70-
write_file = next(f for f in pfs if f.startswith('p{:02d}'.format(chapter)))
71-
else:
72-
f.seek(-1, 1)
90+
pfs = [f for f in os.listdir(paper_dir)
91+
if os.path.isfile(join(paper_dir, f))]
92+
write_file = next(
93+
join(paper_dir, f) for f in pfs if f.startswith('p{:02d}'.format(paper)))
94+
_log.info('下次要写的小节文件:{}'.format(write_file))
95+
# 创建小节源码文件
96+
c_dir = join(base_dir, 'cookbook', 'c{:02d}'.format(chapter))
97+
with open(join(c_dir, 'p{:02d}_.py'.format(paper)), 'w',
98+
encoding='utf-8') as pfile:
99+
pfile.write('#!/usr/bin/env python\n')
100+
pfile.write('# -*- encoding: utf-8 -*-\n')
101+
pfile.write('"""\n')
102+
pfile.write('Topic: \n')
103+
pfile.write('Desc : \n')
104+
pfile.write('"""\n')
105+
hit_paper = False
106+
hit_offset += 1
107+
if hit_offset > 2:
108+
hit_paper = False
109+
elif a_match:
110+
# 前面的给写入文件中
111+
if temp_lines:
112+
_log.info('write_file={}'.format(write_file))
113+
with open(write_file, mode='r', encoding='utf-8') as wf:
114+
for i in range(7):
115+
temp_lines.insert(i, wf.readline())
116+
with open(write_file, mode='w', encoding='utf-8') as wf:
117+
wf.writelines(temp_lines)
118+
temp_lines.clear()
73119
else:
74120
temp_lines.append(line)
75121

76122

77123
if __name__ == '__main__':
78-
print('{:02d}'.format(11))
124+
convert_cookbook(r'D:\download\20150430\pc_after.txt'
125+
, r'D:\work\projects\gitprojects\python3-cookbook')

source/c11/p01_interact_with_http_services_as_client.rst

+197-9
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,202 @@
55
----------
66
问题
77
----------
8-
todo...
8+
You need to access various services via HTTP as a client. For example, downloading
9+
data or interacting with a REST-based API.
910

10-
----------
11-
解决方案
12-
----------
13-
todo...
11+
Solution
12+
For simple things, it’s usually easy enough to use the urllib.request module. For
13+
example, to send a simple HTTP GET request to a remote service, do something like this:
1414

15-
----------
16-
讨论
17-
----------
18-
todo...
15+
from urllib import request, parse
16+
17+
# Base URL being accessed
18+
url = 'http://httpbin.org/get'
19+
20+
# Dictionary of query parameters (if any)
21+
parms = {
22+
'name1' : 'value1',
23+
'name2' : 'value2'
24+
}
25+
26+
# Encode the query string
27+
querystring = parse.urlencode(parms)
28+
29+
# Make a GET request and read the response
30+
u = request.urlopen(url+'?' + querystring)
31+
resp = u.read()
32+
33+
If you need to send the query parameters in the request body using a POST method,
34+
encode them and supply them as an optional argument to urlopen() like this:
35+
36+
from urllib import request, parse
37+
38+
# Base URL being accessed
39+
url = 'http://httpbin.org/post'
40+
41+
# Dictionary of query parameters (if any)
42+
parms = {
43+
'name1' : 'value1',
44+
'name2' : 'value2'
45+
}
46+
47+
# Encode the query string
48+
querystring = parse.urlencode(parms)
49+
50+
# Make a POST request and read the response
51+
u = request.urlopen(url, querystring.encode('ascii'))
52+
resp = u.read()
53+
54+
If you need to supply some custom HTTP headers in the outgoing request such as a
55+
change to the user-agent field, make a dictionary containing their value and create a
56+
Request instance and pass it to urlopen() like this:
57+
58+
from urllib import request, parse
59+
...
60+
61+
# Extra headers
62+
headers = {
63+
'User-agent' : 'none/ofyourbusiness',
64+
'Spam' : 'Eggs'
65+
}
66+
67+
req = request.Request(url, querystring.encode('ascii'), headers=headers)
68+
69+
# Make a request and read the response
70+
u = request.urlopen(req)
71+
resp = u.read()
72+
73+
If your interaction with a service is more complicated than this, you should probably
74+
look at the requests library. For example, here is equivalent requests code for the
75+
preceding operations:
76+
77+
import requests
78+
79+
# Base URL being accessed
80+
url = 'http://httpbin.org/post'
81+
82+
# Dictionary of query parameters (if any)
83+
parms = {
84+
'name1' : 'value1',
85+
'name2' : 'value2'
86+
}
87+
88+
# Extra headers
89+
headers = {
90+
'User-agent' : 'none/ofyourbusiness',
91+
'Spam' : 'Eggs'
92+
}
93+
94+
resp = requests.post(url, data=parms, headers=headers)
95+
96+
# Decoded text returned by the request
97+
text = resp.text
98+
99+
A notable feature of requests is how it returns the resulting response content from a
100+
request. As shown, the resp.text attribute gives you the Unicode decoded text of a
101+
request. However, if you access resp.content, you get the raw binary content instead.
102+
On the other hand, if you access resp.json, then you get the response content inter‐
103+
preted as JSON.
104+
Here is an example of using requests to make a HEAD request and extract a few fields
105+
of header data from the response:
106+
107+
import requests
108+
109+
resp = requests.head('http://www.python.org/index.html')
110+
111+
status = resp.status_code
112+
last_modified = resp.headers['last-modified']
113+
content_type = resp.headers['content-type']
114+
content_length = resp.headers['content-length']
115+
116+
Here is a requests example that executes a login into the Python Package index using
117+
basic authentication:
118+
import requests
119+
120+
resp = requests.get('http://pypi.python.org/pypi?:action=login',
121+
auth=('user','password'))
122+
123+
Here is an example of using requests to pass HTTP cookies from one request to the
124+
next:
125+
126+
import requests
127+
128+
# First request
129+
resp1 = requests.get(url)
130+
...
131+
132+
# Second requests with cookies received on first requests
133+
resp2 = requests.get(url, cookies=resp1.cookies)
134+
135+
Last, but not least, here is an example of using requests to upload content:
136+
137+
import requests
138+
url = 'http://httpbin.org/post'
139+
files = { 'file': ('data.csv', open('data.csv', 'rb')) }
140+
141+
r = requests.post(url, files=files)
142+
143+
Discussion
144+
For really simple HTTP client code, using the built-in urllib module is usually fine.
145+
However, if you have to do anything other than simple GET or POST requests, you really
146+
can’t rely on its functionality. This is where a third-party module, such as requests,
147+
comes in handy.
148+
For example, if you decided to stick entirely with the standard library instead of a library
149+
like requests, you might have to implement your code using the low-level http.cli
150+
ent module instead. For example, this code shows how to execute a HEAD request:
151+
152+
from http.client import HTTPConnection
153+
from urllib import parse
154+
155+
c = HTTPConnection('www.python.org', 80)
156+
c.request('HEAD', '/index.html')
157+
resp = c.getresponse()
158+
159+
print('Status', resp.status)
160+
for name, value in resp.getheaders():
161+
print(name, value)
162+
163+
Similarly, if you have to write code involving proxies, authentication, cookies, and other
164+
details, using urllib is awkward and verbose. For example, here is a sample of code that
165+
authenticates to the Python package index:
166+
167+
import urllib.request
168+
169+
auth = urllib.request.HTTPBasicAuthHandler()
170+
auth.add_password('pypi','http://pypi.python.org','username','password')
171+
opener = urllib.request.build_opener(auth)
172+
173+
r = urllib.request.Request('http://pypi.python.org/pypi?:action=login')
174+
u = opener.open(r)
175+
resp = u.read()
176+
177+
# From here. You can access more pages using opener
178+
...
179+
180+
Frankly, all of this is much easier in requests.
181+
Testing HTTP client code during development can often be frustrating because of all
182+
the tricky details you need to worry about (e.g., cookies, authentication, headers, en‐
183+
codings, etc.). To do this, consider using the httpbin service. This site receives requests
184+
and then echoes information back to you in the form a JSON response. Here is an
185+
interactive example:
186+
187+
>>> import requests
188+
>>> r = requests.get('http://httpbin.org/get?name=Dave&n=37',
189+
... headers = { 'User-agent': 'goaway/1.0' })
190+
>>> resp = r.json
191+
>>> resp['headers']
192+
{'User-Agent': 'goaway/1.0', 'Content-Length': '', 'Content-Type': '',
193+
'Accept-Encoding': 'gzip, deflate, compress', 'Connection':
194+
'keep-alive', 'Host': 'httpbin.org', 'Accept': '*/*'}
195+
>>> resp['args']
196+
{'name': 'Dave', 'n': '37'}
197+
>>>
198+
199+
Working with a site such as httpbin.org is often preferable to experimenting with a real
200+
site—especially if there’s a risk it might shut down your account after three failed login
201+
attempts (i.e., don’t try to learn how to write an HTTP authentication client by logging
202+
into your bank).
203+
Although it’s not discussed here, requests provides support for many more advanced
204+
HTTP-client protocols, such as OAuth. The requests documentation is excellent (and
205+
frankly better than anything that could be provided in this short space). Go there for
206+
more information.

0 commit comments

Comments
 (0)