Skip to content

Commit d63ad79

Browse files
committed
修正aboutme
1 parent 080c709 commit d63ad79

File tree

2 files changed

+126
-12
lines changed

2 files changed

+126
-12
lines changed

basic/samples/pdf/cookbook_source.py

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
11
#!/usr/bin/env python
22
# -*- encoding: utf-8 -*-
33
"""
4-
Topic: 处理pdf2txt.py -o pc.txt /home/mango/work/perfect.pdf生成的txt文件
5-
Desc : 最后的结果是我想要的,去除了页头和页脚的部分
4+
Desc: 如何将原有的《Python Cookbook》3rd edition.pdf文件转换为我自己的cookbook翻译项目格式
5+
6+
1. 首先使用在线PDF文件切割截取出自己想要的pdf文件部分:http://smallpdf.com/split-pdf
7+
2. 安装PDFMiner依赖,然后使用:pdf2txt.py -o pc.txt /home/mango/work/perfect.pdf生成的txt文件
8+
3. 把生成的txt文件放到idea中,去除某些没用的符号,比如'口'字符,全局replace
9+
4. 调用beauty2()函数,去除了页头和页脚的部分
10+
5. 调用convert_cookbook()函数将txt文件转换为cookbook项目所需的格式
611
"""
712
import re
13+
import os
14+
from os.path import join
15+
import logging
16+
17+
logging.basicConfig(level=logging.INFO,
18+
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
19+
datefmt='%Y-%m-%d %H:%M:%S',
20+
handlers=[logging.FileHandler('d:/logs/cookbook.log', 'w', 'utf-8')])
21+
_log = logging.getLogger('app.' + __name__)
822

923

1024
def beauty(txt_file):
@@ -48,13 +62,113 @@ def beauty2(pre_txt, after_txt):
4862
f.writelines(result_lines)
4963

5064

51-
def generate_chapter():
52-
"""
53-
解析文本文件,生成最终的待翻译文件
54-
"""
55-
65+
def convert_cookbook(txt_file, base_dir):
66+
"""演示一下seek方法"""
67+
chapter = None # 章
68+
paper = None # 节
69+
write_file = None # 接下来要写入的文件
70+
temp_lines = [] # 临时存放章或节内容
71+
hit_paper = False # 是否命中小节标志
72+
hit_offset = 0 # 命中后行距
73+
with open(txt_file, mode='r', encoding='utf-8') as f:
74+
for line in f:
75+
c_match = re.match('^CHAPTER (\d+)$', line.strip())
76+
p_match = re.match('^(\d+)\.(\d+)\. ', line.strip())
77+
a_match = re.match('^APPENDIX A$', line.strip())
78+
if c_match:
79+
old_chapter = chapter
80+
chapter = int(c_match.group(1))
81+
if old_chapter and chapter - old_chapter != 1:
82+
_log.error('章节不连续啊: {}'.format(line.strip()))
83+
continue
84+
# 开始新的一章了
85+
_log.info('------------------------------------------------------')
86+
_log.info('---------开始新的一章了,第{}章!-----------'.format(chapter))
87+
# 前面的给写入文件中
88+
if temp_lines:
89+
_log.info('write_file={}'.format(write_file))
90+
with open(write_file, mode='r', encoding='utf-8') as wf:
91+
for i in range(7):
92+
temp_lines.insert(i, wf.readline())
93+
with open(write_file, mode='w', encoding='utf-8') as wf:
94+
wf.writelines(temp_lines)
95+
temp_lines.clear()
96+
# 首先创建一个章节源码目录
97+
c_dir = join(base_dir, 'cookbook', 'c{:02d}'.format(chapter))
98+
if not os.path.exists(c_dir):
99+
os.makedirs(c_dir)
100+
# 找到章节文件
101+
chapters_dir = join(base_dir, 'source', 'chapters')
102+
onlyfiles = [f for f in os.listdir(chapters_dir)
103+
if os.path.isfile(join(chapters_dir, f))]
104+
write_file = next(join(chapters_dir, f) for f in onlyfiles if
105+
f.startswith('p{:02d}'.format(chapter)))
106+
_log.info('找到章节文件:{}'.format(write_file))
107+
elif p_match:
108+
hit_paper = True
109+
paper = int(p_match.group(2))
110+
hit_offset = 0
111+
elif hit_paper and hit_offset <= 2:
112+
if line.strip() == 'Problem':
113+
# 说明是新的一节开始了
114+
_log.info('开始新的一节了,第{}章,第{}节!'.format(chapter, paper))
115+
# 前面的给写入文件中
116+
if temp_lines:
117+
if 'chapters' not in write_file:
118+
_log.info('write_file={}'.format(write_file))
119+
with open(write_file, mode='r', encoding='utf-8') as wf:
120+
for i in range(7):
121+
temp_lines.insert(i, wf.readline())
122+
with open(write_file, mode='w', encoding='utf-8') as wf:
123+
wf.writelines(temp_lines)
124+
temp_lines.clear()
125+
# 定义接下来要写入的节文件
126+
paper_dir = join(base_dir, 'source', 'c{:02d}'.format(chapter))
127+
pfs = [f for f in os.listdir(paper_dir)
128+
if os.path.isfile(join(paper_dir, f))]
129+
write_file = next(
130+
join(paper_dir, f) for f in pfs if f.startswith('p{:02d}'.format(paper)))
131+
_log.info('下次要写的小节文件:{}'.format(write_file))
132+
# 创建小节源码文件
133+
c_dir = join(base_dir, 'cookbook', 'c{:02d}'.format(chapter))
134+
with open(join(c_dir, 'p{:02d}_.py'.format(paper)), 'w',
135+
encoding='utf-8') as pfile:
136+
pfile.write('#!/usr/bin/env python\n')
137+
pfile.write('# -*- encoding: utf-8 -*-\n')
138+
pfile.write('"""\n')
139+
pfile.write('Topic: \n')
140+
pfile.write('Desc : \n')
141+
pfile.write('"""\n')
142+
hit_paper = False
143+
hit_offset += 1
144+
if hit_offset > 2:
145+
hit_paper = False
146+
elif a_match:
147+
# 前面的给写入文件中
148+
if temp_lines:
149+
_log.info('write_file={}'.format(write_file))
150+
with open(write_file, mode='r', encoding='utf-8') as wf:
151+
for i in range(7):
152+
temp_lines.insert(i, wf.readline())
153+
with open(write_file, mode='w', encoding='utf-8') as wf:
154+
wf.writelines(temp_lines)
155+
temp_lines.clear()
156+
elif re.match('^Solution$', line.strip()):
157+
temp_lines.append('|\n')
158+
temp_lines.append('\n')
159+
temp_lines.append('----------\n')
160+
temp_lines.append('解决方案\n')
161+
temp_lines.append('----------\n')
162+
elif re.match('^Discussion$', line.strip()):
163+
temp_lines.append('|\n')
164+
temp_lines.append('\n')
165+
temp_lines.append('----------\n')
166+
temp_lines.append('讨论\n')
167+
temp_lines.append('----------\n')
168+
else:
169+
temp_lines.append(line)
56170

57171

58172
if __name__ == '__main__':
59-
# beauty2('pc_pre.txt', 'pc_after')
60-
pass
173+
convert_cookbook(r'D:\download\20150430\pc_after.txt'
174+
, r'D:\work\projects\gitprojects\python3-cookbook')

source/aboutme.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
* 姓名: 熊能
77
* Email: yidao620@gmail.com
8-
* 博客: http://www.pycoding.com/
8+
* 博客: http://yidao620c.github.io/
99
* GitHub: https://github.com/yidao620c
1010
* 公司: `广州云宏信息科技股份有限公司 <http://www.winhong.com/>`_
1111

@@ -14,8 +14,8 @@
1414
1515
*主要贡献者*
1616

17-
1. test1 (test1@gmail.com)
18-
2. test2 (test2@gmail.com)
17+
1. ZZ (zz.at.field@gmail.com)
18+
2. Amy (katiechen8815@gmail.com)
1919

2020
|
2121
|

0 commit comments

Comments
 (0)