Skip to content

Commit d71b927

Browse files
committed
Chapter 13 finished
1 parent 59fa594 commit d71b927

35 files changed

+45623
-0
lines changed

13-pdf-word/README.md

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# PDF文档
2+
`pip install PyPDF2`
3+
<pre>
4+
import PyPDF2
5+
</pre>
6+
7+
# Word文档
8+
## 安装
9+
`pip install python-docx`
10+
11+
## Word文档简介
12+
* Document:表示整个文档
13+
* Paragraph:表示段落
14+
* Run:表示相同样式文本
15+
16+
## 使用
17+
`import docx
18+
doc = docx.Document(filename)`
19+
20+
## Word样式
21+
* 段落样式:可以应用于Paragraph
22+
* 字符样式:可以应用于Run对象
23+
* 链接样式:可以应用于Paragraph和Run对象
24+
25+
26+

13-pdf-word/addBreakToDocx.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import docx
2+
3+
doc = docx.Document()
4+
doc.add_paragraph('This is on the first page!')
5+
doc.paragraphs[0].runs[0].add_break(docx.text.run.WD_BREAK.PAGE)
6+
doc.add_paragraph('This is on the second page!')
7+
8+
doc.save('twoPage.docx')

13-pdf-word/addContentToDocx.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import docx
2+
3+
doc = docx.Document()
4+
doc.add_paragraph('Hello world!')
5+
6+
paraObj1 = doc.add_paragraph('This is a second paragraph.')
7+
paraObj2 = doc.add_paragraph('This is a yet another paragraph.')
8+
paraObj1.add_run(' This text is being added to the second paragraph.')
9+
10+
doc.save('multipleParagraphs.docx')

13-pdf-word/addHeaderToDocx.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import docx
2+
3+
doc = docx.Document()
4+
doc.add_heading('Head 0', 0)
5+
doc.add_heading('Head 1', 1)
6+
doc.add_heading('Head 2', 2)
7+
doc.add_heading('Head 3', 3)
8+
doc.add_heading('Head 4', 4)
9+
10+
doc.save('headings.docx')

13-pdf-word/batchEncryptPdf.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import PyPDF2
2+
import os
3+
4+
path = ''
5+
password = ''
6+
7+
# get all pdf file in pointed directory
8+
for dirpath, dirnames, filenames in os.walk(path):
9+
# open each pdf file and get first page of it
10+
for filename in filenames:
11+
if not filename.endswith('.pdf'):
12+
continue
13+
filepath = os.path.join(dirpath, filename)
14+
pdfFile = PyPDF2.open(filepath, 'rb')
15+
pdfReader = PyPDF2.PdfFileReader(pdfFile)
16+
# if catch Exception then decrypt the file with given pass
17+
try:
18+
pdfReader.getPage(0)
19+
except err:
20+
if pdfReader.decrypt(password):
21+
pdfWriter = PyPDF2.PdfFileWriter()
22+
for page in pdfReader.numPages:
23+
pdfWriter.addPage(pdfReader.getPage(0))
24+
decryptPdfFile = open(destDirectory+filename+'_encrypted.pdf', 'wb')
25+
pdfWriter(decryptPdfFile)
26+
decryptPdfFile.close()
27+
else:
28+
# if pass is error, the print message and continue
29+
print(msg)
30+
continue
31+
pdfFile.close()
32+
33+
34+
35+

13-pdf-word/combinePdfs.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#! python3
2+
# combinePdfs.py - Combines all the PDFs in the current working directory into
3+
# a single PDF.
4+
5+
import os
6+
import PyPDF2
7+
8+
# Get all the PDF filenames.
9+
pdfFiles = []
10+
for filename in os.listdir('.'):
11+
if filename.endswith('.pdf'):
12+
pdfFiles.append(filename)
13+
pdfFiles.sort()
14+
15+
pdfWriter = PyPDF2.PdfFileWriter()
16+
17+
# Loop through all the PDF files.
18+
for filename in pdfFiles:
19+
pdfFileObj = open(filename, 'rb')
20+
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
21+
22+
# Loop through all the pages (except the first) and add them.
23+
for pageNum in range(1, pdfReader.numPages):
24+
pageObj = pdfReader.getPage(pageNum)
25+
pdfWriter.addPage(pageObj)
26+
27+
# Save the resulting PDF to a file.
28+
pdfOutput = open('allminutes.pdf', 'wb')
29+
pdfWriter.write(pdfOutput)
30+
pdfOutput.close()

13-pdf-word/combineminutes.pdf

506 KB
Binary file not shown.

13-pdf-word/createDocx.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import docx
2+
3+
doc = docx.Document()
4+
doc.add_paragraph('Hello world!')
5+
doc.save('helloworld.docx')

13-pdf-word/createInvitation.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import docx
2+
3+
# create a docment
4+
doc = docx.Document('guestTemplete.docx')
5+
paragraphNum = len(doc.paragraphs)
6+
guestDoc = docx.Document()
7+
8+
# open guests.txt file
9+
with open('guests.txt') as f:
10+
for guest in f.readlines():
11+
paraSeq = 0
12+
# add each guest to the document
13+
for p in doc.paragraphs:
14+
paraSeq += 1
15+
if paraSeq == 2:
16+
p.add_run(guest)
17+
if paraSeq == paragraphNum:
18+
p.runs[len(p.runs)-1].add_break(docx.text.run.WD_BREAK.PAGE)
19+
# set text and style
20+
guestDoc.add_paragraph(p.text, p.style)
21+
22+
# save file
23+
guestDoc.save('guestsInvitaion.docx')

13-pdf-word/decryptPdfWithDict.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import PyPDF2
2+
3+
# load PDF
4+
pdfFile = open('encryptTest.pdf', 'rb')
5+
pdfReader = PyPDF2.PdfFileReader(pdfFile)
6+
7+
# loop dict list and try to decrypt the pdf file
8+
with open('dictionary.txt') as f:
9+
for word in f:
10+
word = word.strip().lower()
11+
# if success, print the password
12+
if pdfReader.decrypt(word):
13+
print(word)
14+
15+
16+
17+

13-pdf-word/demo.docx

41.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)