Skip to content

Commit 01bb93e

Browse files
committed
Add solutions to new exercises in section 2
1 parent 35c2cfc commit 01bb93e

File tree

1 file changed

+110
-26
lines changed

1 file changed

+110
-26
lines changed
Lines changed: 110 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,125 @@
11
# 13.2 - Extract Pages From a PDF
22
# Solutions to review exercises
33

4-
import os
5-
import copy
6-
from pyPDF2 import PdfFileReader, PdfFileWriter
4+
# ***********
5+
# Exercise 1
6+
#
7+
# Extract the last page from the `Pride_and_Prejudice.pdf` file and
8+
# save it to a new file called `last_page.pdf` in your home directory.
9+
# ***********
710

11+
# First import the classes and libraries needed
12+
from pathlib import Path
13+
from PyPDF2 import PdfFileReader, PdfFileWriter
814

9-
# Exercise 1
10-
path = "C:/python-basics-exercises/ch13-interact-with-pdf-files/\
11-
practice_files"
15+
# Get the path to the `Pride_and_Prejudice.pdf` file. We'll assume you
16+
# downloaded the solutions folder and extracted it into the home
17+
# directory on your computer. If this is not the case, you'll need to
18+
# update the path below.
19+
pdf_path = Path.home() / "python-basics-exercises/ch13-interact-with-pdf-files" \
20+
"/practice_files/Pride_and_Prejudice.pdf"
21+
22+
# Now you can create the PdfFileReader instance. Remember that
23+
# PdfFileReader objects can only be instantiated with path strings, not
24+
# Path objects!
25+
pdf_reader = PdfFileReader(str(pdf_path))
1226

13-
input_file_path = os.path.join(path, "Walrus.pdf")
14-
input_file = PdfFileReader(input_file_path)
15-
output_PDF = PdfFileWriter()
27+
# Use the .pages attribute to get an iterable over all pages in the
28+
# PDF. The last page can be accessed with the index -1.
29+
last_page = pdf_reader.pages[-1]
1630

17-
input_file.decrypt("IamtheWalrus") # decrypt the input file
31+
# Now you can create a PdfFileWriter instance and add the last page to it.
32+
pdf_writer = PdfFileWriter()
33+
pdf_writer.addPage(last_page)
1834

35+
# Finally, write the contents of pdf_writer to the file `last_page.pdf`
36+
# in your home directory.
37+
output_path = Path.home() / "last_page.pdf"
38+
with output_path.open(mode="wb") as output_file:
39+
pdf_writer.write(output_file)
1940

41+
42+
# ***********
2043
# Exercise 2
21-
for page_num in range(0, input_file.getNumPages()):
22-
# rotate pages (call everything page_left for now; will make a copy)
23-
page_left = input_file.getPage(page_num)
24-
page_left.rotateCounterClockwise(90)
44+
#
45+
# Extract all pages with even numbered _indices_ from the
46+
# `Pride_and_Prejudice.pdf` and save them to a new file called
47+
# `every_other_page.pdf` in your home directory.
48+
# ***********
49+
50+
# There are several ways to extract pages with even numbered indices
51+
# so we'll cover a few of them here.
52+
53+
# Solution A: Using a `for` loop
54+
# ------------------------------
55+
56+
# One way to do it is with a `for` loop. We'll create a new PdfFileWriter
57+
# instance, then loop over the numbers 0 up to the number of pages in the
58+
# PDF, and add the pages with even indices to the PdfFileWriter instance.
59+
pdf_writer = PdfFileWriter()
60+
num_pages = pdf_reader.getNumPages()
61+
62+
for idx in range(num_pages): # NOTE: idx is a common short name for "index"
63+
if idx % 2 == 0: # Check that the index is even
64+
page = pdf_reader.getPage(idx) # Get the page at the index
65+
pdf_writer.addPage(page) # Add the page to `pdf_writer`
66+
67+
# Now write the contents of `pdf_writer` the the file `every_other_page.pdf`
68+
# in your home directory
69+
output_path = Path.home() / "every_other_page.pdf"
70+
with output_path.open(mode="wb") as output_file:
71+
pdf_writer.write(output_file)
2572

26-
page_right = copy.copy(page_left) # split each page in half
27-
upper_right = page_left.mediaBox.upperRight # get original page corner
73+
# Solution B: Slicing .`pages` with steps
74+
# ------------------------------
2875

29-
# crop and add left-side page
30-
page_left.mediaBox.upperRight = (upper_right[0] / 2, upper_right[1])
31-
output_PDF.addPage(page_left)
32-
# crop and add right-side page
33-
page_right.mediaBox.upperLeft = (upper_right[0] / 2, upper_right[1])
34-
output_PDF.addPage(page_right)
76+
# A more succinct, alghouth possibly more difficult to understand,
77+
# solution involves slicing the `.pages` iterable. The indices start
78+
# with 0 and every even index can be obtained by iterating over
79+
# `.pages` in steps of size 2, so `.pages[::2]` is an iterable
80+
# containing just the pages with even indices.
81+
pdf_writer = PdfFileWriter()
3582

83+
for page in pdf_reader.pages[::2]:
84+
pdf_writer.addPage(page)
3685

86+
# Now write the contents of `pdf_writer` the the file
87+
# `every_other_page.pdf` in your home directory.
88+
output_path = Path.home() / "every_other_page.pdf"
89+
with output_path.open(mode="wb") as output_file:
90+
pdf_writer.write(output_file)
91+
92+
93+
# ***********
3794
# Exercise 3
38-
# save new pages to an output file
39-
output_file_path = os.path.join(path, "Output/Updated Walrus.pdf")
40-
with open(output_file_path, "wb") as output_file:
41-
output_PDF.write(output_file)
95+
#
96+
# Split the `Pride_and_Prejudice.pdf` file into two new PDF files. The
97+
# first file should contain the first 150 pages, and the second file
98+
# should contain the remaining pages. Save both files in your home
99+
# directory as `part_1.pdf` and `part_2.pdf`.
100+
# ***********
101+
102+
# Start by creating two new PdfFileWriter instances.
103+
part1_writer = PdfFileWriter()
104+
part2_writer = PdfFileWriter()
105+
106+
# Next, create two new iterables containing the correct pages.
107+
part1_pages = pdf_reader.pages[:150] # Contains pages 0 - 149
108+
part2_pages = pdf_reader.pages[150:] # Contains pages 150 - last page
109+
110+
# Add the pages to their corresponding writers.
111+
for page in part1_pages:
112+
part1_writer.addPage(page)
113+
114+
for page in part2_pages:
115+
part2_writer.addPage(page)
116+
117+
# Now write the contents of each writer to the files `part_1.pdf` and
118+
# `part_2.pdf` in your home directory.
119+
part1_output_path = Path.home() / "part_1.pdf"
120+
with part1_output_path.open(mode="wb") as part1_output_file:
121+
part1_writer.write(part1_output_file)
122+
123+
part2_output_path = Path.home() / "part_2.pdf"
124+
with part2_output_path.open(mode="wb") as part2_output_file:
125+
part2_writer.write(part2_output_file)

0 commit comments

Comments
 (0)