Skip to content

Commit 31cede1

Browse files
committed
add encrypting & decrypting PDF files tutorial
1 parent df5242e commit 31cede1

File tree

5 files changed

+229
-0
lines changed

5 files changed

+229
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
9898
- [How to Convert PDF to Docx in Python](https://www.thepythoncode.com/article/convert-pdf-files-to-docx-in-python). ([code](handling-pdf-files/convert-pdf-to-docx))
9999
- [How to Convert PDF to Images in Python](https://www.thepythoncode.com/article/convert-pdf-files-to-images-in-python). ([code](handling-pdf-files/convert-pdf-to-image))
100100
- [How to Compress PDF Files in Python](https://www.thepythoncode.com/article/compress-pdf-files-in-python). ([code](handling-pdf-files/pdf-compressor))
101+
- [How to Encrypt and Decrypt PDF Files in Python](https://www.thepythoncode.com/article/encrypt-pdf-files-in-python). ([code](handling-pdf-files/encrypt-pdf))
101102

102103

103104
- ### [Web Scraping](https://www.thepythoncode.com/topic/web-scraping)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# [How to Encrypt and Decrypt PDF Files in Python](https://www.thepythoncode.com/article/encrypt-pdf-files-in-python)
2+
To run this:
3+
- `pip3 install -r requirements.txt`
4+
-
5+
```
6+
$ python encrypt_pdf.py --help
7+
```
8+
**Output:**
9+
```
10+
usage: encrypt_pdf.py [-h] [-a {encrypt,decrypt}] [-l {1,2}] -p [PASSWORD] [-o OUTPUT_FILE] file
11+
12+
These options are available
13+
14+
positional arguments:
15+
file Input PDF file you want to encrypt
16+
17+
optional arguments:
18+
-h, --help show this help message and exit
19+
-a {encrypt,decrypt}, --action {encrypt,decrypt}
20+
Choose whether to encrypt or to decrypt
21+
-l {1,2}, --level {1,2}
22+
Choose which protection level to apply
23+
-p [PASSWORD], --password [PASSWORD]
24+
Enter a valid password
25+
-o OUTPUT_FILE, --output_file OUTPUT_FILE
26+
Enter a valid output file
27+
```
28+
- For instance, to encrypt `bert-paper.pdf` file and output as bert-paper-encrypted.pdf:
29+
```
30+
$ python encrypt_pdf.py bert-paper.pdf -a encrypt -l 1 -p -o bert-paper-encrypted.pdf
31+
```
32+
- To decrypt it:
33+
```
34+
$ python encrypt_pdf.py bert-paper-encrypted.pdf -a decrypt -l 1 -p -o bert-paper-decrypted.pdf
35+
```
36+
This will spawn the original PDF file under the name `bert-paper-decrypted.pdf`. The password must be the same for encryption and decryption.
757 KB
Binary file not shown.
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# Import Libraries
2+
from PyPDF4 import PdfFileReader, PdfFileWriter, utils
3+
import os
4+
import argparse
5+
import getpass
6+
from io import BytesIO
7+
import pyAesCrypt
8+
9+
# Size of chunck
10+
BUFFER_SIZE = 64*1024
11+
12+
13+
def is_encrypted(input_file: str) -> bool:
14+
"""Checks if the inputted file is encrypted using PyPDF4 library"""
15+
with open(input_file, 'rb') as pdf_file:
16+
pdf_reader = PdfFileReader(pdf_file, strict=False)
17+
return pdf_reader.isEncrypted
18+
19+
20+
def encrypt_pdf(input_file: str, password: str):
21+
"""
22+
Encrypts a file using PyPDF4 library.
23+
Precondition: File is not encrypted.
24+
"""
25+
pdf_writer = PdfFileWriter()
26+
pdf_reader = PdfFileReader(open(input_file, 'rb'), strict=False)
27+
if pdf_reader.isEncrypted:
28+
print(f"PDF File {input_file} already encrypted")
29+
return False, None, None
30+
try:
31+
# To encrypt all the pages of the input file, you need to loop over all of them
32+
# and to add them to the writer.
33+
for page_number in range(pdf_reader.numPages):
34+
pdf_writer.addPage(pdf_reader.getPage(page_number))
35+
except utils.PdfReadError as e:
36+
print(f"Error reading PDF File {input_file} = {e}")
37+
return False, None, None
38+
# The default is 128 bit encryption (if false then 40 bit encryption).
39+
pdf_writer.encrypt(user_pwd=password, owner_pwd=None, use_128bit=True)
40+
return True, pdf_reader, pdf_writer
41+
42+
43+
def decrypt_pdf(input_file: str, password: str):
44+
"""
45+
Decrypts a file using PyPDF4 library.
46+
Precondition: A file is already encrypted
47+
"""
48+
pdf_reader = PdfFileReader(open(input_file, 'rb'), strict=False)
49+
if not pdf_reader.isEncrypted:
50+
print(f"PDF File {input_file} not encrypted")
51+
return False, None, None
52+
pdf_reader.decrypt(password=password)
53+
pdf_writer = PdfFileWriter()
54+
try:
55+
for page_number in range(pdf_reader.numPages):
56+
pdf_writer.addPage(pdf_reader.getPage(page_number))
57+
except utils.PdfReadError as e:
58+
print(f"Error reading PDF File {input_file} = {e}")
59+
return False, None, None
60+
return True, pdf_reader, pdf_writer
61+
62+
63+
def cipher_stream(inp_buffer: BytesIO, password: str):
64+
"""Ciphers an input memory buffer and returns a ciphered output memory buffer"""
65+
# Initialize output ciphered binary stream
66+
out_buffer = BytesIO()
67+
inp_buffer.seek(0)
68+
# Encrypt Stream
69+
pyAesCrypt.encryptStream(inp_buffer, out_buffer, password, BUFFER_SIZE)
70+
out_buffer.seek(0)
71+
return out_buffer
72+
73+
74+
def decipher_file(input_file: str, output_file: str, password: str):
75+
"""
76+
Deciphers an input file and returns a deciphered output file
77+
"""
78+
inpFileSize = os.stat(input_file).st_size
79+
out_buffer = BytesIO()
80+
with open(input_file, mode='rb') as inp_buffer:
81+
try:
82+
# Decrypt Stream
83+
pyAesCrypt.decryptStream(
84+
inp_buffer, out_buffer, password, BUFFER_SIZE, inpFileSize)
85+
except Exception as e:
86+
print("Exception", str(e))
87+
return False
88+
inp_buffer.close()
89+
if out_buffer:
90+
with open(output_file, mode='wb') as f:
91+
f.write(out_buffer.getbuffer())
92+
f.close()
93+
return True
94+
95+
96+
def encrypt_decrypt_file(**kwargs):
97+
"""Encrypts or decrypts a file"""
98+
input_file = kwargs.get('input_file')
99+
password = kwargs.get('password')
100+
output_file = kwargs.get('output_file')
101+
action = kwargs.get('action')
102+
# Protection Level
103+
# Level 1 --> Encryption / Decryption using PyPDF4
104+
# Level 2 --> Encryption and Ciphering / Deciphering and Decryption
105+
level = kwargs.get('level')
106+
if not output_file:
107+
output_file = input_file
108+
if action == "encrypt":
109+
result, pdf_reader, pdf_writer = encrypt_pdf(
110+
input_file=input_file, password=password)
111+
# Encryption completed successfully
112+
if result:
113+
output_buffer = BytesIO()
114+
pdf_writer.write(output_buffer)
115+
pdf_reader.stream.close()
116+
if level == 2:
117+
output_buffer = cipher_stream(output_buffer, password=password)
118+
with open(output_file, mode='wb') as f:
119+
f.write(output_buffer.getbuffer())
120+
f.close()
121+
elif action == "decrypt":
122+
if level == 2:
123+
decipher_file(input_file=input_file,
124+
output_file=output_file, password=password)
125+
result, pdf_reader, pdf_writer = decrypt_pdf(
126+
input_file=input_file, password=password)
127+
# Decryption completed successfully
128+
if result:
129+
output_buffer = BytesIO()
130+
pdf_writer.write(output_buffer)
131+
pdf_reader.stream.close()
132+
with open(output_file, mode='wb') as f:
133+
f.write(output_buffer.getbuffer())
134+
f.close()
135+
136+
137+
138+
class Password(argparse.Action):
139+
"""
140+
Hides the password entry
141+
"""
142+
def __call__(self, parser, namespace, values, option_string):
143+
if values is None:
144+
values = getpass.getpass()
145+
setattr(namespace, self.dest, values)
146+
147+
148+
def is_valid_path(path):
149+
"""Validates the path inputted and checks whether it is a file path or a folder path"""
150+
if not path:
151+
raise ValueError(f"Invalid Path")
152+
if os.path.isfile(path):
153+
return path
154+
elif os.path.isdir(path):
155+
return path
156+
else:
157+
raise ValueError(f"Invalid Path {path}")
158+
159+
160+
def parse_args():
161+
"""Get user command line parameters"""
162+
parser = argparse.ArgumentParser(description="These options are available")
163+
parser.add_argument("file", help="Input PDF file you want to encrypt", type=is_valid_path)
164+
# parser.add_argument('-i', '--input_path', dest='input_path', type=is_valid_path,
165+
# required=True, help="Enter the path of the file or the folder to process")
166+
parser.add_argument('-a', '--action', dest='action', choices=[
167+
'encrypt', 'decrypt'], type=str, default='encrypt', help="Choose whether to encrypt or to decrypt")
168+
parser.add_argument('-l', '--level', dest='level', choices=[
169+
1, 2], type=int, default=1, help="Choose which protection level to apply")
170+
parser.add_argument('-p', '--password', dest='password', action=Password,
171+
nargs='?', type=str, required=True, help="Enter a valid password")
172+
parser.add_argument('-o', '--output_file', dest='output_file',
173+
type=str, help="Enter a valid output file")
174+
args = vars(parser.parse_args())
175+
# To Display Command Arguments Except Password
176+
print("## Command Arguments #################################################")
177+
print("\n".join("{}:{}".format(i, j)
178+
for i, j in args.items() if i != 'password'))
179+
print("######################################################################")
180+
return args
181+
182+
183+
if __name__ == '__main__':
184+
# Parsing command line arguments entered by user
185+
args = parse_args()
186+
# Encrypting or Decrypting File
187+
encrypt_decrypt_file(
188+
input_file=args['file'], password=args['password'],
189+
action=args['action'], level=args['level'], output_file=args['output_file']
190+
)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
PyPDF4==1.27.0
2+
pyAesCrypt==6.0.0

0 commit comments

Comments
 (0)