Pypdf2.Pdffilewriter Python Example

Download as pdf or txt
Download as pdf or txt
You are on page 1of 24

PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

Home Popular Modules

Python PyPDF2.PdfFileWriter() Examples


Related Methods
The following are code examples for showing how to use PyPDF2.PdfFileWriter(). They are from open source Python projects.
You can vote up the examples you like or vote down the ones you don't like. sys.exit()
sys.argv()
re.compile()
Example 1 time.time()
os.listdir()
Project: WebTeam Author: shashankrnr32 File: PDFManager.py MIT License 11 votes time.sleep()
os.remove()
def img2pdf(folder_path,name): os.makedirs()
pdf = fpdf.FPDF('L','pt','letter') subprocess.Popen()
pdf.add_page() shutil.rmtree()
pdf.image(folder_path+'/'+name+'.png') json.loads()
pdf.output(folder_path+'/'+name+'1.pdf','F') json.dumps()
logging.INFO
inputFile = open(folder_path+'/'+name+'1.pdf', 'rb')
logging.basicConfig()
pdfReader = PyPDF2.PdfFileReader(inputFile)
argparse.ArgumentParser()
io.BytesIO()
output_pdf = PdfFileWriter()
setuptools.find_packages()
output_pdf.addPage(pdfReader.getPage(1))
bs4.BeautifulSoup()
PyPDF2.PdfFileReader()
output_pdf.encrypt(str.lower(name))
PIL.Image.open()
with open(folder_path+'/'+name+'.pdf', "wb") as out_file:
output_pdf.write(out_file)
Related Modules
out_file.close() os
inputFile.close() sys
re
os.remove(folder_path+'/'+name+'.png') time
os.remove(folder_path+'/'+name+'1.pdf') logging
datetime
Example 2
random
math
Project: Pykell Author: morgulbrut File: Pykell.py MIT License 7 votes subprocess
tempfile
def combine_pdf(files, path='page/', outfile='comb.pdf'):
shutil
"""
json
Combines a list of pdf files into one file
argparse
io
:param files: list of files
requests
:param path: output directory (default: page/)
:param outfile:
:return: pdf output file (default: <path>/comb.pdf) More from PyPDF2
""" .PdfFileReader()
output = PdfFileWriter()
.PdfFileMerger()
for f in files:
.PdfFileWriter()
pdfFile = PdfFileReader(open(f, "rb"))
for p in range(pdfFile.getNumPages()):
output.addPage(pdfFile.getPage(p))
Pykell.check_path(path)
logging.info('Writing: ' + path + outfile)
outputStream = open(path + outfile, "wb")
output.write(outputStream)
outputStream.close()

# Utils

Example 3

Project: pdfcli Author: oliviersm199 File: pdfcli.py MIT License 7 votes

def _delete(*args, **kwargs):


file_arg = kwargs['file']
delete_pages = kwargs['delete']
out = kwargs['out']
decrypt_key = _encr_key_encoding(kwargs['key'])

with open(file_arg, 'rb') as pdf_reader_fp:


pdf_reader = get_pdf_reader(pdf_reader_fp, file_arg, key=decrypt_key)
num_pages = pdf_reader.getNumPages()

for page in delete_pages:


if page > num_pages - 1:
raise click.BadParameter('All indexes must be within range of the length of the PDF')

with open(out, 'wb') as pdf_writer_fp:


pdf_writer = PyPDF2. PdfFileWriter()
for i in range(num_pages):
if i not in delete_pages:
pdf_writer.addPage(pdf_reader.getPage(i))

pdf_writer.write(pdf_writer_fp)
click.echo("Deleted pages %s from %s and created new PDF at %s" % (delete_pages, file_arg, out))

Example 4

Project: pdfcli Author: oliviersm199 File: pdfcli.py MIT License 6 votes

def _rotate(*args, **kwargs):


file_arg = kwargs['file']
direction = kwargs['direction']
out = kwargs['out']
decrypt_key = _encr_key_encoding(kwargs['key'])

with open(file_arg, 'rb') as pdf_reader_fp, open(out, 'wb') as pdf_writer_fp:


pdf_reader = get_pdf_reader(pdf_reader_fp, file_arg, key=decrypt_key)

pdf_writer = PyPDF2. PdfFileWriter()


num_pages = pdf_reader.getNumPages()

1 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

for i in range(num_pages):
page = pdf_reader.getPage(i)
if direction == "clockwise":
page = page.rotateClockwise(90)
else:
page = page.rotateCounterClockwise(90)
pdf_writer.addPage(page)
pdf_writer.write(pdf_writer_fp)
click.echo("Pages were rotated %s successfully and saved at %s" % (direction, out))

Example 5

Project: rotatepdf Author: AndreMiras File: rotatepdf.py MIT License 6 votes

def rotate(src_stream, dst_stream, rotate_pages_dict):


"""
Rotates src_stream to dst_stream given a rotate_pages_dict configuration.
"""
file_reader = PdfFileReader(src_stream)
file_writer = PdfFileWriter()
rotate_left_pages = rotate_pages_dict['rotate_left_pages']
rotate_right_pages = rotate_pages_dict['rotate_right_pages']
rotate_180_pages = rotate_pages_dict['rotate_180_pages']
for zb_page_num in range(file_reader.getNumPages()):
# zero based page num page num
page_num = zb_page_num + 1
if page_num in rotate_180_pages:
file_writer.addPage(
file_reader.getPage(zb_page_num).rotateClockwise(180))
elif page_num in rotate_left_pages:
file_writer.addPage(
file_reader.getPage(zb_page_num).rotateCounterClockwise(90))
elif page_num in rotate_right_pages:
file_writer.addPage(
file_reader.getPage(zb_page_num).rotateClockwise(90))
else:
file_writer.addPage(file_reader.getPage(zb_page_num))
file_writer.write(dst_stream)

Example 6

Project: pdf_text_overlay Author: zerodhatech File: pdfWriter.py MIT License 6 votes

def edit_and_save_pdf(self):
"""Return file object."""
original_pdf = PdfFileReader(self.original_pdf)
output = PdfFileWriter()

config_var_map = dict(
(config['page_number'], config['variables'])
for config in self.configuration
)

# Pages begin with numeric 0


for page_no in range(original_pdf.numPages):
configuration = config_var_map.get(page_no)
page = original_pdf.getPage(page_no)
if configuration:
new_pdf = PdfFileReader(self.create_new_pdf(configuration))
page.mergePage(new_pdf.getPage(0))
output.addPage(page)

return output

Example 7

Project: automate-the-boring-stuff-projects Author: kudeh File: pdfParanoia.py MIT License 6 votes

def encryptPDFs(root, password):


"""Encrypts all pdfs folder walk
Args:
root (str): folder path to walk
password (str): password to encrypt pdfs with
Returns:
None
"""
for folder, subfolder, fileList in os.walk(root):
for file in fileList:
if file.endswith('.pdf'):
filepath = os.path.join(os.path.abspath(folder), file)
pdfFileObj = open(filepath, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)

if not pdfReader.isEncrypted:
pdfWriter = PyPDF2. PdfFileWriter()
for pageNum in range(pdfReader.numPages):
pdfWriter.addPage(pdfReader.getPage(pageNum))
pdfWriter.encrypt(password)
newPath = os.path.dirname(filepath) + '/untitled folder/' + \
('_encrypted.'.join(os.path.basename(filepath).split('.')))
resultPdf = open(newPath, 'wb')
pdfWriter.write(resultPdf)
resultPdf.close()

Example 8

Project: pdfly Author: jabalazs File: pdf_utils.py MIT License 6 votes

def extract(document, new_document, from_page=0, to_page=0):


"""
Split the PDF document beginning in from_page to to_page included.

2 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

A value of 0 for from_page will mean beginning of the document, and a value
of 0 for to_page will mean the end of the document
"""

output = PdfFileWriter()
input1 = PdfFileReader(open(document, "rb"))

if from_page == 0:
from_page = 1

if to_page == 0:
to_page = input1.getNumPages()

for i in range(from_page - 1, to_page):


output.addPage(input1.getPage(i))

with open(new_document, "wb") as outfile:


output.write(outfile)
return 0

Example 9

Project: handfontgen Author: nixeneko File: tilecharbox.py MIT License 6 votes

def outputpapertemplate(self, dest, listchar, output=None):


if output == None:
output = PyPDF2. PdfFileWriter()

while listchar:
iopage = self.outputtemplateonepage(listchar)
page = PyPDF2.PdfFileReader(iopage)
output.addPage(page.getPage(0))

if dest != None:
if isinstance(dest, str): # when dest is a file path
destdir = os.path.dirname(dest)
if destdir != '' and not os.path.isdir(destdir):
os.makedirs(destdir)
with open(dest, "wb") as w:
output.write(w)
else: # when dest is io.IOBase
output.write(dest)
else:
return output

Example 10

Project: Python-Automation-Cookbook Author: PacktPublishing File: watermarking_pdf.py MIT


6 votes
License

def encrypt(out_pdf, password):


print('Encrypting the document')

output_pdf = PyPDF2. PdfFileWriter()

in_file = open(out_pdf, "rb")


input_pdf = PyPDF2.PdfFileReader(in_file)
output_pdf.appendPagesFromReader(input_pdf)
output_pdf.encrypt(password)

# Intermediate file
with open(INTERMEDIATE_ENCRYPT_FILE, "wb") as out_file:
output_pdf.write(out_file)

in_file.close()

# Rename the intermediate file


os.rename(INTERMEDIATE_ENCRYPT_FILE, out_pdf)

Example 11

Project: ebook_homebrew Author: tubone24 File: convert.py MIT License 6 votes

def __init__(self, digits, extension, directory_path=None):


"""Constructor

Args:
digits (str): Regex target digit.
extension (str): Target file extension.
directory_path (str): Target directory path.
"""
super().__init__()
self.__digits = digits
self.__extension = self._convert_extension_with_dot(extension)
self.__regex_ext = re.compile(self.__extension)
self.__file_writer = PyPDF2. PdfFileWriter()
if directory_path is not None:
self.__directory_path = directory_path
else:
self.__directory_path = os.getcwd()
logger.debug("Current Directory: {cwd}".format(cwd=self.__directory_path))
os.chdir(self.__directory_path)

Example 12

Project: datasheet-scrubber Author: idea-fasoc File: hierarchy_pdf_cropper_for_extraction.py MIT


6 votes
License

def pdf_cropper(source_path,destination_path,pdf_filename,number_page):
page_seperated=[]
PDFfilename = source_path #filename of your PDF/directory where your PDF is stored
pfr = PyPDF2.PdfFileReader(open(PDFfilename, "rb")) #PdfFileReader object
if pfr.isEncrypted: #needed for some encrypted files like AD7183

3 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

pfr.decrypt('')
#page_number= pfr.getNumPages()
for i in range(0,number_page):
#page_seperated.append(str(i))
if not os.path.exists(destination_path):
os.makedirs(destination_path)
pdfname = pdf_filename.split(".pdf")[0]
page_name = pdfname + "_" + str(i) + ".pdf"
NewPDFfilename=os.path.join(destination_path,page_name)#filename of your PDF/directory where you want your new PDF to be
pg = pfr.getPage(i) #extract pg 1
writer = PyPDF2. PdfFileWriter() #create PdfFileWriter object
#add pages
writer.addPage(pg)
with open(NewPDFfilename, "wb") as outputStream: #create new PDF
writer.write(outputStream) #write pages to new PDF

Example 13

Project: datasheet-scrubber Author: idea-fasoc File: pdf_cropper.py MIT License 6 votes

def pdf_cropper(source_path,destination_path,pdf_filename,number_page):
page_seperated=[]
PDFfilename = source_path #filename of your PDF/directory where your PDF is stored
pfr = PyPDF2.PdfFileReader(open(PDFfilename, "rb")) #PdfFileReader object
if pfr.isEncrypted: #needed for some encrypted files lke AD7183
pfr.decrypt('')
#page_number= pfr.getNumPages()

for i in range(0,number_page):
page_seperated.append(str(i))
new_folder_path=os.path.join(destination_path, page_seperated[i])
if not os.path.exists(new_folder_path):
os.makedirs(new_folder_path)
NewPDFfilename=os.path.join(new_folder_path,pdf_filename)#filename of your PDF/directory where you want your new PDF to be
pg = pfr.getPage(i) #extract pg 1
writer = PyPDF2. PdfFileWriter() #create PdfFileWriter object
#add pages
writer.addPage(pg)
with open(NewPDFfilename, "wb") as outputStream: #create new PDF
writer.write(outputStream) #write pages to new PDF

Example 14

Project: pyzottk Author: sbrisard File: pdf.py BSD 3-Clause "New" or "Revised" License 6 votes

def copy_bookmarks(src, dest, outlines=None, parent=None):


"""Copy the bookmarks from src to dest.

Args:
src (PyPDF2.PdfFileReader): The source.
dest (PyPDF2. PdfFileWriter): The destination.
outlines (list of PyPDF2.generic.Destination): The outlines to be
copied (for recursive calls). If None, then uses all elements
returned by``src.getOutlines()``.
parent (PyPDF2.generic.IndirectObject): The parent bookmark (if
outlines are nested).
"""
if outlines is None:
outlines = src.getOutlines()
for current, next in itertools.zip_longest(outlines, outlines[1:]):
if is_destination(current):
bookmark = dest.addBookmark(current.title,
src.getDestinationPageNumber(current),
parent=parent)
if next and not is_destination(next):
copy_bookmarks(src, dest, outlines=next, parent=bookmark)

Example 15

Project: pyzottk Author: sbrisard File: pdf.py BSD 3-Clause "New" or "Revised" License 6 votes

def add_metadata(istream, ostream, author, title):


"""Add author and title metadata to PDF file.

Args:
istream: The input PDF (string or stream in 'rb' mode).
ostream: The output PDF (string or stream in 'wb' mode).
author: The '/Author' metadata (string).
title: The '/Title' metadata (string).
"""
reader = PyPDF2.PdfFileReader(istream)
writer = PyPDF2. PdfFileWriter()
writer.appendPagesFromReader(reader)
writer.addMetadata({'/Author': author,
'/Title': title})
copy_bookmarks(reader, writer)
writer.write(ostream)

Example 16

Project: knowledge-repo Author: airbnb File: image.py Apache License 2.0 6 votes

def pdf_page_to_png(src_pdf, pagenum=0, resolution=154):


"""
Returns specified PDF page as wand.image.Image png.
:param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages.
:param int pagenum: Page number to take.
:param int resolution: Resolution for resulting png in DPI.
"""

check_dependencies(__optional_dependencies__['pdf'])
# Import libraries within this function so as to avoid import-time dependence
import PyPDF2

4 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

from wand.image import Image # TODO: When we start using this again, document which system-level libraries are required.

dst_pdf = PyPDF2. PdfFileWriter()


dst_pdf.addPage(src_pdf.getPage(pagenum))

pdf_bytes = io.BytesIO()
dst_pdf.write(pdf_bytes)
pdf_bytes.seek(0)

img = Image(file=pdf_bytes, resolution=resolution)


img.convert("png")

return img

Example 17

Project: autosplitPDF Author: wienand File: splitW2C.py MIT License 6 votes

def splitW2C(filename, template):


log.debug('Opening %s as source PDF', filename)
extractOperations = [(x[1], int(x[1][2:])) for x in string.Formatter().parse(template) if x[1]]
with open(filename, 'rb') as sourcePDF:
reader = PyPDF2.PdfFileReader(sourcePDF)
for index in range(0, reader.getNumPages()):
page = reader.getPage(index)
operations = PyPDF2.pdf.ContentStream(page.getContents(), page.pdf).operations
formattingDictionary = {}
for formatterID, extractOperation in extractOperations:
operation = operations[extractOperation]
if not operation[1] == b'Tj':
log.warn('NO STRING TYPE')
text = ''.join(operation[0])
formattingDictionary[formatterID] = re.sub('\s+', ' ', text).strip().title()
output = PyPDF2. PdfFileWriter()
output.addPage(page)
outputFilename = template.format(**formattingDictionary)
with open(outputFilename, "wb") as outputStream:
log.debug('Exporting page %s to %s', index + 1, outputFilename)
output.write(outputStream)

Example 18

Project: callisto-core Author: project-callisto File: test_pdf.py GNU Affero General Public License v3.0 6 votes

def test_output_file(self):
"""
for when you want to see what the file looks like
$ open MatchingUserReviewPDFTest.pdf
"""
matching_id = "test1a08daw awd7awgd 1213123"
self.create_match(self.user1, matching_id)
self.create_match(self.user2, matching_id)
self.most_recent_report.contact_phone = "555-555-5555"
self.most_recent_report.save()
pdf = PDFUserReviewReport.generate({"matches": MatchReport.objects.all()})
pdf_reader = PyPDF2.PdfFileReader(BytesIO(pdf))
with open("MatchingUserReviewPDFTest.pdf", "wb") as _file:
dst_pdf = PyPDF2. PdfFileWriter()
dst_pdf.appendPagesFromReader(pdf_reader)
dst_pdf.write(_file)

Example 19

Project: Python-for-Everyday-Life Author: PacktPublishing File: rotate.py MIT License 6 votes

def rotate_clockwise(src_pdf_path, target_pdf_path, rotation_angle):

assert isinstance(src_pdf_path, str)


assert isinstance(target_pdf_path, str)
assert isinstance(rotation_angle, int)
assert rotation_angle >= 0

with open(src_pdf_path, 'rb') as f:


reader = PyPDF2.PdfFileReader(f)
writer = PyPDF2. PdfFileWriter()

# we need to rotate all pages


for index in range(reader.numPages):
page = reader.getPage(index)
page.rotateClockwise(rotation_angle)
writer.addPage(page)

# saving to target file


with open(target_pdf_path, 'wb') as g:
writer.write(g)

Example 20

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 6 votes

def append_memory_pdf_to_writer(input_pdf: bytes,


writer: PdfFileWriter,
start_recto: bool = True) -> None:
"""
Appends a PDF (as bytes in memory) to a PyPDF2 writer.

Args:
input_pdf: the PDF, as ``bytes``
writer: the writer
start_recto: start a new right-hand page?
"""
if not input_pdf:
return

5 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

if start_recto and writer.getNumPages() % 2 != 0:


writer.addBlankPage()
# ... suitable for double-sided printing
infile = io.BytesIO(input_pdf)
reader = PdfFileReader(infile)
for page_num in range(reader.numPages):
writer.addPage(reader.getPage(page_num))

Example 21

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 6 votes

def get_concatenated_pdf_in_memory(
pdf_plans: Iterable[PdfPlan],
start_recto: bool = True) -> bytes:
"""
Concatenates PDFs and returns them as an in-memory binary PDF.

Args:
pdf_plans: iterable of :class:`PdfPlan` objects
start_recto: start a new right-hand page for each new PDF?

Returns:
concatenated PDF, as ``bytes``

"""
writer = PdfFileWriter()
for pdfplan in pdf_plans:
pdfplan.add_to_writer(writer, start_recto=start_recto)
return pdf_from_writer(writer)

# =============================================================================
# Main -- to enable logging for imports, for debugging
# =============================================================================

Example 22

Project: PDF-Covering Author: Jinqihuang File: pdfcovering.py MIT License 6 votes

def create_watermark_pdf(input_pdf, input_name,output,watermark):


#指��拼PDF的�件
watermark_obj=PdfFileReader(watermark)
watermark_page=watermark_obj.getPage(0)
#指��拼的��件
pdf_reader = PdfFileReader(input_pdf)
pdf_writer = PdfFileWriter()
#�环�⼀页,���⽩页⾯
a = 0
for page in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page)
page.mergePage(watermark_page)
pdf_writer.addPage(page)
a += 1
#获��成的页�
rate = float(a)/float(pdf_reader.getNumPages())
#输��成的进�
print('File conversion completed :'+'%.2f%%' % (rate * 100)+'.')
#�成输��件
with open(output, 'wb') as out:
pdf_writer.write(out)
print('The '+file_name[input_name]+' file first add watermark successful.')

Example 23

Project: semantic-pdf-splitter Author: MtnFranke File: extractor.py MIT License 5 votes

def processPDF(f, fout):


pdf = PyPDF2.PdfFileReader(f, strict=False)
fname = os.path.basename(f)

pages = [[0]]

for x in range(0, pdf.getNumPages() - 1):

page1 = ""
page1 += pdf.getPage(x).extractText() + "\n"

page2 = ""
page2 += pdf.getPage(x + 1).extractText() + "\n"

if cosine_sim(page1, page2) < 0.1:


pages.append([x + 1])
else:
pages[len(pages) - 1].append(x + 1)

for cluster in pages:


output = PyPDF2. PdfFileWriter()
content = ''
filename = ''

for single_page in cluster:


output.addPage(pdf.getPage(single_page))
content += pdf.getPage(single_page).extractText() + "\n"
filename += str(single_page) + '-'

outputStream = open(fout + '/' + fname + "-" +


filename + ".pdf", "wb")

output.write(outputStream)
outputStream.close()

6 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

Example 24

Project: pdf2imgpdf Author: felinx File: pdf2imgpdf.py Apache License 2.0 5 votes

def pdf2img(src_pdf, pagenum=0, filename="filename", resolution=600):


dst_pdf = PdfFileWriter()
dst_pdf.addPage(src_pdf.getPage(pagenum))

pdf_bytes = io.BytesIO()
dst_pdf.write(pdf_bytes)
pdf_bytes.seek(0)

with Image(file=pdf_bytes, resolution=resolution) as img:


img.convert("png")
img.save(filename=filename)

Example 25

Project: kissmanga-downloader Author: Astrames File: pdfMaker.py GNU General Public License
5 votes
v3.0

def merge_pdfs(folder_with_pdfs, outputPDFName=None):


"""
Merges the *.pdf files into one single .pdf file
"""
output = PdfFileWriter()

if outputPDFName is None:
outputPDFName = str(folder_with_pdfs) + ".pdf"

mypath = folder_with_pdfs

# To get range of PDFs used


for root, dirs, files in os.walk(mypath):
D = dirs
break

included_pdf = '(' + D[0] + ' - ' + D[-1] + ')'

outputPDFName = outputPDFName.replace('.pdf',included_pdf+'.pdf')

for root, dirs, files in os.walk(mypath):


for single_file in files:
if ".pdf" in single_file:
pdfOne = PdfFileReader(open( join(root,single_file) , "rb"))

for pageIndex in range(0, pdfOne.getNumPages()):


output.addPage(pdfOne.getPage(pageIndex))

outputStream = open( join(root, outputPDFName) , "wb")


output.write(outputStream)
outputStream.close()

Example 26

Project: Marisol Author: wikkiewikkie File: marisol.py MIT License 5 votes

def save(self, filename=None, overwrite=False):


"""
Applies the bates numbers and saves to file.

Args:
filename (str): Path where the PDF should be saved.
overwrite (bool): Switch to allow overwriting of existing files.

Returns:
str: Path where the file was saved.

Raises:
FileExistsError: When the file already exists and overwrite is not enabled.
"""
filename = filename or "{begin}.pdf".format(begin=self.begin)

if os.path.exists(filename) and not overwrite:


raise FileExistsError("PDF file {} already exists and overwrite is disabled.".format(filename))

with open(filename, "wb") as out_file:


writer = PdfFileWriter()
for page in self:
page.apply()
writer.addPage(page.page)
writer.write(out_file)
return filename

Example 27

Project: automate-the-boring-stuff Author: zspatter File: test_password_breaker.py The Unlicense 5 votes

def encrypt_pdf(password):
writer = PyPDF2. PdfFileWriter()
writer.encrypt(password)
with open(file=join(root, 'test_pdf.pdf'), mode='wb') as pdf_file:
writer.write(pdf_file)

Example 28

Project: automate-the-boring-stuff Author: zspatter File: pdf_paranoia.py The Unlicense 5 votes

def copy_pdf_pages(pdf_reader):
"""
Copies all pages from a given pdf reader object to a pdf writer object

7 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

:param pdf_reader: PyPDF2 reader object


:return: pdf_writer
"""
pdf_writer = PyPDF2. PdfFileWriter()

for page_num in range(pdf_reader.numPages):


pdf_writer.addPage(pdf_reader.getPage(page_num))

return pdf_writer

Example 29

Project: libresign Author: this-is-ari File: stamp.py MIT License 5 votes

def create_attachments(pdf, filename, data, fields=None):


if not fields:
fields = request_fields(pdf)
flattened = PdfFileReader(exec_pdftk(filename))

for pgnum, watermark in create_watermarks(fields, data):


page = flattened.getPage(pgnum)
page.mergePage(watermark)

output = PdfFileWriter()
pages = range(flattened.getNumPages())
for p in pages:
output.addPage(flattened.getPage(p))

return output

Example 30

Project: libresign Author: this-is-ari File: pdfjinja.py MIT License 5 votes

def __call__(self, data, attachments=[], pages=None):


self.rendered = {}
for field, ctx in self.fields.items():
if "template" not in ctx:
continue

self.context = ctx
kwargs = self.template_args(data)
template = self.context["template"]

try:
rendered_field = template.render(**kwargs)
except Exception as err:
logger.error("%s: %s %s", field, template, err)
else:
# Skip the field if it is already rendered by filter
if field not in self.rendered:
if PY3:
field = field.decode('utf-8')
self.rendered[field] = rendered_field

filled = PdfFileReader(self.exec_pdftk({}))
for pagenumber, watermark in self.watermarks:
page = filled.getPage(pagenumber)
page.mergePage(watermark)

output = PdfFileWriter()
pages = pages or xrange(filled.getNumPages())
for p in pages:
output.addPage(filled.getPage(p))

for attachment in attachments:


output.addBlankPage().mergePage(attachment.pdf())

return output

Example 31

Project: interleave-pdf Author: sproberts92 File: interleave-pdf.py MIT License 5 votes

def interleave(self):
self.input_path = self.entry_in.get()
self.output_path = self.entry_out.get()

if self.input_path and self.output_path:


document = PyPDF2.PdfFileReader(self.input_path)
writer = PyPDF2. PdfFileWriter()

for page in document.pages:


writer.addPage(page)
writer.addBlankPage()

outputStream = open(self.output_path, 'wb')


writer.write(outputStream)
outputStream.close()

Example 32

Project: pdfcli Author: oliviersm199 File: tests.py MIT License 5 votes

def test_pypdf2_writer(self):
reader_pdf1 = PyPDF2.PdfFileReader(self.test_pdf_file_handles[0])

writer = PyPDF2. PdfFileWriter()


writer.addPage(reader_pdf1.getPage(0))

fp = tempfile.TemporaryFile()
writer.write(fp)

8 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

fp.seek(0)
pdf_reader = PyPDF2.PdfFileReader(fp)

self.assertEqual(pdf_reader.getNumPages(), 1)

Example 33

Project: pdfcli Author: oliviersm199 File: pdfcli.py MIT License 5 votes

def _reorder(*args, **kwargs):


file_arg = kwargs['file']
reverse = kwargs['reverse']
order = kwargs['order']
out = kwargs['out']
decrypt_key = _encr_key_encoding(kwargs['key'])

if not reverse and not order:


raise click.UsageError("Either the reverse or out switch must be set when using reorder.")

if order:
try:
order = order.split(",")
order = [int(num) for num in order]
except ValueError as e:
raise click.BadParameter("order must be a list of integers representing indexes in PDF.")

with open(file_arg, 'rb') as pdf_fp, open(out, 'wb') as pdf_fp_w:


pdf_reader = get_pdf_reader(pdf_fp, file_arg, key=decrypt_key)

pdf_writer = PyPDF2. PdfFileWriter()


num_pages = pdf_reader.getNumPages()

if order:
for index in order:
if index > num_pages - 1:
raise click.BadParameter('Indexes start from zero must be less than the number of pages')

if reverse:
order = [i for i in range(num_pages - 1, -1, -1)]

for index in order:


pdf_writer.addPage(pdf_reader.getPage(index))

pdf_writer.write(pdf_fp_w)
click.echo("Reordered pages in %s and rewrote file to %s" % (file_arg, out))

Example 34

Project: pdfcli Author: oliviersm199 File: pdfcli.py MIT License 5 votes

def _split(*args, **kwargs):


file_arg = kwargs['file']
split_index = kwargs['index']
out_first = kwargs['out_first']
out_second = kwargs['out_second']
decrypt_key = _encr_key_encoding(kwargs['key'])

with open(file_arg, 'rb') as pdf_reader_fp, open(out_first, 'wb') as pdf_fp_one, \


open(out_second, 'wb') as pdf_fp_two:
pdf_reader = get_pdf_reader(pdf_reader_fp, file_arg, key=decrypt_key)

pdf_writer_one = PyPDF2. PdfFileWriter()


pdf_writer_two = PyPDF2. PdfFileWriter()

num_pages = pdf_reader.getNumPages()

if split_index > num_pages - 1:


raise click.BadParameter('The split index must be less than the number of pages')

for i in range(num_pages):
if i < split_index:
pdf_writer_one.addPage(pdf_reader.getPage(i))
else:
pdf_writer_two.addPage(pdf_reader.getPage(i))
pdf_writer_one.write(pdf_fp_one)
pdf_writer_two.write(pdf_fp_two)
click.echo("Split %s at index %s into %s and %s" % (file_arg, split_index, out_first, out_second))

Example 35

Project: pdfcli Author: oliviersm199 File: pdfcli.py MIT License 5 votes

def _decrypt(*args, **kwargs):


file_arg = kwargs['file']
out = kwargs['out']
decrypt_key = _encr_key_encoding(kwargs['key'])

with open(file_arg, 'rb') as pdf_reader_fp, open(out, 'wb') as pdf_writer_fp:


pdf_reader = get_pdf_reader(pdf_reader_fp, file_arg, key=decrypt_key)

pdf_writer = PyPDF2. PdfFileWriter()


pdf_writer.appendPagesFromReader(pdf_reader)
pdf_writer.write(pdf_writer_fp)
click.echo("PDF was successfully decrypted and saved at %s" % out)

Example 36

Project: pdfdir Author: chroming File: api.py GNU General Public License v3.0 5 votes

def __init__(self, path):


self.path = path
reader = PdfFileReader(open(path, "rb"), strict=False)
self.writer = PdfFileWriter()

9 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

self.writer.appendPagesFromReader(reader)
self.writer.addMetadata({k: v for k, v in reader.getDocumentInfo().items()
if isinstance(v, (utils.string_type, utils.bytes_type))})

Example 37

Project: toolkitem Author: edonyM File: im2pdf.py MIT License 5 votes

def convertor(self):
"""
Convert all given images into pdf file with stream way.
"""
merge_img2pdf = PdfFileWriter()
for img_pdf in self.img_pdf_file:
merge_tmp = PdfFileReader(open(img_pdf, 'rb'))
num_pages = merge_tmp.getNumPages()
for i in range(num_pages):
merge_img2pdf.addPage(merge_tmp.getPage(i))
os.remove(img_pdf)
with open(self.out_pdf_name, 'wb') as outputstream:
merge_img2pdf.write(outputstream)

Example 38

Project: nautilus-pdf-tools Author: atareao File: cairoapi.py MIT License 5 votes

def encrypt(file_in, password):


document_in = PdfFileReader(open(file_in, 'rb'))
document_out = PdfFileWriter()
document_out.cloneReaderDocumentRoot(document_in)
document_out.encrypt(password)
tmp_file = tools.create_temp_file()
document_out.write(open(tmp_file, 'wb'))
shutil.copy(tmp_file, file_in)
os.remove(tmp_file)

Example 39

Project: nautilus-pdf-tools Author: atareao File: cairoapi.py MIT License 5 votes

def decrypt(file_in, password):


document_in = PdfFileReader(open(file_in, 'rb'))
if document_in.isEncrypted:
while True:
matched = document_in.decrypt(password)
if matched:
document_out = PdfFileWriter()
document_out.cloneReaderDocumentRoot(document_in)
tmp_file = tools.create_temp_file()
document_out.write(open(tmp_file, 'wb'))
shutil.copy(tmp_file, file_in)
os.remove(tmp_file)
return True
return False

Example 40

Project: csdn Author: spygg File: csdn.py GNU General Public License v3.0 5 votes

def __init__(self, username):


super(CSDN, self).__init__()

self.username = username
self.baseUrl = 'https://blog.csdn.net/%s/article/list/' % username;

self.conn = sqlite3.connect('csdn.db')
self.cursor = self.conn.cursor()

self.cursor.execute('''
CREATE TABLE IF NOT EXISTS %s(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
url TEXT,
title TEXT,
srcHtml BLOB
)
''' % self.username)

self.cursor.execute('''
CREATE TABLE IF NOT EXISTS %s_Index(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
url TEXT,
indexHtml BLOB
)
''' % self.username)

self.articleNumber = 0
self.merge = PdfFileWriter()
self.catlogPageNum = 0

Example 41

Project: opencanary Author: thinkst File: testpdf.py BSD 3-Clause "New" or "Revised" License 5 votes

def createPDF(self, name=None, size='10kb'):


from PyPDF2 import PdfFileReader, PdfFileWriter
from fpdf import FPDF
import os
import random
name = os.path.basename(name)
tmp_name = '/tmp/' + name
output_name = self.sharepath + '/' + name

if size == '10kb':

10 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

randlength = random.randint(10000,90000)
elif size == '100kb':
randlength = random.randint(100000,900000)
elif size == '1mb':
randlength = random.randint(1000000,9000000)

#create file
pdf=FPDF()
pdf.add_page()
pdf.set_font('Arial','B',8)
pdf.cell(0,0,os.urandom(randlength))
pdf.output(tmp_name, "F")

#encrypt it
output = PdfFileWriter()
input1 = PdfFileReader(open(tmp_name, "rb"))
output.encrypt(user_pwd="ihasapass")
output.addPage(input1.getPage(0))

outputStream = file(output_name, "wb")


output.write(outputStream)
outputStream.close()

Example 42

Project: dir2pdf Author: aliagdeniz File: dir2pdf.py GNU General Public License v3.0 5 votes

def add_bookmark(list1,list2):
output = PdfFileWriter()
input1 = PdfFileReader(open('output.pdf', 'rb'))
num = input1.getNumPages()
for i in range(0,num):
output.addPage(input1.getPage(i))

for i in range(0,len(list1)):
output.addBookmark(list1[i], list2[i])

os.remove("output.pdf")
pdf = open("output.pdf", "wb")
output.write(pdf)

Example 43

Project: automate-the-boring-stuff-projects Author: kudeh File: pdfParanoia.py MIT License 5 votes

def decryptPDFs(root, password):


"""Decrypts all pdfs folder walk
Args:
root (str): folder path to walk
password (str): password to decrypt pdfs with
Returns:
None
"""
for folder, subfolder, fileList in os.walk(root):
for file in fileList:
if file.endswith('_encrypted.pdf'):
filepath = os.path.join(os.path.abspath(folder), file)
pdfFileObj = open(filepath, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)

if pdfReader.isEncrypted:
success = pdfReader.decrypt(password)

if success:
pdfWriter = PyPDF2. PdfFileWriter()
for pageNum in range(pdfReader.numPages):
pdfWriter.addPage(pdfReader.getPage(pageNum))
newPath = os.path.dirname(filepath) + '/' + \
''.join(os.path.basename(filepath).split('_encrypted'))
resultPdf = open(newPath, 'wb')
pdfWriter.write(resultPdf)
resultPdf.close()
else:
print('wrong password provided')

Example 44

Project: ExtractTable-py Author: ExtractTable File: __init__.py Apache License 2.0 5 votes

def pdf_separator(self, gather_pages: set):


"""PDF Splitter"""
merged_pdf = os.path.join(self.temp_dir, str(self.pages) + os.path.basename(self.filepath))
with open(merged_pdf, 'wb') as out_file:
pdf_reader = PyPDF2.PdfFileReader(self.filepath)
pdf_writer = PyPDF2. PdfFileWriter()
for page in gather_pages:
try:
pdf_writer.addPage(pdf_reader.getPage(page-1))
except IndexError:
raise EOFError(f"File has only {pdf_reader.numPages} pages, but asked for {self.pages}")
pdf_writer.write(out_file)
return merged_pdf

Example 45

Project: pdfly Author: jabalazs File: pdf_utils.py MIT License 5 votes

def merge(new_document, file_list):


pdf_writer = PdfFileWriter()
for filepath in file_list:
pdf_file = PdfFileReader(open(filepath, "rb"))
pdf_writer = _add_full_pdf_to_writer(pdf_writer, pdf_file)
with open(new_document, "wb") as outfile:

11 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

pdf_writer.write(outfile)

Example 46

Project: Python-Automation-Cookbook Author: PacktPublishing File: generate_sales_report.py MIT


5 votes
License

def main(input_file, output_file):


xlsfile = openpyxl.load_workbook(input_file)
sheet = xlsfile['Sheet']

def row_to_dict(header, row):


return {header: cell.value for cell, header in zip(row, header)}

# islice skips the first row, the header


data = [SaleLog.from_row([cell.value for cell in row])
for row in islice(sheet, 1, None)]

# Generate each of the pages: a full summary, graph by day, and by shop
total_summary = generate_summary(data)
products = total_summary['by_product'].keys()
summary_by_day = aggregate_by_day(data)
summary_by_shop = aggregate_by_shop(data)

# Compose the PDF with a brief summary and all the graphs
summary_file = create_summary_brief(total_summary, 'summary.pdf')
by_day_file = graph(summary_by_day, products, 'by_day.pdf', 7)
by_shop_file = graph(summary_by_shop, products, 'by_shop.pdf')

# Group all the pdfs into a single file


pdfs = [summary_file, by_day_file, by_shop_file]
pdf_files = [open(filename, 'rb') for filename in pdfs]
output_pdf = PyPDF2. PdfFileWriter()
for pdf in pdf_files:
reader = PyPDF2.PdfFileReader(pdf)
output_pdf.appendPagesFromReader(reader)

# Write the resulting PDF


with open(output_file, "wb") as out_file:
output_pdf.write(out_file)

# Close the files


for pdf in pdf_files:
pdf.close()

# clean the temp files


for pdf_filename in pdfs:
os.remove(pdf_filename)

Example 47

Project: pythonpdf Author: iprapas File: stamp.py GNU General Public License v3.0 5 votes

def stamp_pdf(input_path, stamp_path, output_path, add_frame=False):


output = PdfFileWriter()
create_pdf_stamp(stamp_path, add_frame=add_frame)
pdf_in = PdfFileReader(open(input_path, 'rb'))
pdf_stamp = PdfFileReader(open(stamp_path, 'rb'))
stamp = pdf_stamp.getPage(0)

for i in xrange(pdf_in.getNumPages()):
page = pdf_in.getPage(i)
page.mergePage(stamp)
output.addPage(page)

with open(output_path, 'wb') as f:


output.write(f)

Example 48

Project: datasheet-scrubber Author: idea-fasoc File: pdf_cropper.py MIT License 5 votes

def pdf_cropper_all(source_path,destination_path,pdf_filename,number_page):
## generate number_page pdf in one file
PDFfilename = source_path #filename of your PDF/directory where your PDF is stored
pfr = PyPDF2.PdfFileReader(open(PDFfilename, "rb")) #PdfFileReader object
if pfr.isEncrypted: #needed for some encrypted files lke AD7183
pfr.decrypt('')
NewPDFfilename = os.path.join(destination_path, pdf_filename) # filename of your PDF/directory where you want your new PDF to be
writer = PyPDF2. PdfFileWriter() # create PdfFileWriter object
for i in range(0, number_page):
pg = pfr.getPage(i) # extract pg 1
# add pages
writer.addPage(pg)
with open(NewPDFfilename, "ab+") as outputStream: # create new PDF
writer.write(outputStream) # write pages to new PDF

Example 49

Project: gallipy Author: GeoHistoricalData File: getpdf.py GNU Affero General Public License v3.0 5 votes

def write_pdfdata(pdffilereader, path):


"""Write a partial file"""
with open(path, "wb+") as ostream:
writer = PdfFileWriter()
writer.appendPagesFromReader(pdffilereader)
writer.write(ostream)

Example 50

Project: pdfviewer Author: naiveHobo File: pdfviewer.py MIT License 5 votes

def _run_ocr(self):

12 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

if self.pdf is None:
return
pdf_pages = list()
for page in self.pdf.pages:
image = page.to_image(resolution=100)
pdf = pytesseract.image_to_pdf_or_hocr(image.original, extension='pdf')
pdf_pages.append(pdf)

pdf_writer = PyPDF2. PdfFileWriter()


for page in pdf_pages:
pdf = PyPDF2.PdfFileReader(io.BytesIO(page))
pdf_writer.addPage(pdf.getPage(0))

dirname = os.path.dirname(self.paths[self.pathidx])
filename = os.path.basename(self.paths[self.pathidx])

path = filedialog.asksaveasfilename(title='Save OCR As', defaultextension='.pdf',


initialdir=dirname, initialfile=filename,
filetypes=[('PDF files', '*.pdf'), ('all files', '.*')])
if path == '' or path is None:
return

with open(path, 'wb') as out:


pdf_writer.write(out)

self.paths[self.pathidx] = path
self._load_file()

Example 51

Project: smart-manuscript Author: antemons File: searchable_pdf.py GNU General Public License
5 votes
v3.0

def _add_layer_to_pdf(self, input_pdf, layer, output_pdf):

transcription_pdf = PdfFileReader(layer)
original_pdf = PdfFileReader(open(input_pdf, 'rb'))
page = original_pdf.getPage(0)
page.mergePage(transcription_pdf.getPage(0))

output = PdfFileWriter()
output.addPage(page)

with open(output_pdf, 'wb') as f:


output.write(f)
print("Transcribed manuscript have been generated:", output_pdf)

Example 52

Project: pdf-hacks Author: AnnaMag File: pdf_processing.py BSD 2-Clause "Simplified" License 5 votes

def split_pdf(input_pdf, list_pages, end_page = None):

res = []
if not end_page:
end_page = inputpdf.numPages

for i in range(len(list_pages)):

output_i = PdfFileWriter()

if i == len(list_pages)-1:
end = end_page -1
else:
end = list_pages[i+1] -1

for j in range(list_pages[i], end):


output_i.addPage(inputpdf.getPage(j))
#with open("document-page%i.pdf" % i, "wb") as outputStream:
# output_i.write(outputStream)

sio = BytesIO()
output_i.write(sio)

res.append(sio)

return res

Example 53

Project: AutoOfficer Author: vicyangworld File: removelast2pagesofpdf.py MIT License 5 votes

def Run(self):
self.__messages()
allFiles = os.listdir(self.__ROOTPATH)
CDMF.print_blue_text("扫描待统计村�资料...,")
nNumFile = 0;
nNumNoContent = 0;
for fileOrDir in allFiles:
if fileOrDir.startswith(('1','2','3','4','5','6','7','8','9','0')) and fileOrDir.endswith('.pdf'):
nNumFile = nNumFile + 1
CDMF.print_blue_text("扫描�毕�共有 "+str(nNumFile) + " 户的资料,",end='')
CDMF.print_blue_text("需�统计的有 "+str(nNumFile) + " 户.")
#��村
bdeleteOrg = self.__quiry("是否删掉��件(请输�y或n):")
index = 1
for file in allFiles:
filefull = os.path.join(self.__ROOTPATH,file)
if not os.path.isdir(filefull):
if filefull.endswith('.pdf'): #找��.pdf结尾的�件
(filepath,tempfilename) = os.path.split(filefull)
(filename,extension) = os.path.splitext(tempfilename)
if filename.startswith(('1','2','3','4','5','6','7','8','9','0')):

13 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

pdfWriter = PyPDF2. PdfFileWriter() #⽣成⼀��⽩的pdf�件


inPDFfile = open(filefull,'rb')
pdfReader = PyPDF2.PdfFileReader(inPDFfile) #��读�式��打开pdf�件
for pageNum in range(pdfReader.numPages):
if pageNum<pdfReader.numPages-2:
pdfWriter.addPage(pdfReader.getPage(pageNum)) #�打开的pdf�件内�⼀页⼀页的�制���的�⽩pdf⾥
outPdfName = self.__ROOTPATH+'\\'+'Res_'+filename+'.pdf'
pdfOutput = open(outPdfName,'wb')
pdfWriter.write(pdfOutput) #��制的内�������的pdf
pdfOutput.close()
inPDFfile.close()
outPdfName="" #��outPdfName
CDMF.print_yellow_text(str(index)+'/'+str(nNumFile)+' ---> '+file+" 成功�")
index += 1
if bdeleteOrg:
os.remove(filefull)

Example 54

Project: deda Author: dfd-tud File: privacy.py GNU General Public License v3.0 5 votes

def pdfNormaliseFormat(pdfin, width, height):


"""
Reads a PDF as binary string and sets with and height of each page.
"""
input_ = PdfFileReader(BytesIO(pdfin))
output = PdfFileWriter()
outIO = BytesIO()
for p_nr in range(input_.getNumPages()):
page = input_.getPage(p_nr)
outPage = output.addBlankPage(width, height)
outPage.mergePage(page)
outPage.compressContentStreams()
output.write(outIO)
outIO.seek(0)
return outIO.read()

Example 55

Project: deda Author: dfd-tud File: privacy.py GNU General Public License v3.0 5 votes

def pdfWatermark(pdfin, maskCreator, foreground=False):


"""
For each page from pdfin, maskCreator(page) will be called
and shall return a single page watermark PDF.
@pdfin PDF binary or file path
@maskCreator Function that creates the watermark for each page given
arguments width and height,
@foreground If False, put the watermark in the background,
Returns: Pdf binary

Example:
def func(w,h): ...
with open("output.pdf","wb") as fp_out:
with open("input.pdf","rb") as fp_in:
fp_out.write(pdfWatermark(pf_in.read(), func))
"""
output = PdfFileWriter()
if not isinstance(pdfin, bytes):
with open(pdfin,"rb") as fp: pdfin = fp.read()
input_ = PdfFileReader(BytesIO(pdfin))

for p_nr in range(input_.getNumPages()):


page = input_.getPage(p_nr)
mask = PdfFileReader(BytesIO(maskCreator(page))).getPage(0)
if foreground:
page.mergePage(mask)
output.addPage(page)
else:
maskCp = output.addBlankPage(page.mediaBox.getWidth(),
page.mediaBox.getHeight())
maskCp.mergePage(mask)
maskCp.mergePage(page)
output.getPage(p_nr).compressContentStreams()

outIO = BytesIO()
output.write(outIO)
outIO.seek(0)
return outIO.read()

Example 56

Project: autosplitPDF Author: wienand File: autosplitPDF.py MIT License 5 votes

def extractPages(inputPDF, start, end, filename='pages from %s to %s.pdf'):


log.debug('Extracting pages %s to %s to file %s', start, end, filename)
output = PyPDF2. PdfFileWriter()
for j in range(start, end):
output.addPage(inputPDF.getPage(j))
with open(filename % (start, end), "wb") as outputStream:
output.write(outputStream)

Example 57

Project: callisto-core Author: project-callisto File: test_pdf.py GNU Affero General Public License v3.0 5 votes

def test_output_file(self):
"""
for when you want to see what the file looks like
$ open UserReviewPDFTest.pdf
"""
self.client_post_report_creation()
self.client_post_report_prep()

14 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

pdf = PDFUserReviewReport.generate({"reports": [self.report, self.report]})


pdf_reader = PyPDF2.PdfFileReader(BytesIO(pdf))
with open("UserReviewPDFTest.pdf", "wb") as _file:
dst_pdf = PyPDF2. PdfFileWriter()
dst_pdf.appendPagesFromReader(pdf_reader)
dst_pdf.write(_file)

Example 58

Project: pdfjinja Author: rammie File: pdfjinja.py MIT License 5 votes

def __call__(self, data, attachments=[], pages=None):


self.rendered = {}
for field, ctx in self.fields.items():
if "template" not in ctx:
continue

self.context = ctx
kwargs = self.template_args(data)
template = self.context["template"]

try:
rendered_field = template.render(**kwargs)
except Exception as err:
logger.error("%s: %s %s", field, template, err)
else:
# Skip the field if it is already rendered by filter
if field not in self.rendered:
if PY3:
field = field.decode('utf-8')
self.rendered[field] = rendered_field

filled = PdfFileReader(self.exec_pdftk(self.rendered))
for pagenumber, watermark in self.watermarks:
page = filled.getPage(pagenumber)
page.mergePage(watermark)

output = PdfFileWriter()
pages = pages or range(filled.getNumPages())
for p in pages:
output.addPage(filled.getPage(p))

for attachment in attachments:


output.addBlankPage().mergePage(attachment.pdf())

return output

Example 59

Project: txffpAssistant Author: huimingz File: pdf.py GNU General Public License v3.0 5 votes

def __init__(self):
self._pdf_writer = PdfFileWriter()

Example 60

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 5 votes

def add_to_writer(self,
writer: PdfFileWriter,
start_recto: bool = True) -> None:
"""
Add the PDF described by this class to a PDF writer.

Args:
writer: a :class:`PyPDF2. PdfFileWriter`
start_recto: start a new right-hand page?

"""
if self.is_html:
pdf = get_pdf_from_html(
html=self.html,
header_html=self.header_html,
footer_html=self.footer_html,
wkhtmltopdf_filename=self.wkhtmltopdf_filename,
wkhtmltopdf_options=self.wkhtmltopdf_options)
append_memory_pdf_to_writer(pdf, writer, start_recto=start_recto)
elif self.is_filename:
if start_recto and writer.getNumPages() % 2 != 0:
writer.addBlankPage()
writer.appendPagesFromReader(PdfFileReader(
open(self.filename, 'rb')))
else:
raise AssertionError("PdfPlan: shouldn't get here!")

# =============================================================================
# Ancillary functions for PDFs
# =============================================================================

Example 61

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 5 votes

def pdf_from_writer(writer: Union[ PdfFileWriter, PdfFileMerger]) -> bytes:


"""
Extracts a PDF (as binary data) from a PyPDF2 writer or merger object.
"""
memfile = io.BytesIO()
writer.write(memfile)
memfile.seek(0)
return memfile.read()

15 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

Example 62

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 5 votes

def make_pdf_writer() -> PdfFileWriter:


"""
Creates and returns a PyPDF2 writer.
"""
return PdfFileWriter()

Example 63

Project: pythonlib Author: RudolfCardinal File: pdf.py Apache License 2.0 5 votes

def append_pdf(input_pdf: bytes, output_writer: PdfFileWriter):


"""
Appends a PDF to a pyPDF writer. Legacy interface.
"""
append_memory_pdf_to_writer(input_pdf=input_pdf,
writer=output_writer)

# =============================================================================
# Serve concatenated PDFs
# =============================================================================
# Two ways in principle to do this:
# (1) Load data from each PDF into memory; concatenate; serve the result.
# (2) With each PDF on disk, create a temporary file (e.g. with pdftk),
# serve the result (e.g. in one go), then delete the temporary file.
# This may be more memory-efficient.
# However, there can be problems:
# http://stackoverflow.com/questions/7543452/how-to-launch-a-pdftk-subprocess-while-in-wsgi # noqa
# Others' examples:
# https://gist.github.com/zyegfryed/918403
# https://gist.github.com/grantmcconnaughey/ce90a689050c07c61c96
# http://stackoverflow.com/questions/3582414/removing-tmp-file-after-return-httpresponse-in-django # noqa

# def append_disk_pdf_to_writer(filename, writer):


# """Appends a PDF from disk to a pyPDF writer."""
# if writer.getNumPages() % 2 != 0:
# writer.addBlankPage()
# # ... keeps final result suitable for double-sided printing
# with open(filename, mode='rb') as infile:
# reader = PdfFileReader(infile)
# for page_num in range(reader.numPages):
# writer.addPage(reader.getPage(page_num))

Example 64

Project: python-drafthorse Author: pretix File: pdf.py Apache License 2.0 5 votes

def attach_xml(original_pdf, xml_data, level='BASIC'):


if not isinstance(original_pdf, bytes):
raise TypeError("Please supply original PDF as bytes.")
if not isinstance(xml_data, bytes):
raise TypeError("Please supply XML data as bytes.")

reader = PdfFileReader(BytesIO(original_pdf))
output = PdfFileWriter()
# for page in reader.pages:
# output.addPage(page)

output._header = "%PDF-1.6\r\n%\xc7\xec\x8f\xa2".encode()
output.appendPagesFromReader(reader)

original_pdf_id = reader.trailer.get('/ID')
if original_pdf_id:
output._ID = original_pdf_id
# else : generate some ?

_facturx_update_metadata_add_attachment(
output, xml_data, {}, level,
output_intents=_get_original_output_intents(reader),
)

outbuffer = BytesIO()
output.write(outbuffer)
outbuffer.seek(0)
return outbuffer.read()

Example 65

Project: paper2remarkable Author: GjjvdBurg File: pdf_ops.py MIT License 5 votes

def blank_pdf(filepath):
"""Add blank pages to PDF
"""
logger.info("Adding blank pages")
input_pdf = PyPDF2.PdfFileReader(filepath)
output_pdf = PyPDF2. PdfFileWriter()
for page in input_pdf.pages:
output_pdf.addPage(page)
output_pdf.addBlankPage()

output_file = os.path.splitext(filepath)[0] + "-blank.pdf"


with open(output_file, "wb") as fp:
output_pdf.write(fp)
return output_file

Example 66

Project: paper2remarkable Author: GjjvdBurg File: crop.py MIT License 5 votes

16 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

def __init__(
self, input_file=None, output_file=None, pdfcrop_path="pdfcrop"
):
if not input_file is None:
self.input_file = os.path.abspath(input_file)
self.reader = PyPDF2.PdfFileReader(self.input_file)
if not output_file is None:
self.output_file = os.path.abspath(output_file)
self.pdfcrop_path = pdfcrop_path

self.writer = PyPDF2. PdfFileWriter()

Example 67

Project: paper2remarkable Author: GjjvdBurg File: crop.py MIT License 5 votes

def export_page(self, page_idx):


"""Helper function that exports a single page given by index """
page = self.reader.getPage(page_idx)
writer = PyPDF2. PdfFileWriter()
writer.addPage(page)
tmpfname = "./page.pdf"
with open(tmpfname, "wb") as fp:
writer.write(fp)
return tmpfname

Example 68

Project: PDF-Covering Author: Jinqihuang File: pdfcovering.py MIT License 5 votes

def create_watermark_pdf_add(input_pdf, input_name, output):


#指��拼PDF的��件
pdf_reader = PdfFileReader(input_pdf)
pdf_writer = PdfFileWriter()
a = 0
#���配成功⽣成的���件
for page in range(pdf_reader.getNumPages()):

page = pdf_reader.getPage(page)
#�历mark⽬录下的⽣�的���件
watermark_obj=PdfFileReader('mark/'+str(a)+'.pdf')
watermark_page=watermark_obj.getPage(0)
page.mergePage(watermark_page)
pdf_writer.addPage(page)
a += 1
#print(pdf_reader.getNumPages())
#获��成的页�
rate = float(a)/float(pdf_reader.getNumPages())
#输��成的进�
print('File conversion completed :'+'%.2f%%' % (rate * 100)+'.')

#break

with open(output, 'wb') as out:


#设��码
#pdf_writer.encrypt('pega.1234')
pdf_writer.write(out)
#输��成��
print('The '+file_name[input_name]+' File second add watermark successful.')

Example 69

Project: PDF_Merge_and_Edit Author: simonwongwong File: PDF_Merge_and_Edit.py MIT License 4 votes

def insertPage():
inserterWindow = tk.Tk()
inserterWindow.title("PDF page inserter")

tk.Label(inserterWindow, text="Inserts a single page inside an existing PDF").grid(row=0, column=0, columnspan=3, padx=10, pady=3, sticky=stickyFill)

tk.Label(inserterWindow, text="Select PDF file to edit:").grid(row=1, column=0, padx=10, pady=3)


updateFile = tk.Entry(inserterWindow)
updateFile.grid(row=1, column=1, sticky=stickyFill, pady=5, padx=5)
tk.Button(inserterWindow, text="Browse...", command=lambda entry=updateFile, window=inserterWindow: filePicker(entry, window)).grid(row=1, column=2, pady=5, padx

tk.Label(inserterWindow, text="Page number where new page will be inserted:").grid(row=2, column=0, padx=10, pady=3)
pageToInsert = tk.Entry(inserterWindow)
pageToInsert.grid(row=2, column=1, sticky=stickyFill, pady=5, padx=5)

tk.Label(inserterWindow, text="Select PDF file with page to be inserted:").grid(row=3, column=0, padx=10, pady=3)
fileWithInsert = tk.Entry(inserterWindow)
fileWithInsert.grid(row=3, column=1, sticky=stickyFill, pady=5, padx=5)
tk.Button(inserterWindow, text="Browse...", command=lambda entry=fileWithInsert, window=inserterWindow: filePicker(entry, window)).grid(row=3, column=2, pady=

tk.Label(inserterWindow, text="Page number of page to be inserted:").grid(row=4, column=0, padx=10, pady=3)


pageWithInsert = tk.Entry(inserterWindow)
pageWithInsert.grid(row=4, column=1, sticky=stickyFill, pady=5, padx=5)
pageWithInsert.insert(0, "1")

tk.Button(inserterWindow, text="Insert!", command=lambda: inserterWindow.quit()).grid(row=5, column=0, columnspan=3, padx=5, pady=10, sticky=stickyFill)

inserterWindow.mainloop()

filename = updateFile.get()
filename = filename[:-4] + '-updated.pdf'
updateFile = checkExist(updateFile.get())
pageToInsert = int(pageToInsert.get())
fileWithInsert = checkExist(fileWithInsert.get())
pageWithInsert = int(pageWithInsert.get())

if pageToInsert == 0 or pageWithInsert == 0:
popup("invalid page number, must be greater than 0")

17 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

originalPDF = PyPDF2.PdfFileReader(updateFile)
PDFwithInsert = PyPDF2.PdfFileReader(fileWithInsert)

updatedPDF = PyPDF2. PdfFileWriter()


updatedPDF.cloneDocumentFromReader(originalPDF)
try:
updatedPDF.insertPage(PDFwithInsert.getPage(pageWithInsert - 1), pageToInsert - 1)
except IndexError:
popup("Please check if page number is within range")

outputFile = open(filename, 'wb')

pdfOut = PyPDF2. PdfFileWriter()

for i in range(updatedPDF.getNumPages()):
pdfOut.addPage(updatedPDF.getPage(i))

pdfOut.write(outputFile)
outputFile.close()

inserterWindow.destroy()
finished(filename, "Page insert", inserterWindow)

Example 70

Project: PDF_Merge_and_Edit Author: simonwongwong File: PDF_Merge_and_Edit.py MIT License 4 votes

def deletePage():
deleterWindow = tk.Tk()
deleterWindow.title("PDF page deleter")

tk.Label(deleterWindow, text="Deletes a single page inside an existing PDF").grid(row=0, column=0, columnspan=3, padx=10, pady=3, sticky=stickyFill)

tk.Label(deleterWindow, text="Select PDF file to edit:").grid(row=1, column=0, padx=10, pady=3)


updateFile = tk.Entry(deleterWindow)
updateFile.grid(row=1, column=1, sticky=stickyFill, pady=5, padx=5)
tk.Button(deleterWindow, text="Browse...", command=lambda entry=updateFile, window=deleterWindow: filePicker(entry, window)).grid(row=1, column=2, pady=5, padx

tk.Label(deleterWindow, text="Page to delete:").grid(row=2, column=0, padx=10, pady=3)


pageToDelete = tk.Entry(deleterWindow)
pageToDelete.grid(row=2, column=1, sticky=stickyFill, pady=5, padx=5)

tk.Button(deleterWindow, text="Delete!", command=lambda: deleterWindow.quit()).grid(row=3, column=0, columnspan=3, padx=5, pady=10, sticky=stickyFill)

deleterWindow.mainloop()

filename = updateFile.get()
filename = filename[:-4] + '-updated.pdf'
updateFile = checkExist(updateFile.get())
pageToDelete = int(pageToDelete.get())

if pageToDelete == 0:
popup("invalid page number, must be greater than 0")

originalPDF = PyPDF2.PdfFileReader(updateFile)

updatedPDF = PyPDF2. PdfFileWriter()


updatedPDF.cloneDocumentFromReader(originalPDF)
try:
updatedPDF.getPage(pageToDelete - 1)
except IndexError:
popup("Please check if page number is within range")

outputFile = open(filename, 'wb')

pdfOut = PyPDF2. PdfFileWriter()

for i in range(updatedPDF.getNumPages()):
if i != pageToDelete - 1:
pdfOut.addPage(updatedPDF.getPage(i))

pdfOut.write(outputFile)
outputFile.close()

deleterWindow.destroy()
finished(filename, "Page delete", deleterWindow)

Example 71

Project: ampscan Author: abel-research File: output.py MIT License 4 votes

def getPDF(lngths, perimeters, CSA, APW, MLW):


"""
creates a PDF file containing information about the limb in correct
locations on the page
then merges the PDF file with the existing template to create the output
file

Returns
-------
The file path to the PDF
"""
packet = io.BytesIO()
c = canvas.Canvas(packet)
for i in range(1, len(lngths)-1):
stringl = "{}".format(abs(round(lngths[i],1)))
stringp = "{}".format(abs(round(perimeters[i],1)))
c.drawString(360+((i-1)*27), 474-((i-1)*41.5), stringl)
c.drawString(88, 524.5- ((i-1)*74.5), stringp)
stringmaxZ = "{}".format(abs(round(lngths[len(lngths)-1],1)))
c.drawString(514, 419, stringmaxZ)

18 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

c.setFont("Courier-Bold", 12)
c.drawString(65, 575, "Perimeter / cm")
c.drawString(400, 520, "Distance / cm")
c.showPage()
c.drawImage("ant.png", 38,225, 256,256)
c.drawImage("lat.png", 300,225,256,256)
c.drawImage("figure.png", -2.5,-50, 334,200)
for i in range(1,len(CSA),2):
sCSA = "{}".format(round(CSA[i],1))
sAPW = "{}".format(round(APW[i],1))
sMLW = "{}".format(round(MLW[i],1))
c.drawString(403, 145-((i-1)*11.5), sCSA)
c.drawString(465, 145-((i-1)*11.5), sAPW)
c.drawString(520, 145-((i-1)*11.5), sMLW)
c.save()
packet.seek(0)
newpdf = PdfFileReader(packet)
template = PdfFileReader(open(os.path.join("res", "Measurements Template.pdf"), "rb"))
t2 = PdfFileReader(open(os.path.join("res", "Output Template.pdf"), "rb"))
output = PdfFileWriter()
page = t2.getPage(0)
page.mergePage(newpdf.getPage(1))
page2 = template.getPage(0)
page2.mergePage(newpdf.getPage(0))
output.addPage(page)
output.addPage(page2)

output_file_path = os.path.join(get_downloads_folder(), "ampscanReport.pdf")


outputStream = open(output_file_path, "wb")
output.write(outputStream)

outputStream.close()

return output_file_path

Example 72

Project: pdf2pdfocr Author: LeoFCardoso File: pdf2pdfocr.py Apache License 2.0 4 votes

def do_ocr_tesseract(param_image_file, param_extra_ocr_flag, param_tess_lang, param_tess_psm, param_temp_dir, param_shell_mode, param_path_tesseract,


param_text_generation_strategy, param_delete_temps, param_tess_can_textonly_pdf):
"""
Will be called from multiprocessing, so no global variables are allowed.
Do OCR of image with tesseract
"""
param_image_no_ext = os.path.splitext(os.path.basename(param_image_file))[0]
tess_command_line = [param_path_tesseract]
if type(param_extra_ocr_flag) == str:
tess_command_line.extend(param_extra_ocr_flag.split(" "))
tess_command_line.extend(['-l', param_tess_lang])
if param_text_generation_strategy == "tesseract":
tess_command_line += ['-c', 'tessedit_create_pdf=1']
if param_tess_can_textonly_pdf:
tess_command_line += ['-c', 'textonly_pdf=1']
#
if param_text_generation_strategy == "native":
tess_command_line += ['-c', 'tessedit_create_hocr=1']
#
tess_command_line += [
'-c', 'tessedit_create_txt=1',
'-c', 'tessedit_pageseg_mode=' + param_tess_psm,
param_image_file, param_temp_dir + param_image_no_ext]
pocr = subprocess.Popen(tess_command_line,
stdout=subprocess.DEVNULL,
stderr=open(param_temp_dir + "tess_err_{0}.log".format(param_image_no_ext), "wb"),
shell=param_shell_mode)
pocr.wait()
if param_text_generation_strategy == "tesseract" and (not param_tess_can_textonly_pdf):
pdf_file = param_temp_dir + param_image_no_ext + ".pdf"
pdf_file_tmp = param_temp_dir + param_image_no_ext + ".tesspdf"
os.rename(pdf_file, pdf_file_tmp)
output_pdf = PyPDF2. PdfFileWriter()
desc_pdf_file_tmp = open(pdf_file_tmp, 'rb')
tess_pdf = PyPDF2.PdfFileReader(desc_pdf_file_tmp, strict=False)
for i in range(tess_pdf.getNumPages()):
imagepage = tess_pdf.getPage(i)
output_pdf.addPage(imagepage)
#
output_pdf.removeImages(ignoreByteStringObject=False)
out_page = output_pdf.getPage(0) # Tesseract PDF is always one page in this software
# Hack to obtain smaller file (delete the image reference)
out_page["/Resources"][PyPDF2.generic.createStringObject("/XObject")] = PyPDF2.generic.ArrayObject()
out_page.compressContentStreams()
with open(pdf_file, 'wb') as f:
output_pdf.write(f)
desc_pdf_file_tmp.close()
# Try to save some temp space as tesseract generate PDF with same size of image
if param_delete_temps:
os.remove(pdf_file_tmp)
#
if param_text_generation_strategy == "native":
hocr = HocrTransform(param_temp_dir + param_image_no_ext + ".hocr", 300)
hocr.to_pdf(param_temp_dir + param_image_no_ext + ".pdf", image_file_name=None, show_bounding_boxes=False,
invisible_text=True)
# Track progress in all situations
Path(param_temp_dir + param_image_no_ext + ".tmp").touch() # .tmp files are used to track overall progress

Example 73

Project: pdf2pdfocr Author: LeoFCardoso File: pdf2pdfocr.py Apache License 2.0 4 votes

def autorotate_final_output(self):

19 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

param_source_file = self.tmp_dir + self.prefix + "-OUTPUT.pdf"


param_dest_file = self.tmp_dir + self.prefix + "-OUTPUT-ROTATED.pdf"
# method "autorotate_info" generated these OSD files
list_osd = sorted(glob.glob(self.tmp_dir + "{0}*.{1}".format(self.prefix, "osd")))
skip_autorotate = False
if self.use_autorotate and (len(list_osd) != self.input_file_number_of_pages):
eprint("Skipping autorotation because OSD files were not correctly generated. Check input file and "
"tesseract logs")
skip_autorotate = True
#
if self.use_autorotate and not skip_autorotate:
self.debug("Autorotate final output")
file_source = open(param_source_file, 'rb')
pre_output_pdf = PyPDF2.PdfFileReader(file_source, strict=False)
final_output_pdf = PyPDF2. PdfFileWriter()
rotation_angles = []
osd_page_num = 0
for osd_information_file in list_osd:
with open(osd_information_file, 'r') as f:
osd_information_string = '[root]\n' + f.read() # A dummy section to satisfy ConfigParser
f.close()
osd_page_num += 1
config_osd = configparser.ConfigParser()
config_osd.read_file(io.StringIO(osd_information_string))
try:
rotate_value = config_osd.getint('root', 'Rotate')
except configparser.NoOptionError:
eprint("Error reading rotate page value from page {0}. Assuming zero as rotation angle.".format(
osd_page_num))
rotate_value = 0
rotation_angles.append(rotate_value)
#
for i in range(pre_output_pdf.getNumPages()):
page = pre_output_pdf.getPage(i)
page.rotateClockwise(rotation_angles[i])
final_output_pdf.addPage(page)
#
with open(param_dest_file, 'wb') as f:
final_output_pdf.write(f)
f.close()
#
file_source.close()
else:
# No autorotate, just rename the file to next method process correctly
os.rename(param_source_file, param_dest_file)
#

Example 74

Project: pdf2pdfocr Author: LeoFCardoso File: pdf2pdfocr.py Apache License 2.0 4 votes

def edit_producer(self):
self.debug("Editing producer")
param_source_file = self.tmp_dir + self.prefix + "-OUTPUT-ROTATED.pdf"
file_source = open(param_source_file, 'rb')
pre_output_pdf = PyPDF2.PdfFileReader(file_source, strict=False)
final_output_pdf = PyPDF2. PdfFileWriter()
for i in range(pre_output_pdf.getNumPages()):
page = pre_output_pdf.getPage(i)
final_output_pdf.addPage(page)
info_dict_output = dict()
# Our signature as a producer
our_name = "PDF2PDFOCR(github.com/LeoFCardoso/pdf2pdfocr)"
read_producer = False
producer_key = "/Producer"
if self.input_file_metadata is not None:
for key in self.input_file_metadata:
value = self.input_file_metadata[key]
if key == producer_key:
if type(value) == ByteStringObject:
value = str(value, errors="ignore")
value = "".join(filter(lambda x: x in string.printable, value)) # Try to remove unprintable
value = value + "; " + our_name
read_producer = True
#
try:
# Check if value can be accepted by pypdf API
PyPDF2.generic.createStringObject(value)
info_dict_output[key] = value
except TypeError:
# This can happen with some array properties.
eprint("Warning: property " + key + " not copied to final PDF")
#
if not read_producer:
info_dict_output[producer_key] = our_name
#
final_output_pdf.addMetadata(info_dict_output)
#
with open(self.output_file, 'wb') as f:
final_output_pdf.write(f)
f.close()
#
file_source.close()

Example 75

Project: Sudoku Author: BurnYourPc File: burnSudo2Img.py GNU General Public License v3.0 4 votes

def BurnSudoOnPdf(path,numpage,diffarray):

20 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

pdf = PdfFileWriter()

# Using ReportLab Canvas to insert image into PDF


imgTemp = BytesIO()
imgDoc = canvas.Canvas(imgTemp, pagesize=A4)

# Draw image on Canvas and save PDF in buffer


pdfmetrics.registerFont(TTFont('VeraIt', 'VeraIt.ttf'))
for i in range(len(path)):
if ((i+1)%2==0):
x=345
else:
x=55
if (i<2):
y=590
elif (i<4):
y=320
else:
y=50
imgDoc.drawImage(path[i], x, y,200,200)
imgDoc.setFont('VeraIt', 9)
imgDoc.drawString(x+2,y+203,getStrDiff(diffarray[i]))

pdfmetrics.registerFont(TTFont('Vera', 'Vera.ttf'))
pdfmetrics.registerFont(TTFont('VeraBd', 'VeraBd.ttf'))
pdfmetrics.registerFont(TTFont('VeraIt', 'VeraIt.ttf'))
#pdfmetrics.registerFont(TTFont('VeraBI', 'VeraBI.ttf'))

imgDoc.setFont('Vera', 13)
imgDoc.drawString(30,820,"BurnYourPc Organization/")
imgDoc.setFont('VeraBd', 9)
imgDoc.drawString(197,820,"Sudoku Project")
imgDoc.setFont('VeraIt', 8)
imgDoc.drawString(430,20,"By PantelisPanka, nikfot, TolisChal")
imgDoc.setFont('Vera', 8)
imgDoc.drawString(550,820,str(numpage))

imgDoc.save()

# Use PyPDF to merge the image-PDF into the template


pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0))

pdf.write(open("output"+ str(numpage)+".pdf","wb"))

Example 76

Project: everest Author: rodluger File: detrender.py MIT License 4 votes

def publish(self, **kwargs):


'''
Correct the light curve with the CBVs, generate a
cover page for the DVS figure,
and produce a FITS file for publication.

'''

try:

# HACK: Force these params for publication


self.cbv_win = 999
self.cbv_order = 3
self.cbv_num = 1

# Get the CBVs


self._mission.GetTargetCBVs(self)

# Plot the final corrected light curve


cbv = CBV()
self.plot_info(cbv)
self.plot_cbv(cbv.body(), self.fcor, 'Corrected')
self.plot_cbv(cbv.body(), self.flux, 'De-trended', show_cbv=True)
self.plot_cbv(cbv.body(), self.fraw, 'Raw')

# Save the CBV pdf


pdf = PdfPages(os.path.join(self.dir, 'cbv.pdf'))
pdf.savefig(cbv.fig)
pl.close(cbv.fig)
d = pdf.infodict()
d['Title'] = 'EVEREST: %s de-trending of %s %d' % (
self.name, self._mission.IDSTRING, self.ID)
d['Author'] = 'Rodrigo Luger'
pdf.close()

# Now merge the two PDFs


assert os.path.exists(os.path.join(
self.dir, self.name + '.pdf')), \
"Unable to locate %s.pdf." % self.name
output = PdfFileWriter()
pdfOne = PdfFileReader(os.path.join(self.dir, 'cbv.pdf'))
pdfTwo = PdfFileReader(os.path.join(self.dir, self.name + '.pdf'))
# Add the CBV page
output.addPage(pdfOne.getPage(0))
# Add the original DVS page
output.addPage(pdfTwo.getPage(pdfTwo.numPages - 1))
# Write the final PDF
outputStream = open(os.path.join(self.dir, self._mission.DVSFile(
self.ID, self.season, self.cadence)), "wb")
output.write(outputStream)
outputStream.close()
os.remove(os.path.join(self.dir, 'cbv.pdf'))

# Make the FITS file

21 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

MakeFITS(self)

except:

self.exception_handler(self.debug)

Example 77

Project: datasheet-scrubber Author: idea-fasoc File: pdf_cropper_title.py MIT License 4 votes

def pdf_cropper_title(sourth_path,destin_path):
Path_extracted=Address(1).split("\n")
Path_extracted1=Path_extracted[0]
source_pdf_Dir=os.path.join(Path_extracted1,sourth_path)
destin_pdf_Dir=os.path.join(Path_extracted1,destin_path)
basic_search_words = r'(?:product description|Product description|Product Description|PRODUCT DESCRIPTION|general description|General description|General Description|GENERAL
non_crop_words=r'(?:TABLE OF CONTENTS|Table Of Contents|Table of Contents|Table of contents|table of contents)'
for pdf in os.listdir(source_pdf_Dir):
#print("ALL PDF"+pdf)
single_source_name = os.path.join(source_pdf_Dir , pdf)
single_destin_name = os.path.join(destin_pdf_Dir , pdf)
pdf1 = PdfFileReader(open(single_source_name,'rb'),strict=False)
if pdf1.isEncrypted:
try:
pdf1.decrypt('')
except:
command = ("cp "+ single_source_name +
" temp.pdf; qpdf --password='' --decrypt temp.pdf " + single_source_name
+ "; rm temp.pdf")
os.system(command)
pdf1 = PdfFileReader(open(single_source_name,'rb'),strict=False)
page_number=pdf1.getNumPages()
pdf_writer = PdfFileWriter()
find_page_tag=False
for i in range(0, page_number):
PageObj = pdf1.getPage(i)
Text = PageObj.extractText()
if re.search(basic_search_words, Text):
# print("document_name"+pdf)
#print("salam"+str(i))
find_page_tag=True
target_page=i
break
if find_page_tag:
pdf_writer.addPage(pdf1.getPage(target_page))
with open(single_destin_name, 'wb') as out:
pdf_writer.write(out)
else:
if page_number>=2:
for page in range(0, 2):
PageObj = pdf1.getPage(page)
Text = PageObj.extractText()
if not re.search(non_crop_words, Text):
pdf_writer.addPage(pdf1.getPage(page))
else:
pass
with open(single_destin_name, 'wb') as out:
pdf_writer.write(out)
else:
copyfile(single_source_name, single_destin_name)

Example 78

Project: deda Author: dfd-tud File: privacy.py GNU General Public License v3.0 4 votes

def _createMask(self,page=None):
w,h = self.pagesize
w = float(w)
h = float(h)
shps = self.hps*self.scale
svps = self.vps*self.scale
io = BytesIO()
c = canvas.Canvas(io, pagesize=(w,h) )
c.setStrokeColorRGB(*self.colour)
c.setFillColorRGB(*self.colour)
allDots = []
for x_ in range(int(w/shps/72+1)):
for y_ in range(int(h/svps/72+1)):
for xDot, yDot in self.proto:
x = (x_*shps+xDot*self.scale)
y = (y_*svps+yDot*self.scale)
if x*72 > w or y*72 > h: continue
allDots.append((x,y))

if "wand" in globals() and page != None:


# PyPDF2 page to cv2 image
dpi = 300
pageio = BytesIO()
pageWriter = PdfFileWriter()
pageWriter.addPage(page)
pageWriter.write(pageio)
pageio.seek(0)
with WandImage(file=pageio,format="pdf",resolution=dpi) as wim:
imbin = wim.make_blob("png")
file_bytes = np.asarray(bytearray(imbin), dtype=np.uint8)
im = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)

# remove dots on black spots


allDots = [(x,y) for x,y in allDots
if int(y*dpi)>=im.shape[0] or int(x*dpi)>=im.shape[1]
or (im[-int(y*dpi),int(x*dpi)] != (0,0,0)).any()]

22 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

for x,y in allDots:


c.circle(x*72,h-y*72,self.dotRadius*72,stroke=0,fill=1)
c.showPage()
c.save()
io.seek(0)
return io.read()

Example 79

Project: signpdf Author: yourcelf File: signpdf.py MIT License 4 votes

def sign_pdf(args):
#TODO: use a gui or something.... for now, just trial-and-error the coords
page_num, x1, y1, width, height = [int(a) for a in args.coords.split("x")]
page_num -= 1

output_filename = args.output or "{}_signed{}".format(


*os.path.splitext(args.pdf)
)

pdf_fh = open(args.pdf, 'rb')


sig_tmp_fh = None

pdf = PyPDF2.PdfFileReader(pdf_fh)
writer = PyPDF2. PdfFileWriter()
sig_tmp_filename = None

for i in range(0, pdf.getNumPages()):


page = pdf.getPage(i)

if i == page_num:
# Create PDF for signature
sig_tmp_filename = _get_tmp_filename()
c = canvas.Canvas(sig_tmp_filename, pagesize=page.cropBox)
c.drawImage(args.signature, x1, y1, width, height, mask='auto')
if args.date:
c.drawString(x1 + width, y1, datetime.datetime.now().strftime("%Y-%m-%d"))
c.showPage()
c.save()

# Merge PDF in to original page


sig_tmp_fh = open(sig_tmp_filename, 'rb')
sig_tmp_pdf = PyPDF2.PdfFileReader(sig_tmp_fh)
sig_page = sig_tmp_pdf.getPage(0)
sig_page.mediaBox = page.mediaBox
page.mergePage(sig_page)

writer.addPage(page)

with open(output_filename, 'wb') as fh:


writer.write(fh)

for handle in [pdf_fh, sig_tmp_fh]:


if handle:
handle.close()
if sig_tmp_filename:
os.remove(sig_tmp_filename)

Example 80

Project: cryptoluggage Author: miguelinux314 File: pypdfocr_pdf.py GNU General Public License
4 votes
v3.0

def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):

logging.debug("Going to overlay following files onto %s" % orig_pdf_filename)


# Sort the hocr_filenames into natural keys!
hocr_filenames.sort(key=lambda x: self.natural_keys(x[0] ))
logging.debug(hocr_filenames)

pdf_dir, pdf_basename = os.path.split(orig_pdf_filename)


basename = os.path.splitext(pdf_basename)[0]
pdf_filename = os.path.join(pdf_dir, "%s_ocr.pdf" % (basename))

text_pdf_filenames = []
for img_filename, hocr_filename in hocr_filenames:
text_pdf_filename = self.overlay_hocr_page(dpi, hocr_filename, img_filename)
logging.info("Created temp OCR'ed pdf containing only the text as %s" % (text_pdf_filename))
text_pdf_filenames.append(text_pdf_filename)

# Now, concatenate this text_pdfs into one single file.


# This is a hack to save memory/running time when we have to do the actual merge with a writer

all_text_filename = os.path.join(pdf_dir, "%s_text.pdf" % (basename))


merger = PdfFileMerger()
for text_pdf_filename in text_pdf_filenames:
merger.append(PdfFileReader(file(text_pdf_filename, 'rb')))
merger.write(all_text_filename)
merger.close()
del merger

writer = PdfFileWriter()
orig = open(orig_pdf_filename, 'rb')
text_file = open(all_text_filename, 'rb')

for orig_pg, text_pg in zip(self.iter_pdf_page(orig), self.iter_pdf_page(text_file)):


orig_pg = self._get_merged_single_page(orig_pg, text_pg)
writer.addPage(orig_pg)

with open(pdf_filename, 'wb') as f:


# Flush out this page merge so we can close the text_file

23 of 24 7/5/20, 9:10 PM
PyPDF2.PdfFileWriter Python Example https://www.programcreek.com/python/example/...

writer.write(f)

orig.close()
text_file.close()

# Windows sometimes locks the temp text file for no reason, so we need to retry a few times to delete
for fn in text_pdf_filenames:
#os.remove(fn)
Retry(partial(os.remove, fn), tries=10, pause=3).call_with_retry()

os.remove(all_text_filename)
logging.info("Created OCR'ed pdf as %s" % (pdf_filename))

return pdf_filename

Terms of Use Privacy Support & Contact

24 of 24 7/5/20, 9:10 PM

You might also like