Better Data Science | Generate PDF Reports with
Python
● Install any library you don't have with pip install <libraryname> command
● It's likely you won't have FPDF installed, so install it with:
○ pip install fpdf
In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import calendar
from datetime import datetime
from fpdf import FPDF
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False
Data generation
● generate_sales_data() functions returns a Pandas DataFrame with dummy data for
a given month
● Month is passed as integer
In [2]:
def generate_sales_data(month: int) -> pd.DataFrame:
# Date range from first day of month until last
# Use ```calendar.monthrange(year, month)``` to get the last date
dates = pd.date_range(
start=datetime(year=2020, month=month, day=1),
end=datetime(year=2020, month=month, day=calendar.monthrange(2020, month)[1])
)
# Sales numbers as a random integer between 1000 and 2000
sales = np.random.randint(low=1000, high=2000, size=len(dates))
# Combine into a single dataframe
return pd.DataFrame({
'Date': dates,
'ItemsSold': sales
})
In [10]:
generate_sales_data(month=3)
Visualizing sales data
● plot() function visualizes a single sales month (time series)
● Instead of showing the figure, the function saves it to a file
○ Filename is specified by a parameter
In [3]:
def plot(data: pd.DataFrame, filename: str) -> None:
plt.figure(figsize=(12, 4))
plt.grid(color='#F2F2F2', alpha=1, zorder=0)
plt.plot(data['Date'], data['ItemsSold'], color='#087E8B', lw=3, zorder=5)
plt.title(f'Sales 2020/{data["Date"].dt.month[0]}', fontsize=17)
plt.xlabel('Period', fontsize=13)
plt.xticks(fontsize=9)
plt.ylabel('Number of items sold', fontsize=13)
plt.yticks(fontsize=9)
plt.savefig(filename, dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()
return
In [4]:
december = generate_sales_data(month=12)
plot(data=december, filename='december.png')
Construct page elements
● construct() function makes a directory for plots and than makes a sales chart for
every month in 2020 except January
○ January was excluded because we want to show how you can
have different number of elements on reports page
○ Feel free to include it
■ Change for i in range(2, 13) to for i in range(1, 13)
● Once the visualizations are saved, they are appended to a list of list structure
(matrix)
○ Max of 3 elements per row
○ Can be lower
○ A single row in this matrix represents a single page
■ If the row has 3 elements, the report page will
have 3 visualizations
In [5]:
PLOT_DIR = 'plots'
def construct():
# Delete folder if exists and create it again
try:
shutil.rmtree(PLOT_DIR)
os.mkdir(PLOT_DIR)
except FileNotFoundError:
os.mkdir(PLOT_DIR)
# Iterate over all months in 2020 except January
for i in range(2, 13):
# Save visualization
plot(data=generate_sales_data(month=i), filename=f'{PLOT_DIR}/{i}.png')
# Construct data shown in document
counter = 0
pages_data = []
temp = []
# Get all plots
files = os.listdir(PLOT_DIR)
# Sort them by month - a bit tricky because the file names are strings
files = sorted(os.listdir(PLOT_DIR), key=lambda x: int(x.split('.')[0]))
# Iterate over all created visualization
for fname in files:
# We want 3 per page
if counter == 3:
pages_data.append(temp)
temp = []
counter = 0
temp.append(f'{PLOT_DIR}/{fname}')
counter += 1
return [*pages_data, temp]
In [6]:
plots_per_page = construct()
● 4 pages in total
● First 3 have 3 plots per page
● The last one has only 2
In [7]:
plots_per_page
PDF class
● Inherits from PDF
○ All methods and properties are inherited
○ Don't forget to call super() in the constructor
● Class is used to generate report from the plots_per_page matrix
In [8]:
class PDF(FPDF):
def __init__(self):
super().__init__()
self.WIDTH = 210
self.HEIGHT = 297
def header(self):
# Custom logo and positioning
# Create an `assets` folder and put any wide and short image inside
# Name the image `logo.png`
self.image('assets/logo.png', 10, 8, 33)
self.set_font('Arial', 'B', 11)
self.cell(self.WIDTH - 80)
self.cell(60, 1, 'Sales report', 0, 0, 'R')
self.ln(20)
def footer(self):
# Page numbers in the footer
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.set_text_color(128)
self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
def page_body(self, images):
# Determine how many plots there are per page and set positions
# and margins accordingly
if len(images) == 3:
self.image(images[0], 15, 25, self.WIDTH - 30)
self.image(images[1], 15, self.WIDTH / 2 + 5, self.WIDTH - 30)
self.image(images[2], 15, self.WIDTH / 2 + 90, self.WIDTH - 30)
elif len(images) == 2:
self.image(images[0], 15, 25, self.WIDTH - 30)
self.image(images[1], 15, self.WIDTH / 2 + 5, self.WIDTH - 30)
else:
self.image(images[0], 15, 25, self.WIDTH - 30)
def print_page(self, images):
# Generates the report
self.add_page()
self.page_body(images)
● Instantiate it and create a report:
In [9]:
pdf = PDF()
for elem in plots_per_page:
pdf.print_page(elem)
pdf.output('SalesRepot.pdf', 'F')