Time Series Visualization From Raw Data To Insights
Time Series Visualization From Raw Data To Insights
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("Electricity.csv")
df.head().style.set_properties(
**{
'background-color': 'OliveDrab',
'color': 'white',
'border-color': 'darkblack'
})
DateTime Consumption Production Nuclear Wind Hydroelectric Oil and Gas Coal Solar Biomass
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.describe().style.background_gradient(cmap='rainbow')
DateTime Consumption Production Nuclear Wind Hydroelectric Oil and Gas Coal Solar
count 46011 46011.000000 46011.000000 46011.000000 46011.000000 46011.000000 46011.000000 46011.000000 46011.000000 46011.
2021-08-16
mean 6587.616440 6518.645628 1291.177501 792.310882 1857.052444 1171.890418 1193.157332 156.688031
11:19:47.715981056
2019-01-01
min 3889.000000 3315.000000 562.000000 -26.000000 175.000000 198.000000 279.000000 0.000000
00:00:00
2020-04-24
25% 5773.000000 5814.000000 1347.000000 236.000000 1347.000000 858.000000 962.000000 0.000000
06:30:00
2021-08-16
50% 6552.000000 6462.000000 1383.000000 592.000000 1747.000000 1211.000000 1172.000000 2.000000
12:00:00
2022-12-08
75% 7321.000000 7176.000000 1405.000000 1205.000000 2265.000000 1511.000000 1406.000000 280.000000
15:30:00
2024-03-31
max 9615.000000 9886.000000 1457.000000 2811.000000 4434.000000 2141.000000 2537.000000 1137.000000
23:00:00
std nan 1043.654923 986.805018 236.549637 675.812712 692.592157 434.748917 320.449368 229.502650
plt.style.use('fivethirtyeight')
df2 = df.copy('Deep')
df2 = df2.set_index('DateTime')
# Facet plots
df2.plot(subplots=True,
linewidth=0.5,
layout=(3, 3),
figsize=(20, 15),
sharex=False,
sharey=False)
plt.show()
df_consumption = df[['DateTime','Consumption']]
df_consumption.head()
DateTime Consumption
df5 = df_consumption.copy('Deep')
df5 = df5.set_index('DateTime')
df5['Consumption'].plot(figsize=(20,5))
plt.show()
index=df5.loc['2023-01-01':'2023-09-01'].index
Consumption=df5.loc['2023-01-01':'2023-09-01']['Consumption']
Consumption
DateTime
2023-01-01 00:00:00 4996
2023-01-01 01:00:00 4995
2023-01-01 02:00:00 4816
2023-01-01 03:00:00 4627
2023-01-01 04:00:00 4581
...
2023-09-01 19:00:00 6704
2023-09-01 20:00:00 6932
2023-09-01 21:00:00 6712
2023-09-01 22:00:00 6090
2023-09-01 23:00:00 5582
Name: Consumption, Length: 5855, dtype: int64
print(plt.style.available)
plt.style.use('fivethirtyeight')
df5 = df_consumption.copy('Deep')
df5 = df5.set_index('DateTime')
df5['Consumption'].plot(figsize=(20,5))
plt.show()
df5.resample(rule='QS').max()['Consumption'].plot(figsize=(20,5));
##plotting
df5['Consumption'].resample(rule='A').mean().plot(kind='bar',figsize=(20,5));
df5['Consumption'].resample(rule='M').max().plot(kind='bar',figsize=(20,5));
df5['Consumption'].rolling(5).max().head(8)
DateTime
2019-01-01 00:00:00 NaN
2019-01-01 01:00:00 NaN
2019-01-01 02:00:00 NaN
2019-01-01 03:00:00 NaN
2019-01-01 04:00:00 6352.0
2019-01-01 05:00:00 6116.0
2019-01-01 06:00:00 5873.0
2019-01-01 07:00:00 5682.0
Name: Consumption, dtype: float64
count_date = df5.groupby(df5.index.date)['Consumption'].sum()
pw_clean = pd.DataFrame(count_date)
pw_clean['DateTime'] = pd.to_datetime(pw_clean.index)
pw_clean = pw_clean.set_index('DateTime')
pw_clean.head()
Consumption
DateTime
2019-01-01 142984
2019-01-02 151729
2019-01-03 174098
2019-01-04 183242
2019-01-05 177114
DateTime
df1 = df[['DateTime','Consumption']].copy('Deep')
df1 = df1.set_index('DateTime')
# Add legend
handles, labels = axs[-1].get_legend_handles_labels() # Get labels from last subplot
plt.legend(handles, labels, loc='upper left', bbox_to_anchor=(1, 1.15), fontsize=12) # Add legend outside plot
plt.tight_layout()
plt.show()
df.head(2)
DateTime Consumption Production Nuclear Wind Hydroelectric Oil and Gas Coal Solar Biomass
df2 = df.copy('Deep')
df2 = df2.set_index('DateTime')
plt.figure(figsize=(20,6))
plt.plot(data1['DateTime'], data1['Consumption'], color='Gold')
plt.ylabel('Consumption', fontsize=12)
plt.xlabel('DateTime', fontsize=12)
plt.title('Electricity Consumption for a Particular Time Frame', fontsize=14)
plt.tight_layout()
plt.grid(True)
sns.despine(bottom=True, left=True)
plt.show()
df1 = df.copy('Deep')
df1= df1.set_index('DateTime')
plt.rcParams["figure.figsize"] = (15,8)
from statsmodels.tsa.seasonal import seasonal_decompose
decomp=seasonal_decompose(df1['Consumption'], model='additive', period=1)
decomp.plot()
plt.show()
df_con = df.copy('deep')
df_con["DATE"] = pd.to_datetime(df_con["DateTime"]).dt.date
df_con["TIME"] = pd.to_datetime(df_con["DateTime"]).dt.time
df_con = df_con.set_index('DateTime')
df_con = df_con[["Consumption", "Production", "Solar","DATE" , "TIME"]]["2024-03-01 00:00:00" : "2024-03-30 23:59:59"
df_con.head()
Consumption Production Solar DATE TIME
DateTime
plt.show()
daily_Consumption = df_con.groupby('DATE')['Consumption'].agg('sum')
ax = daily_Consumption.sort_values(ascending=False).plot.bar(figsize=(20,5), legend=True,color='red')
plt.title('Daily Consumption')
plt.show()
df_solar = df.copy('deep')
df_solar["DATE"] = pd.to_datetime(df_solar["DateTime"]).dt.date
df_solar["TIME"] = pd.to_datetime(df_solar["DateTime"]).dt.time
df_solar["DATE"] = pd.to_datetime(df_solar["DateTime"]).dt.date
df_solar["DATE_STRING"] = df_solar["DATE"].astype(str) # add column with date as string
df_solar.head()
Oil
DateTime Consumption Production Nuclear Wind Hydroelectric and Coal Solar Biomass DATE TIME DATE_STRING
Gas
2019-01-01 2019-
0 6352 6527 1395 79 1383 1896 1744 0 30 00:00:00 2019-01-01
00:00:00 01-01
2019-01-01 2019-
1 6116 5701 1393 96 1112 1429 1641 0 30 01:00:00 2019-01-01
01:00:00 01-01
2019-01-01 2019-
2 5873 5676 1393 142 1030 1465 1616 0 30 02:00:00 2019-01-01
02:00:00 01-01
2019-01-01 2019-
3 5682 5603 1397 191 972 1455 1558 0 30 03:00:00 2019-01-01
03:00:00 01-01
2019-01-01 2019-
4 5557 5454 1393 159 960 1454 1458 0 30 04:00:00 2019-01-01
04:00:00 01-01
# Assuming 'df_solar' has columns 'DateTime', 'Consumption', 'Production', 'Solar', and 'Wind'
# Subplot 1: Consumption
plt.subplot(411)
sns.lineplot(data=filtered_data, x="DateTime", y="Consumption", label="Consumption", color='green')
plt.title("Consumption: {}".format(date[0]))
# Subplot 2: Production
plt.subplot(412)
sns.lineplot(data=filtered_data, x="DateTime", y="Production", label="Production", color='green')
plt.title("Production: {}".format(date[0]))
plt.tight_layout()
plt.show()
2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024- 2024-
DATE ...
03-01 03-02 03-03 03-04 03-05 03-06 03-07 03-08 03-09 03-10 03-21 03-22 03-23 03-24 03-25 03-26 03-27
TIME
00:00:00 5830.0 5774.0 5375.0 5331.0 5813.0 5889.0 5973.0 6064.0 6079.0 5516.0 ... 6479.0 6215.0 5907.0 6551.0 6202.0 7654.0 6539.0
01:00:00 5780.0 5560.0 5186.0 5236.0 5625.0 5659.0 5821.0 5863.0 5834.0 5366.0 ... 6624.0 6106.0 5281.0 6574.0 6780.0 7670.0 6544.0
02:00:00 5617.0 5475.0 5115.0 5126.0 5473.0 5582.0 5664.0 5743.0 5646.0 5244.0 ... 6622.0 6250.0 5301.0 6400.0 6706.0 7724.0 6687.0
03:00:00 5600.0 5415.0 5069.0 5070.0 5478.0 5561.0 5600.0 5680.0 5634.0 5211.0 ... 6599.0 6202.0 5295.0 6252.0 6893.0 7786.0 6828.0
04:00:00 5656.0 5418.0 5113.0 5189.0 5615.0 5559.0 5736.0 5675.0 5677.0 5161.0 ... 6644.0 6162.0 5348.0 6244.0 6900.0 7688.0 6904.0
5 rows × 60 columns
sns.set_theme(style="white")
df_con_pro = df_con.pivot_table(values=['Consumption', 'Production'], index='TIME', columns='DATE')
Args:
data: A pandas DataFrame containing the timeseries data.
row: Number of rows in the grid layout (optional).
col: Number of columns in the grid layout (optional).
title: Title for the overall plot (optional).
ylim_top: Upper limit for the y-axis (optional, default: 8000).
"""
cols = data.columns.levels[1] # Get all column names
# Call the function with desired parameters and new y-axis limit
Daywise_plot(data=df_con_pro, row=12, col=3, ylim_top=8000)
plt.tight_layout()
plt.show()
<Figure size 1500x800 with 0 Axes>
Timeseries visualization of electricity consumption and generation for the entire month of March
2024, with the units on the y-axis labeled as MW (Megawatts).
1. Red Line: This line represents the electricity consumption trend over the month. It
appears to fluctuate, potentially indicating higher consumption during daytime hours and
lower consumption at night.
2. Blue Line: This line represents the electricity generation trend over the month. It also
seems to fluctuate, and it's possible that the generation may not always keep up with
consumption (indicated by the red line going above the blue line in some areas).
plt.style.use('fivethirtyeight')
df_con_pro = df_con.pivot_table(values=['Solar'], index='TIME', columns='DATE')
Args:
data: A pandas DataFrame containing the timeseries data.
row: Number of rows in the grid layout (optional).
col: Number of columns in the grid layout (optional).
title: Title for the overall plot (optional).
ylim_top: Upper limit for the y-axis (optional, default: 8000).
"""
cols = data.columns.levels[1] # Get all column names
# Call the function with desired parameters and new y-axis limit
Daywise_plot(data=df_con_pro, row=12, col=3, ylim_top=1000)
from statsmodels.tsa.seasonal import seasonal_decompose
Parameters:
df: DataFrame with time series data.
col: Column name for data to decompose. Default is 'sqrt(O3 AQI)'.
date_col: Column name for datetime values. Default is 'Date'.
period: Seasonality period. Default is 12.
Returns:
A DecomposeResult object with seasonal, trend, and residual components.
"""
# Decompostion
decomposition = seasonal_decompose(df.values, period=period)
de_season = decomposition.seasonal
de_resid = decomposition.resid
de_trend = decomposition.trend
ax[0].set_title(title)
ax[0].plot(df.index, df.values, color='C3')
ax[0].set_ylabel(df.keys()[0])
ax[0].grid(alpha=0.25)
plt.tight_layout(h_pad=0)
plt.show()
return decomposition
data1 = df[['DateTime','Consumption']].copy("deep")
data1 = data1.set_index('DateTime')
data = df.copy("deep")
data = data.set_index('DateTime')
plt.figure(figsize=(20,6))
plt.plot(data.index,data['Consumption'], 'b')
plt.plot(data.index,data['Production'], 'y')
plt.title('Power Production vs Consumption over time')
plt.xlabel('Date')
plt.ylabel('MW')
plt.show()
plt.show()
plt.show()
ax = data.plot(figsize=(20,5),use_index=True,grid=False,
y='Consumption',
title='2023 Energy Consumption in MWs Unit',
ylim=([4000,10000]),
xlim=([pd.Timestamp('2020-01-01'),pd.Timestamp('2021-01-01')]),
style='b.-',lw=0.3)
ax.set_xticklabels(data.index,rotation=0,ha='center')
ax.xaxis.set_major_locator(MonthLocator(interval=2))
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
plt.show()
production_types = list(data.columns) # Get all columns from index 2 onwards (skipping the first two)
df3 = df3.set_index('DateTime')
daily_data = df3.resample('24h').mean()
display(daily_data.head(2))
Consumption Production
DateTime
ax2 = ax1.twinx()
ax.set_xlabel('Hour of Year')
ax.set_ylabel('Consumption')
ax.set_title(name.week)
plt.subplots_adjust(hspace=0.5)
Power Consumption by Day of the Week
df6 = df[['DateTime','Consumption','Production']]
df6 = df6.set_index('DateTime')
df6.head()
Consumption Production
DateTime
df6['Day'] = df6.index.day
df6['Year'] = df6.index.year
df6['Month'] = df6.index.month_name()
df6['WeekDay'] = df6.index.day_name()
df6.head()
DateTime
dfbyDay = df6.drop('Month',axis=1).groupby('WeekDay').mean()
dfbyDay.head()
WeekDay
plt.figure(figsize=(20,4))
plt.plot(dfbyDay.index, dfbyDay['Consumption'], 'Red')
plt.plot(dfbyDay.index, dfbyDay['Production'], 'Green')
plt.xlabel('Power Consumption and Porduction to Day of the Week')
plt.ylabel('MW')
plt.legend(['Consumption','Production'])
plt.show()
Month
plt.figure(figsize=(20,5))
plt.figure(figsize=(20,5))
plt.plot(dfbyMonth.index, dfbyMonth['Consumption'], 'Red')
plt.plot(dfbyMonth.index, dfbyMonth['Production'], 'Green')
plt.xlabel('Power Consumption and Porduction to Month')
plt.ylabel('MW')
plt.legend(['Consumption','Production'])
plt.show()
plt.rcParams["figure.figsize"] = (20,5)
plt.bar(
df6.index, df6['delta'],
color=np.where(df6['delta'] > 0, 'Red', 'Green')
)
DateTime
# Span
df7['EMA_5Days'] = df7['Consumption'].ewm(span=5,adjust=False).mean()
df7['EMA_10Days'] = df7['Consumption'].ewm(span=10,adjust=False).mean()
df7[['EMA_5Days','EMA_10Days','Consumption']].plot(figsize=(20,5),xlim=['2021-01-01','2021-01-31']);
adfuller Test
from statsmodels.tsa.stattools import adfuller
def adf_test(series):
result=adfuller(series)
print('ADF Statistics: {}'.format(result[0]))
print('p- value: {}'.format(result[1]))
if result[1] <= 0.05:
print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is
else:
print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary "
adf_test(df7['Consumption'])
df2.head(2)
Consumption Production Nuclear Wind Hydroelectric Oil and Gas Coal Solar Biomass
DateTime
Correlation
# Computing Correlation Matrices
from scipy.stats.stats import pearsonr
from scipy.stats.stats import spearmanr
from scipy.stats.stats import kendalltau
df2.head()
Consumption Production Nuclear Wind Hydroelectric Oil and Gas Coal Solar Biomass
DateTime
corr_mat = df2.corr(method='pearson')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
corr_mat = df2.corr(method='pearson')
plt.show()
# Create the second subplot on the same figure with blue color
ax2 = sns.displot(data=df, x="Production", kde=True, bins=100, color="blue",
facecolor="#0000FF", height=5, aspect=3.5)
Differencing
monthly_data = df7.resample('M')['Consumption'].agg('mean')
monthly_data = monthly_data.reset_index()
df8 = monthly_data.set_index('DateTime')
df8.head()
Consumption
DateTime
2019-01-31 7752.168011
2019-02-28 7605.071429
2019-03-31 6992.792732
2019-04-30 6652.491667
2019-05-31 6379.282258
DateTime
DateTime
adf_test(df8['Consumption 12 Difference'].dropna())
DateTime
Consumption
DateTime
2019-01-31 7752.168011
2019-02-28 7605.071429
2019-03-31 6992.792732
2019-04-30 6652.491667
2019-05-31 6379.282258
train_data=df9[:train_dataset_end]
test_data=df9[train_dataset_end+timedelta(days=1):test_dataset_end]
##prediction
pred_start_date=test_data.index[0]
pred_end_date=test_data.index[-1]
train_data.head()
Consumption
DateTime
2019-01-31 7752.168011
2019-02-28 7605.071429
2019-03-31 6992.792732
2019-04-30 6652.491667
2019-05-31 6379.282258
test_data.head()
Consumption
DateTime
2023-06-30 5651.966667
2023-07-31 6053.303763
2023-08-31 5929.065860
2023-09-30 5678.083333
2023-10-31 5758.722148
import warnings
warnings.filterwarnings("ignore")
model_ARIMA=ARIMA(train_data['Consumption'],order=(10,2,0))
model_Arima_fit=model_ARIMA.fit()
model_Arima_fit.summary()
SARIMAX Results
Dep. Variable: Consumption No. Observations: 53
- 05-31-2023
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
##prediction
pred_start_date=test_data.index[0]
pred_end_date=test_data.index[-1]
print(pred_start_date)
print(pred_end_date)
2023-06-30 00:00:00
2024-03-31 00:00:00
pred=model_Arima_fit.predict(start=pred_start_date,end=pred_end_date)
residuals=test_data['Consumption']-pred
pred.to_frame()
predicted_mean
2023-06-30 5782.510258
2023-07-31 5941.097195
2023-08-31 5762.945329
2023-09-30 5441.840938
2023-10-31 5659.123037
2023-11-30 5858.474618
2023-12-31 5870.687607
2024-01-31 6003.153652
2024-02-29 5911.563521
2024-03-31 5778.196544
residuals.to_frame()
0
DateTime
2023-06-30 -130.543592
2023-07-31 112.206569
2023-08-31 166.120531
2023-09-30 236.242395
2023-10-31 99.599110
2023-11-30 546.747604
2023-12-31 752.983092
2024-01-31 832.097692
2024-02-29 566.848835
2024-03-31 496.099553
model_Arima_fit.resid.plot(kind='kde',figsize=(20,5));
test_data['Predicted_ARIMA']=pred
test_data[['Consumption','Predicted_ARIMA']].plot(figsize=(20,5));
- 05-31-2023
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.26e+25. Standard errors may be unstable.
##prediction
pred_start_date=test_data.index[0]
pred_end_date=test_data.index[-1]
print(pred_start_date)
print(pred_end_date)
2023-06-30 00:00:00
2024-03-31 00:00:00
pred_Sarima=model_SARIMA_fit.predict(start=datetime(2023,6,1),end=datetime(2024,4,1))
residuals=test_data['Consumption']-pred_Sarima
model_SARIMA_fit.resid.plot(figsize=(20,5));
model_SARIMA_fit.resid.plot(kind='kde',figsize=(20,5));
test_data[['Consumption','Predicted_SARIMA','Predicted_ARIMA']].plot(figsize=(20,5));
Loading [MathJax]/extensions/Safe.js