from datetime import datetime
from decimal import *
import glob
import os
import pandas as pd #pip install this first before executing
#4-significant decimal digits precision and rounding
getcontext().rounding = ROUND_HALF_EVEN
DECIMAL_DIGITS = "1.0000"
MAX_FILES = 205 #1611 original for 22 houses
H_BASE_DIR = "D:/DATASETS/Disaggregation/BY HOUSEHOLD/"
TC_BASE_DIR = "D:/DATASETS/Disaggregation/TOTAL CONSUMPTIONS/"
NEW_FILE = "D:/DATASETS/Disaggregation/TOTAL CONSUMPTIONS/tc_temp_dataset.csv"
def listdirs(rootdir):
for file in os.listdir(rootdir):
d = os.path.join(rootdir, file)
if os.path.isdir(d):
listdirs(d)
#MAIN
#vars
totalFiles = 0
totalRows = 0
newRows = 0
dt_min = -1
dt_hr = -1
dt_day = ""
old_dt = datetime.now()
ap_sum = 0.0
rp_sum = 0.0
min_ctr = 0
#prepare new dataset file
ds = open(NEW_FILE, "a")
#loop by house
for h in os.scandir(TC_BASE_DIR):
if h.is_dir():
HDir = h.path + "/"
HNo = h.path[-2:]
#loop by dates
for d in os.scandir(HDir):
if d.is_dir():
DateDir = d.path
path = DateDir + "/"
dt_min = -1
dt_hr = -1
dt_day = ""
#get csv file
print(path)
csv_files = glob.glob(path + "*.csv")
for file in csv_files:
#read content into data frame
df = pd.read_csv(file)
#iterate every row of csv file, skip first row (header row)
for i in range(0, len(df)):
ap = df.iloc[i, 0] #Active Power
rp = df.iloc[i, 1] #Reactive Power
tse = str(df.iloc[i, 2]) #timestamp in scientific notation
#change big E to small e
tse = tse.replace("E", "e")
#convert tse
sts = str(int(float(tse)))
ts10 = sts[0:10]
ts = int(ts10)
dt = datetime.fromtimestamp(ts)
#print(dt, "\nMIN: ", dt.minute, " | HR: ", dt.hour, " |
DATE: ", dt.day)
totalRows += 1
if dt_min == dt.minute and dt_hr == dt.hour and dt_day ==
dt.day:
#add for average
ap_sum += float(ap)
rp_sum = float(rp)
min_ctr += 1
else:
if ap_sum != 0.0 and rp_sum != 0.0:
ap_avg = ap_sum/min_ctr
rp_avg = rp_sum/min_ctr
#sdate = str(dt.year) + "-" +
str(dt.month).zfill(2) + "-" + str(dt.day).zfill(2) + " " + str(dt.hour).zfill(2) +
":" + str(dt.minute).zfill(2) + ":00"
sdate = old_dt.strftime("%Y-%m-%d %H:%M:00")
#append to new file
ds.writelines(sdate + ",H" + HNo + "," +
str(Decimal(ap_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," +
str(Decimal(rp_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," + old_dt.strftime("%w,
%H") + "\n") #DATETIME, HOUSE_ID, AVG_ACTIVE_POWER, AVG_REACTIVE_POWER, WEEKDAY,
HOUR
newRows += 1
# set as new vars
dt_min = dt.minute
dt_hr = dt.hour
dt_day = dt.day
min_ctr = 1
ap_sum = float(ap)
rp_sum = float(rp)
old_dt = dt
totalFiles += 1
print("Total Files Processed = ", totalFiles, "/", MAX_FILES, "
: {:.2f}".format((totalFiles/MAX_FILES)*100), "%")
ds.close()
print("Total Rows Processed = {:,}".format(totalRows))
print("Total New Rows Created = {:,}".format(newRows))