Skip to content

Commit ca6f4c1

Browse files
committed
Loop around train and test/evaluate for 30 diffent contiguous lookup date valies (1-30). Put results in file CSV.
1 parent 1d88bb1 commit ca6f4c1

File tree

3 files changed

+201
-3
lines changed

3 files changed

+201
-3
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import numpy as np
2+
3+
import matplotlib.pyplot as plt
4+
5+
from stock_prediction import create_model, load_data
6+
from parameters import *
7+
8+
9+
def plot_graph(test_df,MY_LOOKUP_STEP):
10+
"""
11+
This function plots true close price along with predicted close price
12+
with blue and red colors respectively
13+
"""
14+
plt.plot(test_df[f'true_adjclose_{MY_LOOKUP_STEP}'], c='k')
15+
plt.plot(test_df[f'adjclose_{MY_LOOKUP_STEP}'], c='bo')
16+
plt.xlabel("Days")
17+
plt.ylabel("Price")
18+
plt.legend(["Actual Price", "Predicted Price"])
19+
plt.show()
20+
21+
22+
def get_final_df(model, data, MY_LOOKUP_STEP):
23+
"""
24+
This function takes the `model` and `data` dict to
25+
construct a final dataframe that includes the features along
26+
with true and predicted prices of the testing dataset
27+
"""
28+
# if predicted future price is higher than the current,
29+
# then calculate the true future price minus the current price, to get the buy profit
30+
buy_profit = lambda current, pred_future, true_future: true_future - current if pred_future > current else 0
31+
# if the predicted future price is lower than the current price,
32+
# then subtract the true future price from the current price
33+
sell_profit = lambda current, pred_future, true_future: current - true_future if pred_future < current else 0
34+
X_test = data["X_test"]
35+
y_test = data["y_test"]
36+
# perform prediction and get prices
37+
y_pred = model.predict(X_test)
38+
if SCALE:
39+
y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
40+
y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
41+
test_df = data["test_df"]
42+
# add predicted future prices to the dataframe
43+
test_df[f"adjclose_{MY_LOOKUP_STEP}"] = y_pred
44+
# add true future prices to the dataframe
45+
test_df[f"true_adjclose_{MY_LOOKUP_STEP}"] = y_test
46+
# sort the dataframe by date
47+
test_df.sort_index(inplace=True)
48+
final_df = test_df
49+
# add the buy profit column
50+
final_df["buy_profit"] = list(map(buy_profit,
51+
final_df["adjclose"],
52+
final_df[f"adjclose_{MY_LOOKUP_STEP}"],
53+
final_df[f"true_adjclose_{MY_LOOKUP_STEP}"])
54+
# since we don't have profit for last sequence, add 0's
55+
)
56+
# add the sell profit column
57+
final_df["sell_profit"] = list(map(sell_profit,
58+
final_df["adjclose"],
59+
final_df[f"adjclose_{MY_LOOKUP_STEP}"],
60+
final_df[f"true_adjclose_{MY_LOOKUP_STEP}"])
61+
# since we don't have profit for last sequence, add 0's
62+
)
63+
return final_df
64+
65+
66+
def predict(model, data):
67+
# retrieve the last sequence from data
68+
last_sequence = data["last_sequence"][-N_STEPS:]
69+
# expand dimension
70+
last_sequence = np.expand_dims(last_sequence, axis=0)
71+
# get the prediction (scaled from 0 to 1)
72+
prediction = model.predict(last_sequence)
73+
# get the price (by inverting the scaling)
74+
if SCALE:
75+
predicted_price = data["column_scaler"]["adjclose"].inverse_transform(prediction)[0][0]
76+
else:
77+
predicted_price = prediction[0][0]
78+
return predicted_price
79+
80+
predictions = []
81+
# predictions = np.array(float)
82+
# predictions.resize(30)
83+
for MY_LOOKUP_STEP in range(1, 30):
84+
85+
# load the data
86+
data = load_data(ticker, N_STEPS, scale=SCALE, split_by_date=SPLIT_BY_DATE,
87+
shuffle=SHUFFLE, lookup_step=MY_LOOKUP_STEP, test_size=TEST_SIZE,
88+
feature_columns=FEATURE_COLUMNS)
89+
90+
# construct the model
91+
model = create_model(N_STEPS, len(FEATURE_COLUMNS), loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
92+
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
93+
94+
my_model_name = f"{date_now}_{ticker}-{shuffle_str}-{scale_str}-{split_by_date_str}-\
95+
{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{MY_LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
96+
if BIDIRECTIONAL:
97+
my_model_name += "-b"
98+
99+
# load optimal model weights from results folder
100+
model_path = os.path.join("results", my_model_name) + ".h5"
101+
model.load_weights(model_path)
102+
103+
# evaluate the model
104+
loss, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
105+
# calculate the mean absolute error (inverse scaling)
106+
if SCALE:
107+
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
108+
else:
109+
mean_absolute_error = mae
110+
111+
# get the final dataframe for the testing set
112+
final_df = get_final_df(model, data, MY_LOOKUP_STEP)
113+
# predict the future price
114+
future_price = predict(model, data)
115+
# we calculate the accuracy by counting the number of positive profits
116+
accuracy_score = (len(final_df[final_df['sell_profit'] > 0]) + len(final_df[final_df['buy_profit'] > 0])) / len(final_df)
117+
# calculating total buy & sell profit
118+
total_buy_profit = final_df["buy_profit"].sum()
119+
total_sell_profit = final_df["sell_profit"].sum()
120+
# total profit by adding sell & buy together
121+
total_profit = total_buy_profit + total_sell_profit
122+
# dividing total profit by number of testing samples (number of trades)
123+
profit_per_trade = total_profit / len(final_df)
124+
# Append new prediction to array
125+
# predictions[MY_LOOKUP_STEP] = future_price
126+
predictions.append(future_price)
127+
# printing metrics
128+
print(f"Future price after {MY_LOOKUP_STEP} days is {future_price:.2f}$")
129+
print(f"{LOSS} loss:", loss)
130+
print("Mean Absolute Error:", mean_absolute_error)
131+
print("Accuracy score:", accuracy_score)
132+
print("Total buy profit:", total_buy_profit)
133+
print("Total sell profit:", total_sell_profit)
134+
print("Total profit:", total_profit)
135+
print("Profit per trade:", profit_per_trade)
136+
# plot true/pred prices graph
137+
plot_graph(final_df,MY_LOOKUP_STEP)
138+
print(final_df.tail(10))
139+
print(f"Future price after {MY_LOOKUP_STEP} days is {future_price:.2f}$")
140+
# save the final dataframe to csv-results folder
141+
csv_results_folder = "csv-results"
142+
if not os.path.isdir(csv_results_folder):
143+
os.mkdir(csv_results_folder)
144+
csv_filename = os.path.join(csv_results_folder, my_model_name + ".csv")
145+
final_df.to_csv(csv_filename)
146+
print(predictions)
147+
predictions = np.array(predictions)
148+
predictions.tofile(f"{csv_results_folder}\evaluate30_{date_now}_{ticker}.csv",",")

machine-learning/stock-prediction/parameters.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
# Window size or the sequence length
66
N_STEPS = 50
77
# Lookup step, 1 is the next day
8-
LOOKUP_STEP = 15
8+
LOOKUP_STEP = 1 #15
99

1010
# whether to scale feature columns & output price as well
11-
SCALE = True
11+
SCALE = True #False #True
1212
scale_str = f"sc-{int(SCALE)}"
1313
# whether to shuffle the dataset
1414
SHUFFLE = True
@@ -46,7 +46,9 @@
4646
EPOCHS = 500
4747

4848
# Amazon stock market
49-
ticker = "AMZN"
49+
# ticker = "AMZN"
50+
# Oxford Nanopore stock market
51+
ticker = "ONTTF"
5052
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
5153
# model name to save, making it as unique as possible based on parameters
5254
model_name = f"{date_now}_{ticker}-{shuffle_str}-{scale_str}-{split_by_date_str}-\
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from stock_prediction import create_model, load_data
2+
from tensorflow.keras.layers import LSTM
3+
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
4+
import os
5+
import pandas as pd
6+
from parameters import *
7+
8+
9+
# create these folders if they does not exist
10+
if not os.path.isdir("results"):
11+
os.mkdir("results")
12+
13+
if not os.path.isdir("logs"):
14+
os.mkdir("logs")
15+
16+
if not os.path.isdir("data"):
17+
os.mkdir("data")
18+
19+
for MY_LOOKUP_STEP in range(1,30):
20+
21+
# load the data
22+
data = load_data(ticker, N_STEPS, scale=SCALE, split_by_date=SPLIT_BY_DATE,
23+
shuffle=SHUFFLE, lookup_step=MY_LOOKUP_STEP, test_size=TEST_SIZE,
24+
feature_columns=FEATURE_COLUMNS)
25+
26+
# save the dataframe
27+
data["df"].to_csv(ticker_data_filename)
28+
29+
# construct the model
30+
model = create_model(N_STEPS, len(FEATURE_COLUMNS), loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
31+
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
32+
33+
my_model_name = f"{date_now}_{ticker}-{shuffle_str}-{scale_str}-{split_by_date_str}-\
34+
{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{MY_LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
35+
if BIDIRECTIONAL:
36+
my_model_name += "-b"
37+
38+
# some tensorflow callbacks
39+
checkpointer = ModelCheckpoint(os.path.join("results", my_model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
40+
tensorboard = TensorBoard(log_dir=os.path.join("logs", my_model_name))
41+
# train the model and save the weights whenever we see
42+
# a new optimal model using ModelCheckpoint
43+
history = model.fit(data["X_train"], data["y_train"],
44+
batch_size=BATCH_SIZE,
45+
epochs=EPOCHS,
46+
validation_data=(data["X_test"], data["y_test"]),
47+
callbacks=[checkpointer, tensorboard],
48+
verbose=1)

0 commit comments

Comments
 (0)