Skip to content

Commit fa6480b

Browse files
add test
1 parent 29d3a0e commit fa6480b

File tree

5 files changed

+214
-14
lines changed

5 files changed

+214
-14
lines changed

序列预测/PCA去趋势化/dev2/.idea/dev2.iml

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

序列预测/PCA去趋势化/dev2/.idea/misc.xml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

序列预测/PCA去趋势化/dev2/.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

序列预测/PCA去趋势化/dev2/.idea/workspace.xml

Lines changed: 159 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

序列预测/PCA去趋势化/dev2/main.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ def createdataset(data):
6464

6565
def myload():
6666
filename = 'dump2.txt'
67-
f = open('data/' + filename, 'rb')
67+
#f = open('./data/' + filename, 'rb')
68+
abspath='C:/Users/wwwa8/Documents/GitHub/Machine-Learning/序列预测/PCA去趋势化/dev2/data/'
69+
f = open(abspath + filename, 'rb')
6870
data = pickle.load(f)
6971
f.close()
7072
# print (data) # 路段数 * 每个路段的信息(df的数据结构)
@@ -102,14 +104,17 @@ def use_pca(dataset):
102104
time_step = 2
103105
data = myload() # data是一个list,里面是df格式的数据
104106
# transfer
105-
dataset = createdataset(data) # dataset 的格式是 (路段 * 每一天 * 一天内的数据)20 * 7 * 480
107+
dataset = createdataset(data) # dataset 的格式是 (路段 * 每一天 * 一天内的数据)20 * 14 * 480
106108
dataset_main, dataset_rest = use_pca(dataset) # dataset_main = 路段 * 每一个路段里面的主成分 ; dataset_rest = 路段 * 每一个路段里面的偏差
107109
days, dnum = dataset_main[0].shape
108-
y_main = dataset_main[0][time_step:days, :]
109-
trainX, trainY = split_dataset(dataset_rest)
110-
train_x_raw = trainX[0]
111-
train_y_raw = np.reshape(trainY[0], (days - time_step, 480))
112110

111+
y_main = dataset_main[0][time_step:days, :] # y的主成分[2~14] shape 12 * 2 * 480
112+
#将偏差数据 分割 成输入和输出
113+
trainX, trainY = split_dataset(dataset_rest) #trainX 的shape 20 * 12 * 2 *480 ; trainY 的shape 20 * 12 * 1 *480
114+
115+
train_x_raw = trainX[0] #取了第一条路段来进行预测 train_x_raw的sahpe :12 * 2 *480
116+
train_y_raw = np.reshape(trainY[0], (days - time_step, 480)) # train_y_raw的shape 12 * 480
117+
# 归一化
113118
x_max = train_x_raw.max()
114119
x_min = train_x_raw.min()
115120
y_max = train_y_raw.max()
@@ -118,6 +123,13 @@ def use_pca(dataset):
118123
train_x = (train_x_raw - x_min) / (x_max - x_min)
119124
train_y = (train_y_raw - y_min) / (y_max - y_min)
120125

126+
# test
127+
test_num = 3
128+
test_x = train_x[0:test_num]
129+
test_y = train_y[0:test_num]
130+
131+
132+
# 放入lstm训练
121133
# lstm的hyper-parameter
122134

123135
hidden_size = 400
@@ -126,7 +138,7 @@ def use_pca(dataset):
126138
dropout_keep_rate = 1
127139

128140
# 根据输入数据来决定,train_num训练集大小,input_size输入维度
129-
train_num, time_step_size, input_size = train_x.shape
141+
train_num, time_step_size, input_size = train_x.shape # sahpe :12 * 2 *480
130142
# output_size输出的结点个数
131143
_, output_size = train_y.shape
132144

@@ -160,19 +172,19 @@ def use_pca(dataset):
160172
mae_result = []
161173
rmse_result = []
162174

163-
175+
# 获得训练的指标
164176
def get_metrics(y, pred_y):
165177
mre = np.mean(np.abs(y - pred_y) / y)
166178
mae = np.mean(np.abs(y - pred_y))
167179
rmse = np.sqrt(np.mean(np.square(y - pred_y)))
168180
return mre, mae, rmse
169181

170182

171-
def print_to_console(i, train_y_pred):
172-
train_y_pred_real = train_y_pred * (y_max - y_min) + y_min
173-
train_y_real = train_y * (y_max - y_min) + y_min
174-
plt.plot(range(dnum), train_y_real[0], 'b-')
175-
plt.plot(range(dnum), train_y_pred_real[0], 'r-')
183+
def print_to_console(i, train_y ,train_y_pred):
184+
train_y_pred_real = train_y_pred * (y_max - y_min) + y_min #反归一化
185+
train_y_real = train_y * (y_max - y_min) + y_min #train_y 是真实的y值,堆train_y 进行反归一化
186+
plt.plot(range(dnum), train_y_real[0], 'b-') #实际用蓝色
187+
plt.plot(range(dnum), train_y_pred_real[0], 'r-') #预测用红色
176188
plt.show()
177189
train_mre, train_mae, train_rmse = get_metrics(train_y_real, train_y_pred_real)
178190
print('epoch %d train %.4f %.2f %.2f' % (i, train_mre, train_mae, train_rmse))
@@ -184,7 +196,13 @@ def print_to_console(i, train_y_pred):
184196
if i % 50 == 0:
185197
feed_dict = {x_input: train_x, y_real: train_y, keep_prob: 1.0, batch_size: train_num}
186198
train_y_pred = sess.run(y_pred, feed_dict=feed_dict)
187-
print_to_console(i, train_y_pred)
199+
print_to_console(i,train_y, train_y_pred)
200+
if i % 50 ==0:
201+
print ("test : ")
202+
feed_dict = {x_input: test_x, y_real: test_y, keep_prob: 1.0, batch_size: test_num}
203+
test_y_pred = sess.run(y_pred, feed_dict=feed_dict)
204+
print_to_console(i, test_y,test_y_pred)
205+
print ("--- test end ---")
188206

189207
y_main = dataset_main[0][time_step:days, :]
190208
y_pre_real = y_main + train_y_pred * (y_max - y_min) + y_min

0 commit comments

Comments
 (0)