@@ -64,7 +64,9 @@ def createdataset(data):
64
64
65
65
def myload ():
66
66
filename = 'dump2.txt'
67
- f = open ('data/' + filename , 'rb' )
67
+ #f = open('./data/' + filename, 'rb')
68
+ abspath = 'C:/Users/wwwa8/Documents/GitHub/Machine-Learning/序列预测/PCA去趋势化/dev2/data/'
69
+ f = open (abspath + filename , 'rb' )
68
70
data = pickle .load (f )
69
71
f .close ()
70
72
# print (data) # 路段数 * 每个路段的信息(df的数据结构)
@@ -102,14 +104,17 @@ def use_pca(dataset):
102
104
time_step = 2
103
105
data = myload () # data是一个list,里面是df格式的数据
104
106
# transfer
105
- dataset = createdataset (data ) # dataset 的格式是 (路段 * 每一天 * 一天内的数据)20 * 7 * 480
107
+ dataset = createdataset (data ) # dataset 的格式是 (路段 * 每一天 * 一天内的数据)20 * 14 * 480
106
108
dataset_main , dataset_rest = use_pca (dataset ) # dataset_main = 路段 * 每一个路段里面的主成分 ; dataset_rest = 路段 * 每一个路段里面的偏差
107
109
days , dnum = dataset_main [0 ].shape
108
- y_main = dataset_main [0 ][time_step :days , :]
109
- trainX , trainY = split_dataset (dataset_rest )
110
- train_x_raw = trainX [0 ]
111
- train_y_raw = np .reshape (trainY [0 ], (days - time_step , 480 ))
112
110
111
+ y_main = dataset_main [0 ][time_step :days , :] # y的主成分[2~14] shape 12 * 2 * 480
112
+ #将偏差数据 分割 成输入和输出
113
+ trainX , trainY = split_dataset (dataset_rest ) #trainX 的shape 20 * 12 * 2 *480 ; trainY 的shape 20 * 12 * 1 *480
114
+
115
+ train_x_raw = trainX [0 ] #取了第一条路段来进行预测 train_x_raw的sahpe :12 * 2 *480
116
+ train_y_raw = np .reshape (trainY [0 ], (days - time_step , 480 )) # train_y_raw的shape 12 * 480
117
+ # 归一化
113
118
x_max = train_x_raw .max ()
114
119
x_min = train_x_raw .min ()
115
120
y_max = train_y_raw .max ()
@@ -118,6 +123,13 @@ def use_pca(dataset):
118
123
train_x = (train_x_raw - x_min ) / (x_max - x_min )
119
124
train_y = (train_y_raw - y_min ) / (y_max - y_min )
120
125
126
+ # test
127
+ test_num = 3
128
+ test_x = train_x [0 :test_num ]
129
+ test_y = train_y [0 :test_num ]
130
+
131
+
132
+ # 放入lstm训练
121
133
# lstm的hyper-parameter
122
134
123
135
hidden_size = 400
@@ -126,7 +138,7 @@ def use_pca(dataset):
126
138
dropout_keep_rate = 1
127
139
128
140
# 根据输入数据来决定,train_num训练集大小,input_size输入维度
129
- train_num , time_step_size , input_size = train_x .shape
141
+ train_num , time_step_size , input_size = train_x .shape # sahpe :12 * 2 *480
130
142
# output_size输出的结点个数
131
143
_ , output_size = train_y .shape
132
144
@@ -160,19 +172,19 @@ def use_pca(dataset):
160
172
mae_result = []
161
173
rmse_result = []
162
174
163
-
175
+ # 获得训练的指标
164
176
def get_metrics (y , pred_y ):
165
177
mre = np .mean (np .abs (y - pred_y ) / y )
166
178
mae = np .mean (np .abs (y - pred_y ))
167
179
rmse = np .sqrt (np .mean (np .square (y - pred_y )))
168
180
return mre , mae , rmse
169
181
170
182
171
- def print_to_console (i , train_y_pred ):
172
- train_y_pred_real = train_y_pred * (y_max - y_min ) + y_min
173
- train_y_real = train_y * (y_max - y_min ) + y_min
174
- plt .plot (range (dnum ), train_y_real [0 ], 'b-' )
175
- plt .plot (range (dnum ), train_y_pred_real [0 ], 'r-' )
183
+ def print_to_console (i , train_y , train_y_pred ):
184
+ train_y_pred_real = train_y_pred * (y_max - y_min ) + y_min #反归一化
185
+ train_y_real = train_y * (y_max - y_min ) + y_min #train_y 是真实的y值,堆train_y 进行反归一化
186
+ plt .plot (range (dnum ), train_y_real [0 ], 'b-' ) #实际用蓝色
187
+ plt .plot (range (dnum ), train_y_pred_real [0 ], 'r-' ) #预测用红色
176
188
plt .show ()
177
189
train_mre , train_mae , train_rmse = get_metrics (train_y_real , train_y_pred_real )
178
190
print ('epoch %d train %.4f %.2f %.2f' % (i , train_mre , train_mae , train_rmse ))
@@ -184,7 +196,13 @@ def print_to_console(i, train_y_pred):
184
196
if i % 50 == 0 :
185
197
feed_dict = {x_input : train_x , y_real : train_y , keep_prob : 1.0 , batch_size : train_num }
186
198
train_y_pred = sess .run (y_pred , feed_dict = feed_dict )
187
- print_to_console (i , train_y_pred )
199
+ print_to_console (i ,train_y , train_y_pred )
200
+ if i % 50 == 0 :
201
+ print ("test : " )
202
+ feed_dict = {x_input : test_x , y_real : test_y , keep_prob : 1.0 , batch_size : test_num }
203
+ test_y_pred = sess .run (y_pred , feed_dict = feed_dict )
204
+ print_to_console (i , test_y ,test_y_pred )
205
+ print ("--- test end ---" )
188
206
189
207
y_main = dataset_main [0 ][time_step :days , :]
190
208
y_pre_real = y_main + train_y_pred * (y_max - y_min ) + y_min
0 commit comments