1"""see https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/"""
2
3import matplotlib
4matplotlib.use('TkAgg')
5import numpy as np
6from matplotlib import pyplot
7from pandas import read_csv
8from pandas import DataFrame
9from pandas import concat
10from sklearn.preprocessing import MinMaxScaler
11from sklearn.preprocessing import LabelEncoder
12from sklearn.metrics import mean_squared_error
13from keras.models import Sequential
14from keras.layers import Dropout
15from keras.layers import Dense
16from keras.layers import LSTM
17from keras.callbacks import ModelCheckpoint
18from matplotlib import pyplot
19
20from pandas import read_csv
21
22
23# convert series to supervised learning
24def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
25  n_vars = 1 if type(data) is list else data.shape[1]
26  df = DataFrame(data)
27  cols, names = list(), list()
28  # input sequence (t-n, ... t-1)
29  for i in range(n_in, 0, -1):
30    cols.append(df.shift(i))
31    names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
32  # forecast sequence (t, t+1, ... t+n)
33  for i in range(0, n_out):
34    cols.append(df.shift(-i))
35    if i == 0:
36      names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
37    else:
38      names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
39  # put it all together
40  agg = concat(cols, axis=1)
41  agg.columns = names
42  # drop rows with NaN values
43  if dropnan:
44    agg.dropna(inplace=True)
45  return agg
46
47
48values = [x for x in range(10)]
49data = series_to_supervised(values, 2)
50print(data)
51
52# load dataset
53#dataset = read_csv('./data/minitaur_log_latency_0.01.csv')
54#dataset = read_csv('./data/minitaur_log_latency_0.003.csv')
55dataset = read_csv('./data/minitaur_log_latency_0.006.csv')
56
57values = dataset.values
58# integer encode direction
59#encoder = LabelEncoder()
60#values[:,3] = encoder.fit_transform(values[:,3])
61# ensure all data is float
62values = values.astype('float32')
63
64# normalize features
65useNormalization = False
66if useNormalization:
67  scaler = MinMaxScaler(feature_range=(0, 1))
68  scaled = scaler.fit_transform(values)
69else:
70  scaled = values
71
72# frame as supervised learning
73lag_steps = 5
74reframed = series_to_supervised(scaled, lag_steps, 1)
75print("reframed before drop=", reframed)
76
77# drop columns we don't want to predict
78reframed.drop(reframed.columns[[3, 7, 11, 15, 19]], axis=1, inplace=True)
79print("after drop=", reframed.head())
80
81#dummy = scaler.inverse_transform(reframed)
82#print(dummy)
83
84groups = [0, 1, 2, 3]
85
86i = 1
87# plot each column
88doPlot = False
89if doPlot:
90  pyplot.figure()
91  for group in groups:
92    pyplot.subplot(len(groups), 1, i)
93    pyplot.plot(values[0:25, group])
94    pyplot.title(dataset.columns[group], y=0.5, loc='right')
95    i += 1
96  pyplot.show()
97
98# split into train and test sets
99values = reframed.values
100n_train_hours = 6000
101train = values[:n_train_hours, :]
102test = values[n_train_hours:, :]
103# split into input and outputs
104train_X, train_y = train[:, :-1], train[:, -1]
105test_X, test_y = test[:, :-1], test[:, -1]
106
107print("train_X.shape[1]=", train_X.shape[1])
108
109# design network
110useLSTM = True
111if useLSTM:
112  # reshape input to be 3D [samples, timesteps, features]
113  train_X = train_X.reshape(
114      (train_X.shape[0], lag_steps + 1, int(train_X.shape[1] / (lag_steps + 1))))
115  test_X = test_X.reshape((test_X.shape[0], lag_steps + 1, int(test_X.shape[1] / (lag_steps + 1))))
116  model = Sequential()
117  model.add(LSTM(40, input_shape=(train_X.shape[1], train_X.shape[2])))
118  model.add(Dropout(0.05))
119  model.add(Dense(8, activation='sigmoid'))
120  model.add(Dense(8, activation='sigmoid'))
121  model.add(Dropout(0.05))
122  model.add(Dense(1, activation='linear'))
123else:
124  # create model
125  model = Sequential()
126  model.add(Dense(12, input_dim=train_X.shape[1], activation="relu"))
127  model.add(Dense(8, activation="sigmoid"))
128  model.add(Dense(1, activation="linear"))
129
130#model.compile(loss='mae', optimizer='adam')
131model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])
132
133# checkpoint
134filepath = '/tmp/keras/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5'
135checkpoint = ModelCheckpoint(filepath,
136                             monitor='val_loss',
137                             verbose=1,
138                             save_best_only=True,
139                             mode='min')
140callbacks_list = [checkpoint]
141
142# fit network
143history = model.fit(train_X,
144                    train_y,
145                    epochs=1500,
146                    batch_size=32,
147                    callbacks=callbacks_list,
148                    validation_data=(test_X, test_y),
149                    verbose=2,
150                    shuffle=False)
151# plot history
152
153data = np.array([[[
154    1.513535008329887299, 3.234624992847829894e-01, 1.731481043119239782, 1.741165415165205399,
155    1.534267104753672228e+00, 1.071354965017878635e+00, 1.712386127673626302e+00
156]]])
157
158#prediction = model.predict(data)
159#print("prediction=",prediction)
160
161pyplot.plot(history.history['loss'], label='train')
162pyplot.plot(history.history['val_loss'], label='test')
163pyplot.legend()
164pyplot.show()
165