1"""see https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/""" 2 3import matplotlib 4matplotlib.use('TkAgg') 5import numpy as np 6from matplotlib import pyplot 7from pandas import read_csv 8from pandas import DataFrame 9from pandas import concat 10from sklearn.preprocessing import MinMaxScaler 11from sklearn.preprocessing import LabelEncoder 12from sklearn.metrics import mean_squared_error 13from keras.models import Sequential 14from keras.layers import Dropout 15from keras.layers import Dense 16from keras.layers import LSTM 17from keras.callbacks import ModelCheckpoint 18from matplotlib import pyplot 19 20from pandas import read_csv 21 22 23# convert series to supervised learning 24def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): 25 n_vars = 1 if type(data) is list else data.shape[1] 26 df = DataFrame(data) 27 cols, names = list(), list() 28 # input sequence (t-n, ... t-1) 29 for i in range(n_in, 0, -1): 30 cols.append(df.shift(i)) 31 names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)] 32 # forecast sequence (t, t+1, ... t+n) 33 for i in range(0, n_out): 34 cols.append(df.shift(-i)) 35 if i == 0: 36 names += [('var%d(t)' % (j + 1)) for j in range(n_vars)] 37 else: 38 names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)] 39 # put it all together 40 agg = concat(cols, axis=1) 41 agg.columns = names 42 # drop rows with NaN values 43 if dropnan: 44 agg.dropna(inplace=True) 45 return agg 46 47 48values = [x for x in range(10)] 49data = series_to_supervised(values, 2) 50print(data) 51 52# load dataset 53#dataset = read_csv('./data/minitaur_log_latency_0.01.csv') 54#dataset = read_csv('./data/minitaur_log_latency_0.003.csv') 55dataset = read_csv('./data/minitaur_log_latency_0.006.csv') 56 57values = dataset.values 58# integer encode direction 59#encoder = LabelEncoder() 60#values[:,3] = encoder.fit_transform(values[:,3]) 61# ensure all data is float 62values = values.astype('float32') 63 64# normalize features 65useNormalization = False 66if useNormalization: 67 scaler = MinMaxScaler(feature_range=(0, 1)) 68 scaled = scaler.fit_transform(values) 69else: 70 scaled = values 71 72# frame as supervised learning 73lag_steps = 5 74reframed = series_to_supervised(scaled, lag_steps, 1) 75print("reframed before drop=", reframed) 76 77# drop columns we don't want to predict 78reframed.drop(reframed.columns[[3, 7, 11, 15, 19]], axis=1, inplace=True) 79print("after drop=", reframed.head()) 80 81#dummy = scaler.inverse_transform(reframed) 82#print(dummy) 83 84groups = [0, 1, 2, 3] 85 86i = 1 87# plot each column 88doPlot = False 89if doPlot: 90 pyplot.figure() 91 for group in groups: 92 pyplot.subplot(len(groups), 1, i) 93 pyplot.plot(values[0:25, group]) 94 pyplot.title(dataset.columns[group], y=0.5, loc='right') 95 i += 1 96 pyplot.show() 97 98# split into train and test sets 99values = reframed.values 100n_train_hours = 6000 101train = values[:n_train_hours, :] 102test = values[n_train_hours:, :] 103# split into input and outputs 104train_X, train_y = train[:, :-1], train[:, -1] 105test_X, test_y = test[:, :-1], test[:, -1] 106 107print("train_X.shape[1]=", train_X.shape[1]) 108 109# design network 110useLSTM = True 111if useLSTM: 112 # reshape input to be 3D [samples, timesteps, features] 113 train_X = train_X.reshape( 114 (train_X.shape[0], lag_steps + 1, int(train_X.shape[1] / (lag_steps + 1)))) 115 test_X = test_X.reshape((test_X.shape[0], lag_steps + 1, int(test_X.shape[1] / (lag_steps + 1)))) 116 model = Sequential() 117 model.add(LSTM(40, input_shape=(train_X.shape[1], train_X.shape[2]))) 118 model.add(Dropout(0.05)) 119 model.add(Dense(8, activation='sigmoid')) 120 model.add(Dense(8, activation='sigmoid')) 121 model.add(Dropout(0.05)) 122 model.add(Dense(1, activation='linear')) 123else: 124 # create model 125 model = Sequential() 126 model.add(Dense(12, input_dim=train_X.shape[1], activation="relu")) 127 model.add(Dense(8, activation="sigmoid")) 128 model.add(Dense(1, activation="linear")) 129 130#model.compile(loss='mae', optimizer='adam') 131model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error']) 132 133# checkpoint 134filepath = '/tmp/keras/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5' 135checkpoint = ModelCheckpoint(filepath, 136 monitor='val_loss', 137 verbose=1, 138 save_best_only=True, 139 mode='min') 140callbacks_list = [checkpoint] 141 142# fit network 143history = model.fit(train_X, 144 train_y, 145 epochs=1500, 146 batch_size=32, 147 callbacks=callbacks_list, 148 validation_data=(test_X, test_y), 149 verbose=2, 150 shuffle=False) 151# plot history 152 153data = np.array([[[ 154 1.513535008329887299, 3.234624992847829894e-01, 1.731481043119239782, 1.741165415165205399, 155 1.534267104753672228e+00, 1.071354965017878635e+00, 1.712386127673626302e+00 156]]]) 157 158#prediction = model.predict(data) 159#print("prediction=",prediction) 160 161pyplot.plot(history.history['loss'], label='train') 162pyplot.plot(history.history['val_loss'], label='test') 163pyplot.legend() 164pyplot.show() 165