1from itertools import chain 2 3import numpy as np 4from scipy.stats import norm 5import statsmodels.api as sm 6 7from Orange.data import Table 8from orangecontrib.timeseries import Timeseries, rmse, mape, mae, pocid, r2 9 10 11IC_MAGIC = 'magic' 12 13 14class NotFittedError(ValueError, AttributeError): 15 """Raised when model predictions made without fitting""" 16 17 18class _BaseModel: 19 REQUIRES_STATIONARY = True 20 SUPPORTS_VECTOR = False 21 22 _NOT_FITTED = NotFittedError('Model must be fitted first (see fit() method)') 23 24 __wrapped__ = None 25 26 def __init__(self): 27 self.model = None 28 self.results = None 29 self.order = () 30 self._model_kwargs = dict(missing='raise') 31 self._fit_kwargs = dict() 32 self._endog = None 33 34 self._table_var_names = None 35 self._table_name = None 36 self._table_timevar = None 37 self._table_timevals = None 38 39 def _before_init(self, endog, exog): 40 """ 41 This method is called before the statsmodels model is init. It can 42 last-minute transform the endog and exog variables, or it can set 43 constructor parameters depending on their values. 44 """ 45 return endog, exog 46 47 def _before_fit(self, endog, exog): 48 """ 49 This method is called before fit() with the same parameters. It 50 can be used to set last-minute, endog or exog-dependent parameters. 51 Override it if you need it. 52 """ 53 54 def _fittedvalues(self): 55 """ 56 This was needed to override for ARIMA as its fittedvalues returned 57 unintegraded series instead. 58 """ 59 return self.results.fittedvalues 60 61 def fittedvalues(self, as_table=False): 62 """ 63 Return predictions for in-sample observations, i.e. the model's 64 approximations of the original training values. 65 66 Parameters 67 ---------- 68 as_table : bool 69 If True, return results as an Orange.data.Table. 70 71 Returns 72 ------- 73 fitted_values : array_like 74 """ 75 if self.results is None: 76 raise self._NOT_FITTED 77 values = self._fittedvalues() 78 if as_table: 79 values = self._as_table(values, 'fitted') 80 return values 81 82 def _as_table(self, values, what): 83 """Used for residuals() and fittedvalues() methods.""" 84 from Orange.data import Domain, ContinuousVariable 85 attrs = [] 86 n_vars = values.shape[1] if values.ndim == 2 else 1 87 if n_vars == 1: 88 values = np.atleast_2d(values).T 89 tvar = None 90 # If 1d, time var likely not already present, so lets add it if possible 91 if n_vars == 1 and self._table_timevar: 92 values = np.column_stack((self._table_timevals[-values.shape[0]:], 93 values)) 94 tvar = self._table_timevar 95 attrs.append(tvar) 96 for i, name in zip(range(n_vars), 97 self._table_var_names or range(n_vars)): 98 attrs.append(ContinuousVariable('{} ({})'.format(name, what))) 99 100 # Make the fitted time variable time variable 101 if self._table_timevar and self._table_timevar.name == name: 102 tvar = attrs[-1] 103 104 table = Timeseries.from_numpy(Domain(attrs), values) 105 table.time_variable = tvar 106 table.name = (self._table_name or '') + '({} {})'.format(self, what) 107 return table 108 109 def residuals(self, as_table=True): 110 """ 111 Return residuals (prediction errors) for in-sample observations, 112 113 Parameters 114 ---------- 115 as_table : bool 116 If True, return results as an Orange.data.Table. 117 118 Returns 119 ------- 120 residuals : array_like 121 """ 122 if self.results is None: 123 raise self._NOT_FITTED 124 resid = self.results.resid 125 if as_table: 126 resid = self._as_table(resid, 'residuals') 127 return resid 128 129 def _predict(self, steps, exog, alpha): 130 """ 131 Return forecast predictions (along with confidence intervals) 132 for steps ahead given exog values (or None if not needed). 133 """ 134 raise NotImplementedError 135 136 def _orange_arrays(self, table): 137 self._table_var_names = [v.name for v in chain(table.domain.class_vars, 138 table.domain.attributes)] 139 self._table_name = table.name 140 if getattr(table, 'time_variable', None): 141 self._table_timevar = table.time_variable 142 self._table_timevals = table.time_values 143 y = table.Y.ravel() 144 X = table.X 145 if y.size: 146 defined_range = (np.arange(1, len(y) + 1) * ~np.isnan(y)).max() 147 y = y[:defined_range] 148 X = X[:defined_range] 149 return y, X 150 151 def fit(self, endog, exog=None): 152 """ 153 Fit the model to endogenous variable endog, optionally given 154 exogenous column variables exog. 155 156 Parameters 157 ---------- 158 endog : array_like 159 Dependent variable (y) of shape ``[nobs, k]`` 160 (``k = 1`` for a single variable; ``k > 1`` for vector models). 161 exog : array_like 162 If model supports it, the additional independent variables (X) of 163 shape ``[nobs, k_vars]``. 164 165 Returns 166 ------- 167 fitted_model 168 """ 169 if isinstance(endog, Table): 170 assert exog is None 171 endog, exog = self._orange_arrays(endog) 172 173 if not endog.size: 174 if not exog.size: 175 raise ValueError('Input series are empty. Nothing to learn.') 176 endog, exog = exog, None 177 178 endog, exog = self._before_init(endog, exog) 179 self._endog = endog 180 kwargs = self._model_kwargs.copy() 181 kwargs.update(endog=endog) 182 if exog is not None: 183 kwargs.update(exog=exog) 184 model = self.model = self.__wrapped__(**kwargs) 185 186 self._before_fit(endog, exog) 187 kwargs = self._fit_kwargs.copy() 188 self.results = model.fit(**kwargs) 189 return self 190 191 def errors(self): 192 """Return dict of RMSE/MAE/MAPE/POCID/R² errors on in-sample, fitted values 193 194 Returns 195 ------- 196 errors : dict 197 Mapping of error measure str -> error value. 198 """ 199 if self.results is None: 200 raise self._NOT_FITTED 201 true = self._endog 202 pred = self._fittedvalues() 203 return dict(r2=r2(true, pred), 204 mae=mae(true, pred), 205 rmse=rmse(true, pred), 206 mape=mape(true, pred), 207 pocid=pocid(true, pred)) 208 209 def _predict_as_table(self, prediction, confidence): 210 from Orange.data import Domain, ContinuousVariable 211 means, lows, highs = [], [], [] 212 n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1 213 for i, name in zip(range(n_vars), 214 self._table_var_names or range(n_vars)): 215 mean = ContinuousVariable('{} (forecast)'.format(name)) 216 low = ContinuousVariable('{} ({:d}%CI low)'.format(name, confidence)) 217 high = ContinuousVariable('{} ({:d}%CI high)'.format(name, confidence)) 218 low.ci_percent = high.ci_percent = confidence 219 mean.ci_attrs = (low, high) 220 means.append(mean) 221 lows.append(low) 222 highs.append(high) 223 domain = Domain(means + lows + highs) 224 X = np.column_stack(prediction) 225 table = Timeseries.from_numpy(domain, X) 226 table.name = (self._table_name or '') + '({} forecast)'.format(self) 227 return table 228 229 def predict(self, steps=1, exog=None, *, alpha=.05, as_table=False): 230 """Make the forecast of future values. 231 232 Parameters 233 ---------- 234 steps : int 235 The number of steps to make forecast for. 236 exog : array_like 237 The exogenous variables some models require. 238 alpha : float 239 Calculate and return (1-alpha)100% confidence intervals. 240 as_table : bool 241 If True, return results as an Orange.data.Table. 242 243 Returns 244 ------- 245 forecast : array_like 246 (forecast, low, high) 247 """ 248 if self.results is None: 249 raise self._NOT_FITTED 250 prediction = self._predict(steps, exog, alpha) 251 if as_table: 252 prediction = self._predict_as_table(prediction, int((1 - alpha) * 100)) 253 return prediction 254 255 def __str__(self): 256 return str(self.__wrapped__) 257 258 @property 259 def max_order(self): 260 return max(self.order, default=0) 261 262 def clear(self): 263 """Reset (unfit) the current model""" 264 self.model = None 265 self.results = None 266 self._endog = None 267 self._table_var_names = None 268 self._table_name = None 269 self._table_timevar = None 270 self._table_timevals = None 271 272 def copy(self): 273 """Copy the current model""" 274 from copy import deepcopy 275 return deepcopy(self) 276 277 278class ARIMA(_BaseModel): 279 """Autoregressive integrated moving average (ARIMA) model 280 281 An auto regression (AR) and moving average (MA) model with differencing. 282 283 If exogenous variables are provided in fit() method, this becomes an 284 ARIMAX model. 285 286 Parameters 287 ---------- 288 order : tuple (p, d, q) 289 Tuple of three non-negative integers: (p) the AR order, (d) the 290 degree of differencing, and (q) the order of MA model. 291 If d = 0, this becomes an ARMA model. 292 293 Returns 294 ------- 295 unfitted_model 296 """ 297 REQUIRES_STATIONARY = False 298 __wrapped__ = sm.tsa.ARIMA 299 300 def __init__(self, order=(1, 0, 0), use_exog=False): 301 super().__init__() 302 self.order = order 303 self.use_exog = use_exog 304 self._model_kwargs.update(order=order) 305 self._fit_kwargs.update(disp=0, # Don't print shit 306 verbose=False) 307 308 def __str__(self): 309 return '{}({})'.format('AR{}MA{}'.format('I' if self.order[1] else '', 310 'X' if self.use_exog else ''), 311 ','.join(map(str, self.order))) 312 313 def _predict(self, steps, exog, alpha): 314 forecast, _, confint = self.results.forecast(steps, exog, alpha) 315 return np.c_[forecast, confint].T 316 317 def _before_init(self, endog, exog): 318 exog = exog if self.use_exog else None 319 if len(endog) == 0: 320 raise ValueError('Need an endogenous (target) variable to fit') 321 return endog, exog 322 323 def _fittedvalues(self): 324 # Statsmodels supports different args whether series is 325 # differentiated (order has d) or not. -- stupid statsmodels 326 kwargs = dict(typ='levels') if self.order[1] > 0 else {} 327 return self.results.predict(**kwargs) 328 329 330class VAR(_BaseModel): 331 """Vector auto-regression (VAR) model 332 333 A multivariate auto regression model of multiple inter-dependent variables. 334 335 Parameters 336 ---------- 337 maxlags : int 338 The exact number of lags (order) to construct the model with or 339 the maximum number of lags to check for order selection (see `ic` 340 parameter). Defaults to ``12*(nobs/10)**.5``. 341 ic : {‘aic’, ‘fpe’, ‘hqic’, ‘bic’, 'magic', None} 342 The information criterion to optimize order (`maxlags`) on. 343 trend : {'c', 'ct', 'ctt', 'nc'} 344 Constant (c); constant and trend (ct); constant, linear and 345 quadratic trend (ctt); no constant, no trend (nc). These are 346 prepended to the columns of the data set. 347 348 Returns 349 ------- 350 unfitted_model 351 """ 352 SUPPORTS_VECTOR = True 353 __wrapped__ = sm.tsa.VAR 354 355 MAX_LAGS = lambda arr: 12 * (len(arr) / 10) ** .5 356 357 def __init__(self, maxlags=None, ic=None, trend='c'): 358 super().__init__() 359 self.ic = ic 360 self.trend = trend 361 self._ic_magic = ic == IC_MAGIC 362 self.order = (maxlags,) 363 self._maxlags = self.MAX_LAGS if maxlags is None else maxlags 364 self._fit_kwargs.update(maxlags=maxlags, trend=trend, ic=ic) 365 366 def __str__(self): 367 args = ('auto' if callable(self._maxlags) else self._maxlags, 368 self.ic, 369 self.trend if self.trend != 'c' else None) 370 return '{}({})'.format(self.__wrapped__.__name__, 371 ','.join(map(str, filter(None, args)))) 372 373 def _before_init(self, endog, exog): 374 if exog is not None: 375 endog = np.column_stack((endog, exog)) if endog.size else exog 376 return endog, None 377 378 def _before_fit(self, endog, exog=None): 379 maxlags = self._maxlags 380 381 if callable(maxlags): 382 maxlags = maxlags(endog) 383 self._fit_kwargs.update(maxlags=maxlags) 384 self.order = (maxlags,) 385 386 if self._ic_magic: 387 ic_results = self.model.select_order(maxlags) 388 lags = sum(ic_results.values()) // len(ic_results) 389 self.order = (lags,) 390 self._fit_kwargs.update(maxlags=lags, ic=None) 391 392 def _predict(self, steps, exog, alpha): 393 assert 0 < alpha < 1 394 y = (exog if exog is not None else self._endog)[-self.results.k_ar:] 395 forecast = self.results.forecast(y, steps) 396 # FIXME: The following is adapted from statsmodels's 397 # VAR.forecast_interval() as the original doesn't work 398 q = norm.ppf(1 - alpha / 2) 399 sigma = np.sqrt(np.abs(np.diagonal(self.results.mse(steps), axis1=2))) 400 err = q * sigma 401 return np.asarray([forecast, forecast - err, forecast + err]) 402 403