1# -*- coding: utf-8 -*- 2""" 3Created on Fri Dec 19 11:29:18 2014 4 5Author: Josef Perktold 6License: BSD-3 7 8""" 9 10import numpy as np 11from scipy import stats 12import pandas as pd 13 14 15# this is similar to ContrastResults after t_test, copied and adjusted 16class PredictionResults(object): 17 """ 18 Results class for predictions. 19 20 Parameters 21 ---------- 22 predicted_mean : ndarray 23 The array containing the prediction means. 24 var_pred_mean : ndarray 25 The array of the variance of the prediction means. 26 var_resid : ndarray 27 The array of residual variances. 28 df : int 29 The degree of freedom used if dist is 't'. 30 dist : {'norm', 't', object} 31 Either a string for the normal or t distribution or another object 32 that exposes a `ppf` method. 33 row_labels : list[str] 34 Row labels used in summary frame. 35 """ 36 37 def __init__(self, predicted_mean, var_pred_mean, var_resid, 38 df=None, dist=None, row_labels=None): 39 self.predicted_mean = predicted_mean 40 self.var_pred_mean = var_pred_mean 41 self.df = df 42 self.var_resid = var_resid 43 self.row_labels = row_labels 44 45 if dist is None or dist == 'norm': 46 self.dist = stats.norm 47 self.dist_args = () 48 elif dist == 't': 49 self.dist = stats.t 50 self.dist_args = (self.df,) 51 else: 52 self.dist = dist 53 self.dist_args = () 54 55 @property 56 def se_obs(self): 57 return np.sqrt(self.var_pred_mean + self.var_resid) 58 59 @property 60 def se_mean(self): 61 return np.sqrt(self.var_pred_mean) 62 63 def conf_int(self, obs=False, alpha=0.05): 64 """ 65 Returns the confidence interval of the value, `effect` of the 66 constraint. 67 68 This is currently only available for t and z tests. 69 70 Parameters 71 ---------- 72 alpha : float, optional 73 The significance level for the confidence interval. 74 ie., The default `alpha` = .05 returns a 95% confidence interval. 75 76 Returns 77 ------- 78 ci : ndarray, (k_constraints, 2) 79 The array has the lower and the upper limit of the confidence 80 interval in the columns. 81 """ 82 83 se = self.se_obs if obs else self.se_mean 84 85 q = self.dist.ppf(1 - alpha / 2., *self.dist_args) 86 lower = self.predicted_mean - q * se 87 upper = self.predicted_mean + q * se 88 return np.column_stack((lower, upper)) 89 90 def summary_frame(self, alpha=0.05): 91 # TODO: finish and cleanup 92 ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split 93 ci_mean = self.conf_int(alpha=alpha, obs=False) 94 to_include = {} 95 to_include['mean'] = self.predicted_mean 96 to_include['mean_se'] = self.se_mean 97 to_include['mean_ci_lower'] = ci_mean[:, 0] 98 to_include['mean_ci_upper'] = ci_mean[:, 1] 99 to_include['obs_ci_lower'] = ci_obs[:, 0] 100 to_include['obs_ci_upper'] = ci_obs[:, 1] 101 102 self.table = to_include 103 # pandas dict does not handle 2d_array 104 # data = np.column_stack(list(to_include.values())) 105 # names = .... 106 res = pd.DataFrame(to_include, index=self.row_labels, 107 columns=to_include.keys()) 108 return res 109 110 111def get_prediction(self, exog=None, transform=True, weights=None, 112 row_labels=None, pred_kwds=None): 113 """ 114 Compute prediction results. 115 116 Parameters 117 ---------- 118 exog : array_like, optional 119 The values for which you want to predict. 120 transform : bool, optional 121 If the model was fit via a formula, do you want to pass 122 exog through the formula. Default is True. E.g., if you fit 123 a model y ~ log(x1) + log(x2), and transform is True, then 124 you can pass a data structure that contains x1 and x2 in 125 their original form. Otherwise, you'd need to log the data 126 first. 127 weights : array_like, optional 128 Weights interpreted as in WLS, used for the variance of the predicted 129 residual. 130 row_labels : list 131 A list of row labels to use. If not provided, read `exog` is 132 available. 133 **kwargs 134 Some models can take additional keyword arguments, see the predict 135 method of the model for the details. 136 137 Returns 138 ------- 139 linear_model.PredictionResults 140 The prediction results instance contains prediction and prediction 141 variance and can on demand calculate confidence intervals and summary 142 tables for the prediction of the mean and of new observations. 143 """ 144 145 # prepare exog and row_labels, based on base Results.predict 146 if transform and hasattr(self.model, 'formula') and exog is not None: 147 from patsy import dmatrix 148 if isinstance(exog, pd.Series): 149 # GH-6509 150 exog = pd.DataFrame(exog) 151 exog = dmatrix(self.model.data.design_info, exog) 152 153 if exog is not None: 154 if row_labels is None: 155 row_labels = getattr(exog, 'index', None) 156 if callable(row_labels): 157 row_labels = None 158 159 exog = np.asarray(exog) 160 if exog.ndim == 1: 161 # Params informs whether a row or column vector 162 if self.params.shape[0] > 1: 163 exog = exog[None, :] 164 else: 165 exog = exog[:, None] 166 exog = np.atleast_2d(exog) # needed in count model shape[1] 167 else: 168 exog = self.model.exog 169 if weights is None: 170 weights = getattr(self.model, 'weights', None) 171 172 if row_labels is None: 173 row_labels = getattr(self.model.data, 'row_labels', None) 174 175 # need to handle other arrays, TODO: is delegating to model possible ? 176 if weights is not None: 177 weights = np.asarray(weights) 178 if (weights.size > 1 and 179 (weights.ndim != 1 or weights.shape[0] == exog.shape[1])): 180 raise ValueError('weights has wrong shape') 181 182 if pred_kwds is None: 183 pred_kwds = {} 184 predicted_mean = self.model.predict(self.params, exog, **pred_kwds) 185 186 covb = self.cov_params() 187 var_pred_mean = (exog * np.dot(covb, exog.T).T).sum(1) 188 var_resid = self.scale # self.mse_resid / weights 189 190 # TODO: check that we have correct scale, Refactor scale #??? 191 # special case for now: 192 if self.cov_type == 'fixed scale': 193 var_resid = self.cov_kwds['scale'] 194 195 if weights is not None: 196 var_resid /= weights 197 198 dist = ['norm', 't'][self.use_t] 199 return PredictionResults(predicted_mean, var_pred_mean, var_resid, 200 df=self.df_resid, dist=dist, 201 row_labels=row_labels) 202