1# -*- coding: utf-8 -*-
2"""
3Created on Fri Dec 19 11:29:18 2014
4
5Author: Josef Perktold
6License: BSD-3
7
8"""
9
10import numpy as np
11from scipy import stats
12import pandas as pd
13
14
15# this is similar to ContrastResults after t_test, copied and adjusted
16class PredictionResults(object):
17    """
18    Results class for predictions.
19
20    Parameters
21    ----------
22    predicted_mean : ndarray
23        The array containing the prediction means.
24    var_pred_mean : ndarray
25        The array of the variance of the prediction means.
26    var_resid : ndarray
27        The array of residual variances.
28    df : int
29        The degree of freedom used if dist is 't'.
30    dist : {'norm', 't', object}
31        Either a string for the normal or t distribution or another object
32        that exposes a `ppf` method.
33    row_labels : list[str]
34        Row labels used in summary frame.
35    """
36
37    def __init__(self, predicted_mean, var_pred_mean, var_resid,
38                 df=None, dist=None, row_labels=None):
39        self.predicted_mean = predicted_mean
40        self.var_pred_mean = var_pred_mean
41        self.df = df
42        self.var_resid = var_resid
43        self.row_labels = row_labels
44
45        if dist is None or dist == 'norm':
46            self.dist = stats.norm
47            self.dist_args = ()
48        elif dist == 't':
49            self.dist = stats.t
50            self.dist_args = (self.df,)
51        else:
52            self.dist = dist
53            self.dist_args = ()
54
55    @property
56    def se_obs(self):
57        return np.sqrt(self.var_pred_mean + self.var_resid)
58
59    @property
60    def se_mean(self):
61        return np.sqrt(self.var_pred_mean)
62
63    def conf_int(self, obs=False, alpha=0.05):
64        """
65        Returns the confidence interval of the value, `effect` of the
66        constraint.
67
68        This is currently only available for t and z tests.
69
70        Parameters
71        ----------
72        alpha : float, optional
73            The significance level for the confidence interval.
74            ie., The default `alpha` = .05 returns a 95% confidence interval.
75
76        Returns
77        -------
78        ci : ndarray, (k_constraints, 2)
79            The array has the lower and the upper limit of the confidence
80            interval in the columns.
81        """
82
83        se = self.se_obs if obs else self.se_mean
84
85        q = self.dist.ppf(1 - alpha / 2., *self.dist_args)
86        lower = self.predicted_mean - q * se
87        upper = self.predicted_mean + q * se
88        return np.column_stack((lower, upper))
89
90    def summary_frame(self, alpha=0.05):
91        # TODO: finish and cleanup
92        ci_obs = self.conf_int(alpha=alpha, obs=True)  # need to split
93        ci_mean = self.conf_int(alpha=alpha, obs=False)
94        to_include = {}
95        to_include['mean'] = self.predicted_mean
96        to_include['mean_se'] = self.se_mean
97        to_include['mean_ci_lower'] = ci_mean[:, 0]
98        to_include['mean_ci_upper'] = ci_mean[:, 1]
99        to_include['obs_ci_lower'] = ci_obs[:, 0]
100        to_include['obs_ci_upper'] = ci_obs[:, 1]
101
102        self.table = to_include
103        # pandas dict does not handle 2d_array
104        # data = np.column_stack(list(to_include.values()))
105        # names = ....
106        res = pd.DataFrame(to_include, index=self.row_labels,
107                           columns=to_include.keys())
108        return res
109
110
111def get_prediction(self, exog=None, transform=True, weights=None,
112                   row_labels=None, pred_kwds=None):
113    """
114    Compute prediction results.
115
116    Parameters
117    ----------
118    exog : array_like, optional
119        The values for which you want to predict.
120    transform : bool, optional
121        If the model was fit via a formula, do you want to pass
122        exog through the formula. Default is True. E.g., if you fit
123        a model y ~ log(x1) + log(x2), and transform is True, then
124        you can pass a data structure that contains x1 and x2 in
125        their original form. Otherwise, you'd need to log the data
126        first.
127    weights : array_like, optional
128        Weights interpreted as in WLS, used for the variance of the predicted
129        residual.
130    row_labels : list
131        A list of row labels to use.  If not provided, read `exog` is
132        available.
133    **kwargs
134        Some models can take additional keyword arguments, see the predict
135        method of the model for the details.
136
137    Returns
138    -------
139    linear_model.PredictionResults
140        The prediction results instance contains prediction and prediction
141        variance and can on demand calculate confidence intervals and summary
142        tables for the prediction of the mean and of new observations.
143    """
144
145    # prepare exog and row_labels, based on base Results.predict
146    if transform and hasattr(self.model, 'formula') and exog is not None:
147        from patsy import dmatrix
148        if isinstance(exog, pd.Series):
149            # GH-6509
150            exog = pd.DataFrame(exog)
151        exog = dmatrix(self.model.data.design_info, exog)
152
153    if exog is not None:
154        if row_labels is None:
155            row_labels = getattr(exog, 'index', None)
156            if callable(row_labels):
157                row_labels = None
158
159        exog = np.asarray(exog)
160        if exog.ndim == 1:
161            # Params informs whether a row or column vector
162            if self.params.shape[0] > 1:
163                exog = exog[None, :]
164            else:
165                exog = exog[:, None]
166        exog = np.atleast_2d(exog)  # needed in count model shape[1]
167    else:
168        exog = self.model.exog
169        if weights is None:
170            weights = getattr(self.model, 'weights', None)
171
172        if row_labels is None:
173            row_labels = getattr(self.model.data, 'row_labels', None)
174
175    # need to handle other arrays, TODO: is delegating to model possible ?
176    if weights is not None:
177        weights = np.asarray(weights)
178        if (weights.size > 1 and
179                (weights.ndim != 1 or weights.shape[0] == exog.shape[1])):
180            raise ValueError('weights has wrong shape')
181
182    if pred_kwds is None:
183        pred_kwds = {}
184    predicted_mean = self.model.predict(self.params, exog, **pred_kwds)
185
186    covb = self.cov_params()
187    var_pred_mean = (exog * np.dot(covb, exog.T).T).sum(1)
188    var_resid = self.scale  # self.mse_resid / weights
189
190    # TODO: check that we have correct scale, Refactor scale #???
191    # special case for now:
192    if self.cov_type == 'fixed scale':
193        var_resid = self.cov_kwds['scale']
194
195    if weights is not None:
196        var_resid /= weights
197
198    dist = ['norm', 't'][self.use_t]
199    return PredictionResults(predicted_mean, var_pred_mean, var_resid,
200                             df=self.df_resid, dist=dist,
201                             row_labels=row_labels)
202