1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3"""
4
5   Copyright 2014-2019 OpenEEmeter contributors
6
7   Licensed under the Apache License, Version 2.0 (the "License");
8   you may not use this file except in compliance with the License.
9   You may obtain a copy of the License at
10
11       http://www.apache.org/licenses/LICENSE-2.0
12
13   Unless required by applicable law or agreed to in writing, software
14   distributed under the License is distributed on an "AS IS" BASIS,
15   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   See the License for the specific language governing permissions and
17   limitations under the License.
18
19"""
20from scipy.stats import t
21
22from .caltrack.usage_per_day import CalTRACKUsagePerDayModelResults
23
24
25__all__ = ("metered_savings", "modeled_savings")
26
27
28def _compute_ols_error(
29    t_stat,
30    rmse_base_residuals,
31    post_obs,
32    base_obs,
33    base_avg,
34    post_avg,
35    base_var,
36    nprime,
37):
38    ols_model_agg_error = (
39        (t_stat * rmse_base_residuals * post_obs)
40        / (base_obs ** 0.5)
41        * (1.0 + ((base_avg - post_avg) ** 2.0 / base_var)) ** 0.5
42    )
43
44    ols_noise_agg_error = (
45        t_stat * rmse_base_residuals * (post_obs * base_obs / nprime) ** 0.5
46    )
47
48    ols_total_agg_error = (
49        ols_model_agg_error ** 2.0 + ols_noise_agg_error ** 2.0
50    ) ** 0.5
51
52    return ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error
53
54
55def _compute_fsu_error(
56    t_stat,
57    interval,
58    post_obs,
59    total_base_energy,
60    rmse_base_residuals,
61    base_avg,
62    base_obs,
63    nprime,
64):
65    if interval.startswith("billing"):
66        a_coeff = -0.00022
67        b_coeff = 0.03306
68        c_coeff = 0.94054
69        months_reporting = float(post_obs)
70    else:  # daily
71        a_coeff = -0.00024
72        b_coeff = 0.03535
73        c_coeff = 1.00286
74        months_reporting = float(post_obs) / 30.0
75
76    fsu_error_band = total_base_energy * (
77        t_stat
78        * (a_coeff * months_reporting ** 2.0 + b_coeff * months_reporting + c_coeff)
79        * (rmse_base_residuals / base_avg)
80        * ((base_obs / nprime) * (1.0 + (2.0 / nprime)) * (1.0 / post_obs)) ** 0.5
81    )
82
83    return fsu_error_band
84
85
86def _compute_error_bands_metered_savings(
87    totals_metrics, results, interval, confidence_level
88):
89    num_parameters = float(totals_metrics.num_parameters)
90
91    base_obs = float(totals_metrics.observed_length)
92    if (interval.startswith("billing")) & (len(results.dropna().index) > 0):
93        post_obs = float(round((results.index[-1] - results.index[0]).days / 30.0))
94    else:
95        post_obs = float(results["reporting_observed"].dropna().shape[0])
96
97    degrees_of_freedom = float(base_obs - num_parameters)
98    single_tailed_confidence_level = 1 - ((1 - confidence_level) / 2)
99    t_stat = t.ppf(single_tailed_confidence_level, degrees_of_freedom)
100
101    rmse_base_residuals = float(totals_metrics.rmse_adj)
102    autocorr_resid = totals_metrics.autocorr_resid
103
104    base_avg = float(totals_metrics.observed_mean)
105    post_avg = float(results["reporting_observed"].mean())
106
107    base_var = float(totals_metrics.observed_variance)
108
109    # these result in division by zero error for fsu_error_band
110    if (
111        post_obs == 0
112        or autocorr_resid is None
113        or abs(autocorr_resid) == 1
114        or base_obs == 0
115        or base_avg == 0
116        or base_var == 0
117    ):
118        return None
119
120    autocorr_resid = float(autocorr_resid)
121
122    nprime = float(base_obs * (1 - autocorr_resid) / (1 + autocorr_resid))
123
124    total_base_energy = float(base_avg * base_obs)
125
126    ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error = _compute_ols_error(
127        t_stat,
128        rmse_base_residuals,
129        post_obs,
130        base_obs,
131        base_avg,
132        post_avg,
133        base_var,
134        nprime,
135    )
136
137    fsu_error_band = _compute_fsu_error(
138        t_stat,
139        interval,
140        post_obs,
141        total_base_energy,
142        rmse_base_residuals,
143        base_avg,
144        base_obs,
145        nprime,
146    )
147
148    return {
149        "FSU Error Band": fsu_error_band,
150        "OLS Error Band": ols_total_agg_error,
151        "OLS Error Band: Model Error": ols_model_agg_error,
152        "OLS Error Band: Noise": ols_noise_agg_error,
153    }
154
155
156def metered_savings(
157    baseline_model,
158    reporting_meter_data,
159    temperature_data,
160    with_disaggregated=False,
161    confidence_level=0.90,
162    predict_kwargs=None,
163):
164    """Compute metered savings, i.e., savings in which the baseline model
165    is used to calculate the modeled usage in the reporting period. This
166    modeled usage is then compared to the actual usage from the reporting period.
167    Also compute two measures of the uncertainty of the aggregate savings estimate,
168    a fractional savings uncertainty (FSU) error band and an OLS error band. (To convert
169    the FSU error band into FSU, divide by total estimated savings.)
170
171    Parameters
172    ----------
173    baseline_model : :any:`eemeter.CalTRACKUsagePerDayModelResults`
174        Object to use for predicting pre-intervention usage.
175    reporting_meter_data : :any:`pandas.DataFrame`
176        The observed reporting period data (totals). Savings will be computed for the
177        periods supplied in the reporting period data.
178    temperature_data : :any:`pandas.Series`
179        Hourly-frequency timeseries of temperature data during the reporting
180        period.
181    with_disaggregated : :any:`bool`, optional
182        If True, calculate baseline counterfactual disaggregated usage
183        estimates. Savings cannot be disaggregated for metered savings. For
184        that, use :any:`eemeter.modeled_savings`.
185    confidence_level : :any:`float`, optional
186        The two-tailed confidence level used to calculate the t-statistic used
187        in calculation of the error bands.
188
189        Ignored if not computing error bands.
190    predict_kwargs : :any:`dict`, optional
191        Extra kwargs to pass to the baseline_model.predict method.
192
193    Returns
194    -------
195    results : :any:`pandas.DataFrame`
196        DataFrame with metered savings, indexed with
197        ``reporting_meter_data.index``. Will include the following columns:
198
199        - ``counterfactual_usage`` (baseline model projected into reporting period)
200        - ``reporting_observed`` (given by reporting_meter_data)
201        - ``metered_savings``
202
203        If `with_disaggregated` is set to True, the following columns will also
204        be in the results DataFrame:
205
206        - ``counterfactual_base_load``
207        - ``counterfactual_heating_load``
208        - ``counterfactual_cooling_load``
209
210    error_bands : :any:`dict`, optional
211        If baseline_model is an instance of CalTRACKUsagePerDayModelResults,
212        will also return a dictionary of FSU and OLS error bands for the
213        aggregated energy savings over the post period.
214    """
215    if predict_kwargs is None:
216        predict_kwargs = {}
217
218    model_type = None  # generic
219    if isinstance(baseline_model, CalTRACKUsagePerDayModelResults):
220        model_type = "usage_per_day"
221
222    if model_type == "usage_per_day" and with_disaggregated:
223        predict_kwargs["with_disaggregated"] = True
224
225    prediction_index = reporting_meter_data.index
226    model_prediction = baseline_model.predict(
227        prediction_index, temperature_data, **predict_kwargs
228    )
229
230    predicted_baseline_usage = model_prediction.result
231
232    # CalTrack 3.5.1
233    counterfactual_usage = predicted_baseline_usage["predicted_usage"].to_frame(
234        "counterfactual_usage"
235    )
236
237    reporting_observed = reporting_meter_data["value"].to_frame("reporting_observed")
238
239    def metered_savings_func(row):
240        return row.counterfactual_usage - row.reporting_observed
241
242    results = reporting_observed.join(counterfactual_usage).assign(
243        metered_savings=metered_savings_func
244    )
245
246    if model_type == "usage_per_day" and with_disaggregated:
247        counterfactual_usage_disaggregated = predicted_baseline_usage[
248            ["base_load", "heating_load", "cooling_load"]
249        ].rename(
250            columns={
251                "base_load": "counterfactual_base_load",
252                "heating_load": "counterfactual_heating_load",
253                "cooling_load": "counterfactual_cooling_load",
254            }
255        )
256        results = results.join(counterfactual_usage_disaggregated)
257
258    results = results.dropna().reindex(results.index)  # carry NaNs
259
260    # compute t-statistic associated with n degrees of freedom
261    # and a two-tailed confidence level.
262    error_bands = None
263    if model_type == "usage_per_day":  # has totals_metrics
264        error_bands = _compute_error_bands_metered_savings(
265            baseline_model.totals_metrics,
266            results,
267            baseline_model.interval,
268            confidence_level,
269        )
270    return results, error_bands
271
272
273def _compute_error_bands_modeled_savings(
274    totals_metrics_baseline,
275    totals_metrics_reporting,
276    results,
277    interval_baseline,
278    interval_reporting,
279    confidence_level,
280):
281    num_parameters_baseline = float(totals_metrics_baseline.num_parameters)
282    num_parameters_reporting = float(totals_metrics_reporting.num_parameters)
283
284    base_obs_baseline = float(totals_metrics_baseline.observed_length)
285    base_obs_reporting = float(totals_metrics_reporting.observed_length)
286
287    if (interval_baseline.startswith("billing")) & (len(results.dropna().index) > 0):
288        post_obs_baseline = float(
289            round((results.index[-1] - results.index[0]).days / 30.0)
290        )
291    else:
292        post_obs_baseline = float(results["modeled_baseline_usage"].dropna().shape[0])
293
294    if (interval_reporting.startswith("billing")) & (len(results.dropna().index) > 0):
295        post_obs_reporting = float(
296            round((results.index[-1] - results.index[0]).days / 30.0)
297        )
298    else:
299        post_obs_reporting = float(results["modeled_reporting_usage"].dropna().shape[0])
300
301    degrees_of_freedom_baseline = float(base_obs_baseline - num_parameters_baseline)
302    degrees_of_freedom_reporting = float(base_obs_reporting - num_parameters_reporting)
303    single_tailed_confidence_level = 1 - ((1 - confidence_level) / 2)
304    t_stat_baseline = t.ppf(single_tailed_confidence_level, degrees_of_freedom_baseline)
305    t_stat_reporting = t.ppf(
306        single_tailed_confidence_level, degrees_of_freedom_reporting
307    )
308
309    rmse_base_residuals_baseline = float(totals_metrics_baseline.rmse_adj)
310    rmse_base_residuals_reporting = float(totals_metrics_reporting.rmse_adj)
311    autocorr_resid_baseline = totals_metrics_baseline.autocorr_resid
312    autocorr_resid_reporting = totals_metrics_reporting.autocorr_resid
313
314    base_avg_baseline = float(totals_metrics_baseline.observed_mean)
315    base_avg_reporting = float(totals_metrics_reporting.observed_mean)
316
317    # these result in division by zero error for fsu_error_band
318    if (
319        post_obs_baseline == 0
320        or autocorr_resid_baseline is None
321        or abs(autocorr_resid_baseline) == 1
322        or base_obs_baseline == 0
323        or base_avg_baseline == 0
324        or post_obs_reporting == 0
325        or autocorr_resid_reporting is None
326        or abs(autocorr_resid_reporting) == 1
327        or base_obs_reporting == 0
328        or base_avg_reporting == 0
329    ):
330        return None
331
332    autocorr_resid_baseline = float(autocorr_resid_baseline)
333    autocorr_resid_reporting = float(autocorr_resid_reporting)
334
335    nprime_baseline = float(
336        base_obs_baseline
337        * (1 - autocorr_resid_baseline)
338        / (1 + autocorr_resid_baseline)
339    )
340    nprime_reporting = float(
341        base_obs_reporting
342        * (1 - autocorr_resid_reporting)
343        / (1 + autocorr_resid_reporting)
344    )
345
346    total_base_energy_baseline = float(base_avg_baseline * base_obs_baseline)
347    total_base_energy_reporting = float(base_avg_reporting * base_obs_reporting)
348
349    fsu_error_band_baseline = _compute_fsu_error(
350        t_stat_baseline,
351        interval_baseline,
352        post_obs_baseline,
353        total_base_energy_baseline,
354        rmse_base_residuals_baseline,
355        base_avg_baseline,
356        base_obs_baseline,
357        nprime_baseline,
358    )
359
360    fsu_error_band_reporting = _compute_fsu_error(
361        t_stat_reporting,
362        interval_reporting,
363        post_obs_reporting,
364        total_base_energy_reporting,
365        rmse_base_residuals_reporting,
366        base_avg_reporting,
367        base_obs_reporting,
368        nprime_reporting,
369    )
370
371    return {
372        "FSU Error Band: Baseline": fsu_error_band_baseline,
373        "FSU Error Band: Reporting": fsu_error_band_reporting,
374        "FSU Error Band": (
375            fsu_error_band_baseline ** 2.0 + fsu_error_band_reporting ** 2.0
376        )
377        ** 0.5,
378    }
379
380
381def modeled_savings(
382    baseline_model,
383    reporting_model,
384    result_index,
385    temperature_data,
386    with_disaggregated=False,
387    confidence_level=0.90,
388    predict_kwargs=None,
389):
390    """Compute modeled savings, i.e., savings in which baseline and reporting
391    usage values are based on models. This is appropriate for annualizing or
392    weather normalizing models.
393
394    Parameters
395    ----------
396    baseline_model : :any:`eemeter.CalTRACKUsagePerDayCandidateModel`
397        Model to use for predicting pre-intervention usage.
398    reporting_model : :any:`eemeter.CalTRACKUsagePerDayCandidateModel`
399        Model to use for predicting post-intervention usage.
400    result_index : :any:`pandas.DatetimeIndex`
401        The dates for which usage should be modeled.
402    temperature_data : :any:`pandas.Series`
403        Hourly-frequency timeseries of temperature data during the modeled
404        period.
405    with_disaggregated : :any:`bool`, optional
406        If True, calculate modeled disaggregated usage estimates and savings.
407    confidence_level : :any:`float`, optional
408        The two-tailed confidence level used to calculate the t-statistic used
409        in calculation of the error bands.
410
411        Ignored if not computing error bands.
412    predict_kwargs : :any:`dict`, optional
413        Extra kwargs to pass to the baseline_model.predict and
414        reporting_model.predict methods.
415
416    Returns
417    -------
418    results : :any:`pandas.DataFrame`
419        DataFrame with modeled savings, indexed with the result_index. Will
420        include the following columns:
421
422        - ``modeled_baseline_usage``
423        - ``modeled_reporting_usage``
424        - ``modeled_savings``
425
426        If `with_disaggregated` is set to True, the following columns will also
427        be in the results DataFrame:
428
429        - ``modeled_baseline_base_load``
430        - ``modeled_baseline_cooling_load``
431        - ``modeled_baseline_heating_load``
432        - ``modeled_reporting_base_load``
433        - ``modeled_reporting_cooling_load``
434        - ``modeled_reporting_heating_load``
435        - ``modeled_base_load_savings``
436        - ``modeled_cooling_load_savings``
437        - ``modeled_heating_load_savings``
438    error_bands : :any:`dict`, optional
439        If baseline_model and reporting_model are instances of
440        CalTRACKUsagePerDayModelResults, will also return a dictionary of
441        FSU and error bands for the aggregated energy savings over the
442        normal year period.
443    """
444    prediction_index = result_index
445
446    if predict_kwargs is None:
447        predict_kwargs = {}
448
449    model_type = None  # generic
450    if isinstance(baseline_model, CalTRACKUsagePerDayModelResults):
451        model_type = "usage_per_day"
452
453    if model_type == "usage_per_day" and with_disaggregated:
454        predict_kwargs["with_disaggregated"] = True
455
456    def _predicted_usage(model):
457        model_prediction = model.predict(
458            prediction_index, temperature_data, **predict_kwargs
459        )
460        predicted_usage = model_prediction.result
461        return predicted_usage
462
463    predicted_baseline_usage = _predicted_usage(baseline_model)
464    predicted_reporting_usage = _predicted_usage(reporting_model)
465    modeled_baseline_usage = predicted_baseline_usage["predicted_usage"].to_frame(
466        "modeled_baseline_usage"
467    )
468    modeled_reporting_usage = predicted_reporting_usage["predicted_usage"].to_frame(
469        "modeled_reporting_usage"
470    )
471
472    def modeled_savings_func(row):
473        return row.modeled_baseline_usage - row.modeled_reporting_usage
474
475    results = modeled_baseline_usage.join(modeled_reporting_usage).assign(
476        modeled_savings=modeled_savings_func
477    )
478
479    if model_type == "usage_per_day" and with_disaggregated:
480        modeled_baseline_usage_disaggregated = predicted_baseline_usage[
481            ["base_load", "heating_load", "cooling_load"]
482        ].rename(
483            columns={
484                "base_load": "modeled_baseline_base_load",
485                "heating_load": "modeled_baseline_heating_load",
486                "cooling_load": "modeled_baseline_cooling_load",
487            }
488        )
489
490        modeled_reporting_usage_disaggregated = predicted_reporting_usage[
491            ["base_load", "heating_load", "cooling_load"]
492        ].rename(
493            columns={
494                "base_load": "modeled_reporting_base_load",
495                "heating_load": "modeled_reporting_heating_load",
496                "cooling_load": "modeled_reporting_cooling_load",
497            }
498        )
499
500        def modeled_base_load_savings_func(row):
501            return row.modeled_baseline_base_load - row.modeled_reporting_base_load
502
503        def modeled_heating_load_savings_func(row):
504            return (
505                row.modeled_baseline_heating_load - row.modeled_reporting_heating_load
506            )
507
508        def modeled_cooling_load_savings_func(row):
509            return (
510                row.modeled_baseline_cooling_load - row.modeled_reporting_cooling_load
511            )
512
513        results = (
514            results.join(modeled_baseline_usage_disaggregated)
515            .join(modeled_reporting_usage_disaggregated)
516            .assign(
517                modeled_base_load_savings=modeled_base_load_savings_func,
518                modeled_heating_load_savings=modeled_heating_load_savings_func,
519                modeled_cooling_load_savings=modeled_cooling_load_savings_func,
520            )
521        )
522
523    results = results.dropna().reindex(results.index)  # carry NaNs
524
525    error_bands = None
526    if model_type == "usage_per_day":  # has totals_metrics
527        error_bands = _compute_error_bands_modeled_savings(
528            baseline_model.totals_metrics,
529            reporting_model.totals_metrics,
530            results,
531            baseline_model.interval,
532            reporting_model.interval,
533            confidence_level,
534        )
535    return results, error_bands
536