1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3""" 4 5 Copyright 2014-2019 OpenEEmeter contributors 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 19""" 20from scipy.stats import t 21 22from .caltrack.usage_per_day import CalTRACKUsagePerDayModelResults 23 24 25__all__ = ("metered_savings", "modeled_savings") 26 27 28def _compute_ols_error( 29 t_stat, 30 rmse_base_residuals, 31 post_obs, 32 base_obs, 33 base_avg, 34 post_avg, 35 base_var, 36 nprime, 37): 38 ols_model_agg_error = ( 39 (t_stat * rmse_base_residuals * post_obs) 40 / (base_obs ** 0.5) 41 * (1.0 + ((base_avg - post_avg) ** 2.0 / base_var)) ** 0.5 42 ) 43 44 ols_noise_agg_error = ( 45 t_stat * rmse_base_residuals * (post_obs * base_obs / nprime) ** 0.5 46 ) 47 48 ols_total_agg_error = ( 49 ols_model_agg_error ** 2.0 + ols_noise_agg_error ** 2.0 50 ) ** 0.5 51 52 return ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error 53 54 55def _compute_fsu_error( 56 t_stat, 57 interval, 58 post_obs, 59 total_base_energy, 60 rmse_base_residuals, 61 base_avg, 62 base_obs, 63 nprime, 64): 65 if interval.startswith("billing"): 66 a_coeff = -0.00022 67 b_coeff = 0.03306 68 c_coeff = 0.94054 69 months_reporting = float(post_obs) 70 else: # daily 71 a_coeff = -0.00024 72 b_coeff = 0.03535 73 c_coeff = 1.00286 74 months_reporting = float(post_obs) / 30.0 75 76 fsu_error_band = total_base_energy * ( 77 t_stat 78 * (a_coeff * months_reporting ** 2.0 + b_coeff * months_reporting + c_coeff) 79 * (rmse_base_residuals / base_avg) 80 * ((base_obs / nprime) * (1.0 + (2.0 / nprime)) * (1.0 / post_obs)) ** 0.5 81 ) 82 83 return fsu_error_band 84 85 86def _compute_error_bands_metered_savings( 87 totals_metrics, results, interval, confidence_level 88): 89 num_parameters = float(totals_metrics.num_parameters) 90 91 base_obs = float(totals_metrics.observed_length) 92 if (interval.startswith("billing")) & (len(results.dropna().index) > 0): 93 post_obs = float(round((results.index[-1] - results.index[0]).days / 30.0)) 94 else: 95 post_obs = float(results["reporting_observed"].dropna().shape[0]) 96 97 degrees_of_freedom = float(base_obs - num_parameters) 98 single_tailed_confidence_level = 1 - ((1 - confidence_level) / 2) 99 t_stat = t.ppf(single_tailed_confidence_level, degrees_of_freedom) 100 101 rmse_base_residuals = float(totals_metrics.rmse_adj) 102 autocorr_resid = totals_metrics.autocorr_resid 103 104 base_avg = float(totals_metrics.observed_mean) 105 post_avg = float(results["reporting_observed"].mean()) 106 107 base_var = float(totals_metrics.observed_variance) 108 109 # these result in division by zero error for fsu_error_band 110 if ( 111 post_obs == 0 112 or autocorr_resid is None 113 or abs(autocorr_resid) == 1 114 or base_obs == 0 115 or base_avg == 0 116 or base_var == 0 117 ): 118 return None 119 120 autocorr_resid = float(autocorr_resid) 121 122 nprime = float(base_obs * (1 - autocorr_resid) / (1 + autocorr_resid)) 123 124 total_base_energy = float(base_avg * base_obs) 125 126 ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error = _compute_ols_error( 127 t_stat, 128 rmse_base_residuals, 129 post_obs, 130 base_obs, 131 base_avg, 132 post_avg, 133 base_var, 134 nprime, 135 ) 136 137 fsu_error_band = _compute_fsu_error( 138 t_stat, 139 interval, 140 post_obs, 141 total_base_energy, 142 rmse_base_residuals, 143 base_avg, 144 base_obs, 145 nprime, 146 ) 147 148 return { 149 "FSU Error Band": fsu_error_band, 150 "OLS Error Band": ols_total_agg_error, 151 "OLS Error Band: Model Error": ols_model_agg_error, 152 "OLS Error Band: Noise": ols_noise_agg_error, 153 } 154 155 156def metered_savings( 157 baseline_model, 158 reporting_meter_data, 159 temperature_data, 160 with_disaggregated=False, 161 confidence_level=0.90, 162 predict_kwargs=None, 163): 164 """Compute metered savings, i.e., savings in which the baseline model 165 is used to calculate the modeled usage in the reporting period. This 166 modeled usage is then compared to the actual usage from the reporting period. 167 Also compute two measures of the uncertainty of the aggregate savings estimate, 168 a fractional savings uncertainty (FSU) error band and an OLS error band. (To convert 169 the FSU error band into FSU, divide by total estimated savings.) 170 171 Parameters 172 ---------- 173 baseline_model : :any:`eemeter.CalTRACKUsagePerDayModelResults` 174 Object to use for predicting pre-intervention usage. 175 reporting_meter_data : :any:`pandas.DataFrame` 176 The observed reporting period data (totals). Savings will be computed for the 177 periods supplied in the reporting period data. 178 temperature_data : :any:`pandas.Series` 179 Hourly-frequency timeseries of temperature data during the reporting 180 period. 181 with_disaggregated : :any:`bool`, optional 182 If True, calculate baseline counterfactual disaggregated usage 183 estimates. Savings cannot be disaggregated for metered savings. For 184 that, use :any:`eemeter.modeled_savings`. 185 confidence_level : :any:`float`, optional 186 The two-tailed confidence level used to calculate the t-statistic used 187 in calculation of the error bands. 188 189 Ignored if not computing error bands. 190 predict_kwargs : :any:`dict`, optional 191 Extra kwargs to pass to the baseline_model.predict method. 192 193 Returns 194 ------- 195 results : :any:`pandas.DataFrame` 196 DataFrame with metered savings, indexed with 197 ``reporting_meter_data.index``. Will include the following columns: 198 199 - ``counterfactual_usage`` (baseline model projected into reporting period) 200 - ``reporting_observed`` (given by reporting_meter_data) 201 - ``metered_savings`` 202 203 If `with_disaggregated` is set to True, the following columns will also 204 be in the results DataFrame: 205 206 - ``counterfactual_base_load`` 207 - ``counterfactual_heating_load`` 208 - ``counterfactual_cooling_load`` 209 210 error_bands : :any:`dict`, optional 211 If baseline_model is an instance of CalTRACKUsagePerDayModelResults, 212 will also return a dictionary of FSU and OLS error bands for the 213 aggregated energy savings over the post period. 214 """ 215 if predict_kwargs is None: 216 predict_kwargs = {} 217 218 model_type = None # generic 219 if isinstance(baseline_model, CalTRACKUsagePerDayModelResults): 220 model_type = "usage_per_day" 221 222 if model_type == "usage_per_day" and with_disaggregated: 223 predict_kwargs["with_disaggregated"] = True 224 225 prediction_index = reporting_meter_data.index 226 model_prediction = baseline_model.predict( 227 prediction_index, temperature_data, **predict_kwargs 228 ) 229 230 predicted_baseline_usage = model_prediction.result 231 232 # CalTrack 3.5.1 233 counterfactual_usage = predicted_baseline_usage["predicted_usage"].to_frame( 234 "counterfactual_usage" 235 ) 236 237 reporting_observed = reporting_meter_data["value"].to_frame("reporting_observed") 238 239 def metered_savings_func(row): 240 return row.counterfactual_usage - row.reporting_observed 241 242 results = reporting_observed.join(counterfactual_usage).assign( 243 metered_savings=metered_savings_func 244 ) 245 246 if model_type == "usage_per_day" and with_disaggregated: 247 counterfactual_usage_disaggregated = predicted_baseline_usage[ 248 ["base_load", "heating_load", "cooling_load"] 249 ].rename( 250 columns={ 251 "base_load": "counterfactual_base_load", 252 "heating_load": "counterfactual_heating_load", 253 "cooling_load": "counterfactual_cooling_load", 254 } 255 ) 256 results = results.join(counterfactual_usage_disaggregated) 257 258 results = results.dropna().reindex(results.index) # carry NaNs 259 260 # compute t-statistic associated with n degrees of freedom 261 # and a two-tailed confidence level. 262 error_bands = None 263 if model_type == "usage_per_day": # has totals_metrics 264 error_bands = _compute_error_bands_metered_savings( 265 baseline_model.totals_metrics, 266 results, 267 baseline_model.interval, 268 confidence_level, 269 ) 270 return results, error_bands 271 272 273def _compute_error_bands_modeled_savings( 274 totals_metrics_baseline, 275 totals_metrics_reporting, 276 results, 277 interval_baseline, 278 interval_reporting, 279 confidence_level, 280): 281 num_parameters_baseline = float(totals_metrics_baseline.num_parameters) 282 num_parameters_reporting = float(totals_metrics_reporting.num_parameters) 283 284 base_obs_baseline = float(totals_metrics_baseline.observed_length) 285 base_obs_reporting = float(totals_metrics_reporting.observed_length) 286 287 if (interval_baseline.startswith("billing")) & (len(results.dropna().index) > 0): 288 post_obs_baseline = float( 289 round((results.index[-1] - results.index[0]).days / 30.0) 290 ) 291 else: 292 post_obs_baseline = float(results["modeled_baseline_usage"].dropna().shape[0]) 293 294 if (interval_reporting.startswith("billing")) & (len(results.dropna().index) > 0): 295 post_obs_reporting = float( 296 round((results.index[-1] - results.index[0]).days / 30.0) 297 ) 298 else: 299 post_obs_reporting = float(results["modeled_reporting_usage"].dropna().shape[0]) 300 301 degrees_of_freedom_baseline = float(base_obs_baseline - num_parameters_baseline) 302 degrees_of_freedom_reporting = float(base_obs_reporting - num_parameters_reporting) 303 single_tailed_confidence_level = 1 - ((1 - confidence_level) / 2) 304 t_stat_baseline = t.ppf(single_tailed_confidence_level, degrees_of_freedom_baseline) 305 t_stat_reporting = t.ppf( 306 single_tailed_confidence_level, degrees_of_freedom_reporting 307 ) 308 309 rmse_base_residuals_baseline = float(totals_metrics_baseline.rmse_adj) 310 rmse_base_residuals_reporting = float(totals_metrics_reporting.rmse_adj) 311 autocorr_resid_baseline = totals_metrics_baseline.autocorr_resid 312 autocorr_resid_reporting = totals_metrics_reporting.autocorr_resid 313 314 base_avg_baseline = float(totals_metrics_baseline.observed_mean) 315 base_avg_reporting = float(totals_metrics_reporting.observed_mean) 316 317 # these result in division by zero error for fsu_error_band 318 if ( 319 post_obs_baseline == 0 320 or autocorr_resid_baseline is None 321 or abs(autocorr_resid_baseline) == 1 322 or base_obs_baseline == 0 323 or base_avg_baseline == 0 324 or post_obs_reporting == 0 325 or autocorr_resid_reporting is None 326 or abs(autocorr_resid_reporting) == 1 327 or base_obs_reporting == 0 328 or base_avg_reporting == 0 329 ): 330 return None 331 332 autocorr_resid_baseline = float(autocorr_resid_baseline) 333 autocorr_resid_reporting = float(autocorr_resid_reporting) 334 335 nprime_baseline = float( 336 base_obs_baseline 337 * (1 - autocorr_resid_baseline) 338 / (1 + autocorr_resid_baseline) 339 ) 340 nprime_reporting = float( 341 base_obs_reporting 342 * (1 - autocorr_resid_reporting) 343 / (1 + autocorr_resid_reporting) 344 ) 345 346 total_base_energy_baseline = float(base_avg_baseline * base_obs_baseline) 347 total_base_energy_reporting = float(base_avg_reporting * base_obs_reporting) 348 349 fsu_error_band_baseline = _compute_fsu_error( 350 t_stat_baseline, 351 interval_baseline, 352 post_obs_baseline, 353 total_base_energy_baseline, 354 rmse_base_residuals_baseline, 355 base_avg_baseline, 356 base_obs_baseline, 357 nprime_baseline, 358 ) 359 360 fsu_error_band_reporting = _compute_fsu_error( 361 t_stat_reporting, 362 interval_reporting, 363 post_obs_reporting, 364 total_base_energy_reporting, 365 rmse_base_residuals_reporting, 366 base_avg_reporting, 367 base_obs_reporting, 368 nprime_reporting, 369 ) 370 371 return { 372 "FSU Error Band: Baseline": fsu_error_band_baseline, 373 "FSU Error Band: Reporting": fsu_error_band_reporting, 374 "FSU Error Band": ( 375 fsu_error_band_baseline ** 2.0 + fsu_error_band_reporting ** 2.0 376 ) 377 ** 0.5, 378 } 379 380 381def modeled_savings( 382 baseline_model, 383 reporting_model, 384 result_index, 385 temperature_data, 386 with_disaggregated=False, 387 confidence_level=0.90, 388 predict_kwargs=None, 389): 390 """Compute modeled savings, i.e., savings in which baseline and reporting 391 usage values are based on models. This is appropriate for annualizing or 392 weather normalizing models. 393 394 Parameters 395 ---------- 396 baseline_model : :any:`eemeter.CalTRACKUsagePerDayCandidateModel` 397 Model to use for predicting pre-intervention usage. 398 reporting_model : :any:`eemeter.CalTRACKUsagePerDayCandidateModel` 399 Model to use for predicting post-intervention usage. 400 result_index : :any:`pandas.DatetimeIndex` 401 The dates for which usage should be modeled. 402 temperature_data : :any:`pandas.Series` 403 Hourly-frequency timeseries of temperature data during the modeled 404 period. 405 with_disaggregated : :any:`bool`, optional 406 If True, calculate modeled disaggregated usage estimates and savings. 407 confidence_level : :any:`float`, optional 408 The two-tailed confidence level used to calculate the t-statistic used 409 in calculation of the error bands. 410 411 Ignored if not computing error bands. 412 predict_kwargs : :any:`dict`, optional 413 Extra kwargs to pass to the baseline_model.predict and 414 reporting_model.predict methods. 415 416 Returns 417 ------- 418 results : :any:`pandas.DataFrame` 419 DataFrame with modeled savings, indexed with the result_index. Will 420 include the following columns: 421 422 - ``modeled_baseline_usage`` 423 - ``modeled_reporting_usage`` 424 - ``modeled_savings`` 425 426 If `with_disaggregated` is set to True, the following columns will also 427 be in the results DataFrame: 428 429 - ``modeled_baseline_base_load`` 430 - ``modeled_baseline_cooling_load`` 431 - ``modeled_baseline_heating_load`` 432 - ``modeled_reporting_base_load`` 433 - ``modeled_reporting_cooling_load`` 434 - ``modeled_reporting_heating_load`` 435 - ``modeled_base_load_savings`` 436 - ``modeled_cooling_load_savings`` 437 - ``modeled_heating_load_savings`` 438 error_bands : :any:`dict`, optional 439 If baseline_model and reporting_model are instances of 440 CalTRACKUsagePerDayModelResults, will also return a dictionary of 441 FSU and error bands for the aggregated energy savings over the 442 normal year period. 443 """ 444 prediction_index = result_index 445 446 if predict_kwargs is None: 447 predict_kwargs = {} 448 449 model_type = None # generic 450 if isinstance(baseline_model, CalTRACKUsagePerDayModelResults): 451 model_type = "usage_per_day" 452 453 if model_type == "usage_per_day" and with_disaggregated: 454 predict_kwargs["with_disaggregated"] = True 455 456 def _predicted_usage(model): 457 model_prediction = model.predict( 458 prediction_index, temperature_data, **predict_kwargs 459 ) 460 predicted_usage = model_prediction.result 461 return predicted_usage 462 463 predicted_baseline_usage = _predicted_usage(baseline_model) 464 predicted_reporting_usage = _predicted_usage(reporting_model) 465 modeled_baseline_usage = predicted_baseline_usage["predicted_usage"].to_frame( 466 "modeled_baseline_usage" 467 ) 468 modeled_reporting_usage = predicted_reporting_usage["predicted_usage"].to_frame( 469 "modeled_reporting_usage" 470 ) 471 472 def modeled_savings_func(row): 473 return row.modeled_baseline_usage - row.modeled_reporting_usage 474 475 results = modeled_baseline_usage.join(modeled_reporting_usage).assign( 476 modeled_savings=modeled_savings_func 477 ) 478 479 if model_type == "usage_per_day" and with_disaggregated: 480 modeled_baseline_usage_disaggregated = predicted_baseline_usage[ 481 ["base_load", "heating_load", "cooling_load"] 482 ].rename( 483 columns={ 484 "base_load": "modeled_baseline_base_load", 485 "heating_load": "modeled_baseline_heating_load", 486 "cooling_load": "modeled_baseline_cooling_load", 487 } 488 ) 489 490 modeled_reporting_usage_disaggregated = predicted_reporting_usage[ 491 ["base_load", "heating_load", "cooling_load"] 492 ].rename( 493 columns={ 494 "base_load": "modeled_reporting_base_load", 495 "heating_load": "modeled_reporting_heating_load", 496 "cooling_load": "modeled_reporting_cooling_load", 497 } 498 ) 499 500 def modeled_base_load_savings_func(row): 501 return row.modeled_baseline_base_load - row.modeled_reporting_base_load 502 503 def modeled_heating_load_savings_func(row): 504 return ( 505 row.modeled_baseline_heating_load - row.modeled_reporting_heating_load 506 ) 507 508 def modeled_cooling_load_savings_func(row): 509 return ( 510 row.modeled_baseline_cooling_load - row.modeled_reporting_cooling_load 511 ) 512 513 results = ( 514 results.join(modeled_baseline_usage_disaggregated) 515 .join(modeled_reporting_usage_disaggregated) 516 .assign( 517 modeled_base_load_savings=modeled_base_load_savings_func, 518 modeled_heating_load_savings=modeled_heating_load_savings_func, 519 modeled_cooling_load_savings=modeled_cooling_load_savings_func, 520 ) 521 ) 522 523 results = results.dropna().reindex(results.index) # carry NaNs 524 525 error_bands = None 526 if model_type == "usage_per_day": # has totals_metrics 527 error_bands = _compute_error_bands_modeled_savings( 528 baseline_model.totals_metrics, 529 reporting_model.totals_metrics, 530 results, 531 baseline_model.interval, 532 reporting_model.interval, 533 confidence_level, 534 ) 535 return results, error_bands 536