1import numpy as np 2from scipy import optimize 3from numpy.testing import assert_allclose 4from scipy.special import factorial, xlogy 5from itertools import product 6import pytest 7 8from sklearn.utils._testing import assert_almost_equal 9from sklearn.utils._testing import assert_array_equal 10from sklearn.utils._testing import assert_array_almost_equal 11from sklearn.dummy import DummyRegressor 12from sklearn.model_selection import GridSearchCV 13 14from sklearn.metrics import explained_variance_score 15from sklearn.metrics import mean_absolute_error 16from sklearn.metrics import mean_squared_error 17from sklearn.metrics import mean_squared_log_error 18from sklearn.metrics import median_absolute_error 19from sklearn.metrics import mean_absolute_percentage_error 20from sklearn.metrics import max_error 21from sklearn.metrics import mean_pinball_loss 22from sklearn.metrics import r2_score 23from sklearn.metrics import mean_tweedie_deviance 24from sklearn.metrics import d2_tweedie_score 25from sklearn.metrics import make_scorer 26 27from sklearn.metrics._regression import _check_reg_targets 28 29from sklearn.exceptions import UndefinedMetricWarning 30 31 32def test_regression_metrics(n_samples=50): 33 y_true = np.arange(n_samples) 34 y_pred = y_true + 1 35 y_pred_2 = y_true - 1 36 37 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.0) 38 assert_almost_equal( 39 mean_squared_log_error(y_true, y_pred), 40 mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred)), 41 ) 42 assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.0) 43 assert_almost_equal(mean_pinball_loss(y_true, y_pred), 0.5) 44 assert_almost_equal(mean_pinball_loss(y_true, y_pred_2), 0.5) 45 assert_almost_equal(mean_pinball_loss(y_true, y_pred, alpha=0.4), 0.6) 46 assert_almost_equal(mean_pinball_loss(y_true, y_pred_2, alpha=0.4), 0.4) 47 assert_almost_equal(median_absolute_error(y_true, y_pred), 1.0) 48 mape = mean_absolute_percentage_error(y_true, y_pred) 49 assert np.isfinite(mape) 50 assert mape > 1e6 51 assert_almost_equal(max_error(y_true, y_pred), 1.0) 52 assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) 53 assert_almost_equal(explained_variance_score(y_true, y_pred), 1.0) 54 assert_almost_equal( 55 mean_tweedie_deviance(y_true, y_pred, power=0), 56 mean_squared_error(y_true, y_pred), 57 ) 58 assert_almost_equal( 59 d2_tweedie_score(y_true, y_pred, power=0), r2_score(y_true, y_pred) 60 ) 61 62 # Tweedie deviance needs positive y_pred, except for p=0, 63 # p>=2 needs positive y_true 64 # results evaluated by sympy 65 y_true = np.arange(1, 1 + n_samples) 66 y_pred = 2 * y_true 67 n = n_samples 68 assert_almost_equal( 69 mean_tweedie_deviance(y_true, y_pred, power=-1), 70 5 / 12 * n * (n ** 2 + 2 * n + 1), 71 ) 72 assert_almost_equal( 73 mean_tweedie_deviance(y_true, y_pred, power=1), (n + 1) * (1 - np.log(2)) 74 ) 75 assert_almost_equal( 76 mean_tweedie_deviance(y_true, y_pred, power=2), 2 * np.log(2) - 1 77 ) 78 assert_almost_equal( 79 mean_tweedie_deviance(y_true, y_pred, power=3 / 2), 80 ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum(), 81 ) 82 assert_almost_equal( 83 mean_tweedie_deviance(y_true, y_pred, power=3), np.sum(1 / y_true) / (4 * n) 84 ) 85 86 dev_mean = 2 * np.mean(xlogy(y_true, 2 * y_true / (n + 1))) 87 assert_almost_equal( 88 d2_tweedie_score(y_true, y_pred, power=1), 89 1 - (n + 1) * (1 - np.log(2)) / dev_mean, 90 ) 91 92 dev_mean = 2 * np.log((n + 1) / 2) - 2 / n * np.log(factorial(n)) 93 assert_almost_equal( 94 d2_tweedie_score(y_true, y_pred, power=2), 1 - (2 * np.log(2) - 1) / dev_mean 95 ) 96 97 98def test_mean_squared_error_multioutput_raw_value_squared(): 99 # non-regression test for 100 # https://github.com/scikit-learn/scikit-learn/pull/16323 101 mse1 = mean_squared_error([[1]], [[10]], multioutput="raw_values", squared=True) 102 mse2 = mean_squared_error([[1]], [[10]], multioutput="raw_values", squared=False) 103 assert np.sqrt(mse1) == pytest.approx(mse2) 104 105 106def test_multioutput_regression(): 107 y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) 108 y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) 109 110 error = mean_squared_error(y_true, y_pred) 111 assert_almost_equal(error, (1.0 / 3 + 2.0 / 3 + 2.0 / 3) / 4.0) 112 113 error = mean_squared_error(y_true, y_pred, squared=False) 114 assert_almost_equal(error, 0.454, decimal=2) 115 116 error = mean_squared_log_error(y_true, y_pred) 117 assert_almost_equal(error, 0.200, decimal=2) 118 119 # mean_absolute_error and mean_squared_error are equal because 120 # it is a binary problem. 121 error = mean_absolute_error(y_true, y_pred) 122 assert_almost_equal(error, (1.0 + 2.0 / 3) / 4.0) 123 124 error = mean_pinball_loss(y_true, y_pred) 125 assert_almost_equal(error, (1.0 + 2.0 / 3) / 8.0) 126 127 error = np.around(mean_absolute_percentage_error(y_true, y_pred), decimals=2) 128 assert np.isfinite(error) 129 assert error > 1e6 130 error = median_absolute_error(y_true, y_pred) 131 assert_almost_equal(error, (1.0 + 1.0) / 4.0) 132 133 error = r2_score(y_true, y_pred, multioutput="variance_weighted") 134 assert_almost_equal(error, 1.0 - 5.0 / 2) 135 error = r2_score(y_true, y_pred, multioutput="uniform_average") 136 assert_almost_equal(error, -0.875) 137 138 139def test_regression_metrics_at_limits(): 140 assert_almost_equal(mean_squared_error([0.0], [0.0]), 0.0) 141 assert_almost_equal(mean_squared_error([0.0], [0.0], squared=False), 0.0) 142 assert_almost_equal(mean_squared_log_error([0.0], [0.0]), 0.0) 143 assert_almost_equal(mean_absolute_error([0.0], [0.0]), 0.0) 144 assert_almost_equal(mean_pinball_loss([0.0], [0.0]), 0.0) 145 assert_almost_equal(mean_absolute_percentage_error([0.0], [0.0]), 0.0) 146 assert_almost_equal(median_absolute_error([0.0], [0.0]), 0.0) 147 assert_almost_equal(max_error([0.0], [0.0]), 0.0) 148 assert_almost_equal(explained_variance_score([0.0], [0.0]), 1.0) 149 assert_almost_equal(r2_score([0.0, 1], [0.0, 1]), 1.0) 150 msg = ( 151 "Mean Squared Logarithmic Error cannot be used when targets " 152 "contain negative values." 153 ) 154 with pytest.raises(ValueError, match=msg): 155 mean_squared_log_error([-1.0], [-1.0]) 156 msg = ( 157 "Mean Squared Logarithmic Error cannot be used when targets " 158 "contain negative values." 159 ) 160 with pytest.raises(ValueError, match=msg): 161 mean_squared_log_error([1.0, 2.0, 3.0], [1.0, -2.0, 3.0]) 162 msg = ( 163 "Mean Squared Logarithmic Error cannot be used when targets " 164 "contain negative values." 165 ) 166 with pytest.raises(ValueError, match=msg): 167 mean_squared_log_error([1.0, -2.0, 3.0], [1.0, 2.0, 3.0]) 168 169 # Tweedie deviance error 170 power = -1.2 171 assert_allclose( 172 mean_tweedie_deviance([0], [1.0], power=power), 2 / (2 - power), rtol=1e-3 173 ) 174 msg = "can only be used on strictly positive y_pred." 175 with pytest.raises(ValueError, match=msg): 176 mean_tweedie_deviance([0.0], [0.0], power=power) 177 with pytest.raises(ValueError, match=msg): 178 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 179 180 assert_almost_equal(mean_tweedie_deviance([0.0], [0.0], power=0), 0.0, 2) 181 182 power = 1.0 183 msg = "only be used on non-negative y and strictly positive y_pred." 184 with pytest.raises(ValueError, match=msg): 185 mean_tweedie_deviance([0.0], [0.0], power=power) 186 with pytest.raises(ValueError, match=msg): 187 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 188 189 power = 1.5 190 assert_allclose(mean_tweedie_deviance([0.0], [1.0], power=power), 2 / (2 - power)) 191 msg = "only be used on non-negative y and strictly positive y_pred." 192 with pytest.raises(ValueError, match=msg): 193 mean_tweedie_deviance([0.0], [0.0], power=power) 194 with pytest.raises(ValueError, match=msg): 195 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 196 197 power = 2.0 198 assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power), 0.00, atol=1e-8) 199 msg = "can only be used on strictly positive y and y_pred." 200 with pytest.raises(ValueError, match=msg): 201 mean_tweedie_deviance([0.0], [0.0], power=power) 202 with pytest.raises(ValueError, match=msg): 203 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 204 205 power = 3.0 206 assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power), 0.00, atol=1e-8) 207 msg = "can only be used on strictly positive y and y_pred." 208 with pytest.raises(ValueError, match=msg): 209 mean_tweedie_deviance([0.0], [0.0], power=power) 210 with pytest.raises(ValueError, match=msg): 211 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 212 213 power = 0.5 214 with pytest.raises(ValueError, match="is only defined for power<=0 and power>=1"): 215 mean_tweedie_deviance([0.0], [0.0], power=power) 216 with pytest.raises(ValueError, match="is only defined for power<=0 and power>=1"): 217 d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) 218 219 220def test__check_reg_targets(): 221 # All of length 3 222 EXAMPLES = [ 223 ("continuous", [1, 2, 3], 1), 224 ("continuous", [[1], [2], [3]], 1), 225 ("continuous-multioutput", [[1, 1], [2, 2], [3, 1]], 2), 226 ("continuous-multioutput", [[5, 1], [4, 2], [3, 1]], 2), 227 ("continuous-multioutput", [[1, 3, 4], [2, 2, 2], [3, 1, 1]], 3), 228 ] 229 230 for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2): 231 232 if type1 == type2 and n_out1 == n_out2: 233 y_type, y_check1, y_check2, multioutput = _check_reg_targets(y1, y2, None) 234 assert type1 == y_type 235 if type1 == "continuous": 236 assert_array_equal(y_check1, np.reshape(y1, (-1, 1))) 237 assert_array_equal(y_check2, np.reshape(y2, (-1, 1))) 238 else: 239 assert_array_equal(y_check1, y1) 240 assert_array_equal(y_check2, y2) 241 else: 242 with pytest.raises(ValueError): 243 _check_reg_targets(y1, y2, None) 244 245 246def test__check_reg_targets_exception(): 247 invalid_multioutput = "this_value_is_not_valid" 248 expected_message = ( 249 "Allowed 'multioutput' string values are.+You provided multioutput={!r}".format( 250 invalid_multioutput 251 ) 252 ) 253 with pytest.raises(ValueError, match=expected_message): 254 _check_reg_targets([1, 2, 3], [[1], [2], [3]], invalid_multioutput) 255 256 257def test_regression_multioutput_array(): 258 y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]] 259 y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]] 260 261 mse = mean_squared_error(y_true, y_pred, multioutput="raw_values") 262 mae = mean_absolute_error(y_true, y_pred, multioutput="raw_values") 263 err_msg = ( 264 "multioutput is expected to be 'raw_values' " 265 "or 'uniform_average' but we got 'variance_weighted' instead." 266 ) 267 with pytest.raises(ValueError, match=err_msg): 268 mean_pinball_loss(y_true, y_pred, multioutput="variance_weighted") 269 pbl = mean_pinball_loss(y_true, y_pred, multioutput="raw_values") 270 mape = mean_absolute_percentage_error(y_true, y_pred, multioutput="raw_values") 271 r = r2_score(y_true, y_pred, multioutput="raw_values") 272 evs = explained_variance_score(y_true, y_pred, multioutput="raw_values") 273 274 assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2) 275 assert_array_almost_equal(mae, [0.25, 0.625], decimal=2) 276 assert_array_almost_equal(pbl, [0.25 / 2, 0.625 / 2], decimal=2) 277 assert_array_almost_equal(mape, [0.0778, 0.2262], decimal=2) 278 assert_array_almost_equal(r, [0.95, 0.93], decimal=2) 279 assert_array_almost_equal(evs, [0.95, 0.93], decimal=2) 280 281 # mean_absolute_error and mean_squared_error are equal because 282 # it is a binary problem. 283 y_true = [[0, 0]] * 4 284 y_pred = [[1, 1]] * 4 285 mse = mean_squared_error(y_true, y_pred, multioutput="raw_values") 286 mae = mean_absolute_error(y_true, y_pred, multioutput="raw_values") 287 pbl = mean_pinball_loss(y_true, y_pred, multioutput="raw_values") 288 r = r2_score(y_true, y_pred, multioutput="raw_values") 289 assert_array_almost_equal(mse, [1.0, 1.0], decimal=2) 290 assert_array_almost_equal(mae, [1.0, 1.0], decimal=2) 291 assert_array_almost_equal(pbl, [0.5, 0.5], decimal=2) 292 assert_array_almost_equal(r, [0.0, 0.0], decimal=2) 293 294 r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput="raw_values") 295 assert_array_almost_equal(r, [0, -3.5], decimal=2) 296 assert np.mean(r) == r2_score( 297 [[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput="uniform_average" 298 ) 299 evs = explained_variance_score( 300 [[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput="raw_values" 301 ) 302 assert_array_almost_equal(evs, [0, -1.25], decimal=2) 303 304 # Checking for the condition in which both numerator and denominator is 305 # zero. 306 y_true = [[1, 3], [-1, 2]] 307 y_pred = [[1, 4], [-1, 1]] 308 r2 = r2_score(y_true, y_pred, multioutput="raw_values") 309 assert_array_almost_equal(r2, [1.0, -3.0], decimal=2) 310 assert np.mean(r2) == r2_score(y_true, y_pred, multioutput="uniform_average") 311 evs = explained_variance_score(y_true, y_pred, multioutput="raw_values") 312 assert_array_almost_equal(evs, [1.0, -3.0], decimal=2) 313 assert np.mean(evs) == explained_variance_score(y_true, y_pred) 314 315 # Handling msle separately as it does not accept negative inputs. 316 y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) 317 y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]]) 318 msle = mean_squared_log_error(y_true, y_pred, multioutput="raw_values") 319 msle2 = mean_squared_error( 320 np.log(1 + y_true), np.log(1 + y_pred), multioutput="raw_values" 321 ) 322 assert_array_almost_equal(msle, msle2, decimal=2) 323 324 325def test_regression_custom_weights(): 326 y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]] 327 y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]] 328 329 msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6]) 330 rmsew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6], squared=False) 331 maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6]) 332 mapew = mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.4, 0.6]) 333 rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6]) 334 evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6]) 335 336 assert_almost_equal(msew, 0.39, decimal=2) 337 assert_almost_equal(rmsew, 0.59, decimal=2) 338 assert_almost_equal(maew, 0.475, decimal=3) 339 assert_almost_equal(mapew, 0.1668, decimal=2) 340 assert_almost_equal(rw, 0.94, decimal=2) 341 assert_almost_equal(evsw, 0.94, decimal=2) 342 343 # Handling msle separately as it does not accept negative inputs. 344 y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) 345 y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]]) 346 msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) 347 msle2 = mean_squared_error( 348 np.log(1 + y_true), np.log(1 + y_pred), multioutput=[0.3, 0.7] 349 ) 350 assert_almost_equal(msle, msle2, decimal=2) 351 352 353@pytest.mark.parametrize("metric", [r2_score, d2_tweedie_score]) 354def test_regression_single_sample(metric): 355 y_true = [0] 356 y_pred = [1] 357 warning_msg = "not well-defined with less than two samples." 358 359 # Trigger the warning 360 with pytest.warns(UndefinedMetricWarning, match=warning_msg): 361 score = metric(y_true, y_pred) 362 assert np.isnan(score) 363 364 365def test_deprecation_positional_arguments_mape(): 366 y_true = [1, 1, 1] 367 y_pred = [1, 0, 1] 368 sample_weights = [0.5, 0.1, 0.2] 369 multioutput = "raw_values" 370 371 warning_msg = "passing these as positional arguments will result in an error" 372 373 # Trigger the warning 374 with pytest.warns(FutureWarning, match=warning_msg): 375 mean_absolute_percentage_error(y_true, y_pred, sample_weights, multioutput) 376 377 378def test_tweedie_deviance_continuity(): 379 n_samples = 100 380 381 y_true = np.random.RandomState(0).rand(n_samples) + 0.1 382 y_pred = np.random.RandomState(1).rand(n_samples) + 0.1 383 384 assert_allclose( 385 mean_tweedie_deviance(y_true, y_pred, power=0 - 1e-10), 386 mean_tweedie_deviance(y_true, y_pred, power=0), 387 ) 388 389 # Ws we get closer to the limit, with 1e-12 difference the absolute 390 # tolerance to pass the below check increases. There are likely 391 # numerical precision issues on the edges of different definition 392 # regions. 393 assert_allclose( 394 mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10), 395 mean_tweedie_deviance(y_true, y_pred, power=1), 396 atol=1e-6, 397 ) 398 399 assert_allclose( 400 mean_tweedie_deviance(y_true, y_pred, power=2 - 1e-10), 401 mean_tweedie_deviance(y_true, y_pred, power=2), 402 atol=1e-6, 403 ) 404 405 assert_allclose( 406 mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10), 407 mean_tweedie_deviance(y_true, y_pred, power=2), 408 atol=1e-6, 409 ) 410 411 412def test_mean_absolute_percentage_error(): 413 random_number_generator = np.random.RandomState(42) 414 y_true = random_number_generator.exponential(size=100) 415 y_pred = 1.2 * y_true 416 assert mean_absolute_percentage_error(y_true, y_pred) == pytest.approx(0.2) 417 418 419@pytest.mark.parametrize( 420 "distribution", ["normal", "lognormal", "exponential", "uniform"] 421) 422@pytest.mark.parametrize("target_quantile", [0.05, 0.5, 0.75]) 423def test_mean_pinball_loss_on_constant_predictions(distribution, target_quantile): 424 if not hasattr(np, "quantile"): 425 pytest.skip( 426 "This test requires a more recent version of numpy " 427 "with support for np.quantile." 428 ) 429 430 # Check that the pinball loss is minimized by the empirical quantile. 431 n_samples = 3000 432 rng = np.random.RandomState(42) 433 data = getattr(rng, distribution)(size=n_samples) 434 435 # Compute the best possible pinball loss for any constant predictor: 436 best_pred = np.quantile(data, target_quantile) 437 best_constant_pred = np.full(n_samples, fill_value=best_pred) 438 best_pbl = mean_pinball_loss(data, best_constant_pred, alpha=target_quantile) 439 440 # Evaluate the loss on a grid of quantiles 441 candidate_predictions = np.quantile(data, np.linspace(0, 1, 100)) 442 for pred in candidate_predictions: 443 # Compute the pinball loss of a constant predictor: 444 constant_pred = np.full(n_samples, fill_value=pred) 445 pbl = mean_pinball_loss(data, constant_pred, alpha=target_quantile) 446 447 # Check that the loss of this constant predictor is greater or equal 448 # than the loss of using the optimal quantile (up to machine 449 # precision): 450 assert pbl >= best_pbl - np.finfo(best_pbl.dtype).eps 451 452 # Check that the value of the pinball loss matches the analytical 453 # formula. 454 expected_pbl = (pred - data[data < pred]).sum() * (1 - target_quantile) + ( 455 data[data >= pred] - pred 456 ).sum() * target_quantile 457 expected_pbl /= n_samples 458 assert_almost_equal(expected_pbl, pbl) 459 460 # Check that we can actually recover the target_quantile by minimizing the 461 # pinball loss w.r.t. the constant prediction quantile. 462 def objective_func(x): 463 constant_pred = np.full(n_samples, fill_value=x) 464 return mean_pinball_loss(data, constant_pred, alpha=target_quantile) 465 466 result = optimize.minimize(objective_func, data.mean(), method="Nelder-Mead") 467 assert result.success 468 # The minimum is not unique with limited data, hence the large tolerance. 469 assert result.x == pytest.approx(best_pred, rel=1e-2) 470 assert result.fun == pytest.approx(best_pbl) 471 472 473def test_dummy_quantile_parameter_tuning(): 474 # Integration test to check that it is possible to use the pinball loss to 475 # tune the hyperparameter of a quantile regressor. This is conceptually 476 # similar to the previous test but using the scikit-learn estimator and 477 # scoring API instead. 478 n_samples = 1000 479 rng = np.random.RandomState(0) 480 X = rng.normal(size=(n_samples, 5)) # Ignored 481 y = rng.exponential(size=n_samples) 482 483 all_quantiles = [0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95] 484 for alpha in all_quantiles: 485 neg_mean_pinball_loss = make_scorer( 486 mean_pinball_loss, 487 alpha=alpha, 488 greater_is_better=False, 489 ) 490 regressor = DummyRegressor(strategy="quantile", quantile=0.25) 491 grid_search = GridSearchCV( 492 regressor, 493 param_grid=dict(quantile=all_quantiles), 494 scoring=neg_mean_pinball_loss, 495 ).fit(X, y) 496 497 assert grid_search.best_params_["quantile"] == pytest.approx(alpha) 498