1from functools import lru_cache 2 3import numpy as np 4from scipy import stats 5 6from .utils import check_args, seaborn_plt, call_shortcut 7 8__all__ = ["CorrelatedTTest", "two_on_single"] 9 10 11class Posterior: 12 """ 13 The posterior distribution of differences on a single data set. 14 15 Args: 16 mean (float): the mean difference 17 var (float): the variance 18 df (float): degrees of freedom 19 rope (float): rope (default: 0) 20 meanx (float): mean score of the first classifier; shown in a plot 21 meany (float): mean score of the second classifier; shown in a plot 22 names (tuple of str): names of classifiers; shown in a plot 23 nsamples (int): the number of samples; used only in property `sample`, 24 not in computation of probabilities or plotting (default: 50000) 25 26 """ 27 def __init__(self, mean, var, df, rope=0, meanx=None, meany=None, 28 *, names=None, nsamples=50000): 29 self.meanx = meanx 30 self.meany = meany 31 self.mean = mean 32 self.var = var 33 self.df = df 34 self.rope = rope 35 self.names = names 36 self.nsamples = nsamples 37 38 @property 39 @lru_cache(1) 40 def sample(self): 41 """ 42 A sample of differences as 1-dimensional array. 43 44 Like posteriors for comparison on multiple data sets, an instance of 45 this class will always return the same sample. 46 47 This sample is not used by other methods. 48 """ 49 if self.var == 0: 50 return np.full((self.nsamples, ), self.mean) 51 return self.mean + \ 52 np.sqrt(self.var) * np.random.standard_t(self.df, self.nsamples) 53 54 def probs(self): 55 """ 56 Compute and return probabilities 57 58 Probabilities are not computed from a sample posterior but 59 from cumulative Student distribution. 60 61 Returns: 62 `(p_left, p_rope, p_right)` if `rope > 0`; 63 otherwise `(p_left, p_right)`. 64 """ 65 t_parameters = self.df, self.mean, np.sqrt(self.var) 66 if self.rope == 0: 67 if self.var == 0: 68 pr = (self.mean > 0) + 0.5 * (self.mean == 0) 69 else: 70 pr = 1 - stats.t.cdf(0, *t_parameters) 71 return 1 - pr, pr 72 else: 73 if self.var == 0: 74 pl = float(self.mean < -self.rope) 75 pr = float(self.mean > self.rope) 76 else: 77 pl = stats.t.cdf(-self.rope, *t_parameters) 78 pr = 1 - stats.t.cdf(self.rope, *t_parameters) 79 return pl, 1 - pl - pr, pr 80 81 def plot(self, names=None): 82 """ 83 Plot the posterior Student distribution as a histogram. 84 85 Args: 86 names (tuple of str): names of classifiers 87 88 Returns: 89 matplotlib figure 90 """ 91 with seaborn_plt() as plt: 92 names = names or self.names or ("C1", "C2") 93 94 fig, ax = plt.subplots() 95 ax.grid(True) 96 label = "difference" 97 if self.meanx is not None and self.meany is not None: 98 label += " ({}: {:.3f}, {}: {:.3f})".format( 99 names[0], self.meanx, names[1], self.meany) 100 ax.set_xlabel(label) 101 ax.get_yaxis().set_ticklabels([]) 102 ax.axvline(x=-self.rope, color="#ffad2f", linewidth=2, label="rope") 103 ax.axvline(x=self.rope, color="#ffad2f", linewidth=2) 104 105 targs = (self.df, self.mean, np.sqrt(self.var)) 106 xs = np.linspace(min(stats.t.ppf(0.005, *targs), -1.05 * self.rope), 107 max(stats.t.ppf(0.995, *targs), 1.05 * self.rope), 108 100) 109 ys = stats.t.pdf(xs, *targs) 110 ax.plot(xs, ys, color="#2f56e0", linewidth=2, label="pdf") 111 ax.fill_between(xs, ys, np.zeros(100), color="#34ccff") 112 ax.legend() 113 return fig 114 115 116class CorrelatedTTest: 117 """ 118 Compute and plot a Bayesian correlated t-test 119 """ 120 def __new__(cls, x, y, rope=0, runs=1, *, names=None, nsamples=50000): 121 check_args(x, y, rope) 122 if not int(runs) == runs > 0: 123 raise ValueError('Number of runs must be a positive integer') 124 if len(x) % round(runs) != 0: 125 raise ValueError("Number of measurements is not divisible by number of runs") 126 127 mean, var, df = cls.compute_statistics(x, y, runs) 128 return Posterior(mean, var, df, rope, 129 np.mean(x), np.mean(y), names=names, 130 nsamples=nsamples) 131 132 @classmethod 133 def compute_statistics(cls, x, y, runs=1): 134 """ 135 Compute statistics (mean, variance) from the differences. 136 137 The number of runs is needed to compute the Nadeau-Bengio correction 138 for underestimated variance. 139 140 Args: 141 x (np.array): a vector of scores for the first model 142 y (np.array): a vector of scores for the second model 143 runs (int): number of repetitions of cross validation (default: 1) 144 145 Returns: 146 mean, var, degrees_of_freedom 147 """ 148 diff = y - x 149 n = len(diff) 150 nfolds = n / runs 151 mean = np.mean(diff) 152 var = np.var(diff, ddof=1) 153 var *= 1 / n + 1 / (nfolds - 1) # Nadeau-Bengio's correction 154 return mean, var, n - 1 155 156 @classmethod 157 def sample(cls, x, y, runs=1, *, nsamples=50000): 158 """ 159 Return a sample of posterior distribution for the given data 160 161 Args: 162 x (np.array): a vector of scores for the first model 163 y (np.array): a vector of scores for the second model 164 runs (int): number of repetitions of cross validation (default: 1) 165 nsamples (int): the number of samples (default: 50000) 166 167 Returns: 168 mean, var, degrees_of_freedom 169 """ 170 return cls(x, y, runs=runs, nsamples=nsamples).sample 171 172 @classmethod 173 def probs(cls, x, y, rope=0, runs=1): 174 """ 175 Compute and return probabilities 176 177 Probabilities are not computed from a sample posterior but 178 from cumulative Student distribution. 179 180 Args: 181 x (np.array): a vector of scores for the first model 182 y (np.array): a vector of scores for the second model 183 rope (float): the width of the region of practical equivalence (default: 0) 184 runs (int): number of repetitions of cross validation (default: 1) 185 186 Returns: 187 `(p_left, p_rope, p_right)` if `rope > 0`; 188 otherwise `(p_left, p_right)`. 189 """ 190 # new returns an instance of Test, not CorrelatedTTest 191 # pylint: disable=no-value-for-parameter 192 return cls(x, y, rope, runs).probs() 193 194 @classmethod 195 def plot(cls, x, y, rope=0, runs=1, *, names=None): 196 """ 197 Plot the posterior Student distribution as a histogram. 198 199 Args: 200 x (np.array): a vector of scores for the first model 201 y (np.array): a vector of scores for the second model 202 rope (float): the width of the region of practical equivalence (default: 0) 203 names (tuple of str): names of classifiers 204 205 Returns: 206 matplotlib figure 207 """ 208 # new returns an instance of Test, not CorrelatedTTest 209 # pylint: disable=no-value-for-parameter 210 return cls(x, y, rope, runs).plot(names) 211 212 213def two_on_single(x, y, rope=0, runs=1, *, names=None, plot=False): 214 """ 215 Compute probabilities using a Bayesian correlated t-test and, 216 optionally, draw a histogram. 217 218 The test assumes that the classifiers were evaluated using cross 219 validation. Argument `runs` gives the number of repetitions of 220 cross-validation. 221 222 For more details, see :obj:`CorrelatedTTest` 223 224 Args: 225 x (np.array): a vector of scores for the first model 226 y (np.array): a vector of scores for the second model 227 rope (float): the width of the region of practical equivalence (default: 0) 228 runs (int): the number of repetitions of cross validation (default: 1) 229 nsamples (int): the number of samples (default: 50000) 230 plot (bool): if `True`, the function also return a histogram (default: False) 231 names (tuple of str): names of classifiers (ignored if `plot` is `False`) 232 233 Returns: 234 `(p_left, p_rope, p_right)` if `rope > 0`; otherwise `(p_left, p_right)`. 235 236 If `plot=True`, the function also returns a matplotlib figure, 237 that is, `((p_left, p_rope, p_right), fig)` 238 """ 239 return call_shortcut(CorrelatedTTest, x, y, rope, 240 plot=plot, names=names, runs=runs) 241