1from functools import lru_cache
2
3import numpy as np
4from scipy import stats
5
6from .utils import check_args, seaborn_plt, call_shortcut
7
8__all__ = ["CorrelatedTTest", "two_on_single"]
9
10
11class Posterior:
12    """
13    The posterior distribution of differences on a single data set.
14
15    Args:
16        mean (float): the mean difference
17        var (float): the variance
18        df (float): degrees of freedom
19        rope (float): rope (default: 0)
20        meanx (float): mean score of the first classifier; shown in a plot
21        meany (float): mean score of the second classifier; shown in a plot
22        names (tuple of str): names of classifiers; shown in a plot
23        nsamples (int): the number of samples; used only in property `sample`,
24            not in computation of probabilities or plotting (default: 50000)
25
26        """
27    def __init__(self, mean, var, df, rope=0, meanx=None, meany=None,
28                 *, names=None, nsamples=50000):
29        self.meanx = meanx
30        self.meany = meany
31        self.mean = mean
32        self.var = var
33        self.df = df
34        self.rope = rope
35        self.names = names
36        self.nsamples = nsamples
37
38    @property
39    @lru_cache(1)
40    def sample(self):
41        """
42        A sample of differences as 1-dimensional array.
43
44        Like posteriors for comparison on multiple data sets, an instance of
45        this class will always return the same sample.
46
47        This sample is not used by other methods.
48        """
49        if self.var == 0:
50            return np.full((self.nsamples, ), self.mean)
51        return self.mean + \
52               np.sqrt(self.var) * np.random.standard_t(self.df, self.nsamples)
53
54    def probs(self):
55        """
56        Compute and return probabilities
57
58        Probabilities are not computed from a sample posterior but
59        from cumulative Student distribution.
60
61        Returns:
62            `(p_left, p_rope, p_right)` if `rope > 0`;
63            otherwise `(p_left, p_right)`.
64        """
65        t_parameters = self.df, self.mean, np.sqrt(self.var)
66        if self.rope == 0:
67            if self.var == 0:
68                pr = (self.mean > 0) + 0.5 * (self.mean == 0)
69            else:
70                pr = 1 - stats.t.cdf(0, *t_parameters)
71            return 1 - pr, pr
72        else:
73            if self.var == 0:
74                pl = float(self.mean < -self.rope)
75                pr = float(self.mean > self.rope)
76            else:
77                pl = stats.t.cdf(-self.rope, *t_parameters)
78                pr = 1 - stats.t.cdf(self.rope, *t_parameters)
79        return pl, 1 - pl - pr, pr
80
81    def plot(self, names=None):
82        """
83        Plot the posterior Student distribution as a histogram.
84
85        Args:
86            names (tuple of str): names of classifiers
87
88        Returns:
89            matplotlib figure
90        """
91        with seaborn_plt() as plt:
92            names = names or self.names or ("C1", "C2")
93
94            fig, ax = plt.subplots()
95            ax.grid(True)
96            label = "difference"
97            if self.meanx is not None and self.meany is not None:
98                label += " ({}: {:.3f}, {}: {:.3f})".format(
99                    names[0], self.meanx, names[1], self.meany)
100            ax.set_xlabel(label)
101            ax.get_yaxis().set_ticklabels([])
102            ax.axvline(x=-self.rope, color="#ffad2f", linewidth=2, label="rope")
103            ax.axvline(x=self.rope, color="#ffad2f", linewidth=2)
104
105            targs = (self.df, self.mean, np.sqrt(self.var))
106            xs = np.linspace(min(stats.t.ppf(0.005, *targs), -1.05 * self.rope),
107                             max(stats.t.ppf(0.995, *targs), 1.05 * self.rope),
108                             100)
109            ys = stats.t.pdf(xs, *targs)
110            ax.plot(xs, ys, color="#2f56e0", linewidth=2, label="pdf")
111            ax.fill_between(xs, ys, np.zeros(100), color="#34ccff")
112            ax.legend()
113            return fig
114
115
116class CorrelatedTTest:
117    """
118    Compute and plot a Bayesian correlated t-test
119    """
120    def __new__(cls, x, y, rope=0, runs=1, *, names=None, nsamples=50000):
121        check_args(x, y, rope)
122        if not int(runs) == runs > 0:
123            raise ValueError('Number of runs must be a positive integer')
124        if len(x) % round(runs) != 0:
125            raise ValueError("Number of measurements is not divisible by number of runs")
126
127        mean, var, df = cls.compute_statistics(x, y, runs)
128        return Posterior(mean, var, df, rope,
129                         np.mean(x), np.mean(y), names=names,
130                         nsamples=nsamples)
131
132    @classmethod
133    def compute_statistics(cls, x, y, runs=1):
134        """
135        Compute statistics (mean, variance) from the differences.
136
137        The number of runs is needed to compute the Nadeau-Bengio correction
138        for underestimated variance.
139
140        Args:
141            x (np.array): a vector of scores for the first model
142            y (np.array): a vector of scores for the second model
143            runs (int): number of repetitions of cross validation (default: 1)
144
145        Returns:
146            mean, var, degrees_of_freedom
147        """
148        diff = y - x
149        n = len(diff)
150        nfolds = n / runs
151        mean = np.mean(diff)
152        var = np.var(diff, ddof=1)
153        var *= 1 / n + 1 / (nfolds - 1)  # Nadeau-Bengio's correction
154        return mean, var, n - 1
155
156    @classmethod
157    def sample(cls, x, y, runs=1, *, nsamples=50000):
158        """
159        Return a sample of posterior distribution for the given data
160
161        Args:
162            x (np.array): a vector of scores for the first model
163            y (np.array): a vector of scores for the second model
164            runs (int): number of repetitions of cross validation (default: 1)
165            nsamples (int): the number of samples (default: 50000)
166
167        Returns:
168            mean, var, degrees_of_freedom
169        """
170        return cls(x, y, runs=runs, nsamples=nsamples).sample
171
172    @classmethod
173    def probs(cls, x, y, rope=0, runs=1):
174        """
175        Compute and return probabilities
176
177        Probabilities are not computed from a sample posterior but
178        from cumulative Student distribution.
179
180        Args:
181            x (np.array): a vector of scores for the first model
182            y (np.array): a vector of scores for the second model
183            rope (float): the width of the region of practical equivalence (default: 0)
184            runs (int): number of repetitions of cross validation (default: 1)
185
186        Returns:
187            `(p_left, p_rope, p_right)` if `rope > 0`;
188            otherwise `(p_left, p_right)`.
189        """
190        # new returns an instance of Test, not CorrelatedTTest
191        # pylint: disable=no-value-for-parameter
192        return cls(x, y, rope, runs).probs()
193
194    @classmethod
195    def plot(cls, x, y, rope=0, runs=1, *, names=None):
196        """
197        Plot the posterior Student distribution as a histogram.
198
199        Args:
200            x (np.array): a vector of scores for the first model
201            y (np.array): a vector of scores for the second model
202            rope (float): the width of the region of practical equivalence (default: 0)
203            names (tuple of str): names of classifiers
204
205        Returns:
206            matplotlib figure
207        """
208        # new returns an instance of Test, not CorrelatedTTest
209        # pylint: disable=no-value-for-parameter
210        return cls(x, y, rope, runs).plot(names)
211
212
213def two_on_single(x, y, rope=0, runs=1, *, names=None, plot=False):
214    """
215    Compute probabilities using a Bayesian correlated t-test and,
216    optionally, draw a histogram.
217
218    The test assumes that the classifiers were evaluated using cross
219    validation. Argument `runs` gives the number of repetitions of
220    cross-validation.
221
222    For more details, see :obj:`CorrelatedTTest`
223
224    Args:
225        x (np.array): a vector of scores for the first model
226        y (np.array): a vector of scores for the second model
227        rope (float): the width of the region of practical equivalence (default: 0)
228        runs (int): the number of repetitions of cross validation (default: 1)
229        nsamples (int): the number of samples (default: 50000)
230        plot (bool): if `True`, the function also return a histogram (default: False)
231        names (tuple of str): names of classifiers (ignored if `plot` is `False`)
232
233    Returns:
234        `(p_left, p_rope, p_right)` if `rope > 0`; otherwise `(p_left, p_right)`.
235
236        If `plot=True`, the function also returns a matplotlib figure,
237        that is, `((p_left, p_rope, p_right), fig)`
238        """
239    return call_shortcut(CorrelatedTTest, x, y, rope,
240                         plot=plot, names=names, runs=runs)
241