1# Author: Nelle Varoquaux, Andrew Tulloch, Antony Lee
2
3# Uses the pool adjacent violators algorithm (PAVA), with the
4# enhancement of searching for the longest decreasing subsequence to
5# pool at each step.
6
7import numpy as np
8cimport numpy as np
9cimport cython
10from cython cimport floating
11
12np.import_array()
13
14
15def _inplace_contiguous_isotonic_regression(floating[::1] y, floating[::1] w):
16    cdef:
17        Py_ssize_t n = y.shape[0], i, k
18        floating prev_y, sum_wy, sum_w
19        Py_ssize_t[::1] target = np.arange(n, dtype=np.intp)
20
21    # target describes a list of blocks.  At any time, if [i..j] (inclusive) is
22    # an active block, then target[i] := j and target[j] := i.
23
24    # For "active" indices (block starts):
25    # w[i] := sum{w_orig[j], j=[i..target[i]]}
26    # y[i] := sum{y_orig[j]*w_orig[j], j=[i..target[i]]} / w[i]
27
28    with nogil:
29        i = 0
30        while i < n:
31            k = target[i] + 1
32            if k == n:
33                break
34            if y[i] < y[k]:
35                i = k
36                continue
37            sum_wy = w[i] * y[i]
38            sum_w = w[i]
39            while True:
40                # We are within a decreasing subsequence.
41                prev_y = y[k]
42                sum_wy += w[k] * y[k]
43                sum_w += w[k]
44                k = target[k] + 1
45                if k == n or prev_y < y[k]:
46                    # Non-singleton decreasing subsequence is finished,
47                    # update first entry.
48                    y[i] = sum_wy / sum_w
49                    w[i] = sum_w
50                    target[i] = k - 1
51                    target[k - 1] = i
52                    if i > 0:
53                        # Backtrack if we can.  This makes the algorithm
54                        # single-pass and ensures O(n) complexity.
55                        i = target[i - 1]
56                    # Otherwise, restart from the same point.
57                    break
58        # Reconstruct the solution.
59        i = 0
60        while i < n:
61            k = target[i] + 1
62            y[i + 1 : k] = y[i]
63            i = k
64
65
66def _make_unique(np.ndarray[dtype=floating] X,
67                 np.ndarray[dtype=floating] y,
68                 np.ndarray[dtype=floating] sample_weights):
69    """Average targets for duplicate X, drop duplicates.
70
71    Aggregates duplicate X values into a single X value where
72    the target y is a (sample_weighted) average of the individual
73    targets.
74
75    Assumes that X is ordered, so that all duplicates follow each other.
76    """
77    unique_values = len(np.unique(X))
78
79    cdef np.ndarray[dtype=floating] y_out = np.empty(unique_values,
80                                                     dtype=X.dtype)
81    cdef np.ndarray[dtype=floating] x_out = np.empty_like(y_out)
82    cdef np.ndarray[dtype=floating] weights_out = np.empty_like(y_out)
83
84    cdef floating current_x = X[0]
85    cdef floating current_y = 0
86    cdef floating current_weight = 0
87    cdef floating y_old = 0
88    cdef int i = 0
89    cdef int j
90    cdef floating x
91    cdef int n_samples = len(X)
92    cdef floating eps = np.finfo(X.dtype).resolution
93
94    for j in range(n_samples):
95        x = X[j]
96        if x - current_x >= eps:
97            # next unique value
98            x_out[i] = current_x
99            weights_out[i] = current_weight
100            y_out[i] = current_y / current_weight
101            i += 1
102            current_x = x
103            current_weight = sample_weights[j]
104            current_y = y[j] * sample_weights[j]
105        else:
106            current_weight += sample_weights[j]
107            current_y += y[j] * sample_weights[j]
108
109    x_out[i] = current_x
110    weights_out[i] = current_weight
111    y_out[i] = current_y / current_weight
112    return x_out[:i+1], y_out[:i+1], weights_out[:i+1]
113