1# Author: Nelle Varoquaux, Andrew Tulloch, Antony Lee 2 3# Uses the pool adjacent violators algorithm (PAVA), with the 4# enhancement of searching for the longest decreasing subsequence to 5# pool at each step. 6 7import numpy as np 8cimport numpy as np 9cimport cython 10from cython cimport floating 11 12np.import_array() 13 14 15def _inplace_contiguous_isotonic_regression(floating[::1] y, floating[::1] w): 16 cdef: 17 Py_ssize_t n = y.shape[0], i, k 18 floating prev_y, sum_wy, sum_w 19 Py_ssize_t[::1] target = np.arange(n, dtype=np.intp) 20 21 # target describes a list of blocks. At any time, if [i..j] (inclusive) is 22 # an active block, then target[i] := j and target[j] := i. 23 24 # For "active" indices (block starts): 25 # w[i] := sum{w_orig[j], j=[i..target[i]]} 26 # y[i] := sum{y_orig[j]*w_orig[j], j=[i..target[i]]} / w[i] 27 28 with nogil: 29 i = 0 30 while i < n: 31 k = target[i] + 1 32 if k == n: 33 break 34 if y[i] < y[k]: 35 i = k 36 continue 37 sum_wy = w[i] * y[i] 38 sum_w = w[i] 39 while True: 40 # We are within a decreasing subsequence. 41 prev_y = y[k] 42 sum_wy += w[k] * y[k] 43 sum_w += w[k] 44 k = target[k] + 1 45 if k == n or prev_y < y[k]: 46 # Non-singleton decreasing subsequence is finished, 47 # update first entry. 48 y[i] = sum_wy / sum_w 49 w[i] = sum_w 50 target[i] = k - 1 51 target[k - 1] = i 52 if i > 0: 53 # Backtrack if we can. This makes the algorithm 54 # single-pass and ensures O(n) complexity. 55 i = target[i - 1] 56 # Otherwise, restart from the same point. 57 break 58 # Reconstruct the solution. 59 i = 0 60 while i < n: 61 k = target[i] + 1 62 y[i + 1 : k] = y[i] 63 i = k 64 65 66def _make_unique(np.ndarray[dtype=floating] X, 67 np.ndarray[dtype=floating] y, 68 np.ndarray[dtype=floating] sample_weights): 69 """Average targets for duplicate X, drop duplicates. 70 71 Aggregates duplicate X values into a single X value where 72 the target y is a (sample_weighted) average of the individual 73 targets. 74 75 Assumes that X is ordered, so that all duplicates follow each other. 76 """ 77 unique_values = len(np.unique(X)) 78 79 cdef np.ndarray[dtype=floating] y_out = np.empty(unique_values, 80 dtype=X.dtype) 81 cdef np.ndarray[dtype=floating] x_out = np.empty_like(y_out) 82 cdef np.ndarray[dtype=floating] weights_out = np.empty_like(y_out) 83 84 cdef floating current_x = X[0] 85 cdef floating current_y = 0 86 cdef floating current_weight = 0 87 cdef floating y_old = 0 88 cdef int i = 0 89 cdef int j 90 cdef floating x 91 cdef int n_samples = len(X) 92 cdef floating eps = np.finfo(X.dtype).resolution 93 94 for j in range(n_samples): 95 x = X[j] 96 if x - current_x >= eps: 97 # next unique value 98 x_out[i] = current_x 99 weights_out[i] = current_weight 100 y_out[i] = current_y / current_weight 101 i += 1 102 current_x = x 103 current_weight = sample_weights[j] 104 current_y = y[j] * sample_weights[j] 105 else: 106 current_weight += sample_weights[j] 107 current_y += y[j] * sample_weights[j] 108 109 x_out[i] = current_x 110 weights_out[i] = current_weight 111 y_out[i] = current_y / current_weight 112 return x_out[:i+1], y_out[:i+1], weights_out[:i+1] 113