1import numpy as np 2from statsmodels.duration.hazard_regression import PHReg 3 4 5def _kernel_cumincidence(time, status, exog, kfunc, freq_weights, 6 dimred=True): 7 """ 8 Calculates cumulative incidence functions using kernels. 9 10 Parameters 11 ---------- 12 time : array_like 13 The observed time values 14 status : array_like 15 The status values. status == 0 indicates censoring, 16 status == 1, 2, ... are the events. 17 exog : array_like 18 Covariates such that censoring becomes independent of 19 outcome times conditioned on the covariate values. 20 kfunc : function 21 A kernel function 22 freq_weights : array_like 23 Optional frequency weights 24 dimred : bool 25 If True, proportional hazards regression models are used to 26 reduce exog to two columns by predicting overall events and 27 censoring in two separate models. If False, exog is used 28 directly for calculating kernel weights without dimension 29 reduction. 30 """ 31 32 # Reorder so time is ascending 33 ii = np.argsort(time) 34 time = time[ii] 35 status = status[ii] 36 exog = exog[ii, :] 37 nobs = len(time) 38 39 # Convert the unique times to ranks (0, 1, 2, ...) 40 utime, rtime = np.unique(time, return_inverse=True) 41 42 # Last index where each unique time occurs. 43 ie = np.searchsorted(time, utime, side='right') - 1 44 45 ngrp = int(status.max()) 46 47 # All-cause status 48 statusa = (status >= 1).astype(np.float64) 49 50 if freq_weights is not None: 51 freq_weights = freq_weights / freq_weights.sum() 52 53 ip = [] 54 sp = [None] * nobs 55 n_risk = [None] * nobs 56 kd = [None] * nobs 57 for k in range(ngrp): 58 status0 = (status == k + 1).astype(np.float64) 59 60 # Dimension reduction step 61 if dimred: 62 sfe = PHReg(time, exog, status0).fit() 63 fitval_e = sfe.predict().predicted_values 64 sfc = PHReg(time, exog, 1 - status0).fit() 65 fitval_c = sfc.predict().predicted_values 66 exog2d = np.hstack((fitval_e[:, None], fitval_c[:, None])) 67 exog2d -= exog2d.mean(0) 68 exog2d /= exog2d.std(0) 69 else: 70 exog2d = exog 71 72 ip0 = 0 73 for i in range(nobs): 74 75 if k == 0: 76 kd1 = exog2d - exog2d[i, :] 77 kd1 = kfunc(kd1) 78 kd[i] = kd1 79 80 # Get the local all-causes survival function 81 if k == 0: 82 denom = np.cumsum(kd[i][::-1])[::-1] 83 num = kd[i] * statusa 84 rat = num / denom 85 tr = 1e-15 86 ii = np.flatnonzero((denom < tr) & (num < tr)) 87 rat[ii] = 0 88 ratc = 1 - rat 89 ratc = np.clip(ratc, 1e-10, np.inf) 90 lrat = np.log(ratc) 91 prat = np.cumsum(lrat)[ie] 92 sf = np.exp(prat) 93 sp[i] = np.r_[1, sf[:-1]] 94 n_risk[i] = denom[ie] 95 96 # Number of cause-specific deaths at each unique time. 97 d0 = np.bincount(rtime, weights=status0*kd[i], 98 minlength=len(utime)) 99 100 # The cumulative incidence function probabilities. Carry 101 # forward once the effective sample size drops below 1. 102 ip1 = np.cumsum(sp[i] * d0 / n_risk[i]) 103 jj = len(ip1) - np.searchsorted(n_risk[i][::-1], 1) 104 if jj < len(ip1): 105 ip1[jj:] = ip1[jj - 1] 106 if freq_weights is None: 107 ip0 += ip1 108 else: 109 ip0 += freq_weights[i] * ip1 110 111 if freq_weights is None: 112 ip0 /= nobs 113 114 ip.append(ip0) 115 116 return utime, ip 117 118 119def _kernel_survfunc(time, status, exog, kfunc, freq_weights): 120 """ 121 Estimate the marginal survival function under dependent censoring. 122 123 Parameters 124 ---------- 125 time : array_like 126 The observed times for each subject 127 status : array_like 128 The status for each subject (1 indicates event, 0 indicates 129 censoring) 130 exog : array_like 131 Covariates such that censoring is independent conditional on 132 exog 133 kfunc : function 134 Kernel function 135 freq_weights : array_like 136 Optional frequency weights 137 138 Returns 139 ------- 140 probs : array_like 141 The estimated survival probabilities 142 times : array_like 143 The times at which the survival probabilities are estimated 144 145 References 146 ---------- 147 Zeng, Donglin 2004. Estimating Marginal Survival Function by 148 Adjusting for Dependent Censoring Using Many Covariates. The 149 Annals of Statistics 32 (4): 1533 55. 150 doi:10.1214/009053604000000508. 151 https://arxiv.org/pdf/math/0409180.pdf 152 """ 153 154 # Dimension reduction step 155 sfe = PHReg(time, exog, status).fit() 156 fitval_e = sfe.predict().predicted_values 157 sfc = PHReg(time, exog, 1 - status).fit() 158 fitval_c = sfc.predict().predicted_values 159 exog2d = np.hstack((fitval_e[:, None], fitval_c[:, None])) 160 161 n = len(time) 162 ixd = np.flatnonzero(status == 1) 163 164 # For consistency with standard KM, only compute the survival 165 # function at the times of observed events. 166 utime = np.unique(time[ixd]) 167 168 # Reorder everything so time is ascending 169 ii = np.argsort(time) 170 time = time[ii] 171 status = status[ii] 172 exog2d = exog2d[ii, :] 173 174 # Last index where each evaluation time occurs. 175 ie = np.searchsorted(time, utime, side='right') - 1 176 177 if freq_weights is not None: 178 freq_weights = freq_weights / freq_weights.sum() 179 180 sprob = 0. 181 for i in range(n): 182 183 kd = exog2d - exog2d[i, :] 184 kd = kfunc(kd) 185 186 denom = np.cumsum(kd[::-1])[::-1] 187 num = kd * status 188 rat = num / denom 189 tr = 1e-15 190 ii = np.flatnonzero((denom < tr) & (num < tr)) 191 rat[ii] = 0 192 ratc = 1 - rat 193 ratc = np.clip(ratc, 1e-12, np.inf) 194 lrat = np.log(ratc) 195 prat = np.cumsum(lrat)[ie] 196 prat = np.exp(prat) 197 198 if freq_weights is None: 199 sprob += prat 200 else: 201 sprob += prat * freq_weights[i] 202 203 if freq_weights is None: 204 sprob /= n 205 206 return sprob, utime 207