1 2bandwidth.rq <- function(p, n, hs = TRUE, alpha = 0.05) 3{ 4 # Bandwidth selection for sparsity estimation two flavors: 5 # Hall and Sheather(1988, JRSS(B)) rate = O(n^{-1/3}) 6 # Bofinger (1975, Aus. J. Stat) -- rate = O(n^{-1/5}) 7 # Generally speaking, default method, hs=TRUE is preferred. 8 9 x0 <- qnorm(p) 10 f0 <- dnorm(x0) 11 if(hs) 12 n^(-1/3) * qnorm(1 - alpha/2)^(2/3) * 13 ((1.5 * f0^2)/(2 * x0^2 + 1))^(1/3) 14 else n^-0.2 * ((4.5 * f0^4)/(2 * x0^2 + 1)^2)^ 0.2 15} 16 17 18plot.rq.process <- function(x, nrow = 3, ncol = 2, ...) 19{ 20 ## Function to plot estimated quantile regression process 21 tdim <- dim(x$sol) 22 p <- tdim[1] - 3 23 m <- tdim[2] 24 oldpar <- par(no.readonly=TRUE) 25 par(mfrow = c(nrow, ncol)) 26 ylab <- dimnames(x$sol)[[1]] 27 for(i in 1:p) { 28 plot(x$sol[1,], x$sol[3 + i, ], xlab = "tau", 29 ylab = ylab[3 + i], type = "l") 30 } 31par(oldpar) 32} 33 34print.rqs <- function (x, ...) 35{ 36 if (!is.null(cl <- x$call)) { 37 cat("Call:\n") 38 dput(cl) 39 } 40 coef <- coef(x) 41 cat("\nCoefficients:\n") 42 print(coef, ...) 43 rank <- x$rank 44 nobs <- nrow(residuals(x)) 45 p <- nrow(coef) 46 rdf <- nobs - p 47 cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual\n") 48 if (!is.null(attr(x, "na.message"))) 49 cat(attr(x, "na.message"), "\n") 50 invisible(x) 51} 52 53"print.rq" <- 54function(x, ...) 55{ 56 if(!is.null(cl <- x$call)) { 57 cat("Call:\n") 58 dput(cl) 59 } 60 coef <- coef(x) 61 cat("\nCoefficients:\n") 62 print(coef, ...) 63 rank <- x$rank 64 nobs <- length(residuals(x)) 65 if(is.matrix(coef)) 66 p <- dim(coef)[1] 67 else p <- length(coef) 68 rdf <- nobs - p 69 cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual\n") 70 if(!is.null(attr(x, "na.message"))) 71 cat(attr(x, "na.message"), "\n") 72 invisible(x) 73} 74 75print.summary.rqs <- function(x, ...) { 76 lapply(x, print.summary.rq) 77 invisible(x) 78} 79 80"print.summary.rq" <- 81function(x, digits = max(5, .Options$digits - 2), ...) 82{ 83 cat("\nCall: ") 84 dput(x$call) 85 coef <- x$coef 86 ## df <- x$df 87 ## rdf <- x$rdf 88 tau <- x$tau 89 cat("\ntau: ") 90 print(format(round(tau,digits = digits)), quote = FALSE, ...) 91 cat("\nCoefficients:\n") 92 print(format(round(coef, digits = digits)), quote = FALSE, ...) 93 invisible(x) 94} 95 96"rq" <- 97function (formula, tau = 0.5, data, subset, weights, na.action, method = "br", 98 model = TRUE, contrasts = NULL, ...) 99{ 100 call <- match.call() 101 mf <- match.call(expand.dots = FALSE) 102 m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0) 103 mf <- mf[c(1,m)] 104 mf$drop.unused.levels <- TRUE 105 mf[[1]] <- as.name("model.frame") 106 mf <- eval.parent(mf) 107 if(method == "model.frame")return(mf) 108 mt <- attr(mf, "terms") 109 weights <- as.vector(model.weights(mf)) 110 tau <- sort(unique(tau)) 111 eps <- .Machine$double.eps^(2/3) 112 if (any(tau == 0)) tau[tau == 0] <- eps 113 if (any(tau == 1)) tau[tau == 1] <- 1 - eps 114 Y <- model.response(mf) 115 if(method == "sfn"){ 116 if(requireNamespace("MatrixModels") && requireNamespace("Matrix")){ 117 X <- MatrixModels::model.Matrix(mt, data, sparse = TRUE) 118 vnames <- dimnames(X)[[2]] 119 X <- as(X ,"matrix.csr") 120 mf$x <- X 121 } 122 } 123 else{ 124 X <- model.matrix(mt, mf, contrasts) 125 vnames <- dimnames(X)[[2]] 126 } 127 Rho <- function(u,tau) u * (tau - (u < 0)) 128 if (length(tau) > 1) { 129 if (any(tau < 0) || any(tau > 1)) 130 stop("invalid tau: taus should be >= 0 and <= 1") 131 coef <- matrix(0, ncol(X), length(tau)) 132 rho <- rep(0, length(tau)) 133 if(!(method %in% c("ppro","qfnb","pfnb"))){ 134 fitted <- resid <- matrix(0, nrow(X), length(tau)) 135 for(i in 1:length(tau)){ 136 z <- {if (length(weights)) 137 rq.wfit(X, Y, tau = tau[i], weights, method, ...) 138 else rq.fit(X, Y, tau = tau[i], method, ...) 139 } 140 coef[,i] <- z$coefficients 141 resid[,i] <- z$residuals 142 rho[i] <- sum(Rho(z$residuals,tau[i])) 143 fitted[,i] <- Y - z$residuals 144 } 145 taulabs <- paste("tau=",format(round(tau,3))) 146 dimnames(coef) <- list(vnames, taulabs) 147 dimnames(resid)[[2]] <- taulabs 148 fit <- z 149 fit$coefficients <- coef 150 fit$residuals <- resid 151 fit$fitted.values <- fitted 152 if(method == "lasso") class(fit) <- c("lassorqs","rqs") 153 else if(method == "scad") class(fit) <- c("scadrqs","rqs") 154 else class(fit) <- "rqs" 155 } 156 else if(method == "pfnb"){ # Preprocessing in fortran loop 157 fit <- rq.fit.pfnb(X, Y, tau) 158 class(fit) = "rqs" 159 } 160 else if(method == "qfnb"){ # simple fortran loop method 161 fit <- rq.fit.qfnb(X, Y, tau) 162 class(fit) = ifelse(length(tau) == 1,"rq","rqs") 163 } 164 else if(method == "ppro"){ # Preprocessing method in R 165 fit <- rq.fit.ppro(X, Y, tau, ...) 166 class(fit) = ifelse(length(tau) == 1,"rq","rqs") 167 } 168 } 169 else{ 170 process <- (tau < 0 || tau > 1) 171 if(process && method != "br") 172 stop("when tau not in [0,1] method br must be used") 173 fit <- { 174 if(length(weights)) 175 rq.wfit(X, Y, tau = tau, weights, method, ...) 176 else rq.fit(X, Y, tau = tau, method, ...) 177 } 178 if(process) 179 rho <- list(tau = fit$sol[1,], rho = fit$sol[3,]) 180 else { 181 if(length(dim(fit$residuals))) 182 dimnames(fit$residuals) <- list(dimnames(X)[[1]],NULL) 183 rho <- sum(Rho(fit$residuals,tau)) 184 } 185 if(method == "lasso") class(fit) <- c("lassorq","rq") 186 else if(method == "scad") class(fit) <- c("scadrq","rq") 187 else class(fit) <- ifelse(process, "rq.process", "rq") 188 } 189 fit$na.action <- attr(mf, "na.action") 190 fit$formula <- formula 191 fit$terms <- mt 192 fit$xlevels <- .getXlevels(mt,mf) 193 fit$call <- call 194 fit$tau <- tau 195 fit$weights <- weights 196 fit$residuals <- drop(fit$residuals) 197 fit$rho <- rho 198 fit$method <- method 199 fit$fitted.values <- drop(fit$fitted.values) 200 201 attr(fit, "na.message") <- attr(m, "na.message") 202 if(model) fit$model <- mf 203 fit 204} 205"rq.fit" <- 206function(x, y, tau = 0.5, method = "br", ...) 207{ 208 if(length(tau) > 1 && method != "ppro") { 209 tau <- tau[1] 210 warning("Multiple taus not allowed in rq.fit: solution restricted to first element") 211 } 212 213 fit <- switch(method, 214 fn = rq.fit.fnb(x, y, tau = tau, ...), 215 fnb = rq.fit.fnb(x, y, tau = tau, ...), 216 fnc = rq.fit.fnc(x, y, tau = tau, ...), 217 sfn = rq.fit.sfn(x, y, tau = tau, ...), 218 conquer = rq.fit.conquer(x, y, tau = tau, ...), 219 pfn = rq.fit.pfn(x, y, tau = tau, ...), 220 pfnb = rq.fit.pfnb(x, y, tau = tau, ...), 221 ppro= rq.fit.ppro(x, y, tau = tau, ...), 222 br = rq.fit.br(x, y, tau = tau, ...), 223 lasso = rq.fit.lasso(x, y, tau = tau, ...), 224 scad = rq.fit.scad(x, y, tau = tau, ...), 225 { 226 what <- paste("rq.fit.", method, sep = "") 227 if(exists(what, mode = "function")) 228 (get(what, mode = "function"))(x, y, ...) 229 else stop(paste("unimplemented method:", method)) 230 } 231 ) 232 fit$fitted.values <- y - fit$residuals 233 fit$contrasts <- attr(x, "contrasts") 234 fit 235} 236"rqs.fit"<- 237function(x, y, tau = 0.5, tol = 0.0001) 238{ 239# function to compute rq fits for multiple y's 240 x <- as.matrix(x) 241 p <- ncol(x) 242 n <- nrow(x) 243 m <- ncol(y) 244 z <- .Fortran("rqs", 245 as.integer(n), 246 as.integer(p), 247 as.integer(m), 248 as.integer(n + 5), 249 as.integer(p + 2), 250 as.double(x), 251 as.double(y), 252 as.double(tau), 253 as.double(tol), 254 flag = integer(m), 255 coef = double(p * m), 256 resid = double(n), 257 integer(n), 258 double((n + 5) * (p + 2)), 259 double(n)) 260 if(sum(z$flag)>0){ 261 if(any(z$flag)==2) 262 warning(paste(sum(z$flag==2),"out of",m, 263 "BS replications have near singular design")) 264 if(any(z$flag)==1) 265 warning(paste(sum(z$flag==1),"out of",m,"may be nonunique")) 266 } 267 return(t(matrix(z$coef, p, m))) 268} 269"formula.rq" <- 270function (x, ...) 271{ 272 form <- x$formula 273 if (!is.null(form)) { 274 form <- formula(x$terms) 275 environment(form) <- environment(x$formula) 276 form 277 } 278 else formula(x$terms) 279} 280 281"predict.rq" <- 282function (object, newdata, type = "none", interval = c("none", 283 "confidence"), level = 0.95, na.action = na.pass, ...) 284{ 285 if (missing(newdata)) 286 return(object$fitted) 287 else { 288 tt <- terms(object) 289 Terms <- delete.response(tt) 290 m <- model.frame(Terms, newdata, na.action = na.action, 291 xlev = object$xlevels) 292 if (!is.null(cl <- attr(Terms, "dataClasses"))) 293 .checkMFClasses(cl, m) 294 X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) 295 } 296 pred <- drop(X %*% object$coefficients) 297 dots <- list(...) 298 if (length(dots$se)) 299 boot <- (dots$se == "boot") 300 else boot <- FALSE 301 if (length(dots$mofn)) 302 mofn <- dots$mofn 303 interval <- match.arg(interval) 304 if (!interval == "none") { 305 if (interval == "confidence") { 306 if (type == "percentile") { 307 if (boot) { 308 if(exists("mofn")) {# Rescale and recenter!! 309 n <- length(object$fitted) 310 factor <- ifelse(mofn < n, sqrt(mofn/n), 1) 311 XB <- X %*% t(summary(object, cov = TRUE, ...)$B)/factor 312 pl <- apply(XB, 1, function(x) quantile(x, (1 - level)/2)) 313 pu <- apply(XB, 1, function(x) quantile(x, 1 - (1 - level)/2)) 314 pl <- pred + factor * (pl - pred) 315 pu <- pred + factor * (pu - pred) 316 } 317 else { 318 XB <- X %*% t(summary(object, cov = TRUE, ...)$B) 319 pl <- apply(XB, 1, function(x) quantile(x, (1 - level)/2)) 320 pu <- apply(XB, 1, function(x) quantile(x, 1 - (1 - level)/2)) 321 } 322 pred <- cbind(pred, pl, pu) 323 colnames(pred) <- c("fit", "lower", "higher") 324 } 325 else stop("Percentile method requires se = \"boot\".") 326 } 327 else if (type == "direct") { 328 if (boot) 329 stop("Direct method incompatible with bootstrap covariance matrix estimation") 330 Z <- rq.fit(object$x, object$y, tau = -1)$sol 331 V <- summary(object, cov = TRUE, ...) 332 df <- V$rdf 333 tfrac <- qt(1 - (1 - level)/2, df) 334 Vun <- V$cov * V$scale^2 335 tau <- object$tau 336 bn <- tfrac * sqrt(diag(X %*% Vun %*% t(X))) 337 tauU <- pmin(tau + bn, 1 - 1/df) 338 tauL <- pmax(tau - bn, 1/df) 339 tauhat <- Z[1, ] 340 yhat <- X %*% Z[-(1:3), ] 341 n <- nrow(X) 342 pl <- yhat[cbind(1:n, cut(tauL, tauhat, label = FALSE))] 343 pu <- yhat[cbind(1:n, cut(tauU, tauhat, label = FALSE))] 344 pred <- cbind(pred, pl, pu) 345 colnames(pred) <- c("fit", "lower", "higher") 346 } 347 else { 348 V <- summary(object, cov = TRUE, ...) 349 df <- V$rdf 350 tfrac <- qt((1 - level)/2, df) 351 sdpred <- sqrt(diag(X %*% V$cov %*% t(X))) 352 pred <- cbind(pred, pred + tfrac * sdpred %o% 353 c(1, -1)) 354 colnames(pred) <- c("fit", "lower", "higher") 355 } 356 } 357 else stop(paste("No interval method for", interval)) 358 } 359 pred 360} 361 362"predict.rqs" <- 363function (object, newdata, type = "Qhat", stepfun = FALSE, na.action = na.pass, ...) 364{ 365 ## with all defaults 366 if(missing(newdata) && !stepfun && (type == "Qhat")) return(object$fitted) 367 368 ## otherwise 369 tt <- delete.response(terms(object)) 370 m <- if(missing(newdata)) model.frame(object) else model.frame(tt, newdata, 371 na.action = na.action, xlev = object$xlevels) 372 if(!is.null(cl <- attr(tt, "dataClasses"))) .checkMFClasses(cl, m) 373 X <- model.matrix(tt, m, contrasts = object$contrasts) 374 pred <- t(X %*% object$coefficients) 375 taus <- object$tau 376 M <- NCOL(pred) 377 378 ## return stepfun or matrix 379 if(stepfun) { 380 if(type == "Qhat"){ 381 pred <- rbind(pred[1,],pred) 382 if(M > 1) 383 f <- apply(pred, 2, function(y) stepfun(taus, y)) 384 else 385 f <- stepfun(taus, c(pred[1,1], pred[,1])) 386 } 387 else if(type == "Fhat"){ 388 taus <- c(taus[1], taus) 389 if(M > 1) 390 f <- apply(pred, 2, function(y) { 391 o <- order(y) 392 stepfun(y[o], taus[c(1,o)])}) 393 else 394 f <- stepfun(pred[,1],taus) 395 } 396 else stop("Stepfuns must be either 'Qhat' or 'Fhat'\n") 397 return(f) 398 } 399 else if(type == "fhat"){ 400 akjfun <- function(z, p, d = 10, g = 300, ...) { 401 mz <- sum(z * p) 402 sz <- sqrt(sum((z - mz)^2 * p)) 403 hz <- seq(mz - d * sz, mz + d * sz, length = g) 404 fz <- akj(z, hz, p = p, ...)$dens 405 approxfun(hz, fz) 406 } 407 p <- diff(taus) 408 if (M > 1) 409 f <- apply(pred[-1, ], 2, function(z) akjfun(z, p, ...)) 410 else akjfun(pred[, 1], p, ...) 411 return(f) 412 } 413 else return(t(pred)) 414} 415 416 417"predict.rq.process" <- 418function (object, newdata, type = "Qhat", stepfun = FALSE, na.action = na.pass, ...) 419{ 420 if(missing(newdata) && !stepfun && (type == "Qhat")) return(object$fitted) 421 tt <- terms(object) 422 Terms <- delete.response(tt) 423 m <- model.frame(Terms, newdata, na.action = na.action, 424 xlev = object$xlevels) 425 if (!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) 426 X <- model.matrix(Terms, m, contrasts = object$contrasts) 427 if(!length(X)) X <- rep(1, NROW(object$dsol)) # intercept only hack 428 pred <- t(X %*% object$sol[-(1:3),, drop = FALSE]) 429 taus <- object$sol[1,] 430 M <- NCOL(pred) 431 if(stepfun){ 432 if(type == "Qhat"){ 433 pred <- rbind(pred[1,], pred) 434 if(M > 1) 435 f <- apply(pred,2,function(y) stepfun(taus, y)) 436 else 437 f <- stepfun(taus, pred[,1]) 438 } 439 else if(type == "Fhat"){ 440 taus <- c(taus[1],taus) 441 if(M > 1) 442 f <- apply(pred,2,function(y) stepfun(y,taus)) 443 else 444 f <- stepfun(pred[,1],taus) 445 } 446 else stop("Stepfuns must be either 'Qhat' or 'Fhat'") 447 return(f) 448 } 449 else if(type == "fhat"){ 450 akjfun <- function(z, p, d = 10, g = 300, ...){ 451 mz <- sum(z * p) 452 sz <- sqrt(sum((z - mz)^2 * p)) 453 hz <- seq(mz - d * sz, mz+ d * sz, length = g) 454 fz <- akj(z, hz, p = p, ...)$dens 455 approxfun(hz,fz) 456 } 457 p <- diff(taus) 458 if(M > 1) 459 f <- apply(pred[-1,], 2, function(z) akjfun(z, p, ...)) 460 else 461 f = akjfun(pred[,1], p, ...) 462 return(f) 463 } 464 else return(t(pred)) 465} 466"rearrange" <- function (f, xmin, xmax) 467# Revised Version September 11 2007. 468{ 469 if (is.list(f)) 470 lapply(f, rearrange) 471 else { 472 if (!is.stepfun(f)) 473 stop("Only stepfuns can be rearranged.\n") 474 call <- attributes(f)$call; 475 right <- call[match("right",names(call))]=="TRUE()" 476 x <- knots(f) 477 n <- length(x) 478 if(missing(xmin)) xmin = x[1] 479 if(missing(xmax)) xmax = x[n] 480 x <- x[(x >= xmin) & (x <= xmax)] 481 x <- c(xmin, x, xmax) 482 n <- length(x) 483 y <- f(x) 484 o <- ifelse(rep(right,n-1), order(y[-1])+1, order(y[-n])) 485 x <- cumsum(c(x[1], diff(x)[o - right])) 486 y <- y[o] 487 y <- c(y[1], y, max(y)) 488 stepfun(x, y, right = right) 489 } 490 491} 492 493 494 495# Function to compute regression quantiles using original simplex approach 496# of Barrodale-Roberts/Koenker-d'Orey. There are several options. 497# The options are somewhat different than those available for the Frisch- 498# Newton version of the algorithm, reflecting the different natures of the 499# problems typically solved. Succintly BR for "small" problems, FN for 500# "large" ones. Obviously, these terms are conditioned by available hardware. 501# 502# Basically there are two modes of use: 503# 1. For Single Quantiles: 504# 505# if tau is between 0 and 1 then only one quantile solution is computed. 506# 507# if ci = FALSE then just the point estimate and residuals are returned 508# If the column dimension of x is 1 then ci is set to FALSE since 509# since the rank inversion method has no proper null model. 510# if ci = TRUE then there are two options for confidence intervals: 511# 512# 1. if iid = TRUE we get the original version of the rank 513# inversion intervals as in Koenker (1994) 514# 2. if iid = FALSE we get the new version of the rank inversion 515# intervals which accounts for heterogeneity across 516# observations in the conditional density of the response. 517# The theory of this is described in Koenker-Machado(1999) 518# Both approaches involve solving a parametric linear programming 519# problem, the difference is only in the factor qn which 520# determines how far the PP goes. In either case one can 521# specify two other options: 522# 1. interp = FALSE returns two intervals an upper and a 523# lower corresponding to a level slightly 524# above and slightly below the one specified 525# by the parameter alpha and dictated by the 526# essential discreteness in the test statistic. 527# interp = TRUE returns a single interval based on 528# linear interpolation of the two intervals 529# returned: c.values and p.values which give 530# the critical values and p.values of the 531# upper and lower intervals. Default: interp = TRUE. 532# 2. tcrit = TRUE uses Student t critical values while 533# tcrit = FALSE uses normal theory ones. 534# 2. For Multiple Quantiles: 535# 536# if tau < 0 or tau >1 then it is presumed that the user wants to find 537# all of the rq solutions in tau, and the program computes the whole 538# quantile regression solution as a process in tau, the resulting arrays 539# containing the primal and dual solutions, betahat(tau), ahat(tau) 540# are called sol and dsol. These arrays aren't printed by the default 541# print function but they are available as attributes. 542# It should be emphasized that this form of the solution can be 543# both memory and cpu quite intensive. On typical machines it is 544# not recommended for problems with n > 10,000. 545# In large problems a grid of solutions is probably sufficient. 546# 547rq.fit.br <- 548function (x, y, tau = 0.5, alpha = 0.1, ci = FALSE, iid = TRUE, 549 interp = TRUE, tcrit = TRUE) 550{ 551 tol <- .Machine$double.eps^(2/3) 552 eps <- tol 553 big <- .Machine$double.xmax 554 x <- as.matrix(x) 555 p <- ncol(x) 556 n <- nrow(x) 557 ny <- NCOL(y) 558 nsol <- 2 559 ndsol <- 2 560 # Check for Singularity of X since br fortran isn't very reliable about this 561 if (qr(x)$rank < p) 562 stop("Singular design matrix") 563 if (tau < 0 || tau > 1) { 564 nsol <- 3 * n 565 ndsol <- 3 * n 566 lci1 <- FALSE 567 qn <- rep(0, p) 568 cutoff <- 0 569 tau <- -1 570 } 571 else { 572 if (p == 1) 573 ci <- FALSE 574 if (ci) { 575 lci1 <- TRUE 576 if (tcrit) 577 cutoff <- qt(1 - alpha/2, n - p) 578 else cutoff <- qnorm(1 - alpha/2) 579 if (!iid) { 580 h <- bandwidth.rq(tau, n, hs = TRUE) 581 bhi <- rq.fit.br(x, y, tau + h, ci = FALSE) 582 bhi <- coefficients(bhi) 583 blo <- rq.fit.br(x, y, tau - h, ci = FALSE) 584 blo <- coefficients(blo) 585 dyhat <- x %*% (bhi - blo) 586 if (any(dyhat <= 0)) { 587 pfis <- (100 * sum(dyhat <= 0))/n 588 warning(paste(pfis, "percent fis <=0")) 589 } 590 f <- pmax(eps, (2 * h)/(dyhat - eps)) 591 qn <- rep(0, p) 592 for (j in 1:p) { 593 qnj <- lm(x[, j] ~ x[, -j] - 1, weights = f)$resid 594 qn[j] <- sum(qnj * qnj) 595 } 596 } 597 else qn <- 1/diag(solve(crossprod(x))) 598 } 599 else { 600 lci1 <- FALSE 601 qn <- rep(0, p) 602 cutoff <- 0 603 } 604 } 605 z <- .Fortran("rqbr", as.integer(n), as.integer(p), as.integer(n + 606 5), as.integer(p + 3), as.integer(p + 4), as.double(x), 607 as.double(y), as.double(tau), as.double(tol), flag = as.integer(1), 608 coef = double(p), resid = double(n), integer(n), double((n + 609 5) * (p + 4)), double(n), as.integer(nsol), as.integer(ndsol), 610 sol = double((p + 3) * nsol), dsol = double(n * ndsol), 611 lsol = as.integer(0), h = integer(p * nsol), qn = as.double(qn), 612 cutoff = as.double(cutoff), ci = double(4 * p), tnmat = double(4 * 613 p), as.double(big), as.logical(lci1)) 614 if (z$flag != 0) 615 warning(switch(z$flag, "Solution may be nonunique", "Premature end - possible conditioning problem in x")) 616 if (tau < 0 || tau > 1) { 617 sol <- matrix(z$sol[1:((p + 3) * z$lsol)], p + 3) 618 dsol <- matrix(z$dsol[1:(n * z$lsol)], n) 619 vnames <- dimnames(x)[[2]] 620 dimnames(sol) <- list(c("tau", "Qbar", "Obj.Fun", vnames), 621 NULL) 622 return(list(sol = sol, dsol = dsol)) 623 } 624 if (!ci) { 625 coef <- z$coef 626 dual <- z$dsol[1:n] 627 names(coef) <- dimnames(x)[[2]] 628 return(list(coefficients = coef, x = x, y = y, residuals = y - x %*% z$coef, 629 dual = dual)) 630 } 631 if (interp) { 632 Tn <- matrix(z$tnmat, nrow = 4) 633 Tci <- matrix(z$ci, nrow = 4) 634 Tci[3, ] <- Tci[3, ] + (abs(Tci[4, ] - Tci[3, ]) * (cutoff - 635 abs(Tn[3, ])))/abs(Tn[4, ] - Tn[3, ]) 636 Tci[2, ] <- Tci[2, ] - (abs(Tci[1, ] - Tci[2, ]) * (cutoff - 637 abs(Tn[2, ])))/abs(Tn[1, ] - Tn[2, ]) 638 Tci[2, ][is.na(Tci[2, ])] <- -big 639 Tci[3, ][is.na(Tci[3, ])] <- big 640 coefficients <- cbind(z$coef, t(Tci[2:3, ])) 641 vnames <- dimnames(x)[[2]] 642 cnames <- c("coefficients", "lower bd", "upper bd") 643 dimnames(coefficients) <- list(vnames, cnames) 644 residuals <- y - drop(x %*% z$coef) 645 return(list(coefficients = coefficients, residuals = residuals)) 646 } 647 else { 648 Tci <- matrix(z$ci, nrow = 4) 649 coefficients <- cbind(z$coef, t(Tci)) 650 residuals <- y - drop(x %*% z$coef) 651 vnames <- dimnames(x)[[2]] 652 cnames <- c("coefficients", "lower bound", "Lower Bound", 653 "upper bd", "Upper Bound") 654 dimnames(coefficients) <- list(vnames, cnames) 655 c.values <- t(matrix(z$tnmat, nrow = 4)) 656 c.values <- c.values[, 4:1] 657 dimnames(c.values) <- list(vnames, cnames[-1]) 658 p.values <- if (tcrit) 659 matrix(pt(c.values, n - p), ncol = 4) 660 else matrix(pnorm(c.values), ncol = 4) 661 dimnames(p.values) <- list(vnames, cnames[-1]) 662 list(coefficients = coefficients, residuals = residuals, 663 c.values = c.values, p.values = p.values) 664 } 665} 666 667"rq.fit.conquer" <- function(x, y, tau = 0.5, kernel = c("Gaussian", "uniform", 668 "parabolic", "triangular"), h = 0, tol = 1e-04, 669 iteMax = 5000, ci = FALSE, alpha = 0.05, B = 200) 670{ 671 if(!requireNamespace("conquer", quietly = TRUE)) 672 stop("method conquer requires package conquer") 673 fit = conquer::conquer(x[,-1], y, tau = tau, kernel = kernel, h = h, 674 tol = tol, iteMax = iteMax, ci = FALSE, alpha = alpha, B = 1000) 675 coefficients = fit$coeff 676 names(coefficients) = dimnames(x)[[2]] 677 residuals = fit$residual 678 list(coefficients = coefficients, tau = tau, residuals = residuals) 679} 680"rq.fit.fnb" <- 681function (x, y, tau = 0.5, rhs = (1-tau)*apply(x,2,sum), beta = 0.99995, eps = 1e-06) 682{ 683 n <- length(y) 684 p <- ncol(x) 685 if (n != nrow(x)) 686 stop("x and y don't match n") 687 if (tau < eps || tau > 1 - eps) 688 stop("No parametric Frisch-Newton method. Set tau in (0,1)") 689 d <- rep(1,n) 690 u <- rep(1,n) 691 wn <- rep(0,10*n) 692 wn[1:n] <- (1-tau) #initial value of dual solution 693 z <- .Fortran("rqfnb", as.integer(n), as.integer(p), a = as.double(t(as.matrix(x))), 694 c = as.double(-y), rhs = as.double(rhs), d = as.double(d),as.double(u), 695 beta = as.double(beta), eps = as.double(eps), 696 wn = as.double(wn), wp = double((p + 3) * p), 697 nit = integer(3), info = integer(1)) 698 if (z$info != 0) 699 warning(paste("Error info = ", z$info, "in stepy: possibly singular design")) 700 coefficients <- -z$wp[1:p] 701 names(coefficients) <- dimnames(x)[[2]] 702 residuals <- y - x %*% coefficients 703 list(coefficients=coefficients, tau=tau, residuals=residuals, nit = z$nit) 704} 705 706"rq.fit.fnc" <- 707function (x, y, R, r, tau = 0.5, beta = 0.9995, eps = 1e-06) 708{ 709 n1 <- length(y) 710 n2 <- length(r) 711 p <- ncol(x) 712 if (n1 != nrow(x)) 713 stop("x and y don't match n1") 714 if (n2 != nrow(R)) 715 stop("R and r don't match n2") 716 if (p != ncol(R)) 717 stop("R and x don't match p") 718 if (tau < eps || tau > 1 - eps) 719 stop("No parametric Frisch-Newton method. Set tau in (0,1)") 720 rhs <- (1 - tau) * apply(x, 2, sum) 721 u <- rep(1, max(n1,n2)) #upper bound vector and scratch vector 722 wn1 <- rep(0, 9 * n1) 723 wn1[1:n1] <- (1 - tau) #store the values of x1 724 wn2 <- rep(0, 6 * n2) 725 wn2[1:n2] <- 1 #store the values of x2 726 z <- .Fortran("rqfnc", as.integer(n1), as.integer(n2), as.integer(p), 727 a1 = as.double(t(as.matrix(x))), c1 = as.double(-y), 728 a2 = as.double(t(as.matrix(R))), c2 = as.double(-r), 729 rhs = as.double(rhs), d1 = double(n1), d2 = double(n2), 730 as.double(u), beta = as.double(beta), eps = as.double(eps), 731 wn1 = as.double(wn1), wn2 = as.double(wn2), wp = double((p + 3) * p), 732 it.count = integer(3), info = integer(1)) 733 if (z$info != 0) 734 stop(paste("Error info = ", z$info, "in stepy2: singular design")) 735 coefficients <- -z$wp[1:p] 736 names(coefficients) <- dimnames(x)[[2]] 737 residuals <- y - x %*% coefficients 738 it.count <- z$it.count 739 list(coefficients=coefficients, tau=tau, residuals=residuals, it = it.count) 740} 741"rq.fit.scad" <- 742function (x, y, tau = 0.5, alpha = 3.2, lambda = 1, start = "rq", beta = 0.9995, eps = 1e-06) 743{ 744 n <- length(y) 745 p <- ncol(x) 746 if (n != nrow(x)) 747 stop("x and y don't match n") 748 if (tau < eps || tau > 1 - eps) 749 stop("No parametric Frisch-Newton method. Set tau in (0,1)") 750 if(length(lambda) == 1) 751 lambda <- c(0,rep(lambda,p-1)) 752 if(length(lambda) != p) 753 stop(paste("lambda must be either of length ",p," or length one")) 754 if(any(lambda < 0)) 755 stop("negative lambdas disallowed") 756 R <- diag(lambda,nrow = length(lambda)) 757 R <- R[which(lambda != 0),, drop = FALSE] 758 r <- rep(0,nrow(R)) 759 X <- rbind(x, R) 760 Y <- c(y, r) 761 N <- length(Y) 762 rhs <- (1 - tau) * apply(x, 2, sum) + apply(R,2,sum) 763 dscad <- function(x, a = 3.7, lambda = 2){ 764 lambda * sign(x) * (abs(x) <= lambda) + 765 sign(x) * (a * lambda - abs(x)) / (a - 1) * 766 (abs(x) <= a * lambda) * (abs(x) > lambda) 767 } 768 binit <- switch(start, 769 rq = rq.fit.fnb(x, y, tau = tau)$coef[-1], 770 lasso = rq.fit.lasso(x, y, tau = tau, lambda = lambda)$coef[-1] 771 ) 772 coef <- rep(.Machine$double.xmax,p) 773 vscad <- rhs - c(0,dscad(binit) * sign(binit)) 774 it <- 0 775 while(sum(abs(binit - coef[-1])) > eps){ 776 it <- it + 1 777 d <- rep(1, N) 778 u <- rep(1, N) 779 wn <- rep(0, 10 * N) 780 wn[1:N] <- c(rep((1 - tau),n),rep(.5,nrow(R))) 781 vrhs <- rhs - vscad 782 binit <- coef[-1] 783 z <- .Fortran("rqfnb", as.integer(N), as.integer(p), a = as.double(t(as.matrix(X))), 784 c = as.double(-Y), vrhs = as.double(vrhs), d = as.double(d), 785 as.double(u), beta = as.double(beta), eps = as.double(eps), 786 wn = as.double(wn), wp = double((p + 3) * p), 787 it.count = integer(3), info = integer(1)) 788 coef <- -z$wp[1:p] 789 vscad <- c(0,dscad(coef[2:p]) * sign(coef[2:p])) 790 } 791 if (z$info != 0) 792 stop(paste("Error info = ", z$info, "in stepy2: singular design")) 793 coefficients <- -z$wp[1:p] 794 names(coefficients) <- dimnames(x)[[2]] 795 residuals <- y - x %*% coefficients 796 it.count <- z$it.count 797 list(coefficients=coefficients, residuals=residuals, tau = tau, 798 lambda = lambda, it = it.count) 799} 800 801 802"rq.fit.lasso" <- 803function (x, y, tau = 0.5, lambda = NULL, beta = 0.99995, eps = 1e-06) 804{ 805 n <- length(y) 806 p <- ncol(x) 807 if (n != nrow(x)) 808 stop("x and y don't match n") 809 if(!length(lambda)) 810 lambda <- LassoLambdaHat(x, tau = tau) 811 else if(length(lambda) == 1) 812 lambda <- c(0,rep(lambda,p-1)) 813 else if(length(lambda) != p) 814 stop(paste("lambda must be either of length ",p," or length one")) 815 if(any(lambda < 0)) 816 stop("negative lambdas disallowed") 817 R <- diag(lambda,nrow = length(lambda)) 818 R <- R[which(lambda != 0),, drop = FALSE] 819 r <- rep(0,nrow(R)) 820 if (tau < eps || tau > 1 - eps) 821 stop("No parametric Frisch-Newton method. Set tau in (0,1)") 822 X <- rbind(x, R) 823 Y <- c(y, r) 824 N <- length(Y) 825 rhs <- (1 - tau) * apply(x, 2, sum) + 0.5 * apply(R,2,sum) 826 d <- rep(1, N) 827 u <- rep(1, N) 828 wn <- rep(0, 10 * N) 829 wn[1:N] <- 0.5 830 z <- .Fortran("rqfnb", as.integer(N), as.integer(p), a = as.double(t(as.matrix(X))), 831 c = as.double(-Y), rhs = as.double(rhs), d = as.double(d), 832 as.double(u), beta = as.double(beta), eps = as.double(eps), 833 wn = as.double(wn), wp = double((p + 3) * p), 834 it.count = integer(3), info = integer(1)) 835 if (z$info != 0) 836 stop(paste("Error info = ", z$info, "in stepy2: singular design")) 837 coefficients <- -z$wp[1:p] 838 names(coefficients) <- dimnames(x)[[2]] 839 residuals <- y - x %*% coefficients 840 it.count <- z$it.count 841 list(coefficients=coefficients, residuals=residuals, tau = tau, 842 lambda = lambda, it = it.count) 843} 844 845"rq.fit.pfn" <- 846# This is an implementation (purely in R) of the preprocessing phase 847# of the rq algorithm described in Portnoy and Koenker, Statistical 848# Science, (1997) 279-300. In this implementation it can be used 849# as an alternative method for rq() by specifying method="pfn" 850# It should probably be used only on very large problems and then 851# only with some caution. Very large in this context means roughly 852# n > 100,000. The options are described in the paper, and more 853# explicitly in the code. Again, it would be nice perhaps to have 854# this recoded in a lower level language, but in fact this doesn't 855# seem to make a huge difference in this case since most of the work 856# is already done in the rq.fit.fnb calls. 857# 858function(x, y, tau = 0.5, Mm.factor = 0.8, 859 max.bad.fixups = 3, eps = 1e-6) 860{ 861 #rq function for n large -- 862 n <- length(y) 863 if(nrow(x) != n) 864 stop("x and y don't match n") 865 if(tau < 0 | tau > 1) 866 stop("tau outside (0,1)") 867 p <- ncol(x) 868 m <- round(sqrt(p) * n^(2/3)) 869 not.optimal <- TRUE 870 ifix = 0 871 ibad = 0 872 while(not.optimal) { 873 ibad = ibad + 1 874 if(m < n) 875 s <- sample(n, m) 876 else { 877 z <- rq.fit.fnb(x, y, tau = tau, eps = eps) 878 break 879 } 880 xx <- x[s, ] 881 yy <- y[s] 882 z <- rq.fit.fnb(xx, yy, tau = tau, eps = eps) 883 xxinv <- solve(chol(crossprod(xx))) 884 band <- sqrt(((x %*% xxinv)^2) %*% rep(1, p)) 885 #sqrt(h<-ii) 886 r <- y - x %*% z$coef 887 M <- Mm.factor * m 888 lo.q <- max(1/n, tau - M/(2 * n)) 889 hi.q <- min(tau + M/(2 * n), (n - 1)/n) 890 kappa <- quantile(r/pmax(eps, band), c(lo.q, hi.q)) 891 sl <- r < band * kappa[1] 892 su <- r > band * kappa[2] 893 bad.fixups <- 0 894 while(not.optimal & (bad.fixups < max.bad.fixups)) { 895 ifix = ifix + 1 896 xx <- x[!su & !sl, ] 897 yy <- y[!su & !sl] 898 if(any(sl)) { 899 glob.x <- c(t(x[sl, , drop = FALSE]) %*% rep( 900 1, sum(sl))) 901 glob.y <- sum(y[sl]) 902 xx <- rbind(xx, glob.x) 903 yy <- c(yy, glob.y) 904 } 905 if(any(su)) { 906 ghib.x <- c(t(x[su, , drop = FALSE]) %*% rep( 907 1, sum(su))) 908 ghib.y <- sum(y[su]) 909 xx <- rbind(xx, ghib.x) 910 yy <- c(yy, ghib.y) 911 } 912 z <- rq.fit.fnb(xx, yy, tau = tau, eps = eps) 913 b <- z$coef 914 r <- y - x %*% b 915 su.bad <- (r < 0) & su 916 sl.bad <- (r > 0) & sl 917 if(any(c(su.bad, sl.bad))) { 918 if(sum(su.bad | sl.bad) > 0.1 * M) { 919 # This warning may get annoying? 920 warning("Too many fixups: doubling m") 921 bad.fixups <- bad.fixups + 1 922 m <- 2 * m 923 break 924 } 925 su <- su & !su.bad 926 sl <- sl & !sl.bad 927 } 928 else not.optimal <- FALSE 929 } 930 } 931 nit <- c(z$nit,ifix,ibad) 932 coefficients <- z$coef 933 names(coefficients) <- dimnames(x)[[2]] 934 list(coefficients=coefficients, tau=tau, nit = nit) 935} 936 937"rq.wfit" <- 938function(x, y, tau = 0.5, weights, method = "br", ...) 939{ 940 if(any(weights < 0)) 941 stop("negative weights not allowed") 942 if(length(tau) > 1) { 943 tau <- tau[1] 944 warning("Multiple taus not allowed in rq.wfit: solution restricted to first element") 945 } 946 contr <- attr(x, "contrasts") 947 wx <- x * weights 948 wy <- y * weights 949 fit <- switch(method, 950 fn = rq.fit.fnb(wx, wy, tau = tau, ...), 951 fnb = rq.fit.fnb(wx, wy, tau = tau, ...), 952 br = rq.fit.br(wx, wy, tau = tau, ...), 953 fnc = rq.fit.fnc(wx, wy, tau = tau, ...), 954 sfn = rq.fit.sfn(wx, wy, tau = tau, ...), 955 conquer = rq.fit.conquer(wx, wy, tau = tau, ...), 956 ppro = rq.fit.ppro(wx, wy, tau = tau, ...), 957 pfn = rq.fit.pfn(wx, wy, tau = tau, ...), 958 pfnb = rq.fit.pfnb(wx, wy, tau = tau, ...), { 959 what <- paste("rq.fit.", method, sep = "") 960 if(exists(what, mode = "function")) 961 (get(what, mode = "function"))(x, y, ...) 962 else stop(paste("unimplemented method:", method)) 963 } 964 ) 965 if(length(fit$sol)) 966 fit$fitted.values <- x %*% fit$sol[-(1:3),] 967 else 968 fit$fitted.values <- x %*% fit$coef 969 fit$residuals <- y - fit$fitted.values 970 fit$contrasts <- attr(x, "contrasts") 971 fit$weights <- weights 972 fit 973} 974 975"summary.rqs" <- 976function (object, ...) { 977 taus <- object$tau 978 xsum <- as.list(taus) 979 dots <- list(...) 980 U <- NULL # Reuse bootstrap randomization 981 for(i in 1:length(taus)){ 982 xi <- object 983 xi$coefficients <- xi$coefficients[,i] 984 xi$residuals <- xi$residuals[,i] 985 xi$tau <- xi$tau[i] 986 class(xi) <- "rq" 987 xsum[[i]] <- summary(xi, U = U, ...) 988 if(i == 1 && length(xsum[[i]]$U)) U <- xsum[[i]]$U 989 if(class(object)[1] == "dynrqs"){ 990 class(xsum[[1]]) <- c("summary.dynrq", "summary.rq") 991 if(i == 1) xsum[[1]]$model <- object$model 992 } 993 } 994 class(xsum) <- "summary.rqs" 995 if(class(object)[1] == "dynrqs") 996 class(xsum) <- c("summary.dynrqs", "summary.rqs") 997 xsum 998 } 999"logLik.rq" <- function(object, ...){ 1000 n <- length(object$residuals) 1001 p <- length(object$coefficients) 1002 pen <- (length(object$lambda) > 0) 1003 tau <- object$tau 1004 fid <- object$rho 1005 val <- n * (log(tau * (1-tau)) - 1 - log(fid/n)) 1006 attr(val,"n") <- n 1007 if(pen){ 1008 if(!hasArg(edfThresh)) edfThresh <- 0.0001 1009 attr(val,"df") <- sum(abs(object$coefficients) > edfThresh) 1010 } 1011 else attr(val,"df") <- p 1012 class(val) <- "logLik" 1013 val 1014 } 1015"logLik.rqs" <- function(object, ...){ 1016 n <- nrow(object$residuals) 1017 p <- nrow(object$coefficients) 1018 pen <- (length(object$lambda) > 0) 1019 tau <- object$tau 1020 fid <- object$rho 1021 val <- n * (log(tau * (1-tau)) - 1 - log(fid/n)) 1022 attr(val,"n") <- n 1023 if(pen){ 1024 if(!hasArg(edfThresh)) edfThresh <- 0.0001 1025 attr(val,"df") <- apply(abs(object$coefficients) > edfThresh,2,sum) 1026 } 1027 else attr(val,"df") <- p 1028 class(val) <- "logLik" 1029 val 1030 } 1031"AIC.rq" <- function(object, ... , k = 2){ 1032 v <- logLik(object) 1033 if(k <= 0) 1034 k <- log(attr(v,"n")) 1035 val <- AIC(v, k = k) 1036 attr(val,"edf") <- attr(v,"df") 1037 val 1038 } 1039"extractAIC.rq" <- function(fit, scale, k=2, ...){ 1040aic <- AIC(fit,k) 1041edf <- attr(aic, "edf") 1042c(edf, aic) 1043} 1044 1045"AIC.rqs" <- function(object, ... , k = 2){ 1046 v <- logLik(object) 1047 if(k <= 0) 1048 k <- log(attr(v,"n")) 1049 val <- AIC(v, k = k) 1050 attr(val,"edf") <- attr(v,"df") 1051 val 1052 } 1053 1054 1055"summary.rq" <- 1056# This function provides methods for summarizing the output of the 1057# rq command. In this instance, "summarizing" means essentially provision 1058# of either standard errors, or confidence intervals for the rq coefficents. 1059# Since the preferred method for confidence intervals is currently the 1060# rank inversion method available directly from rq() by setting ci=TRUE, with br=TRUE. 1061# these summary methods are intended primarily for comparison purposes 1062# and for use on large problems where the parametric programming methods 1063# of rank inversion are prohibitively memory/time consuming. Eventually 1064# iterative versions of rank inversion should be developed that would 1065# employ the Frisch-Newton approach. 1066# 1067# Object is the result of a call to rq(), and the function returns a 1068# table of coefficients, standard errors, "t-statistics", and p-values, and, if 1069# covariance=TRUE a structure describing the covariance matrix of the coefficients, 1070# i.e. the components of the Huber sandwich. 1071# 1072# There are five options for "se": 1073# 1074# 1. "rank" strictly speaking this doesn't produce a "standard error" 1075# at all instead it produces a coefficient table with confidence 1076# intervals for the coefficients based on inversion of the 1077# rank test described in GJKP and Koenker (1994). 1078# 2. "iid" which presumes that the errors are iid and computes 1079# an estimate of the asymptotic covariance matrix as in KB(1978). 1080# 3. "nid" which presumes local (in tau) linearity (in x) of the 1081# the conditional quantile functions and computes a Huber 1082# sandwich estimate using a local estimate of the sparsity. 1083# 4. "ker" which uses a kernel estimate of the sandwich as proposed 1084# by Powell. 1085# 5. "boot" which uses one of several flavors of bootstrap methods: 1086# "xy" uses xy-pair method 1087# "wxy" uses weighted (generalized) method 1088# "pwy" uses the parzen-wei-ying method 1089# "pxy" uses the preprocessing method 1090# "mcmb" uses the Markov chain marginal bootstrap method 1091# "cluster" uses the Hagemann clustered wild gradient method 1092# "conquer" uses the He et al multiplier bootstrap 1093# "BLB" uses the Bag of Little Bootstraps method 1094# 1095# 1096function (object, se = NULL, covariance = FALSE, hs = TRUE, U = NULL, gamma = 0.7, ...) 1097{ 1098 if(object$method == "lasso") 1099 stop("no inference for lasso'd rq fitting: try rqss (if brave, or credulous)") 1100 if(object$method == "conquer") se = "conquer" 1101 mt <- terms(object) 1102 m <- model.frame(object) 1103 y <- model.response(m) 1104 dots <- list(...) 1105 method <- object$method 1106 if(object$method == "sfn"){ 1107 x <- object$model$x 1108 vnames <- names(object$coef) 1109 ctrl <- object$control 1110 } 1111 else{ 1112 x <- model.matrix(mt, m, contrasts = object$contrasts) 1113 vnames <- dimnames(x)[[2]] 1114 } 1115 wt <- as.vector(model.weights(object$model)) 1116 tau <- object$tau 1117 eps <- .Machine$double.eps^(1/2) 1118 coef <- coefficients(object) 1119 if (is.matrix(coef)) 1120 coef <- coef[, 1] 1121 resid <- object$residuals 1122 n <- length(y) 1123 p <- length(coef) 1124 rdf <- n - p 1125 if (!is.null(wt)) { 1126 resid <- resid * wt 1127 x <- x * wt 1128 y <- y * wt 1129 } 1130 if (is.null(se)) { 1131 if (n < 1001 & covariance == FALSE) 1132 se <- "rank" 1133 else se <- "nid" 1134 } 1135 if (se == "rank") { 1136 f <- rq.fit.br(x, y, tau = tau, ci = TRUE, ...) 1137 } 1138 if (se == "iid") { 1139 xxinv <- diag(p) 1140 xxinv <- backsolve(qr(x)$qr[1:p, 1:p,drop=FALSE], xxinv) 1141 xxinv <- xxinv %*% t(xxinv) 1142 pz <- sum(abs(resid) < eps) 1143 h <- max(p + 1, ceiling(n * bandwidth.rq(tau, n, hs = hs))) 1144 ir <- (pz + 1):(h + pz + 1) 1145 ord.resid <- sort(resid[order(abs(resid))][ir]) 1146 xt <- ir/(n - p) 1147 sparsity <- rq(ord.resid ~ xt)$coef[2] 1148 cov <- sparsity^2 * xxinv * tau * (1 - tau) 1149 scale <- 1/sparsity 1150 serr <- sqrt(diag(cov)) 1151 } 1152 else if (se == "nid") { 1153 h <- bandwidth.rq(tau, n, hs = hs) 1154 while((tau - h < 0) || (tau + h > 1)) h <- h/2 1155 bhi <- rq.fit(x, y, tau = tau + h, method = method)$coef 1156 blo <- rq.fit(x, y, tau = tau - h, method = method)$coef 1157 dyhat <- x %*% (bhi - blo) 1158 if (any(dyhat <= 0)) 1159 warning(paste(sum(dyhat <= 0), "non-positive fis")) 1160 f <- pmax(0, (2 * h)/(dyhat - eps)) 1161 fxxinv <- diag(p) 1162 if(method == "sfn"){ 1163 D <- t(x) %*% (f * x) 1164 D <- chol(0.5 * (D + t(D)), nsubmax = ctrl$nsubmax, 1165 nnzlmax = ctrl$nnzlmax, tmpmax = ctrl$tmpmax) 1166 fxxinv <- backsolve(D, fxxinv) 1167 } 1168 else{ 1169 fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p,drop=FALSE], fxxinv) 1170 fxxinv <- fxxinv %*% t(fxxinv) 1171 } 1172 xx <- t(x) %*% x 1173 cov <- tau * (1 - tau) * fxxinv %*% xx %*% fxxinv 1174 scale <- mean(f) 1175 serr <- sqrt(diag(cov)) 1176 } 1177 else if (se == "ker") { 1178 h <- bandwidth.rq(tau, n, hs = hs) 1179 while((tau - h < 0) || (tau + h > 1)) h <- h/2 1180 uhat <- c(y - x %*% coef) 1181 h <- (qnorm(tau + h) - qnorm(tau - h))* 1182 min(sqrt(var(uhat)), ( quantile(uhat,.75)- quantile(uhat, .25))/1.34 ) 1183 f <- dnorm(uhat/h)/h 1184 fxxinv <- diag(p) 1185 fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p,drop=FALSE], fxxinv) 1186 fxxinv <- fxxinv %*% t(fxxinv) 1187 cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*% 1188 fxxinv 1189 scale <- mean(f) 1190 serr <- sqrt(diag(cov)) 1191 } 1192 else if (se == "boot") { 1193 if("cluster" %in% names(dots)) { 1194 bargs <- modifyList(list(x = x, y = y, tau = tau), dots) 1195 if(length(object$na.action)) { 1196 cluster <- dots$cluster[-object$na.action] 1197 bargs <- modifyList(bargs, list(cluster = cluster)) 1198 } 1199 if(class(bargs$x)[1] == "matrix.csr") 1200 bargs <- modifyList(bargs, list(control = ctrl)) 1201 B <- do.call(boot.rq, bargs) 1202 } 1203 else 1204 B <- boot.rq(x, y, tau, coef = coef, ...) 1205 cov <- cov(B$B) 1206 serr <- sqrt(diag(cov)) 1207 } 1208 else if (se == "BLB"){ # Bag of Little Bootstraps 1209 n <- length(y) 1210 b <- ceiling(n^gamma) 1211 S <- n %/% b 1212 V <- matrix(sample(1:n, b * S), b, S) 1213 Z <- matrix(0, NCOL(x), S) 1214 for(i in 1:S){ 1215 v <- V[,i] 1216 B <- boot.rq(x[v,], y[v], tau, bsmethod = "BLB", blbn = n, ...) 1217 Z[,i] <- sqrt(diag(cov(B$B))) 1218 } 1219 cov <- cov(B$B) 1220 serr <- apply(Z, 1, mean) 1221 } 1222 else if(se == "extreme"){ 1223 tau0 <- tau 1224 if(tau > 0.5) { 1225 y <- -y 1226 tau <- 1 - tau 1227 } 1228 if(length(dots$mofn)) mofn = dots$mofn 1229 else mofn = floor(n/5) 1230 if(length(dots$mofn)) kex = dots$kex 1231 else kex = 20 1232 if(length(dots$alpha)) alpha = dots$alpha 1233 else alpha = 0.1 1234 if(length(dots$R)) R = dots$R 1235 else R = 200 1236 m <- (tau * n + kex)/(tau * n) 1237 taub <- min(tau * n/mofn, tau + (.5 - tau)/3) 1238 # This warning is a bit different than Section 1.3.4 of the Handbook chapter 1239 #if (tau.b.e == tau.e + (.5 - tau.e) / 3 && b >= min(n / 3, 1000)) 1240 # warning("tau may be non-extremal results are not likely to differ from central inference"); 1241 xbar <- apply(x,2, mean) 1242 b0 <- rq.fit(x, y, tau, method = method)$coef 1243 bm <- rq.fit(x, y, tau = m*tau, method = method)$coef 1244 An <- (m-1)*tau * sqrt(n/(tau*(1-tau)))/c(crossprod(xbar,bm - b0)) 1245 bt <- rq.fit(x, y, tau=taub, method = method)$coef 1246 s <- matrix(sample(1:n, mofn * R, replace = T), mofn, R) 1247 mbe <- (taub * mofn + kex)/(taub * mofn) 1248 bmbeb <- rq.fit(x, y, tau = mbe*taub, method = method)$coef 1249 # Accelerated xy bootstrap 1250 B0 <- boot.rq.pxy(x, y, s, taub, bt, method = method) 1251 Bm <- boot.rq.pxy(x, y, s, tau = mbe * taub, bmbeb, method = method) 1252 B <- (mbe - 1) * taub * sqrt(mofn/(taub * (1-taub))) * 1253 (B0 - b0)/c((Bm - B0) %*% xbar) 1254 if (tau0 <= 0.5) { 1255 bbc <- b0 - apply(B, 2, quantile, .5, na.rm = TRUE)/An 1256 ciL <- b0 - apply(B, 2, quantile, 1 - alpha/2, na.rm = TRUE)/An 1257 ciU <- b0 - apply(B, 2, quantile, alpha/2, na.rm = TRUE)/An 1258 } else { 1259 bbc <- -(b0 - apply(B, 2, quantile, .5, na.rm = TRUE)/An) 1260 ciL <- -(b0 - apply(B, 2, quantile, alpha/2, na.rm = TRUE)/An) 1261 ciU <- -(b0 - apply(B, 2, quantile, 1 - alpha/2, na.rm = TRUE)/An) 1262 } 1263 B <- R-sum(is.na(B[,1])) 1264 coef <- cbind(b0, bbc, ciL, ciU) 1265 if(tau0 > 0.5) {coef <- -coef; tau <- tau0} 1266 dimnames(coef) = list(dimnames(x)[[2]], c("coef", "BCcoef","ciL", "ciU")) 1267} 1268 else if(se == "conquer"){ 1269 if(length(dots$R)) R = dots$R 1270 else R = 200 1271 Z <- conquer(x[,-1], y, tau, ci = TRUE, B = R) 1272 #Fixme: should have option to choose another bsmethod 1273 coef <- cbind(Z$coef, Z$perCI) 1274 cnames <- c("coefficients", "lower bd", "upper bd") 1275 dimnames(coef) <- list(vnames, cnames) 1276 resid <- y - x %*% Z$coef 1277 } 1278 1279 if( se == "rank"){ 1280 coef <- f$coef 1281 } 1282 else if(!(se %in% c("conquer", "extreme"))){ 1283 coef <- array(coef, c(p, 4)) 1284 dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value", 1285 "Pr(>|t|)")) 1286 coef[, 2] <- serr 1287 coef[, 3] <- coef[, 1]/coef[, 2] 1288 coef[, 4] <- if (rdf > 0) 1289 2 * (1 - pt(abs(coef[, 3]), rdf)) 1290 else NA 1291 } 1292 object <- object[c("call", "terms")] 1293 if (covariance == TRUE) { 1294 if(se != "rank") object$cov <- cov 1295 if(se == "iid") object$scale <- scale 1296 if(se %in% c("nid", "ker")) { 1297 object$Hinv <- fxxinv 1298 object$J <- crossprod(x) 1299 object$scale <- scale 1300 } 1301 else if (se == "boot") { 1302 object$B <- B$B 1303 object$U <- B$U 1304 } 1305 } 1306 object$coefficients <- coef 1307 object$residuals <- resid 1308 object$rdf <- rdf 1309 object$tau <- tau 1310 class(object) <- "summary.rq" 1311 object 1312} 1313 1314akj <- function(x, 1315 z = seq(min(x), max(x), length = 2 * length(x)), 1316 p = rep(1/length(x), length(x)), 1317 h = -1, alpha = 0.5, kappa = 0.9, iker1 = 0) 1318{ 1319 nx <- length(x) 1320 stopifnot(is.numeric(x), 1321 length(p) == nx, 1322 any((iker1 <- as.integer(iker1)) == 0:1)) 1323 nz <- length(z) 1324 if(is.unsorted(x)) 1325 x <- sort(x) 1326 .Fortran("sakj", 1327 as.double(x), 1328 as.double(z), 1329 as.double(p), 1330 iker1, 1331 dens = double(nz), 1332 psi = double(nz), 1333 score = double(nz), 1334 as.integer(nx), 1335 as.integer(nz), 1336 h = as.double(h), 1337 as.double(alpha), 1338 as.double(kappa), 1339 double(nx))[c("dens", "psi", "score", "h")] 1340} 1341 1342"lm.fit.recursive" <- 1343function(X, y, int = TRUE) 1344{ 1345 if(int) 1346 X <- cbind(1, X) 1347 p <- ncol(X) 1348 n <- nrow(X) 1349 D <- qr(X[1:p, ]) 1350 A <- qr.coef(D, diag(p)) 1351 A[is.na(A)] <- 0 1352 A <- crossprod(t(A)) 1353 Ax <- rep(0, p) 1354 b <- matrix(0, p, n) 1355 b[, p] <- qr.coef(D, y[1:p]) 1356 b[is.na(b)] <- 0 1357 z <- .Fortran( "rls", 1358 as.integer(n), 1359 as.integer(p), 1360 as.double(t(X)), 1361 as.double(y), 1362 b = as.double(b), 1363 as.double(A), 1364 as.double(Ax)) 1365 bhat <- matrix(z$b, p, n) 1366 return(bhat) 1367} 1368 1369"rq.fit.hogg" <- 1370function (x, y, taus = c(.1,.3,.5), weights = c(.7,.2,.1), 1371 R= NULL, r = NULL, beta = 0.99995, eps = 1e-06) 1372{ 1373 n <- length(y) 1374 n2 <- NROW(R) 1375 m <- length(taus) 1376 p <- ncol(x)+m 1377 if (n != nrow(x)) 1378 stop("x and y don't match n") 1379 if (m != length(weights)) 1380 stop("taus and weights differ in length") 1381 if (any(taus < eps) || any(taus > 1 - eps)) 1382 stop("taus outside (0,1)") 1383 W <- diag(weights) 1384 if(m == 1) W <- weights 1385 x <- as.matrix(x) 1386 X <- cbind(kronecker(W,rep(1,n)),kronecker(weights,x)) 1387 y <- kronecker(weights,y) 1388 rhs <- c(weights*(1 - taus)*n, sum(weights*(1-taus)) * apply(x, 2, sum)) 1389 if(n2!=length(r)) 1390 stop("R and r of incompatible dimension") 1391 if(!is.null(R)) 1392 if(ncol(R)!=p) 1393 stop("R and X of incompatible dimension") 1394 d <- rep(1, m*n) 1395 u <- rep(1, m*n) 1396 if(length(r)){ 1397 wn1 <- rep(0, 10 * m*n) 1398 wn1[1:(m*n)] <- .5 1399 wn2 <- rep(0,6*n2) 1400 wn2[1:n2] <- 1 1401 z <- .Fortran("rqfnc", as.integer(m*n), as.integer(n2), as.integer(p), 1402 a1 = as.double(t(as.matrix(X))), c1 = as.double(-y), 1403 a2 = as.double(t(as.matrix(R))), c2 = as.double(-r), 1404 rhs = as.double(rhs), d1 = double(m*n), d2 = double(n2), 1405 as.double(u), beta = as.double(beta), eps = as.double(eps), 1406 wn1 = as.double(wn1), wn2 = as.double(wn2), wp = double((p + 3) * p), 1407 it.count = integer(3), info = integer(1)) 1408 } 1409 else{ 1410 wn <- rep(0, 10 * m*n) 1411 wn[1:(m*n)] <- .5 1412 z <- .Fortran("rqfnb", as.integer(m*n), as.integer(p), a = as.double(t(as.matrix(X))), 1413 c = as.double(-y), rhs = as.double(rhs), d = as.double(d), as.double(u), 1414 beta = as.double(beta), eps = as.double(eps), wn = as.double(wn), 1415 wp = double((p + 3) * p), it.count = integer(2), info = integer(1)) 1416 } 1417 if (z$info != 0) 1418 warning(paste("Info = ", z$info, "in stepy: singular design: iterations ", z$it.count[1])) 1419 coefficients <- -z$wp[1:p] 1420 if(any(is.na(coefficients)))stop("NA coefs: infeasible problem?") 1421 list(coefficients = coefficients, nit = z$it.count, flag = z$info) 1422} 1423#preprocessing for the QR process 1424"rq.fit.ppro" <- function (x, y, tau, weights=NULL, Mm.factor = 0.8, eps = 1e-06, ...) 1425{ 1426 ntau <- length(tau) 1427 n <- length(y) 1428 if (nrow(x) != n) stop("x and y don't match n") 1429 p <- ncol(x) 1430 m <- n * sqrt(p) * max(diff(tau)) # check this length(tau) == 1 case? 1431 dots <- list(...) 1432 method <- ifelse(length(dots$pmethod), dots$pmethod, "fn") 1433 if(length(weights)){ 1434 if(any(weights < 0)) 1435 stop("negative weights not allowed") 1436 if(length(weights) != n) 1437 stop("weights not of length(y)") 1438 else { 1439 x <- x * weights 1440 y <- y * weights 1441 } 1442 } 1443 coef <- matrix(NA, p, ntau) 1444 resid <- matrix(NA, n, ntau) 1445 rho <- rep(0, ntau) 1446 Rho <- function(u, tau) u * (tau - (u < 0)) 1447 z <- rq.fit(x, y, tau=tau[1], method = method) 1448 r <- z$resid 1449 coef[,1] <- z$coef 1450 rho[1] <- sum(Rho(z$residuals, tau[1])) 1451 xxinv <- solve(chol(crossprod(x))) 1452 # Is pmax really necessary here? 1453 band <- pmax(eps, sqrt(((x %*% xxinv)^2) %*% rep(1, p))) 1454 for(i in 2:ntau){ 1455 not.optimal <- TRUE 1456 mm <- m 1457 while (not.optimal) { 1458 M <- Mm.factor * mm 1459 lo.q <- max(1/n, tau[i] - M/(2 * n)) 1460 hi.q <- min(tau[i] + M/(2 * n), (n - 1)/n) 1461 kappa <- quantile(r/band, c(lo.q, hi.q)) 1462 sl <- r < band * kappa[1] 1463 su <- r > band * kappa[2] 1464 while (not.optimal) { 1465 xx <- x[!su & !sl, ] 1466 yy <- y[!su & !sl] 1467 if (any(sl)) { 1468 glob.x <- c(t(x[sl, , drop = FALSE]) %*% rep(1, sum(sl))) 1469 glob.y <- sum(y[sl]) 1470 xx <- rbind(xx, glob.x) 1471 yy <- c(yy, glob.y) 1472 } 1473 if (any(su)) { 1474 ghib.x <- c(t(x[su, , drop = FALSE]) %*% rep(1, sum(su))) 1475 ghib.y <- sum(y[su]) 1476 xx <- rbind(xx, ghib.x) 1477 yy <- c(yy, ghib.y) 1478 } 1479 z <- rq.fit(xx, yy, tau = tau[i], method = method) 1480 b <- z$coef 1481 r <- y - x %*% b 1482 su.bad <- (r < 0) & su 1483 sl.bad <- (r > 0) & sl 1484 bad.signs <- sum(su.bad | sl.bad) 1485 if (bad.signs > 0) { 1486 if (bad.signs > 0.1 * M) { 1487 mm <- 2*mm 1488 warning("Too many fixups: doubling m") 1489 break 1490 } 1491 su <- su & !su.bad 1492 sl <- sl & !sl.bad 1493 } 1494 else not.optimal <- FALSE 1495 } 1496 } 1497 coef[,i] <- b 1498 resid[,i] <- y - x %*% b 1499 rho[i] <- sum(Rho(resid, tau[i])) 1500 } 1501 dimnames(coef) <- list(dimnames(x)[[2]], tau) 1502 list(coefficients=coef, residuals = resid, rho=rho, weights = weights) 1503} 1504 1505# R function for fnb call for multiple taus 1506rq.fit.qfnb <- function(x,y,tau){ 1507 n <- nrow(x) 1508 p <- ncol(x) 1509 m <- length(tau) 1510 d <- rep(1, n) 1511 u <- rep(1, n) 1512 z <- .Fortran("qfnb", 1513 n = as.integer(n), 1514 p = as.integer(p), 1515 m = as.integer(m), 1516 a = as.double(t(x)), 1517 y = as.double(-y), 1518 t = as.double(tau), 1519 r = double(p), 1520 d = as.double(d), 1521 u = as.double(u), 1522 wn = double(n*9), 1523 wp = double(p*(p+3)), 1524 B = double(p*m), 1525 nit = integer(3), 1526 info = integer(1)) 1527 if(z$info != 0) 1528 warning(paste("Info = ", z$info, "in stepy: singular design: nit = ", z$nit[1])) 1529 coefficients <- matrix(-z$B, p, m) 1530 dimnames(coefficients) <- list(dimnames(x)[[2]],paste("tau = ",tau)) 1531 list(coefficients = coefficients, nit = z$nit, flag = z$info) 1532} 1533rq.fit.pfnb <- function (x, y, tau, m0 = NULL, eps = 1e-06) { 1534 m <- length(tau) 1535 n <- length(y) 1536 if(!is.matrix(x)) dim(x) <- c(n,1) 1537 if (nrow(x) != n) 1538 stop("x and y don't match n") 1539 p <- ncol(x) 1540 if(!length(m0)) 1541 m0 <- floor(n^(2/3) * sqrt(p)) # Needs testing! 1542 s <- sample(n,m0) 1543 xs <- x[s,,drop = FALSE] 1544 ys <- y[s] 1545 z <- rq.fit(xs, ys, tau = tau[1], method = "fn") 1546 r <- y - x %*% z$coef 1547 b <- matrix(0,p,m) 1548 nit <- matrix(0,5,m) 1549 xxinv <- solve(chol(crossprod(xs))) 1550 band <- pmax(eps, sqrt(((x %*% xxinv)^2) %*% rep(1, p))) 1551 z <- .Fortran("pfnb", 1552 as.integer(n), 1553 as.integer(p), 1554 as.integer(m), 1555 as.double(t(x)), 1556 as.double(y), 1557 as.double(tau), 1558 as.double(r), 1559 b = as.double(-b), 1560 as.double(band), 1561 as.integer(m0), 1562 double(n), 1563 double(n), 1564 double(n*9), 1565 double(p*(p+3)), 1566 double(p*n), 1567 double(n), 1568 integer(n), 1569 integer(n), 1570 double(p), 1571 double(p), 1572 double(p), 1573 nit = as.integer(nit), 1574 info = integer(m)) 1575 coefficients <- matrix(-z$b,p,m) 1576 nit <- matrix(z$nit,5,m) 1577 dimnames(coefficients) <- list(dimnames(x)[[2]],paste("tau = ",tau)) 1578 list(coefficients = coefficients, nit = nit, flag = z$info) 1579} 1580LassoLambdaHat <- function(X, R = 1000, tau = 0.5, C = 1, alpha = 0.95){ 1581 # Chernozhukov and Belloni default lasso lambda proposal: 1582 n <- nrow(X) 1583 sigs <- apply(X^2,2,mean) 1584 U <- matrix(runif(n * R),n) 1585 R <- (t(X) %*% (tau - (U < tau)))/sigs 1586 r <- apply(abs(R),2,max) 1587 C * quantile(r, 1 - alpha) * sigs 1588 } 1589 1590