1## Gaussian Processes implementation. Laplace approximation for classification. 2## author : alexandros karatzoglou 3 4setGeneric("gausspr", function(x, ...) standardGeneric("gausspr")) 5setMethod("gausspr",signature(x="formula"), 6function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ 7 cl <- match.call() 8 m <- match.call(expand.dots = FALSE) 9 if (is.matrix(eval(m$data, parent.frame()))) 10 m$data <- as.data.frame(data) 11 m$... <- NULL 12 m$formula <- m$x 13 m$x <- NULL 14 m[[1L]] <- quote(stats::model.frame) 15 m <- eval(m, parent.frame()) 16 Terms <- attr(m, "terms") 17 attr(Terms, "intercept") <- 0 18 x <- model.matrix(Terms, m) 19 y <- model.extract(m, "response") 20 21 if (length(scaled) == 1) 22 scaled <- rep(scaled, ncol(x)) 23 if (any(scaled)) { 24 remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), 25 which(!scaled) 26 ) 27 ) 28 scaled <- !attr(x, "assign") %in% remove 29 } 30 31 ret <- gausspr(x, y, scaled = scaled, ...) 32 kcall(ret) <- cl 33 terms(ret) <- Terms 34 if (!is.null(attr(m, "na.action"))) 35 n.action(ret) <- attr(m, "na.action") 36 return (ret) 37}) 38 39setMethod("gausspr",signature(x="vector"), 40function(x,...) 41 { 42 x <- t(t(x)) 43 ret <- gausspr(x, ...) 44 ret 45 }) 46 47setMethod("gausspr",signature(x="matrix"), 48function (x, 49 y, 50 scaled = TRUE, 51 type = NULL, 52 kernel = "rbfdot", 53 kpar = "automatic", 54 var = 1, 55 variance.model = FALSE, 56 tol = 0.0005, 57 cross = 0, 58 fit = TRUE, 59 ... 60 ,subset 61 ,na.action = na.omit) 62{ 63 64## should become an option 65 reduced <- FALSE 66## subsetting and na-handling for matrices 67 ret <- new("gausspr") 68 if (!missing(subset)) x <- x[subset,] 69 if (is.null(y)) 70 x <- na.action(x) 71 else { 72 df <- na.action(data.frame(y, x)) 73 y <- df[,1] 74 x <- as.matrix(df[,-1]) 75 } 76 ncols <- ncol(x) 77 m <- nrows <- nrow(x) 78 79 if (is.null (type)) type(ret) <- 80 if (is.factor(y)) "classification" 81 else "regression" 82 else type(ret) <- type 83 84 x.scale <- y.scale <- NULL 85 ## scaling 86 if (length(scaled) == 1) 87 scaled <- rep(scaled, ncol(x)) 88 if (any(scaled)) { 89 co <- !apply(x[,scaled, drop = FALSE], 2, var) 90 if (any(co)) { 91 scaled <- rep(FALSE, ncol(x)) 92 warning(paste("Variable(s)", 93 paste("`",colnames(x[,scaled, drop = FALSE])[co], 94 "'", sep="", collapse=" and "), 95 "constant. Cannot scale data.") 96 ) 97 } else { 98 xtmp <- scale(x[,scaled]) 99 x[,scaled] <- xtmp 100 x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] 101 if (is.numeric(y)&&(type(ret)!="classification")) { 102 y <- scale(y) 103 y.scale <- attributes(y)[c("scaled:center","scaled:scale")] 104 y <- as.vector(y) 105 } 106 tmpsc <- list(scaled = scaled, x.scale = x.scale, y.scale = y.scale) 107 } 108 } 109 110 111 if (var < 10^-3) 112 stop("Noise variance parameter var has to be greater than 10^-3") 113 114 # in case of classification: transform factors into integers 115 if (is.factor(y)) { 116 lev(ret) <- levels (y) 117 y <- as.integer (y) 118 } 119 else { 120 if (type(ret) == "classification" && any(as.integer (y) != y)) 121 stop ("dependent variable has to be of factor or integer type for classification mode.") 122 if(type(ret) == "classification") 123 lev(ret) <- unique (y) 124 } 125 # initialize 126 nclass(ret) <- length (lev(ret)) 127 128 if(!is.null(type)) 129 type(ret) <- match.arg(type,c("classification", "regression")) 130 131if(is.character(kernel)){ 132 kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot")) 133 134 if(is.character(kpar)) 135 if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) 136 { 137 cat (" Setting default kernel parameters ","\n") 138 kpar <- list() 139 } 140 } 141 142 if (!is.function(kernel)) 143 if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ 144 kp <- match.arg(kpar,"automatic") 145 if(kp=="automatic") 146 kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) 147 cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") 148 149 } 150 151 if(!is(kernel,"kernel")) 152 { 153 if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) 154 kernel <- do.call(kernel, kpar) 155 } 156 if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") 157 158 p <- 0 159 160 if (type(ret) == "classification") 161 { 162 indexes <- lapply(1:nclass(ret), function(kk) which(y == kk)) 163 for (i in 1:(nclass(ret)-1)) { 164 jj <- i+1 165 for(j in jj:nclass(ret)) { 166 p <- p+1 167 ##prepare data 168 li <- length(indexes[[i]]) 169 lj <- length(indexes[[j]]) 170 xd <- matrix(0,(li+lj),dim(x)[2]) 171 xdi <- 1:(li+lj) <= li 172 xd[xdi,rep(TRUE,dim(x)[2])] <- x[indexes[[i]],] 173 xd[xdi == FALSE,rep(TRUE,dim(x)[2])] <- x[indexes[[j]],] 174 if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) 175 yd <- c(rep(1,li),rep(-1,lj)) 176 else 177 yd <- c(rep(-1,li),rep(1,lj)) 178 if(reduced == FALSE){ 179 K <- kernelMatrix(kernel,xd) 180 gradnorm <- 1 181 alphag <- solut <- rep(0,li+lj) 182 while (gradnorm > tol) 183 { 184 f <- crossprod(K,alphag) 185 grad <- -yd/(1 + exp(yd*f)) 186 hess <- exp(yd*f) 187 hess <- hess / ((1 + hess)^2) 188 189 ## We use solveiter instead of solve to speed up things 190 ## A <- t(t(K)*as.vector(hess)) 191 ## diag(A) <- diag(A) + 1 192 ## alphag <- alphag - solve(A,(grad + alphag)) 193 194 solut <- solveiter(K, hess, (grad + alphag), solut) 195 alphag <- alphag - solut 196 gradnorm <- sqrt(sum((grad + alphag)^2)) 197 } 198 } 199 else if (reduced ==TRUE) 200 { 201 202 yind <- t(matrix(unique(yd),2,length(yd))) 203 ymat <- matrix(0, length(yd), 2) 204 ymat[yind==yd] <- 1 205 ##Z <- csi(xd, ymat, kernel = kernel, rank = dim(yd)[1]) 206 ##Z <- Z[sort(pivots(Z),index.return = TRUE)$ix, ,drop=FALSE] 207 Z <- inchol(xd, kernel = kernel) 208 gradnorm <- 1 209 alphag <- rep(0,li+lj) 210 m1 <- dim(Z)[1] 211 n1 <- dim(Z)[2] 212 Ksub <- diag(rep(1,n1)) 213 214 while (gradnorm > tol) 215 { 216 f <- drop(Z%*%crossprod(Z,alphag)) 217 f[which(f>20)] <- 20 218 grad <- -yd/(1 + exp(yd*f)) 219 hess <- exp(yd*f) 220 hess <- as.vector(hess / ((1 + hess)^2)) 221 222 alphag <- alphag - (- Z %*%solve(Ksub + (t(Z)*hess)%*%Z) %*% (t(Z)*hess))%*%(grad + alphag) + (grad + alphag) 223 224 gradnorm <- sqrt(sum((grad + alphag)^2)) 225 } 226 227 } 228 alpha(ret)[[p]] <- alphag 229 alphaindex(ret)[[p]] <- c(indexes[[i]],indexes[[j]]) 230 } 231 } 232 } 233 234 if (type(ret) == "regression") 235 { 236 K <- kernelMatrix(kernel,x) 237 if(variance.model) 238 { 239 sol <- solve(K + diag(rep(var, length = m))) 240 rm(K) 241 alpha(ret) <- sol%*%y 242 } 243 else 244 alpha(ret) <- solve(K + diag(rep(var, length = m))) %*% y 245 246 } 247 248 kcall(ret) <- match.call() 249 kernelf(ret) <- kernel 250 xmatrix(ret) <- x 251 if(variance.model) 252 sol(ret) <- sol 253 254 fitted(ret) <- if (fit) 255 predict(ret, x) else NA 256 257 if (fit){ 258 if(type(ret)=="classification") 259 error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) 260 if(type(ret)=="regression"){ 261 if (!is.null(scaling(ret)$y.scale)) 262 fitted(ret) <- fitted(ret) * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" 263 264 error(ret) <- drop(crossprod(fitted(ret) - y)/m) 265 } 266 } 267 if(any(scaled)) 268 scaling(ret) <- tmpsc 269 270 cross(ret) <- -1 271 if(cross == 1) 272 cat("\n","cross should be >1 no cross-validation done!","\n","\n") 273 else if (cross > 1) 274 { 275 cerror <- 0 276 suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) 277 for(i in 1:cross) 278 { 279 cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) 280 if(type(ret)=="classification") 281 { 282 cret <- gausspr(x[cind,], y[cind], scaled = FALSE, type=type(ret),kernel=kernel,var = var, cross = 0, fit = FALSE) 283 cres <- predict(cret, x[vgr[[i]],]) 284 cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror 285 } 286 if(type(ret)=="regression") 287 { 288 cret <- gausspr(x[cind,],y[cind],type=type(ret),scaled = FALSE, kernel=kernel,var = var,tol=tol, cross = 0, fit = FALSE) 289 cres <- predict(cret, x[vgr[[i]],]) 290 if (!is.null(scaling(ret)$y.scale)) 291 scal <- scaling(ret)$y.scale$"scaled:scale" 292 cerror <- drop((scal^2)*crossprod(cres - y[vgr[[i]]])/m) + cerror 293 } 294 } 295 cross(ret) <- cerror 296 } 297 298 299 300 return(ret) 301}) 302 303 304setMethod("predict", signature(object = "gausspr"), 305function (object, newdata, type = "response", coupler = "minpair") 306{ 307 sc <- 0 308 type <- match.arg(type,c("response","probabilities","votes", "variance", "sdeviation")) 309 if (missing(newdata) && type!="response") 310 return(fitted(object)) 311 else if(missing(newdata)) 312 { 313 newdata <- xmatrix(object) 314 sc <- 1 315 } 316 317 ncols <- ncol(xmatrix(object)) 318 nrows <- nrow(xmatrix(object)) 319 oldco <- ncols 320 321 if (!is.null(terms(object))) 322 { 323 newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = na.action) 324 } 325 else 326 newdata <- if (is.vector (newdata)) t(t(newdata)) else as.matrix(newdata) 327 328 newcols <- 0 329 newnrows <- nrow(newdata) 330 newncols <- ncol(newdata) 331 newco <- newncols 332 333 if (oldco != newco) stop ("test vector does not match model !") 334 335 if (is.list(scaling(object)) && sc != 1) 336 newdata[,scaling(object)$scaled] <- 337 scale(newdata[,scaling(object)$scaled, drop = FALSE], 338 center = scaling(object)$x.scale$"scaled:center", 339 scale = scaling(object)$x.scale$"scaled:scale" 340 ) 341 342 p <- 0 343 if(type == "response") 344 { 345 if(type(object)=="classification") 346 { 347 predres <- 1:newnrows 348 votematrix <- matrix(0,nclass(object),nrows) 349 for(i in 1:(nclass(object)-1)) 350 { 351 jj <- i+1 352 for(j in jj:nclass(object)) 353 { 354 p <- p+1 355 ret <- kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[p]],],alpha(object)[[p]]) 356 votematrix[i,ret>0] <- votematrix[i,ret>0] + 1 357 votematrix[j,ret<0] <- votematrix[j,ret<0] + 1 358 } 359 } 360 predres <- sapply(predres, function(x) which.max(votematrix[,x])) 361 } 362 363} 364 365 if(type == "probabilities") 366 { 367 if(type(object)=="classification") 368 { 369 binprob <- matrix(0, newnrows, nclass(object)*(nclass(object) - 1)/2) 370 for(i in 1:(nclass(object)-1)) 371 { 372 jj <- i+1 373 for(j in jj:nclass(object)) 374 { 375 p <- p+1 376 binprob[,p] <- 1/(1+exp(-kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[p]],],alpha(object)[[p]]))) 377 378 } 379 } 380 ## multiprob <- sapply(1:newnrows, function(x) couple(binprob[x ,],coupler = coupler)) 381 multiprob <- couple(binprob, coupler = coupler) 382 } 383 } 384 385 386 if(type(object) == "regression") 387 { 388 if (type == "variance"||type == "sdeviation") 389 { 390 Ktest <- kernelMatrix(kernelf(object),xmatrix(object), newdata) 391 predres <- diag(kernelMatrix(kernelf(object),newdata) - t(Ktest) %*% sol(object) %*% Ktest) 392 if (type== "sdeviation") 393 predres <- sqrt(predres) 394 if (!is.null(scaling(object)$y.scale)) 395 predres <- predres * scaling(object)$y.scale$"scaled:scale" + scaling(object)$y.scale$"scaled:center" 396 } 397 398 else 399 { 400 401 predres <- kernelMult(kernelf(object),newdata,xmatrix(object),as.matrix(alpha(object))) 402 403 if (!is.null(scaling(object)$y.scale)) 404 predres <- predres * scaling(object)$y.scale$"scaled:scale" + scaling(object)$y.scale$"scaled:center" 405 } 406 407 } 408 409 410 if (is.character(lev(object))) 411 { 412 ##classification & probabilities : return probabilitie matrix 413 if(type == "probabilities") 414 { 415 colnames(multiprob) <- lev(object) 416 return(multiprob) 417 } 418 ##classification & type response: return factors 419 if(type == "response") 420 return(factor (lev(object)[predres], levels = lev(object))) 421 ##classification & votes : return votematrix 422 if(type == "votes") 423 return(votematrix) 424 } 425 else 426 ##else: return raw values 427 return(predres) 428 429}) 430 431 432setMethod("show","gausspr", 433function(object){ 434 cat("Gaussian Processes object of class \"gausspr\"","\n") 435 cat(paste("Problem type:", type(object),"\n")) 436 cat("\n") 437 show(kernelf(object)) 438 cat(paste("\nNumber of training instances learned :", dim(xmatrix(object))[1],"\n")) 439 if(!is.null(fitted(object))) 440 cat(paste("Train error :", round(error(object),9),"\n")) 441 ##train error & loss 442 if(cross(object)!=-1) 443 cat("Cross validation error :",round(cross(object),9),"\n") 444}) 445 446 447solveiter <- function(B,noiseproc,b,x,itmax = 50,tol = 10e-4 ,verbose = FALSE) { 448 449## ---------------------------- 450## Preconditioned Biconjugate Gradient method 451## solves linear system Ax <- b for general A 452## ------------------------------------------ 453## x : initial guess 454## itmax : max # iterations 455## iterates while mean(abs(Ax-b)) > tol 456## 457## Simplified form of Numerical Recipes: linbcg 458## 459## The preconditioned matrix is set to inv(diag(A)) 460## A defined through A <- I + N*B 461 462diagA <- matrix(1,dim(B)[1],1) + colSums(B)+ diag(B)*(noiseproc-1) 463## diags of A 464 465cont <- 0 466iter <- 0 467r <- .Amul2(x,B,noiseproc) 468r <- b - r 469rr <- r 470znrm <- 1 471 472bnrm <- sqrt(sum((b)^2)) 473z <- r/diagA 474 475err <- sqrt(sum((.Amul2(x,B,noiseproc) - b)^2))/bnrm 476 477while (iter <= itmax){ 478 iter <- iter + 1 479 zm1nrm <- znrm 480 zz <- rr/diagA 481 bknum<- drop(crossprod(z,rr)) 482 if (iter == 1) 483 { 484 p <- z 485 pp <- zz 486 } 487 else 488 { 489 bk <- bknum/bkden 490 p <- bk*p + z 491 pp <- bk*pp + zz 492 } 493 494 bkden <- bknum 495 z <- .Amul2(p,B,noiseproc) 496 akden <- drop(crossprod(z,pp)) 497 ak <- bknum/akden 498 zz <- .Amul2T(pp,B,noiseproc) 499 500 x <- x + ak*p 501 r <- r - ak*z 502 rr <- rr - ak*zz 503 z <- r/diagA 504 znrm <- 1 505 506 err <- mean(abs(r)) 507 508 if (err<tol) 509 break 510} 511return(x) 512} 513 514.Amul2 <- function(d, B, noiseproc){ 515ee <- B%*%d 516return(d + noiseproc*ee) 517} 518.Amul2T <- function(d, B, noiseproc){ 519ee <- noiseproc*d 520return(d + B%*%ee) 521} 522