1/* hansl driver for gretl's C regls plugin */ 2 3function matrix unstdize (const matrix z, scalar my, scalar sy, 4 const matrix mx, const matrix sx) 5 matrix b = zeros(1 + nelem(z), 1) 6 # initialize intercept 7 b[1] = my 8 loop i=1..nelem(z) 9 if z[i] != 0 10 b[i+1] = (sy/sx[i]) * z[i] 11 b[1] -= b[i+1] * mx[i] 12 endif 13 endloop 14 return b 15end function 16 17function matrix unstdize_vcv (const matrix vcv, scalar sy, 18 const matrix sx) 19 return vcv * sy^2 ./ sx'sx 20end function 21 22function matrix m_standardize (const matrix X, matrix *mx, matrix *sx) 23 mx = meanc(X) 24 sx = sdc(X) 25 matrix S = zeros(rows(X), cols(X)) 26 loop j=1..cols(X) 27 S[,j] = sx[j] == 0 ? 0 : (X[,j] .- mx[j]) ./ sx[j] 28 endloop 29 return S 30end function 31 32function matrix s_standardize (const matrix y, scalar *my, scalar *sy) 33 my = meanc(y) 34 sy = sdc(y) 35 matrix S = zeros(rows(y), 1) 36 S = sy == 0 ? 0 : (y - my) / sy 37 return S 38end function 39 40function matrix lambda_sequence (scalar lmax, int K, scalar eps[0.0001]) 41 scalar decr = log(eps) / (K-1) 42 matrix Lam = log(lmax) + decr * seq(0, K-1)' 43 return exp(Lam) 44end function 45 46function scalar unscaled_lambda (const bundle b) 47 return inbundle(b, "lambda_scale") == 1 && b.lambda_scale == 0 48end function 49 50function scalar check_params (const bundle b, matrix *lfrac, 51 scalar *nlam, bundle *ret) 52 matrix m = {} 53 scalar scl = 1 54 if inbundle(b, "lambda_scale") == 1 55 scl = b.lambda_scale 56 if scl < 0 || scl > 2 || floor(scl) != scl 57 print "invalid lambda scale specification" 58 return 1 59 else 60 ret.lambda_scale = scl 61 endif 62 endif 63 if inbundle(b, "lfrac") == 1 || inbundle(b, "lfrac") == 3 64 m = b.lfrac 65 nlam = nelem(m) 66 endif 67 if nelem(m) == 0 && inbundle(b, "nlambda") == 1 68 # got a number of lambda values 69 if b.nlambda < 4 70 print "missing or invalid lambda specification" 71 return 1 72 else 73 nlam = b.nlambda 74 lfrac = lambda_sequence(1, nlam) 75 return 0 76 endif 77 elif nelem(m) == 0 78 # default to 25 automatic lambdas 79 nlam = 25 80 lfrac = lambda_sequence(1, nlam) 81 return 0 82 endif 83 lfrac = m 84 if scl == 0 # treat the lambdas as is, not as fractions 85 if min(m) < 0 86 print "missing or invalid lambda specification" 87 return 2 88 else 89 return 0 90 endif 91 endif 92 loop i=1..nlam 93 if m[i] < 0 || m[i] > 1 || (i > 1 && m[i] >= m[i-1]) 94 print "invalid lambda specification" 95 return 2 96 endif 97 endloop 98 return 0 99end function 100 101function void get_xvalidation_parms (bundle *targ, bundle *src) 102 # options specific to cross validation 103 targ.xvalidate = src.xvalidate 104 if targ.xvalidate 105 matrix deflts = {10, 0, 0, 0, 0, 0, 0} 106 ndef = nelem(deflts) 107 loop foreach i nfolds randfolds no_mpi mpi_np mpi_local use_1se single_b seed 108 if inbundle(src, "$i") 109 scalar targ.$i = src.$i 110 elif i <= ndef 111 scalar targ.$i = deflts[i] 112 endif 113 endloop 114 endif 115end function 116 117function void print_regls_header (const bundle b, int nlam, 118 int elnet, string yname) 119 120 string algo = b.ccd ? "CCD" : b.ridge ? "SVD" : "ADMM" 121 string t1str = obslabel($t1) 122 string t2str = obslabel($t2) 123 124 if elnet 125 printf "\n%s, alpha = %g, ", b.estimator, b.alpha 126 else 127 printf "\n%s (%s) ", b.estimator, algo 128 endif 129 printf "using observations %s to %s (n = %d)\n", t1str, t2str, b.nobs 130 printf "Dependent variable: %s\n", yname 131 if nlam == 1 132 if unscaled_lambda(b) 133 printf "single lambda value %g\n", b.lfrac[1] 134 else 135 printf "single lambda-fraction %g\n", b.lfrac[1] 136 endif 137 else 138 printf "%d values of lambda\n", nlam 139 endif 140end function 141 142function void gui_add_commands (bundle *b, const bundle parms, 143 const string yname, const string xname) 144 string cmdstr 145 set force_decpoint on 146 outfile --buffer=cmdstr --quiet 147 printf "include regls.gfn\n" 148 printf "bundle rb = regls(%s, %s, _(", yname, xname 149 if inbundle(parms, "nlambda") 150 printf "nlambda=%d", parms.nlambda 151 else 152 printf "lfrac=%g", parms.lfrac 153 endif 154 if inbundle(b, "ridge") && b.ridge 155 printf ", ridge=1" 156 endif 157 if inbundle(b, "alpha") 158 printf ", alpha=%g", b.alpha 159 endif 160 printf ", verbosity=3" 161 if inbundle(b, "xvalidate") && b.xvalidate 162 printf ",\n xvalidate=1" 163 printf ", randfolds=%d", b.randfolds 164 printf ", nfolds=%d", b.nfolds 165 endif 166 printf "))\n" 167 end outfile 168 set force_decpoint off 169 printf "Command line equivalent:\n" 170 printf "%s\n", cmdstr 171 b.commands = cmdstr 172end function 173 174function void regls_set_notes (bundle *b) 175 if inbundle(b, "nzb") 176 setnote(b, "nzb", "non-zero coefficients") 177 endif 178 if inbundle(b, "nzX") 179 setnote(b, "nzX", "included regressors") 180 endif 181 if inbundle(b, "B") 182 setnote(b, "B", "all coefficients") 183 endif 184 if inbundle(b, "XVC") 185 setnote(b, "XVC", "cross-validation info") 186 endif 187end function 188 189function void nz_print (const bundle b, int nlam) 190 if b.xvalidate 191 scalar lf = b.use_1se ? b.lf1se : b.lfmin 192 printf "\n%s coefficients (s = %g)\n\n", b.estimator, lf 193 elif nlam > 1 194 printf "\n%s minimum-BIC coefficients\n\n", b.estimator 195 else 196 printf "\n%s coefficients\n\n", b.estimator 197 endif 198 if !inbundle(b, "nzX") 199 if inbundle(b, "b") 200 eval b.b 201 else 202 eval b.B 203 endif 204 return 205 endif 206 strings S = varnames(b.nzX) 207 len = max(strlen(S)) 208 loop i=1..nelem(S) 209 printf "%*s %#12.6g\n", len, S[i], b.nzb[i] 210 endloop 211 printf "\n" 212end function 213 214function bundle regls (series depvar, list indeps, 215 const bundle parms[null]) 216 bundle ret 217 scalar nlam = 1 218 scalar timer = 0 219 matrix lfrac = {} 220 matrix mx = {} 221 matrix sx = {} 222 scalar my = 0 223 scalar sy = 1 224 scalar rank = $mpirank 225 matrix bsel = {} 226 scalar alpha_set = 0 227 scalar elnet = 0 228 scalar gui = 0 229 string yname 230 231 if !exists(parms) 232 # an empty bundle will do 233 bundle parms = null 234 endif 235 236 if inlist(indeps, 0) 237 indeps -= const 238 endif 239 if nelem(indeps) == 0 240 funcerr "no regressors were supplied" 241 endif 242 243 err = check_params(parms, &lfrac, &nlam, &ret) 244 if err 245 funcerr "invalid or missing parameter(s)" 246 endif 247 248 ret.stdize = 1 249 ret.verbosity = 1 250 ret.ridge = 0 251 ret.ccd = 0 252 ret.xvalidate = 0 253 ret.sample_t1 = $t1 254 ret.sample_t2 = $t2 255 256 # read optional parameters 257 if inbundle(parms, "stdize") == 1 258 ret.stdize = parms.stdize 259 endif 260 if inbundle(parms, "verbosity") == 1 261 ret.verbosity = parms.verbosity 262 endif 263 if inbundle(parms, "timer") == 1 264 timer = parms.timer 265 endif 266 if inbundle(parms, "gui") == 1 267 gui = parms.gui 268 if gui 269 ret.verbosity = 3 270 endif 271 endif 272 if inbundle(parms, "alpha") == 1 273 if parms.alpha < 0 || parms.alpha > 1 || isnan(parms.alpha) 274 funcerr "invalid alpha specification" 275 else 276 ret.alpha = parms.alpha 277 if ret.alpha == 0 278 ret.ridge = 1 279 elif ret.alpha < 1 280 ret.ccd = 1 281 elnet = 1 282 endif 283 alpha_set = 1 284 endif 285 endif 286 287 if !alpha_set && inbundle(parms, "ridge") == 1 288 ret.ridge = parms.ridge 289 endif 290 if !ret.ccd && inbundle(parms, "ccd") == 1 291 ret.ccd = parms.ccd 292 endif 293 if ret.ccd && inbundle(parms, "ccd_toler") 294 ret.ccd_toler = parms.ccd_toler 295 endif 296 297 # list of regressors with constant included, if wanted 298 list X0 = ret.stdize ? 0 indeps : indeps 299 300 # record upper limit of incoming sample range 301 my_tmax = $t2 302 303 # handle possible missing values 304 list All = depvar indeps 305 series okcheck = ok(All) 306 nskip = $nobs - sum(okcheck) 307 if nskip > 0 308 set matrix_mask okcheck 309 endif 310 311 if ret.stdize 312 matrix Y = s_standardize({depvar}, &my, &sy) 313 matrix X = m_standardize({indeps}, &mx, &sx) 314 else 315 matrix Y = {depvar} 316 matrix X = {indeps} 317 endif 318 319 scalar ret.nobs = rows(X) 320 321 if nelem(lfrac) > 0 322 matrix ret.lfrac = vec(lfrac) 323 endif 324 if inbundle(parms, "admmctrl") == 3 325 matrix ret.admmctrl = parms.admmctrl 326 endif 327 if inbundle(parms, "xvalidate") 328 get_xvalidation_parms(&ret, &parms) 329 endif 330 if inbundle(parms, "crit_plot") == 1 331 ret.crit_plot = parms.crit_plot 332 endif 333 334 string ret.estimator = elnet ? "Elastic net" : \ 335 ret.ridge ? "Ridge" : "LASSO" 336 337 if ret.verbosity > 0 && rank <= 0 338 yname = argname(depvar, "unnamed") 339 print_regls_header(ret, nlam, elnet, yname) 340 endif 341 342 # call plugin C code 343 if rank <= 0 && timer 344 set stopwatch 345 endif 346 err = _regls(X, Y, ret) 347 if err 348 funcerr "_regls failed" 349 endif 350 if rank <= 0 && timer 351 string mode = ret.ccd ? "CCD" : ret.ridge ? "SVD" : "ADMM" 352 printf "_regls (%s): %.3f seconds\n", mode, $stopwatch 353 endif 354 355 if !ret.xvalidate && ret.verbosity > 0 && inbundle(ret, "BIC") 356 string ss = ret.ridge && unscaled_lambda(ret) ? "lambda" : "s" 357 if nlam > 1 358 if inbundle(ret, "BIC") == 3 359 scalar cmin = ret.BIC[ret.idxmin] 360 else 361 scalar cmin = ret.BIC 362 endif 363 printf "\nBIC minimized at %#g with %s = %f\n", cmin, ss, ret.lfmin 364 else 365 printf "\nBIC = %#g for %s = %f\n", ret.BIC, ss, ret.lfrac 366 endif 367 endif 368 369 if rank > 0 370 # case where the user has invoked MPI 371 return ret 372 endif 373 374 if cols(ret.B) > 1 375 if ret.stdize 376 loop j=1..cols(ret.B) 377 ret.B[,j] = unstdize(ret.B[2:,j], my, sy, mx, sx) 378 endloop 379 endif 380 if inbundle(ret, "idxmin") 381 optcol = (ret.xvalidate && ret.use_1se)? ret.idx1se : ret.idxmin 382 matrix b = ret.B[,optcol] 383 if ret.ridge 384 ret.b = b 385 rnameset(ret.b, varnames(X0)) 386 else 387 matrix bsel = b .!= 0 388 matrix ret.nzb = selifr(b, bsel) 389 endif 390 endif 391 else 392 if ret.stdize 393 ret.B = unstdize(ret.B[2:], my, sy, mx, sx) 394 if inbundle(ret, "vcv") 395 ret.vcv = unstdize_vcv(ret.vcv, sy, sx) 396 endif 397 endif 398 if !ret.ridge 399 matrix bsel = ret.B .!= 0 400 matrix ret.nzb = selifr(ret.B, bsel) 401 endif 402 endif 403 404 rnameset(ret.B, varnames(X0)) 405 if !ret.ridge && nelem(bsel) > 0 406 matrix tmp = X0 407 tmp = selifr(tmp', bsel) 408 list ret.nzX = tmp 409 rnameset(ret.nzb, varnames(ret.nzX)) 410 endif 411 412 if inbundle(ret, "vcv") && ret.verbosity > 1 413 matrix bse = ret.B ~ (NA | sqrt(diag(ret.vcv))) 414 matrix addstats = {ret.edf, ret.BIC, ret.R2} 415 strings pnames = varnames(X0) + defarray("edf", "BIC", "R^2") 416 modprint bse pnames addstats 417 elif nlam == 1 && ret.verbosity > 1 418 nz_print(ret, 1) 419 elif nlam > 1 && ret.verbosity > 2 420 nz_print(ret, nlam) 421 endif 422 if ret.xvalidate && ret.verbosity > 2 423 training_R2(&ret, depvar, indeps) 424 endif 425 if inbundle(ret, "verbosity") 426 delete ret.verbosity 427 endif 428 string ret.depvar = argname(depvar, "y") 429 list ret.ylist = outer_series_id(ret.depvar) 430 list ret.xlist = indeps 431 string xname = argname(indeps, "X") 432 regls_set_notes(&ret) 433 if gui 434 gui_add_commands(&ret, parms, yname, xname) 435 endif 436 437 if inbundle(ret, "crit_plot") && ret.crit_plot 438 if inbundle(ret, "XVC") == 3 439 regls_mse_plot(ret) 440 elif inbundle(ret, "BIC") == 3 441 regls_bic_plot(ret) 442 else 443 delete ret.crit_plot 444 endif 445 endif 446 447 return ret 448end function 449 450function bundle regls_fcast (bundle *b, int t1, int t2) 451 strings S = defarray("R-squared", "Mean Error", 452 "Root Mean Squared Error", "Mean Absolute Error", 453 "Mean Percentage Error", "Mean Absolute Percentage Error", 454 "Theil's U", "Bias proportion", "Regression proportion", 455 "Disturbance proportion") 456 bundle ret 457 if t1 > b.sample_t2 458 string os = "Out-of-sample " 459 elif t1 == b.sample_t2 && t2 == b.sample_t2 460 string os = "Within-sample " 461 else 462 # mixed? 463 string os = "" 464 endif 465 smpl t1 t2 466 smpl # to get the info printed 467 series y = b.ylist[1] 468 series yhat = regls_yhat(b, b.xlist) 469 scalar ret.R2 = 1 - sst(y-yhat) / sst(y) 470 matrix stats = fcstats(y, yhat) 471 printf "%sforecast evaluation statistics for %s\n\n", os, b.depvar 472 if !ok(stats[4]) 473 sel = {1,2,3,4,7,8,9,10} 474 len = maxc(strlen(S[sel])) 475 else 476 len = maxc(strlen(S)) 477 endif 478 n = rows(stats) + 1 479 loop i=1..n 480 scalar x = i==1 ? ret.R2 : stats[i-1] 481 if ok(x) 482 fill = x < 0 ? " " : " " 483 printf "%-*s %s%g\n", len, S[i], fill, x 484 endif 485 endloop 486 printf "\n" 487 # plot: should be conditional? 488 regls_fcast_plot(y, yhat, b) 489 series ret.yhat = yhat 490 matrix ret.stats = stats 491 return ret 492end function 493 494include regls_helpers.inp 495include regls_plots.inp 496