addons/regls/regls.inp

/* hansl driver for gretl's C regls plugin */

function matrix unstdize (const matrix z, scalar my, scalar sy,
                          const matrix mx, const matrix sx)
   matrix b = zeros(1 + nelem(z), 1)
   # initialize intercept
   b[1] = my
   loop i=1..nelem(z)
      if z[i] != 0
         b[i+1] = (sy/sx[i]) * z[i]
         b[1] -= b[i+1] * mx[i]
      endif
   endloop
   return b
end function

function matrix unstdize_vcv (const matrix vcv, scalar sy,
                              const matrix sx)
   return vcv * sy^2 ./ sx'sx
end function

function matrix m_standardize (const matrix X, matrix *mx, matrix *sx)
   mx = meanc(X)
   sx = sdc(X)
   matrix S = zeros(rows(X), cols(X))
   loop j=1..cols(X)
      S[,j] = sx[j] == 0 ? 0 : (X[,j] .- mx[j]) ./ sx[j]
   endloop
   return S
end function

function matrix s_standardize (const matrix y, scalar *my, scalar *sy)
   my = meanc(y)
   sy = sdc(y)
   matrix S = zeros(rows(y), 1)
   S = sy == 0 ? 0 : (y - my) / sy
   return S
end function

function matrix lambda_sequence (scalar lmax, int K, scalar eps[0.0001])
   scalar decr = log(eps) / (K-1)
   matrix Lam = log(lmax) + decr * seq(0, K-1)'
   return exp(Lam)
end function

function scalar unscaled_lambda (const bundle b)
   return inbundle(b, "lambda_scale") == 1 && b.lambda_scale == 0
end function

function scalar check_params (const bundle b, matrix *lfrac,
                              scalar *nlam, bundle *ret)
   matrix m = {}
   scalar scl = 1
   if inbundle(b, "lambda_scale") == 1
      scl = b.lambda_scale
      if scl < 0 || scl > 2 || floor(scl) != scl
         print "invalid lambda scale specification"
         return 1
      else
         ret.lambda_scale = scl
      endif
   endif
   if inbundle(b, "lfrac") == 1 || inbundle(b, "lfrac") == 3
      m = b.lfrac
      nlam = nelem(m)
   endif
   if nelem(m) == 0 && inbundle(b, "nlambda") == 1
      # got a number of lambda values
      if b.nlambda < 4
         print "missing or invalid lambda specification"
         return 1
      else
         nlam = b.nlambda
         lfrac = lambda_sequence(1, nlam)
         return 0
      endif
   elif nelem(m) == 0
      # default to 25 automatic lambdas
      nlam = 25
      lfrac = lambda_sequence(1, nlam)
      return 0
   endif
   lfrac = m
   if scl == 0 # treat the lambdas as is, not as fractions
      if min(m) < 0
         print "missing or invalid lambda specification"
         return 2
      else
         return 0
      endif
   endif
   loop i=1..nlam
      if m[i] < 0 || m[i] > 1 || (i > 1 && m[i] >= m[i-1])
         print "invalid lambda specification"
         return 2
      endif
   endloop
   return 0
end function

function void get_xvalidation_parms (bundle *targ, bundle *src)
   # options specific to cross validation
   targ.xvalidate = src.xvalidate
   if targ.xvalidate
      matrix deflts = {10, 0, 0, 0, 0, 0, 0}
      ndef = nelem(deflts)
      loop foreach i nfolds randfolds no_mpi mpi_np mpi_local use_1se single_b seed
         if inbundle(src, "$i")
            scalar targ.$i = src.$i
         elif i <= ndef
            scalar targ.$i = deflts[i]
         endif
      endloop
   endif
end function

function void print_regls_header (const bundle b, int nlam,
                                  int elnet, string yname)

   string algo = b.ccd ? "CCD" : b.ridge ? "SVD" : "ADMM"
   string t1str = obslabel($t1)
   string t2str = obslabel($t2)

   if elnet
      printf "\n%s, alpha = %g, ", b.estimator, b.alpha
   else
      printf "\n%s (%s) ", b.estimator, algo
   endif
   printf "using observations %s to %s (n = %d)\n", t1str, t2str, b.nobs
   printf "Dependent variable: %s\n", yname
   if nlam == 1
      if unscaled_lambda(b)
         printf "single lambda value %g\n", b.lfrac[1]
      else
         printf "single lambda-fraction %g\n", b.lfrac[1]
      endif
   else
      printf "%d values of lambda\n", nlam
   endif
end function

function void gui_add_commands (bundle *b, const bundle parms,
                                const string yname, const string xname)
   string cmdstr
   set force_decpoint on
   outfile --buffer=cmdstr --quiet
      printf "include regls.gfn\n"
      printf "bundle rb = regls(%s, %s, _(", yname, xname
      if inbundle(parms, "nlambda")
         printf "nlambda=%d", parms.nlambda
      else
         printf "lfrac=%g", parms.lfrac
      endif
      if inbundle(b, "ridge") && b.ridge
         printf ", ridge=1"
      endif
      if inbundle(b, "alpha")
         printf ", alpha=%g", b.alpha
      endif
      printf ", verbosity=3"
      if inbundle(b, "xvalidate") && b.xvalidate
         printf ",\n xvalidate=1"
         printf ", randfolds=%d", b.randfolds
         printf ", nfolds=%d", b.nfolds
      endif
      printf "))\n"
   end outfile
   set force_decpoint off
   printf "Command line equivalent:\n"
   printf "%s\n", cmdstr
   b.commands = cmdstr
end function

function void regls_set_notes (bundle *b)
   if inbundle(b, "nzb")
      setnote(b, "nzb", "non-zero coefficients")
   endif
   if inbundle(b, "nzX")
      setnote(b, "nzX", "included regressors")
   endif
   if inbundle(b, "B")
      setnote(b, "B", "all coefficients")
   endif
   if inbundle(b, "XVC")
      setnote(b, "XVC", "cross-validation info")
   endif
end function

function void nz_print (const bundle b, int nlam)
   if b.xvalidate
      scalar lf = b.use_1se ? b.lf1se : b.lfmin
      printf "\n%s coefficients (s = %g)\n\n", b.estimator, lf
   elif nlam > 1
      printf "\n%s minimum-BIC coefficients\n\n", b.estimator
   else
      printf "\n%s coefficients\n\n", b.estimator
   endif
   if !inbundle(b, "nzX")
      if inbundle(b, "b")
         eval b.b
      else
         eval b.B
      endif
      return
   endif
   strings S = varnames(b.nzX)
   len = max(strlen(S))
   loop i=1..nelem(S)
      printf "%*s %#12.6g\n", len, S[i], b.nzb[i]
   endloop
   printf "\n"
end function

function bundle regls (series depvar, list indeps,
                       const bundle parms[null])
   bundle ret
   scalar nlam = 1
   scalar timer = 0
   matrix lfrac = {}
   matrix mx = {}
   matrix sx = {}
   scalar my = 0
   scalar sy = 1
   scalar rank = $mpirank
   matrix bsel = {}
   scalar alpha_set = 0
   scalar elnet = 0
   scalar gui = 0
   string yname

   if !exists(parms)
      # an empty bundle will do
      bundle parms = null
   endif

   if inlist(indeps, 0)
      indeps -= const
   endif
   if nelem(indeps) == 0
      funcerr "no regressors were supplied"
   endif

   err = check_params(parms, &lfrac, &nlam, &ret)
   if err
      funcerr "invalid or missing parameter(s)"
   endif

   ret.stdize = 1
   ret.verbosity = 1
   ret.ridge = 0
   ret.ccd = 0
   ret.xvalidate = 0
   ret.sample_t1 = $t1
   ret.sample_t2 = $t2

   # read optional parameters
   if inbundle(parms, "stdize") == 1
      ret.stdize = parms.stdize
   endif
   if inbundle(parms, "verbosity") == 1
      ret.verbosity = parms.verbosity
   endif
   if inbundle(parms, "timer") == 1
      timer = parms.timer
   endif
   if inbundle(parms, "gui") == 1
      gui = parms.gui
      if gui
         ret.verbosity = 3
      endif
   endif
   if inbundle(parms, "alpha") == 1
      if parms.alpha < 0 || parms.alpha > 1 || isnan(parms.alpha)
         funcerr "invalid alpha specification"
      else
         ret.alpha = parms.alpha
         if ret.alpha == 0
            ret.ridge = 1
         elif ret.alpha < 1
            ret.ccd = 1
            elnet = 1
         endif
         alpha_set = 1
      endif
   endif

   if !alpha_set && inbundle(parms, "ridge") == 1
      ret.ridge = parms.ridge
   endif
   if !ret.ccd && inbundle(parms, "ccd") == 1
      ret.ccd = parms.ccd
   endif
   if ret.ccd && inbundle(parms, "ccd_toler")
      ret.ccd_toler = parms.ccd_toler
   endif

   # list of regressors with constant included, if wanted
   list X0 = ret.stdize ? 0 indeps : indeps

   # record upper limit of incoming sample range
   my_tmax = $t2

   # handle possible missing values
   list All = depvar indeps
   series okcheck = ok(All)
   nskip = $nobs - sum(okcheck)
   if nskip > 0
      set matrix_mask okcheck
   endif

   if ret.stdize
      matrix Y = s_standardize({depvar}, &my, &sy)
      matrix X = m_standardize({indeps}, &mx, &sx)
   else
      matrix Y = {depvar}
      matrix X = {indeps}
   endif

   scalar ret.nobs = rows(X)

   if nelem(lfrac) > 0
      matrix ret.lfrac = vec(lfrac)
   endif
   if inbundle(parms, "admmctrl") == 3
      matrix ret.admmctrl = parms.admmctrl
   endif
   if inbundle(parms, "xvalidate")
      get_xvalidation_parms(&ret, &parms)
   endif
   if inbundle(parms, "crit_plot") == 1
      ret.crit_plot = parms.crit_plot
   endif

   string ret.estimator = elnet ? "Elastic net" : \
     ret.ridge ? "Ridge" : "LASSO"

   if ret.verbosity > 0 && rank <= 0
      yname = argname(depvar, "unnamed")
      print_regls_header(ret, nlam, elnet, yname)
   endif

   # call plugin C code
   if rank <= 0 && timer
      set stopwatch
   endif
   err = _regls(X, Y, ret)
   if err
      funcerr "_regls failed"
   endif
   if rank <= 0 && timer
      string mode = ret.ccd ? "CCD" : ret.ridge ? "SVD" : "ADMM"
      printf "_regls (%s): %.3f seconds\n", mode, $stopwatch
   endif

   if !ret.xvalidate && ret.verbosity > 0 && inbundle(ret, "BIC")
      string ss = ret.ridge && unscaled_lambda(ret) ? "lambda" : "s"
      if nlam > 1
         if inbundle(ret, "BIC") == 3
            scalar cmin = ret.BIC[ret.idxmin]
         else
            scalar cmin = ret.BIC
         endif
         printf "\nBIC minimized at %#g with %s = %f\n", cmin, ss, ret.lfmin
      else
         printf "\nBIC = %#g for %s = %f\n", ret.BIC, ss, ret.lfrac
      endif
   endif

   if rank > 0
      # case where the user has invoked MPI
      return ret
   endif

   if cols(ret.B) > 1
      if ret.stdize
         loop j=1..cols(ret.B)
            ret.B[,j] = unstdize(ret.B[2:,j], my, sy, mx, sx)
         endloop
      endif
      if inbundle(ret, "idxmin")
         optcol = (ret.xvalidate && ret.use_1se)? ret.idx1se : ret.idxmin
         matrix b = ret.B[,optcol]
         if ret.ridge
            ret.b = b
            rnameset(ret.b, varnames(X0))
         else
            matrix bsel = b .!= 0
            matrix ret.nzb = selifr(b, bsel)
         endif
      endif
   else
      if ret.stdize
         ret.B = unstdize(ret.B[2:], my, sy, mx, sx)
         if inbundle(ret, "vcv")
            ret.vcv = unstdize_vcv(ret.vcv, sy, sx)
         endif
      endif
      if !ret.ridge
         matrix bsel = ret.B .!= 0
         matrix ret.nzb = selifr(ret.B, bsel)
      endif
   endif

   rnameset(ret.B, varnames(X0))
   if !ret.ridge && nelem(bsel) > 0
      matrix tmp = X0
      tmp = selifr(tmp', bsel)
      list ret.nzX = tmp
      rnameset(ret.nzb, varnames(ret.nzX))
   endif

   if inbundle(ret, "vcv") && ret.verbosity > 1
      matrix bse = ret.B ~ (NA | sqrt(diag(ret.vcv)))
      matrix addstats = {ret.edf, ret.BIC, ret.R2}
      strings pnames = varnames(X0) + defarray("edf", "BIC", "R^2")
      modprint bse pnames addstats
   elif nlam == 1 && ret.verbosity > 1
      nz_print(ret, 1)
   elif nlam > 1 && ret.verbosity > 2
      nz_print(ret, nlam)
   endif
   if ret.xvalidate && ret.verbosity > 2
      training_R2(&ret, depvar, indeps)
   endif
   if inbundle(ret, "verbosity")
      delete ret.verbosity
   endif
   string ret.depvar = argname(depvar, "y")
   list ret.ylist = outer_series_id(ret.depvar)
   list ret.xlist = indeps
   string xname = argname(indeps, "X")
   regls_set_notes(&ret)
   if gui
      gui_add_commands(&ret, parms, yname, xname)
   endif

   if inbundle(ret, "crit_plot") && ret.crit_plot
      if inbundle(ret, "XVC") == 3
         regls_mse_plot(ret)
      elif inbundle(ret, "BIC") == 3
         regls_bic_plot(ret)
      else
         delete ret.crit_plot
      endif
   endif

   return ret
end function

function bundle regls_fcast (bundle *b, int t1, int t2)
   strings S = defarray("R-squared", "Mean Error",
     "Root Mean Squared Error", "Mean Absolute Error",
     "Mean Percentage Error", "Mean Absolute Percentage Error",
     "Theil's U", "Bias proportion", "Regression proportion",
     "Disturbance proportion")
   bundle ret
   if t1 > b.sample_t2
      string os = "Out-of-sample "
   elif t1 == b.sample_t2 && t2 == b.sample_t2
      string os = "Within-sample "
   else
      # mixed?
      string os = ""
   endif
   smpl t1 t2
   smpl # to get the info printed
   series y = b.ylist[1]
   series yhat = regls_yhat(b, b.xlist)
   scalar ret.R2 = 1 - sst(y-yhat) / sst(y)
   matrix stats = fcstats(y, yhat)
   printf "%sforecast evaluation statistics for %s\n\n", os, b.depvar
   if !ok(stats[4])
      sel = {1,2,3,4,7,8,9,10}
      len = maxc(strlen(S[sel]))
   else
      len = maxc(strlen(S))
   endif
   n = rows(stats) + 1
   loop i=1..n
      scalar x = i==1 ? ret.R2 : stats[i-1]
      if ok(x)
         fill = x < 0 ? " " : "  "
         printf "%-*s %s%g\n", len, S[i], fill, x
      endif
   endloop
   printf "\n"
   # plot: should be conditional?
   regls_fcast_plot(y, yhat, b)
   series ret.yhat = yhat
   matrix ret.stats = stats
   return ret
end function

include regls_helpers.inp
include regls_plots.inp