1/* hansl driver for gretl's C regls plugin */
2
3function matrix unstdize (const matrix z, scalar my, scalar sy,
4                          const matrix mx, const matrix sx)
5   matrix b = zeros(1 + nelem(z), 1)
6   # initialize intercept
7   b[1] = my
8   loop i=1..nelem(z)
9      if z[i] != 0
10         b[i+1] = (sy/sx[i]) * z[i]
11         b[1] -= b[i+1] * mx[i]
12      endif
13   endloop
14   return b
15end function
16
17function matrix unstdize_vcv (const matrix vcv, scalar sy,
18                              const matrix sx)
19   return vcv * sy^2 ./ sx'sx
20end function
21
22function matrix m_standardize (const matrix X, matrix *mx, matrix *sx)
23   mx = meanc(X)
24   sx = sdc(X)
25   matrix S = zeros(rows(X), cols(X))
26   loop j=1..cols(X)
27      S[,j] = sx[j] == 0 ? 0 : (X[,j] .- mx[j]) ./ sx[j]
28   endloop
29   return S
30end function
31
32function matrix s_standardize (const matrix y, scalar *my, scalar *sy)
33   my = meanc(y)
34   sy = sdc(y)
35   matrix S = zeros(rows(y), 1)
36   S = sy == 0 ? 0 : (y - my) / sy
37   return S
38end function
39
40function matrix lambda_sequence (scalar lmax, int K, scalar eps[0.0001])
41   scalar decr = log(eps) / (K-1)
42   matrix Lam = log(lmax) + decr * seq(0, K-1)'
43   return exp(Lam)
44end function
45
46function scalar unscaled_lambda (const bundle b)
47   return inbundle(b, "lambda_scale") == 1 && b.lambda_scale == 0
48end function
49
50function scalar check_params (const bundle b, matrix *lfrac,
51                              scalar *nlam, bundle *ret)
52   matrix m = {}
53   scalar scl = 1
54   if inbundle(b, "lambda_scale") == 1
55      scl = b.lambda_scale
56      if scl < 0 || scl > 2 || floor(scl) != scl
57         print "invalid lambda scale specification"
58         return 1
59      else
60         ret.lambda_scale = scl
61      endif
62   endif
63   if inbundle(b, "lfrac") == 1 || inbundle(b, "lfrac") == 3
64      m = b.lfrac
65      nlam = nelem(m)
66   endif
67   if nelem(m) == 0 && inbundle(b, "nlambda") == 1
68      # got a number of lambda values
69      if b.nlambda < 4
70         print "missing or invalid lambda specification"
71         return 1
72      else
73         nlam = b.nlambda
74         lfrac = lambda_sequence(1, nlam)
75         return 0
76      endif
77   elif nelem(m) == 0
78      # default to 25 automatic lambdas
79      nlam = 25
80      lfrac = lambda_sequence(1, nlam)
81      return 0
82   endif
83   lfrac = m
84   if scl == 0 # treat the lambdas as is, not as fractions
85      if min(m) < 0
86         print "missing or invalid lambda specification"
87         return 2
88      else
89         return 0
90      endif
91   endif
92   loop i=1..nlam
93      if m[i] < 0 || m[i] > 1 || (i > 1 && m[i] >= m[i-1])
94         print "invalid lambda specification"
95         return 2
96      endif
97   endloop
98   return 0
99end function
100
101function void get_xvalidation_parms (bundle *targ, bundle *src)
102   # options specific to cross validation
103   targ.xvalidate = src.xvalidate
104   if targ.xvalidate
105      matrix deflts = {10, 0, 0, 0, 0, 0, 0}
106      ndef = nelem(deflts)
107      loop foreach i nfolds randfolds no_mpi mpi_np mpi_local use_1se single_b seed
108         if inbundle(src, "$i")
109            scalar targ.$i = src.$i
110         elif i <= ndef
111            scalar targ.$i = deflts[i]
112         endif
113      endloop
114   endif
115end function
116
117function void print_regls_header (const bundle b, int nlam,
118                                  int elnet, string yname)
119
120   string algo = b.ccd ? "CCD" : b.ridge ? "SVD" : "ADMM"
121   string t1str = obslabel($t1)
122   string t2str = obslabel($t2)
123
124   if elnet
125      printf "\n%s, alpha = %g, ", b.estimator, b.alpha
126   else
127      printf "\n%s (%s) ", b.estimator, algo
128   endif
129   printf "using observations %s to %s (n = %d)\n", t1str, t2str, b.nobs
130   printf "Dependent variable: %s\n", yname
131   if nlam == 1
132      if unscaled_lambda(b)
133         printf "single lambda value %g\n", b.lfrac[1]
134      else
135         printf "single lambda-fraction %g\n", b.lfrac[1]
136      endif
137   else
138      printf "%d values of lambda\n", nlam
139   endif
140end function
141
142function void gui_add_commands (bundle *b, const bundle parms,
143                                const string yname, const string xname)
144   string cmdstr
145   set force_decpoint on
146   outfile --buffer=cmdstr --quiet
147      printf "include regls.gfn\n"
148      printf "bundle rb = regls(%s, %s, _(", yname, xname
149      if inbundle(parms, "nlambda")
150         printf "nlambda=%d", parms.nlambda
151      else
152         printf "lfrac=%g", parms.lfrac
153      endif
154      if inbundle(b, "ridge") && b.ridge
155         printf ", ridge=1"
156      endif
157      if inbundle(b, "alpha")
158         printf ", alpha=%g", b.alpha
159      endif
160      printf ", verbosity=3"
161      if inbundle(b, "xvalidate") && b.xvalidate
162         printf ",\n xvalidate=1"
163         printf ", randfolds=%d", b.randfolds
164         printf ", nfolds=%d", b.nfolds
165      endif
166      printf "))\n"
167   end outfile
168   set force_decpoint off
169   printf "Command line equivalent:\n"
170   printf "%s\n", cmdstr
171   b.commands = cmdstr
172end function
173
174function void regls_set_notes (bundle *b)
175   if inbundle(b, "nzb")
176      setnote(b, "nzb", "non-zero coefficients")
177   endif
178   if inbundle(b, "nzX")
179      setnote(b, "nzX", "included regressors")
180   endif
181   if inbundle(b, "B")
182      setnote(b, "B", "all coefficients")
183   endif
184   if inbundle(b, "XVC")
185      setnote(b, "XVC", "cross-validation info")
186   endif
187end function
188
189function void nz_print (const bundle b, int nlam)
190   if b.xvalidate
191      scalar lf = b.use_1se ? b.lf1se : b.lfmin
192      printf "\n%s coefficients (s = %g)\n\n", b.estimator, lf
193   elif nlam > 1
194      printf "\n%s minimum-BIC coefficients\n\n", b.estimator
195   else
196      printf "\n%s coefficients\n\n", b.estimator
197   endif
198   if !inbundle(b, "nzX")
199      if inbundle(b, "b")
200         eval b.b
201      else
202         eval b.B
203      endif
204      return
205   endif
206   strings S = varnames(b.nzX)
207   len = max(strlen(S))
208   loop i=1..nelem(S)
209      printf "%*s %#12.6g\n", len, S[i], b.nzb[i]
210   endloop
211   printf "\n"
212end function
213
214function bundle regls (series depvar, list indeps,
215                       const bundle parms[null])
216   bundle ret
217   scalar nlam = 1
218   scalar timer = 0
219   matrix lfrac = {}
220   matrix mx = {}
221   matrix sx = {}
222   scalar my = 0
223   scalar sy = 1
224   scalar rank = $mpirank
225   matrix bsel = {}
226   scalar alpha_set = 0
227   scalar elnet = 0
228   scalar gui = 0
229   string yname
230
231   if !exists(parms)
232      # an empty bundle will do
233      bundle parms = null
234   endif
235
236   if inlist(indeps, 0)
237      indeps -= const
238   endif
239   if nelem(indeps) == 0
240      funcerr "no regressors were supplied"
241   endif
242
243   err = check_params(parms, &lfrac, &nlam, &ret)
244   if err
245      funcerr "invalid or missing parameter(s)"
246   endif
247
248   ret.stdize = 1
249   ret.verbosity = 1
250   ret.ridge = 0
251   ret.ccd = 0
252   ret.xvalidate = 0
253   ret.sample_t1 = $t1
254   ret.sample_t2 = $t2
255
256   # read optional parameters
257   if inbundle(parms, "stdize") == 1
258      ret.stdize = parms.stdize
259   endif
260   if inbundle(parms, "verbosity") == 1
261      ret.verbosity = parms.verbosity
262   endif
263   if inbundle(parms, "timer") == 1
264      timer = parms.timer
265   endif
266   if inbundle(parms, "gui") == 1
267      gui = parms.gui
268      if gui
269         ret.verbosity = 3
270      endif
271   endif
272   if inbundle(parms, "alpha") == 1
273      if parms.alpha < 0 || parms.alpha > 1 || isnan(parms.alpha)
274         funcerr "invalid alpha specification"
275      else
276         ret.alpha = parms.alpha
277         if ret.alpha == 0
278            ret.ridge = 1
279         elif ret.alpha < 1
280            ret.ccd = 1
281            elnet = 1
282         endif
283         alpha_set = 1
284      endif
285   endif
286
287   if !alpha_set && inbundle(parms, "ridge") == 1
288      ret.ridge = parms.ridge
289   endif
290   if !ret.ccd && inbundle(parms, "ccd") == 1
291      ret.ccd = parms.ccd
292   endif
293   if ret.ccd && inbundle(parms, "ccd_toler")
294      ret.ccd_toler = parms.ccd_toler
295   endif
296
297   # list of regressors with constant included, if wanted
298   list X0 = ret.stdize ? 0 indeps : indeps
299
300   # record upper limit of incoming sample range
301   my_tmax = $t2
302
303   # handle possible missing values
304   list All = depvar indeps
305   series okcheck = ok(All)
306   nskip = $nobs - sum(okcheck)
307   if nskip > 0
308      set matrix_mask okcheck
309   endif
310
311   if ret.stdize
312      matrix Y = s_standardize({depvar}, &my, &sy)
313      matrix X = m_standardize({indeps}, &mx, &sx)
314   else
315      matrix Y = {depvar}
316      matrix X = {indeps}
317   endif
318
319   scalar ret.nobs = rows(X)
320
321   if nelem(lfrac) > 0
322      matrix ret.lfrac = vec(lfrac)
323   endif
324   if inbundle(parms, "admmctrl") == 3
325      matrix ret.admmctrl = parms.admmctrl
326   endif
327   if inbundle(parms, "xvalidate")
328      get_xvalidation_parms(&ret, &parms)
329   endif
330   if inbundle(parms, "crit_plot") == 1
331      ret.crit_plot = parms.crit_plot
332   endif
333
334   string ret.estimator = elnet ? "Elastic net" : \
335     ret.ridge ? "Ridge" : "LASSO"
336
337   if ret.verbosity > 0 && rank <= 0
338      yname = argname(depvar, "unnamed")
339      print_regls_header(ret, nlam, elnet, yname)
340   endif
341
342   # call plugin C code
343   if rank <= 0 && timer
344      set stopwatch
345   endif
346   err = _regls(X, Y, ret)
347   if err
348      funcerr "_regls failed"
349   endif
350   if rank <= 0 && timer
351      string mode = ret.ccd ? "CCD" : ret.ridge ? "SVD" : "ADMM"
352      printf "_regls (%s): %.3f seconds\n", mode, $stopwatch
353   endif
354
355   if !ret.xvalidate && ret.verbosity > 0 && inbundle(ret, "BIC")
356      string ss = ret.ridge && unscaled_lambda(ret) ? "lambda" : "s"
357      if nlam > 1
358         if inbundle(ret, "BIC") == 3
359            scalar cmin = ret.BIC[ret.idxmin]
360         else
361            scalar cmin = ret.BIC
362         endif
363         printf "\nBIC minimized at %#g with %s = %f\n", cmin, ss, ret.lfmin
364      else
365         printf "\nBIC = %#g for %s = %f\n", ret.BIC, ss, ret.lfrac
366      endif
367   endif
368
369   if rank > 0
370      # case where the user has invoked MPI
371      return ret
372   endif
373
374   if cols(ret.B) > 1
375      if ret.stdize
376         loop j=1..cols(ret.B)
377            ret.B[,j] = unstdize(ret.B[2:,j], my, sy, mx, sx)
378         endloop
379      endif
380      if inbundle(ret, "idxmin")
381         optcol = (ret.xvalidate && ret.use_1se)? ret.idx1se : ret.idxmin
382         matrix b = ret.B[,optcol]
383         if ret.ridge
384            ret.b = b
385            rnameset(ret.b, varnames(X0))
386         else
387            matrix bsel = b .!= 0
388            matrix ret.nzb = selifr(b, bsel)
389         endif
390      endif
391   else
392      if ret.stdize
393         ret.B = unstdize(ret.B[2:], my, sy, mx, sx)
394         if inbundle(ret, "vcv")
395            ret.vcv = unstdize_vcv(ret.vcv, sy, sx)
396         endif
397      endif
398      if !ret.ridge
399         matrix bsel = ret.B .!= 0
400         matrix ret.nzb = selifr(ret.B, bsel)
401      endif
402   endif
403
404   rnameset(ret.B, varnames(X0))
405   if !ret.ridge && nelem(bsel) > 0
406      matrix tmp = X0
407      tmp = selifr(tmp', bsel)
408      list ret.nzX = tmp
409      rnameset(ret.nzb, varnames(ret.nzX))
410   endif
411
412   if inbundle(ret, "vcv") && ret.verbosity > 1
413      matrix bse = ret.B ~ (NA | sqrt(diag(ret.vcv)))
414      matrix addstats = {ret.edf, ret.BIC, ret.R2}
415      strings pnames = varnames(X0) + defarray("edf", "BIC", "R^2")
416      modprint bse pnames addstats
417   elif nlam == 1 && ret.verbosity > 1
418      nz_print(ret, 1)
419   elif nlam > 1 && ret.verbosity > 2
420      nz_print(ret, nlam)
421   endif
422   if ret.xvalidate && ret.verbosity > 2
423      training_R2(&ret, depvar, indeps)
424   endif
425   if inbundle(ret, "verbosity")
426      delete ret.verbosity
427   endif
428   string ret.depvar = argname(depvar, "y")
429   list ret.ylist = outer_series_id(ret.depvar)
430   list ret.xlist = indeps
431   string xname = argname(indeps, "X")
432   regls_set_notes(&ret)
433   if gui
434      gui_add_commands(&ret, parms, yname, xname)
435   endif
436
437   if inbundle(ret, "crit_plot") && ret.crit_plot
438      if inbundle(ret, "XVC") == 3
439         regls_mse_plot(ret)
440      elif inbundle(ret, "BIC") == 3
441         regls_bic_plot(ret)
442      else
443         delete ret.crit_plot
444      endif
445   endif
446
447   return ret
448end function
449
450function bundle regls_fcast (bundle *b, int t1, int t2)
451   strings S = defarray("R-squared", "Mean Error",
452     "Root Mean Squared Error", "Mean Absolute Error",
453     "Mean Percentage Error", "Mean Absolute Percentage Error",
454     "Theil's U", "Bias proportion", "Regression proportion",
455     "Disturbance proportion")
456   bundle ret
457   if t1 > b.sample_t2
458      string os = "Out-of-sample "
459   elif t1 == b.sample_t2 && t2 == b.sample_t2
460      string os = "Within-sample "
461   else
462      # mixed?
463      string os = ""
464   endif
465   smpl t1 t2
466   smpl # to get the info printed
467   series y = b.ylist[1]
468   series yhat = regls_yhat(b, b.xlist)
469   scalar ret.R2 = 1 - sst(y-yhat) / sst(y)
470   matrix stats = fcstats(y, yhat)
471   printf "%sforecast evaluation statistics for %s\n\n", os, b.depvar
472   if !ok(stats[4])
473      sel = {1,2,3,4,7,8,9,10}
474      len = maxc(strlen(S[sel]))
475   else
476      len = maxc(strlen(S))
477   endif
478   n = rows(stats) + 1
479   loop i=1..n
480      scalar x = i==1 ? ret.R2 : stats[i-1]
481      if ok(x)
482         fill = x < 0 ? " " : "  "
483         printf "%-*s %s%g\n", len, S[i], fill, x
484      endif
485   endloop
486   printf "\n"
487   # plot: should be conditional?
488   regls_fcast_plot(y, yhat, b)
489   series ret.yhat = yhat
490   matrix ret.stats = stats
491   return ret
492end function
493
494include regls_helpers.inp
495include regls_plots.inp
496