1 /*
2     Genome-wide Efficient Mixed Model Association (GEMMA)
3     Copyright © 2011-2017, Xiang Zhou
4     Copyright © 2017, Peter Carbonetto
5     Copyright © 2017, Pjotr Prins
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <cmath>
22 #include <cstring>
23 #include <ctime>
24 #include <fstream>
25 #include <iostream>
26 #include <iomanip>
27 #include <string>
28 #include <sys/stat.h>
29 #include <vector>
30 #include <fenv.h>
31 
32 #include "gsl/gsl_blas.h"
33 #include "gsl/gsl_cdf.h"
34 #include "gsl/gsl_eigen.h"
35 #include "gsl/gsl_linalg.h"
36 #include "gsl/gsl_matrix.h"
37 #include "gsl/gsl_vector.h"
38 
39 #include "debug.h"
40 #include "mathfunc.h"
41 
42 static bool debug_mode      = false;
43 static bool debug_data_mode = false;
44 static bool debug_check     = false;  // check data/algorithms
45 static bool debug_fpe_check = true;  // check floating point errors (intel hardware)
46 static bool debug_strict    = false; // fail on error, more rigorous checks
47 static bool debug_quiet     = false;
48 static uint debug_issue     = 0;     // track github issues
49 static bool debug_legacy    = false; // legacy mode
50 
debug_set_debug_mode(bool setting)51 void debug_set_debug_mode(bool setting) { debug_mode = setting; }
debug_set_debug_data_mode(bool setting)52 void debug_set_debug_data_mode(bool setting) { debug_data_mode = setting; }
debug_set_check_mode(bool setting)53 void debug_set_check_mode(bool setting) {debug_check = setting; }
debug_set_no_check_mode(bool setting)54 void debug_set_no_check_mode(bool setting) {debug_check = !setting; }
debug_set_no_fpe_check_mode(bool setting)55 void debug_set_no_fpe_check_mode(bool setting) {debug_fpe_check = !setting; }
debug_set_strict_mode(bool setting)56 void debug_set_strict_mode(bool setting) { debug_strict = setting; }
debug_set_quiet_mode(bool setting)57 void debug_set_quiet_mode(bool setting) { debug_quiet = setting; }
debug_set_issue(uint issue)58 void debug_set_issue(uint issue) { debug_issue = issue; }
debug_set_legacy_mode(bool setting)59 void debug_set_legacy_mode(bool setting) { debug_legacy = setting; }
60 
is_debug_mode()61 bool is_debug_mode() { return debug_mode; };
is_debug_data_mode()62 bool is_debug_data_mode() { return debug_data_mode; };
is_no_check_mode()63 bool is_no_check_mode() { return !debug_check; };
is_check_mode()64 bool is_check_mode() { return debug_check; };
is_fpe_check_mode()65 bool is_fpe_check_mode() { return debug_fpe_check; };
is_strict_mode()66 bool is_strict_mode() { return debug_strict; };
is_quiet_mode()67 bool is_quiet_mode() { return debug_quiet; };
is_issue(uint issue)68 bool is_issue(uint issue) { return issue == debug_issue; };
is_legacy_mode()69 bool is_legacy_mode() { return debug_legacy; };
70 
71 #include <stdio.h>
72 #include <sys/types.h>
73 #include <unistd.h>
74 #include <signal.h>
75 
sighandler(int signum)76 void sighandler(int signum)
77 {
78   cout << R"(
79 FATAL ERROR: GEMMA caused a floating point error which suggests machine boundaries were reached.
80 
81 You can disable floating point tests with the -no-check switch (use at your own risk!)
82 )" << endl;
83   signal(signum, SIG_DFL);
84   kill(getpid(), signum); // should force a core dump
85 }
86 
87 /*
88    Force the floating point processor to throw an exception when the result of
89    a double/float computation is overflow, underflow, NaN or inf. In principle
90    this is an Intel hardware feature that does not slow down computations.
91 */
92 
93 #if defined(__APPLE__) && defined(__MACH__)
94 
95 // Public domain polyfill for feenableexcept on OS X
96 // http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c
97 
feenableexcept(unsigned int excepts)98 inline int feenableexcept(unsigned int excepts)
99 {
100     static fenv_t fenv;
101     unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
102     // previous masks
103     unsigned int old_excepts;
104 
105     if (fegetenv(&fenv)) {
106         return -1;
107     }
108     old_excepts = fenv.__control & FE_ALL_EXCEPT;
109 
110     // unmask
111     fenv.__control &= ~new_excepts;
112     fenv.__mxcsr   &= ~(new_excepts << 7);
113 
114     return fesetenv(&fenv) ? -1 : old_excepts;
115 }
116 
fedisableexcept(unsigned int excepts)117 inline int fedisableexcept(unsigned int excepts)
118 {
119     static fenv_t fenv;
120     unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
121     // all previous masks
122     unsigned int old_excepts;
123 
124     if (fegetenv(&fenv)) {
125         return -1;
126     }
127     old_excepts = fenv.__control & FE_ALL_EXCEPT;
128 
129     // mask
130     fenv.__control |= new_excepts;
131     fenv.__mxcsr   |= new_excepts << 7;
132 
133     return fesetenv(&fenv) ? -1 : old_excepts;
134 }
135 
136 #endif
137 
enable_segfpe()138 void enable_segfpe() {
139   if (!is_fpe_check_mode() || is_legacy_mode()) return;
140   #ifdef __GNUC__
141     #if defined(__x86_64__)
142   // debug_msg("enable segfpe hardware floating point error detection");
143        signal(SIGFPE, sighandler);
144        feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
145     #endif
146   #endif
147 }
148 
disable_segfpe()149 void disable_segfpe() {
150   if (!is_fpe_check_mode() || is_legacy_mode()) return;
151   #ifdef __GNUC__
152     #if defined(__x86_64__)
153   // debug_msg("disable segfpe");
154       fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
155     #endif
156   #endif
157 }
158 
write(const char * s,const char * msg)159 void write(const char *s, const char *msg) {
160   if (!is_debug_data_mode()) return;
161   cout << s << ": " << msg << endl;
162 }
163 
write(const double d,const char * msg)164 void write(const double d, const char *msg) {
165   if (!is_debug_data_mode()) return;
166   cout << std::setprecision(6) << d << ": " << msg << endl;
167 }
168 
write(const gsl_vector * v,const char * msg)169 void write(const gsl_vector *v, const char *msg) {
170   if (!is_debug_data_mode()) return;
171   if (msg) cout << "// " << msg << endl;
172   cout << "// vector size: " << v->size << endl;
173   cout << "double " << msg << "[] = {";
174   for (size_t i=0; i < v->size; i++) {
175     cout << gsl_vector_get(v,i) << ",";
176   }
177   cout << "}" << endl;
178 }
179 
write(const gsl_matrix * m,const char * msg)180 void write(const gsl_matrix *m, const char *msg) {
181   if (!is_debug_data_mode()) return;
182   if (msg) cout << "// " << msg << endl;
183   // Matrices are stored in row-major order, meaning that each row of
184   // elements forms a contiguous block in memory. This is the standard
185   // “C-language ordering” of two-dimensional arrays. The number of
186   // rows is size1.
187   auto rows = m->size1; // see https://www.gnu.org/software/gsl/manual/html_node/Matrices.html#Matrices
188   auto cols = m->size2;
189   auto tda = m->tda;
190 
191   cout << "// matrix size: " << cols << " cols, " << rows << " rows," << tda << " tda" << endl;
192   cout << "double " << msg << "[] = {";
193   for (size_t row=0; row < rows; row++) {
194     for (size_t col=0; col < cols; col++) {
195       // cout << "(" << i << "," << j << ")";
196       cout << gsl_matrix_safe_get(m,row,col);
197       cout << ",";
198     }
199     cout << "// row " << row << endl;
200   }
201   cout << "}" << endl;
202 }
203 
204 /*
205   Helper function to make sure gsl allocations do their job because
206   gsl_matrix_alloc does not initiatize values (behaviour that changed
207   in GSL2) we introduced a 'strict mode' by initializing the buffer
208   with NaNs. This happens when NO-CHECKS is not set (default) and with
209   DEBUG (i.e. -debug option).
210 */
gsl_matrix_safe_alloc(size_t rows,size_t cols)211 gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols) {
212   gsl_matrix *m = gsl_matrix_alloc(rows,cols);
213   enforce_msg(m,"Not enough memory"); // just to be sure when there is no error handler set
214   if (is_check_mode() && is_debug_mode()) {
215     gsl_matrix_set_all(m, nan(""));
216   }
217   return m;
218 }
219 
gsl_matrix_safe_memcpy(gsl_matrix * dest,const gsl_matrix * src)220 int gsl_matrix_safe_memcpy (gsl_matrix *dest, const gsl_matrix *src) {
221   enforce(dest->size1 == src->size1);
222   enforce(dest->size2 == src->size2);
223   return gsl_matrix_memcpy(dest,src);
224 }
225 
do_gsl_matrix_safe_free(gsl_matrix * m,const char * __pretty_function,const char * __file,int __line,bool warn_only)226 void do_gsl_matrix_safe_free (gsl_matrix *m, const char *__pretty_function, const char *__file, int __line, bool warn_only) {
227   enforce(m);
228   if (is_strict_mode() && is_check_mode() && is_debug_mode()) {
229     bool has_NaN = has_nan(m);
230     bool has_Inf = has_inf(m);
231     if (has_NaN || has_Inf) {
232       write(m);
233       std::string msg = "Matrix (size ";
234       msg += std::to_string(m->size1);
235       msg += "x";
236       msg += std::to_string(m->size2);
237       msg += ")";
238       if (warn_only) {
239         if (has_Inf)
240           warning_at_msg(__file,__line,(msg+" contains Infinite on free!").c_str());
241         if (has_NaN)
242           warning_at_msg(__file,__line,(msg+" contains NaN on free!").c_str());
243       }
244       else {
245         if (has_Inf)
246           warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains Infinite on free!").c_str());
247         if (has_NaN)
248           warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains NaN on free!").c_str());
249       }
250     }
251   }
252   return gsl_matrix_free(m);
253 }
254 
gsl_vector_safe_memcpy(gsl_vector * dest,const gsl_vector * src)255 int gsl_vector_safe_memcpy (gsl_vector *dest, const gsl_vector *src) {
256   enforce(dest->size == src->size);
257   return gsl_vector_memcpy(dest,src);
258 }
259 
do_gsl_vector_safe_free(gsl_vector * v,const char * __pretty_function,const char * __file,int __line)260 void do_gsl_vector_safe_free (gsl_vector *v, const char *__pretty_function, const char *__file, int __line) {
261   enforce(v);
262   if (is_strict_mode() && is_check_mode() && is_debug_mode()) {
263     bool has_NaN = has_nan(v);
264     bool has_Inf = has_inf(v);
265     if (has_NaN || has_Inf) {
266       write(v);
267       std::string msg = "Vector (size ";
268       msg += std::to_string(v->size);
269       msg += ")";
270       if (has_Inf)
271         warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains Infinite on free!").c_str());
272       if (has_NaN)
273         warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains NaN on free!").c_str());
274     }
275   }
276   return gsl_vector_free(v);
277 }
278 
279 /*
280   Helper function to make sure gsl allocations do their job because
281   gsl_vector_alloc does not initiatize values (behaviour that changed
282   in GSL2) we introduced a 'strict mode' by initializing the buffer
283   with NaNs. This happens when NO-CHECKS is not set and with DEBUG
284   (i.e. -debug option).
285 */
gsl_vector_safe_alloc(size_t n)286 gsl_vector *gsl_vector_safe_alloc(size_t n) {
287   gsl_vector *v = gsl_vector_alloc(n);
288   enforce_msg(v,"Not enough memory"); // just to be sure when there is no error handler set
289   if (is_check_mode() && is_debug_mode()) {
290     gsl_vector_set_all(v, nan(""));
291   }
292   return v;
293 }
294 
do_gsl_matrix_safe_get(const gsl_matrix * m,const size_t row,const size_t col,const char * __pretty_function,const char * __file,int __line)295 double do_gsl_matrix_safe_get(const gsl_matrix * m, const size_t row, const size_t col,
296                               const char *__pretty_function, const char *__file, int __line) {
297   enforce(m);
298   if (!is_legacy_mode() && (is_debug_mode() || is_check_mode() || is_strict_mode())) {
299     auto rows = m->size1; // see above write function
300     auto cols = m->size2;
301     if (col >= cols || row >= rows) {
302       std::string msg = "Matrix out of bounds (" + std::to_string(rows) + "," + std::to_string(cols) + ") ";
303       msg += std::to_string(row);
304       msg += "r,";
305       msg += std::to_string(col);
306       fail_at_msg(__file,__line,msg.c_str());
307     }
308   }
309   return gsl_matrix_get(m,row,col);
310 }
311 
312 
do_strtok_safe(char * tokenize,const char * delimiters,const char * __pretty_function,const char * __file,int __line,const char * infile)313 char *do_strtok_safe(char *tokenize, const char *delimiters, const char *__pretty_function, const char *__file, int __line,
314                      const char *infile) {
315   auto token = strtok(tokenize,delimiters);
316   if (token == NULL) {
317     if (infile)
318       fail_at_msg(__file,__line,string("Parsing input file '") + infile + "' failed in function " + __pretty_function);
319     else
320       fail_at_msg(__file,__line,string("Parsing input file failed in function ") + __pretty_function);
321   }
322   return token;
323 }
324 
325 // Helper function called by macro validate_K(K, check). K is validated
326 // unless -no-check option is used.
do_validate_K(const gsl_matrix * K,const char * __pretty_function,const char * __file,int __line)327 void do_validate_K(const gsl_matrix *K, const char *__pretty_function, const char *__file, int __line) {
328   if (is_check_mode()) {
329     // debug_msg("Validating K");
330     auto eigenvalues = getEigenValues(K);
331     const uint count_small = count_abs_small_values(eigenvalues,EIGEN_MINVALUE);
332     if (count_small>1) {
333       std::string msg = "K has ";
334       msg += std::to_string(count_small);
335       msg += " eigenvalues close to zero";
336       warning_at_msg(__file,__line,msg);
337     }
338     if (isMatrixIllConditioned(eigenvalues))
339       warning_at_msg(__file,__line,"K is ill conditioned!");
340     if (!isMatrixSymmetric(K))
341       warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,"K is not symmetric!" );
342     const bool negative_eigen_values = has_negative_values_but_one(eigenvalues);
343     if (negative_eigen_values) {
344       warning_at_msg(__file,__line,"K has more than one negative eigenvalues!");
345     }
346     if (count_small>1 && negative_eigen_values && !isMatrixPositiveDefinite(K))
347       warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,"K is not positive definite!");
348     gsl_vector_free(eigenvalues);
349   }
350 }
351