1 /*
2  *  Copyright (c) 2004-2010, Bruno Levy
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *  * Redistributions of source code must retain the above copyright notice,
9  *  this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright notice,
11  *  this list of conditions and the following disclaimer in the documentation
12  *  and/or other materials provided with the distribution.
13  *  * Neither the name of the ALICE Project-Team nor the names of its
14  *  contributors may be used to endorse or promote products derived from this
15  *  software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  *  POSSIBILITY OF SUCH DAMAGE.
28  *
29  *  If you modify this software, you should include a notice giving the
30  *  name of the person performing the modification, the date of modification,
31  *  and the reason for such modification.
32  *
33  *  Contact: Bruno Levy
34  *
35  *     levy@loria.fr
36  *
37  *     ALICE Project
38  *     LORIA, INRIA Lorraine,
39  *     Campus Scientifique, BP 239
40  *     54506 VANDOEUVRE LES NANCY CEDEX
41  *     FRANCE
42  *
43  */
44 
45 #include "nl_private.h"
46 #include "nl_matrix.h"
47 #include "nl_context.h"
48 #include "nl_iterative_solvers.h"
49 #include "nl_preconditioners.h"
50 #include "nl_superlu.h"
51 #include "nl_cholmod.h"
52 #include "nl_arpack.h"
53 #include "nl_mkl.h"
54 #include "nl_cuda.h"
55 
56 /*****************************************************************************/
57 
nlGetCurrentSparseMatrix()58 static NLSparseMatrix* nlGetCurrentSparseMatrix() {
59     NLSparseMatrix* result = NULL;
60     switch(nlCurrentContext->matrix_mode) {
61 	case NL_STIFFNESS_MATRIX: {
62 	    nl_assert(nlCurrentContext->M != NULL);
63 	    nl_assert(nlCurrentContext->M->type == NL_MATRIX_SPARSE_DYNAMIC);
64 	    result = (NLSparseMatrix*)(nlCurrentContext->M);
65 	} break;
66 	case NL_MASS_MATRIX: {
67 	    nl_assert(nlCurrentContext->B != NULL);
68 	    nl_assert(nlCurrentContext->B->type == NL_MATRIX_SPARSE_DYNAMIC);
69 	    result = (NLSparseMatrix*)(nlCurrentContext->B);
70 	} break;
71 	default:
72 	    nl_assert_not_reached;
73     }
74     return result;
75 }
76 
77 /*****************************************************************************/
78 
nlGetCurrentCRSMatrix()79 static NLCRSMatrix* nlGetCurrentCRSMatrix() {
80     NLCRSMatrix* result = NULL;
81     switch(nlCurrentContext->matrix_mode) {
82 	case NL_STIFFNESS_MATRIX: {
83 	    nl_assert(nlCurrentContext->M != NULL);
84 	    nl_assert(nlCurrentContext->M->type == NL_MATRIX_CRS);
85 	    result = (NLCRSMatrix*)(nlCurrentContext->M);
86 	} break;
87 	case NL_MASS_MATRIX: {
88 	    nl_assert(nlCurrentContext->B != NULL);
89 	    nl_assert(nlCurrentContext->B->type == NL_MATRIX_CRS);
90 	    result = (NLCRSMatrix*)(nlCurrentContext->B);
91 	} break;
92 	default:
93 	    nl_assert_not_reached;
94     }
95     return result;
96 }
97 
98 /*****************************************************************************/
99 
nlInitExtension(const char * extension)100 NLboolean nlInitExtension(const char* extension) {
101     if(!strcmp(extension, "SUPERLU")) {
102         return nlInitExtension_SUPERLU();
103     } else if(!strcmp(extension, "CHOLMOD")) {
104         return nlInitExtension_CHOLMOD();
105     } else if(!strcmp(extension, "ARPACK")) {
106 	/*
107 	 * SUPERLU is needed by OpenNL's ARPACK driver
108 	 * (factorizes the matrix for the shift-invert spectral
109 	 *  transform).
110 	 */
111 	return nlInitExtension_SUPERLU() && nlInitExtension_ARPACK();
112     } else if(!strcmp(extension, "MKL")) {
113 	return nlInitExtension_MKL();
114     } else if(!strcmp(extension, "CUDA")) {
115 	return nlInitExtension_CUDA();
116     }
117     return NL_FALSE;
118 }
119 
nlExtensionIsInitialized(const char * extension)120 NLboolean nlExtensionIsInitialized(const char* extension) {
121     if(!strcmp(extension, "SUPERLU")) {
122         return nlExtensionIsInitialized_SUPERLU();
123     } else if(!strcmp(extension, "CHOLMOD")) {
124         return nlExtensionIsInitialized_CHOLMOD();
125     } else if(!strcmp(extension, "ARPACK")) {
126 	/*
127 	 * SUPERLU is needed by OpenNL's ARPACK driver
128 	 * (factorizes the matrix for the shift-invert spectral
129 	 *  transform).
130 	 */
131 	return nlExtensionIsInitialized_SUPERLU() && nlExtensionIsInitialized_ARPACK();
132     } else if(!strcmp(extension, "MKL")) {
133 	return nlExtensionIsInitialized_MKL();
134     } else if(!strcmp(extension, "CUDA")) {
135 	return nlExtensionIsInitialized_CUDA();
136     }
137     return NL_FALSE;
138 }
139 
nlInitialize(int argc,char ** argv)140 void nlInitialize(int argc, char** argv) {
141     int i=0;
142     char* ptr=NULL;
143     char extension[255];
144     /* Find all the arguments with the form:
145      * nl:<extension>=true|false
146      * and try to activate the corresponding extensions.
147      */
148     for(i=1; i<argc; ++i) {
149 	ptr = strstr(argv[i],"=true");
150 	if(!strncmp(argv[i], "nl:", 3) &&
151 	   (strlen(argv[i]) > 3) &&
152 	   (ptr != NULL)) {
153 	    strncpy(extension, argv[i]+3, (size_t)(ptr-argv[i]-3));
154 	    extension[(size_t)(ptr-argv[i]-3)] = '\0';
155 	    if(nlInitExtension(extension)) {
156 		nl_fprintf(stdout,"OpenNL %s: initialized\n", extension);
157 	    } else {
158 		nl_fprintf(stderr,"OpenNL %s: could not initialize\n", extension);
159 	    }
160 	}
161     }
162 }
163 
164 /*****************************************************************************/
165 /* Get/Set parameters */
166 
nlSolverParameterd(NLenum pname,NLdouble param)167 void nlSolverParameterd(NLenum pname, NLdouble param) {
168     nlCheckState(NL_STATE_INITIAL);
169     switch(pname) {
170     case NL_THRESHOLD: {
171         nl_assert(param >= 0);
172         nlCurrentContext->threshold = (NLdouble)param;
173         nlCurrentContext->threshold_defined = NL_TRUE;
174     } break;
175     case NL_OMEGA: {
176         nl_range_assert(param,1.0,2.0);
177         nlCurrentContext->omega = (NLdouble)param;
178     } break;
179     default: {
180         nlError("nlSolverParameterd","Invalid parameter");
181         nl_assert_not_reached;
182     }
183     }
184 }
185 
nlSolverParameteri(NLenum pname,NLint param)186 void nlSolverParameteri(NLenum pname, NLint param) {
187     nlCheckState(NL_STATE_INITIAL);
188     switch(pname) {
189     case NL_SOLVER: {
190         nlCurrentContext->solver = (NLenum)param;
191     } break;
192     case NL_NB_VARIABLES: {
193         nl_assert(param > 0);
194         nlCurrentContext->nb_variables = (NLuint)param;
195     } break;
196     case NL_NB_SYSTEMS: {
197 	nl_assert(param > 0);
198 	nlCurrentContext->nb_systems = (NLuint)param;
199     } break;
200     case NL_LEAST_SQUARES: {
201         nlCurrentContext->least_squares = (NLboolean)param;
202     } break;
203     case NL_MAX_ITERATIONS: {
204         nl_assert(param > 0);
205         nlCurrentContext->max_iterations = (NLuint)param;
206         nlCurrentContext->max_iterations_defined = NL_TRUE;
207     } break;
208     case NL_SYMMETRIC: {
209         nlCurrentContext->symmetric = (NLboolean)param;
210     } break;
211     case NL_INNER_ITERATIONS: {
212         nl_assert(param > 0);
213         nlCurrentContext->inner_iterations = (NLuint)param;
214     } break;
215     case NL_PRECONDITIONER: {
216         nlCurrentContext->preconditioner = (NLuint)param;
217         nlCurrentContext->preconditioner_defined = NL_TRUE;
218     } break;
219     default: {
220         nlError("nlSolverParameteri","Invalid parameter");
221         nl_assert_not_reached;
222     }
223     }
224 }
225 
nlGetBooleanv(NLenum pname,NLboolean * params)226 void nlGetBooleanv(NLenum pname, NLboolean* params) {
227     switch(pname) {
228     case NL_LEAST_SQUARES: {
229         *params = nlCurrentContext->least_squares;
230     } break;
231     case NL_SYMMETRIC: {
232         *params = nlCurrentContext->symmetric;
233     } break;
234     default: {
235         nlError("nlGetBooleanv","Invalid parameter");
236         nl_assert_not_reached;
237     }
238     }
239 }
240 
nlGetDoublev(NLenum pname,NLdouble * params)241 void nlGetDoublev(NLenum pname, NLdouble* params) {
242     switch(pname) {
243     case NL_THRESHOLD: {
244         *params = nlCurrentContext->threshold;
245     } break;
246     case NL_OMEGA: {
247         *params = nlCurrentContext->omega;
248     } break;
249     case NL_ERROR: {
250         *params = nlCurrentContext->error;
251     } break;
252     case NL_ELAPSED_TIME: {
253         *params = nlCurrentContext->elapsed_time;
254     } break;
255     case NL_GFLOPS: {
256         if(nlCurrentContext->elapsed_time == 0) {
257             *params = 0.0;
258         } else {
259             *params = (NLdouble)(nlCurrentContext->flops) /
260                 (nlCurrentContext->elapsed_time * 1e9);
261         }
262     } break;
263     default: {
264         nlError("nlGetDoublev","Invalid parameter");
265         nl_assert_not_reached;
266     }
267     }
268 }
269 
nlGetIntegerv(NLenum pname,NLint * params)270 void nlGetIntegerv(NLenum pname, NLint* params) {
271     switch(pname) {
272     case NL_SOLVER: {
273         *params = (NLint)(nlCurrentContext->solver);
274     } break;
275     case NL_NB_VARIABLES: {
276         *params = (NLint)(nlCurrentContext->nb_variables);
277     } break;
278     case NL_NB_SYSTEMS: {
279 	*params = (NLint)(nlCurrentContext->nb_systems);
280     } break;
281     case NL_LEAST_SQUARES: {
282         *params = (NLint)(nlCurrentContext->least_squares);
283     } break;
284     case NL_MAX_ITERATIONS: {
285         *params = (NLint)(nlCurrentContext->max_iterations);
286     } break;
287     case NL_SYMMETRIC: {
288         *params = (NLint)(nlCurrentContext->symmetric);
289     } break;
290     case NL_USED_ITERATIONS: {
291         *params = (NLint)(nlCurrentContext->used_iterations);
292     } break;
293     case NL_PRECONDITIONER: {
294         *params = (NLint)(nlCurrentContext->preconditioner);
295     } break;
296     case NL_NNZ: {
297         *params = (NLint)(nlMatrixNNZ(nlCurrentContext->M));
298     } break;
299     default: {
300         nlError("nlGetIntegerv","Invalid parameter");
301         nl_assert_not_reached;
302     }
303     }
304 }
305 
306 
nlGetIntegervL(NLenum pname,NLlong * params)307 void nlGetIntegervL(NLenum pname, NLlong* params) {
308     switch(pname) {
309     case NL_SOLVER: {
310         *params = (NLlong)(nlCurrentContext->solver);
311     } break;
312     case NL_NB_VARIABLES: {
313         *params = (NLlong)(nlCurrentContext->nb_variables);
314     } break;
315     case NL_NB_SYSTEMS: {
316 	*params = (NLlong)(nlCurrentContext->nb_systems);
317     } break;
318     case NL_LEAST_SQUARES: {
319         *params = (NLlong)(nlCurrentContext->least_squares);
320     } break;
321     case NL_MAX_ITERATIONS: {
322         *params = (NLlong)(nlCurrentContext->max_iterations);
323     } break;
324     case NL_SYMMETRIC: {
325         *params = (NLlong)(nlCurrentContext->symmetric);
326     } break;
327     case NL_USED_ITERATIONS: {
328         *params = (NLlong)(nlCurrentContext->used_iterations);
329     } break;
330     case NL_PRECONDITIONER: {
331         *params = (NLlong)(nlCurrentContext->preconditioner);
332     } break;
333     case NL_NNZ: {
334         *params = (NLlong)(nlMatrixNNZ(nlCurrentContext->M));
335     } break;
336     default: {
337         nlError("nlGetIntegervL","Invalid parameter");
338         nl_assert_not_reached;
339     }
340     }
341 }
342 
343 /******************************************************************************/
344 /* Enable / Disable */
345 
nlEnable(NLenum pname)346 void nlEnable(NLenum pname) {
347     switch(pname) {
348 	case NL_NORMALIZE_ROWS: {
349 	    nl_assert(nlCurrentContext->state != NL_STATE_ROW);
350 	    nlCurrentContext->normalize_rows = NL_TRUE;
351 	} break;
352 	case NL_VERBOSE: {
353 	    nlCurrentContext->verbose = NL_TRUE;
354 	} break;
355 	case NL_VARIABLES_BUFFER: {
356 	    nlCurrentContext->user_variable_buffers = NL_TRUE;
357 	} break;
358 	case NL_NO_VARIABLES_INDIRECTION: {
359 	    nlCurrentContext->no_variables_indirection = NL_TRUE;
360 	} break;
361     default: {
362         nlError("nlEnable","Invalid parameter");
363         nl_assert_not_reached;
364     }
365     }
366 }
367 
nlDisable(NLenum pname)368 void nlDisable(NLenum pname) {
369     switch(pname) {
370 	case NL_NORMALIZE_ROWS: {
371 	    nl_assert(nlCurrentContext->state != NL_STATE_ROW);
372 	    nlCurrentContext->normalize_rows = NL_FALSE;
373 	} break;
374 	case NL_VERBOSE: {
375 	    nlCurrentContext->verbose = NL_FALSE;
376 	} break;
377 	case NL_VARIABLES_BUFFER: {
378 	    nlCurrentContext->user_variable_buffers = NL_FALSE;
379 	} break;
380 	case NL_NO_VARIABLES_INDIRECTION: {
381 	    nlCurrentContext->no_variables_indirection = NL_FALSE;
382 	} break;
383 	default: {
384 	    nlError("nlDisable","Invalid parameter");
385 	    nl_assert_not_reached;
386 	}
387     }
388 }
389 
nlIsEnabled(NLenum pname)390 NLboolean nlIsEnabled(NLenum pname) {
391     NLboolean result = NL_FALSE;
392     switch(pname) {
393 	case NL_NORMALIZE_ROWS: {
394 	    result = nlCurrentContext->normalize_rows;
395 	} break;
396 	case NL_VERBOSE: {
397 	    result = nlCurrentContext->verbose;
398 	} break;
399 	case NL_VARIABLES_BUFFER: {
400 	    result = nlCurrentContext->user_variable_buffers;
401 	} break;
402 	case NL_NO_VARIABLES_INDIRECTION: {
403 	    result = nlCurrentContext->no_variables_indirection;
404 	} break;
405 	default: {
406 	    nlError("nlIsEnables","Invalid parameter");
407 	    nl_assert_not_reached;
408 	}
409     }
410     return result;
411 }
412 
413 /******************************************************************************/
414 /* NL functions */
415 
nlSetFunction(NLenum pname,NLfunc param)416 void  nlSetFunction(NLenum pname, NLfunc param) {
417     switch(pname) {
418     case NL_FUNC_SOLVER:
419         nlCurrentContext->solver_func = (NLSolverFunc)(param);
420         nlCurrentContext->solver = NL_SOLVER_USER;
421         break;
422     case NL_FUNC_MATRIX:
423 	nlDeleteMatrix(nlCurrentContext->M);
424 	nlCurrentContext->M = nlMatrixNewFromFunction(
425 	    nlCurrentContext->n, nlCurrentContext->n,
426 	    (NLMatrixFunc)param
427 	);
428         break;
429     case NL_FUNC_PRECONDITIONER:
430 	nlDeleteMatrix(nlCurrentContext->P);
431 	nlCurrentContext->P = nlMatrixNewFromFunction(
432 	    nlCurrentContext->n, nlCurrentContext->n,
433 	    (NLMatrixFunc)param
434 	);
435         nlCurrentContext->preconditioner = NL_PRECOND_USER;
436         break;
437     case NL_FUNC_PROGRESS:
438         nlCurrentContext->progress_func = (NLProgressFunc)(param);
439         break;
440     default:
441         nlError("nlSetFunction","Invalid parameter");
442         nl_assert_not_reached;
443     }
444 }
445 
nlGetFunction(NLenum pname,NLfunc * param)446 void nlGetFunction(NLenum pname, NLfunc* param) {
447     switch(pname) {
448     case NL_FUNC_SOLVER:
449         *param = (NLfunc)(nlCurrentContext->solver_func);
450         break;
451     case NL_FUNC_MATRIX:
452         *param = (NLfunc)(nlMatrixGetFunction(nlCurrentContext->M));
453         break;
454     case NL_FUNC_PRECONDITIONER:
455         *param = (NLfunc)(nlMatrixGetFunction(nlCurrentContext->P));
456         break;
457     default:
458         nlError("nlGetFunction","Invalid parameter");
459         nl_assert_not_reached;
460     }
461 }
462 
463 /******************************************************************************/
464 /* Get/Set Lock/Unlock variables */
465 
nlSetVariable(NLuint index,NLdouble value)466 void nlSetVariable(NLuint index, NLdouble value) {
467     nlCheckState(NL_STATE_SYSTEM);
468     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
469     NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[0],index) = value;
470 }
471 
nlMultiSetVariable(NLuint index,NLuint system,NLdouble value)472 void nlMultiSetVariable(NLuint index, NLuint system, NLdouble value) {
473     nlCheckState(NL_STATE_SYSTEM);
474     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables-1);
475     nl_debug_range_assert(system, 0, nlCurrentContext->nb_systems-1);
476     NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[system],index) = value;
477 }
478 
nlGetVariable(NLuint index)479 NLdouble nlGetVariable(NLuint index) {
480     nl_assert(nlCurrentContext->state != NL_STATE_INITIAL);
481     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
482     return NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[0],index);
483 }
484 
nlMultiGetVariable(NLuint index,NLuint system)485 NLdouble nlMultiGetVariable(NLuint index, NLuint system) {
486     nl_assert(nlCurrentContext->state != NL_STATE_INITIAL);
487     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables-1);
488     nl_debug_range_assert(system, 0, nlCurrentContext->nb_systems-1);
489     return NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[system],index);
490 }
491 
492 
nlLockVariable(NLuint index)493 void nlLockVariable(NLuint index) {
494     nlCheckState(NL_STATE_SYSTEM);
495     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
496     nlCurrentContext->variable_is_locked[index] = NL_TRUE;
497 }
498 
nlUnlockVariable(NLuint index)499 void nlUnlockVariable(NLuint index) {
500     nlCheckState(NL_STATE_SYSTEM);
501     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
502     nlCurrentContext->variable_is_locked[index] = NL_FALSE;
503 }
504 
nlVariableIsLocked(NLuint index)505 NLboolean nlVariableIsLocked(NLuint index) {
506     nl_assert(nlCurrentContext->state != NL_STATE_INITIAL);
507     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
508     return nlCurrentContext->variable_is_locked[index];
509 }
510 
511 /******************************************************************************/
512 /* System construction */
513 
nlVariablesToVector()514 static void nlVariablesToVector() {
515     NLuint n=nlCurrentContext->n;
516     NLuint k,i,index;
517     NLdouble value;
518 
519     nl_assert(nlCurrentContext->x != NULL);
520     for(k=0; k<nlCurrentContext->nb_systems; ++k) {
521 	for(i=0; i<nlCurrentContext->nb_variables; ++i) {
522 	    if(!nlCurrentContext->variable_is_locked[i]) {
523 		index = nlCurrentContext->variable_index[i];
524 		nl_assert(index < nlCurrentContext->n);
525 		value = NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[k],i);
526 		nlCurrentContext->x[index+k*n] = value;
527 	    }
528 	}
529     }
530 }
531 
nlVectorToVariables()532 static void nlVectorToVariables() {
533     NLuint n=nlCurrentContext->n;
534     NLuint k,i,index;
535     NLdouble value;
536 
537     nl_assert(nlCurrentContext->x != NULL);
538     for(k=0; k<nlCurrentContext->nb_systems; ++k) {
539 	for(i=0; i<nlCurrentContext->nb_variables; ++i) {
540 	    if(!nlCurrentContext->variable_is_locked[i]) {
541 		index = nlCurrentContext->variable_index[i];
542 		nl_assert(index < nlCurrentContext->n);
543 		value = nlCurrentContext->x[index+k*n];
544 		NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[k],i) = value;
545 	    }
546 	}
547     }
548 }
549 
550 
nlBeginSystem()551 static void nlBeginSystem() {
552     NLuint k;
553 
554     nlTransition(NL_STATE_INITIAL, NL_STATE_SYSTEM);
555     nl_assert(nlCurrentContext->nb_variables > 0);
556 
557     nlCurrentContext->variable_buffer = NL_NEW_ARRAY(
558 	NLBufferBinding, nlCurrentContext->nb_systems
559     );
560 
561     if(nlCurrentContext->user_variable_buffers) {
562 	nlCurrentContext->variable_value = NULL;
563     } else {
564 	nlCurrentContext->variable_value = NL_NEW_ARRAY(
565 	    NLdouble,
566 	    nlCurrentContext->nb_variables * nlCurrentContext->nb_systems
567 	);
568 	for(k=0; k<nlCurrentContext->nb_systems; ++k) {
569 	    nlCurrentContext->variable_buffer[k].base_address =
570 		nlCurrentContext->variable_value +
571 		k * nlCurrentContext->nb_variables;
572 	    nlCurrentContext->variable_buffer[k].stride = sizeof(NLdouble);
573 	}
574     }
575 
576     if(!nlCurrentContext->no_variables_indirection) {
577 	nlCurrentContext->variable_is_locked = NL_NEW_ARRAY(
578 	    NLboolean, nlCurrentContext->nb_variables
579 	);
580 	nlCurrentContext->variable_index = NL_NEW_ARRAY(
581 	    NLuint, nlCurrentContext->nb_variables
582 	);
583     }
584     nlCurrentContext->has_matrix_pattern = NL_FALSE;
585 }
586 
nlEndSystem()587 static void nlEndSystem() {
588     nlTransition(NL_STATE_MATRIX_CONSTRUCTED, NL_STATE_SYSTEM_CONSTRUCTED);
589 }
590 
nlInitializeMSystem()591 static void nlInitializeMSystem() {
592     NLuint i;
593     NLuint n = nlCurrentContext->nb_variables;
594 
595     if(!nlCurrentContext->no_variables_indirection) {
596 
597         /* Stores only 1 value per system (rhs of current row). */
598 	nlCurrentContext->right_hand_side = NL_NEW_ARRAY(
599 	    double, nlCurrentContext->nb_systems
600 	);
601 
602 	n = 0;
603 	for(i=0; i<nlCurrentContext->nb_variables; i++) {
604 	    if(!nlCurrentContext->variable_is_locked[i]) {
605 		nlCurrentContext->variable_index[i] = n;
606 		n++;
607 	    } else {
608 		nlCurrentContext->variable_index[i] = (NLuint)~0;
609 	    }
610 	}
611 
612 	nlCurrentContext->x = NL_NEW_ARRAY(
613 	    NLdouble, n*nlCurrentContext->nb_systems
614 	);
615 
616 	nlCurrentContext->n = n;
617 	nlVariablesToVector();
618 
619 	nlRowColumnConstruct(&nlCurrentContext->af);
620 	nlRowColumnConstruct(&nlCurrentContext->al);
621     }
622 
623     nlCurrentContext->b = NL_NEW_ARRAY(
624 	NLdouble, n*nlCurrentContext->nb_systems
625     );
626 
627     nlCurrentContext->n = n;
628     nlCurrentContext->current_row = 0;
629 
630     /*
631      * If the user trusts OpenNL and has left solver as NL_SOLVER_DEFAULT,
632      * then we setup reasonable parameters for him.
633      */
634     if(nlCurrentContext->solver == NL_SOLVER_DEFAULT) {
635         if(nlCurrentContext->least_squares || nlCurrentContext->symmetric) {
636             nlCurrentContext->solver = NL_CG;
637             if(!nlCurrentContext->preconditioner_defined) {
638                 nlCurrentContext->preconditioner = NL_PRECOND_JACOBI;
639             }
640         } else {
641             nlCurrentContext->solver = NL_BICGSTAB;
642         }
643         if(!nlCurrentContext->max_iterations_defined) {
644             nlCurrentContext->max_iterations = n*5;
645         }
646         if(!nlCurrentContext->threshold_defined) {
647             nlCurrentContext->threshold = 1e-6;
648         }
649     }
650 
651     /* a least squares problem results in a symmetric matrix */
652     if(nlCurrentContext->least_squares) {
653         nlCurrentContext->symmetric = NL_TRUE;
654     }
655 }
656 
nlInitializeMCRSMatrixPattern()657 static void nlInitializeMCRSMatrixPattern() {
658     NLuint n = nlCurrentContext->n;
659     nlCurrentContext->M = (NLMatrix)(NL_NEW(NLCRSMatrix));
660     if(nlCurrentContext->symmetric) {
661 	nlCRSMatrixConstructPatternSymmetric(
662 	    (NLCRSMatrix*)(nlCurrentContext->M), n
663 	);
664     } else {
665 	nlCRSMatrixConstructPattern(
666 	    (NLCRSMatrix*)(nlCurrentContext->M), n, n
667 	);
668     }
669     nlCurrentContext->has_matrix_pattern = NL_TRUE;
670 }
671 
nlInitializeMSparseMatrix()672 static void nlInitializeMSparseMatrix() {
673     NLuint n = nlCurrentContext->n;
674     NLenum storage = NL_MATRIX_STORE_ROWS;
675 
676     /* SSOR preconditioner requires rows and columns */
677     if(nlCurrentContext->preconditioner == NL_PRECOND_SSOR) {
678         storage = (storage | NL_MATRIX_STORE_COLUMNS);
679     }
680 
681     if(
682 	nlCurrentContext->symmetric &&
683         nlCurrentContext->preconditioner == NL_PRECOND_SSOR
684     ) {
685 	/*
686 	 * For now, only used with SSOR preconditioner, because
687 	 * for other modes it is either unsupported (SUPERLU) or
688 	 * causes performance loss (non-parallel sparse SpMV)
689 	 */
690         storage = (storage | NL_MATRIX_STORE_SYMMETRIC);
691     }
692 
693     nlCurrentContext->M = (NLMatrix)(NL_NEW(NLSparseMatrix));
694     nlSparseMatrixConstruct(
695 	     (NLSparseMatrix*)(nlCurrentContext->M), n, n, storage
696     );
697 }
698 
nlEndMatrix()699 static void nlEndMatrix() {
700 #ifdef NL_DEBUG
701     NLuint i;
702     NLuint_big jj;
703     NLCRSMatrix* M = NULL;
704 #endif
705     nlTransition(NL_STATE_MATRIX, NL_STATE_MATRIX_CONSTRUCTED);
706 
707     if(!nlCurrentContext->no_variables_indirection) {
708 	nlRowColumnClear(&nlCurrentContext->af);
709 	nlRowColumnClear(&nlCurrentContext->al);
710     }
711 
712     if(!nlCurrentContext->least_squares) {
713         nl_assert(
714             nlCurrentContext->ij_coefficient_called || (
715                 nlCurrentContext->current_row ==
716                 nlCurrentContext->n
717             )
718         );
719     }
720 
721 #ifdef NL_DEBUG
722     if(nlCurrentContext->has_matrix_pattern) {
723 	M = nlGetCurrentCRSMatrix();
724 	for(i=0; i<M->m; ++i) {
725 	    for(jj=M->rowptr[i]; jj<M->rowptr[i+1]; ++jj) {
726 		/*
727 		 * Test that all coefficients were created.
728 		 * This assertion test will fail whenever
729 		 * a length has fewer coefficient than specified
730 		 * with nlSetRowLength()
731 		 */
732 		nl_assert(M->colind[jj] != (NLuint)(-1));
733 	    }
734 	}
735     }
736 #endif
737 }
738 
nlBeginRow()739 static void nlBeginRow() {
740     nlTransition(NL_STATE_MATRIX, NL_STATE_ROW);
741     nlRowColumnZero(&nlCurrentContext->af);
742     nlRowColumnZero(&nlCurrentContext->al);
743 }
744 
nlScaleRow(NLdouble s)745 static void nlScaleRow(NLdouble s) {
746     NLRowColumn*    af = &nlCurrentContext->af;
747     NLRowColumn*    al = &nlCurrentContext->al;
748     NLuint nf            = af->size;
749     NLuint nl            = al->size;
750     NLuint i,k;
751     for(i=0; i<nf; i++) {
752         af->coeff[i].value *= s;
753     }
754     for(i=0; i<nl; i++) {
755         al->coeff[i].value *= s;
756     }
757     for(k=0; k<nlCurrentContext->nb_systems; ++k) {
758 	nlCurrentContext->right_hand_side[k] *= s;
759     }
760 }
761 
nlNormalizeRow(NLdouble weight)762 static void nlNormalizeRow(NLdouble weight) {
763     NLRowColumn*    af = &nlCurrentContext->af;
764     NLRowColumn*    al = &nlCurrentContext->al;
765     NLuint nf            = af->size;
766     NLuint nl            = al->size;
767     NLuint i;
768     NLdouble norm = 0.0;
769     for(i=0; i<nf; i++) {
770         norm += af->coeff[i].value * af->coeff[i].value;
771     }
772     for(i=0; i<nl; i++) {
773         norm += al->coeff[i].value * al->coeff[i].value;
774     }
775     norm = sqrt(norm);
776     nlScaleRow(weight / norm);
777 }
778 
nlEndRow()779 static void nlEndRow() {
780     NLRowColumn*    af = &nlCurrentContext->af;
781     NLRowColumn*    al = &nlCurrentContext->al;
782     NLSparseMatrix* M  = nlGetCurrentSparseMatrix();
783     NLdouble* b        = nlCurrentContext->b;
784     NLuint nf          = af->size;
785     NLuint nl          = al->size;
786     NLuint n           = nlCurrentContext->n;
787     NLuint current_row = nlCurrentContext->current_row;
788     NLuint i,j,jj;
789     NLdouble S;
790     NLuint k;
791     nlTransition(NL_STATE_ROW, NL_STATE_MATRIX);
792 
793     if(nlCurrentContext->normalize_rows) {
794         nlNormalizeRow(nlCurrentContext->row_scaling);
795     } else if(nlCurrentContext->row_scaling != 1.0) {
796         nlScaleRow(nlCurrentContext->row_scaling);
797     }
798     /*
799      * if least_squares : we want to solve
800      * A'A x = A'b
801      */
802 
803     if(nlCurrentContext->least_squares) {
804         for(i=0; i<nf; i++) {
805             for(j=0; j<nf; j++) {
806                 nlSparseMatrixAdd(
807                     M, af->coeff[i].index, af->coeff[j].index,
808                     af->coeff[i].value * af->coeff[j].value
809                 );
810             }
811         }
812 	for(k=0; k<nlCurrentContext->nb_systems; ++k) {
813 	    S = -nlCurrentContext->right_hand_side[k];
814 	    for(jj=0; jj<nl; ++jj) {
815 		j = al->coeff[jj].index;
816 		S += al->coeff[jj].value *
817 		    NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[k],j);
818 	    }
819 	    for(jj=0; jj<nf; jj++) {
820 		b[ k*n+af->coeff[jj].index ] -= af->coeff[jj].value * S;
821 	    }
822 	}
823     } else {
824         for(jj=0; jj<nf; ++jj) {
825             nlSparseMatrixAdd(
826                 M, current_row, af->coeff[jj].index, af->coeff[jj].value
827             );
828         }
829 	for(k=0; k<nlCurrentContext->nb_systems; ++k) {
830 	    b[k*n+current_row] = nlCurrentContext->right_hand_side[k];
831 	    for(jj=0; jj<nl; ++jj) {
832 		j = al->coeff[jj].index;
833 		b[k*n+current_row] -= al->coeff[jj].value *
834 		    NL_BUFFER_ITEM(nlCurrentContext->variable_buffer[k],j);
835 	    }
836 	}
837     }
838     nlCurrentContext->current_row++;
839     for(k=0; k<nlCurrentContext->nb_systems; ++k) {
840 	nlCurrentContext->right_hand_side[k] = 0.0;
841     }
842     nlCurrentContext->row_scaling = 1.0;
843 }
844 
nlCoefficient(NLuint index,NLdouble value)845 void nlCoefficient(NLuint index, NLdouble value) {
846     nlCheckState(NL_STATE_ROW);
847     nl_debug_range_assert(index, 0, nlCurrentContext->nb_variables - 1);
848     if(nlCurrentContext->variable_is_locked[index]) {
849 	/*
850 	 * Note: in al, indices are NLvariable indices,
851 	 * within [0..nb_variables-1]
852 	 */
853         nlRowColumnAppend(&(nlCurrentContext->al), index, value);
854     } else {
855 	/*
856 	 * Note: in af, indices are system indices,
857 	 * within [0..n-1]
858 	 */
859         nlRowColumnAppend(
860 	    &(nlCurrentContext->af),
861 	    nlCurrentContext->variable_index[index], value
862 	);
863     }
864 }
865 
nlAddIJCoefficient(NLuint i,NLuint j,NLdouble value)866 void nlAddIJCoefficient(NLuint i, NLuint j, NLdouble value) {
867 #ifdef NL_DEBUG
868     NLuint kk;
869     if(nlCurrentContext->variable_is_locked != NULL) {
870 	for(kk=0; kk<nlCurrentContext->nb_variables; ++kk) {
871 	    nl_debug_assert(!nlCurrentContext->variable_is_locked[kk]);
872 	}
873     }
874 #endif
875     nlCheckState(NL_STATE_MATRIX);
876     nl_debug_range_assert(i, 0, nlCurrentContext->nb_variables - 1);
877     nl_debug_range_assert(j, 0, nlCurrentContext->nb_variables - 1);
878     if(nlCurrentContext->has_matrix_pattern) {
879 	nlCRSMatrixAdd(
880 	    nlGetCurrentCRSMatrix(), i, j, value
881         );
882     } else {
883 	nlSparseMatrixAdd(
884 	    nlGetCurrentSparseMatrix(), i, j, value
885         );
886     }
887     nlCurrentContext->ij_coefficient_called = NL_TRUE;
888 }
889 
nlAddIRightHandSide(NLuint i,NLdouble value)890 void nlAddIRightHandSide(NLuint i, NLdouble value) {
891 #ifdef NL_DEBUG
892     NLuint kk;
893     if(nlCurrentContext->variable_is_locked != NULL) {
894 	for(kk=0; kk<nlCurrentContext->nb_variables; ++kk) {
895 	    nl_debug_assert(!nlCurrentContext->variable_is_locked[kk]);
896 	}
897     }
898 #endif
899     nlCheckState(NL_STATE_MATRIX);
900     nl_debug_range_assert(i, 0, nlCurrentContext->nb_variables - 1);
901     nlCurrentContext->b[i] += value;
902     nlCurrentContext->ij_coefficient_called = NL_TRUE;
903 }
904 
nlMultiAddIRightHandSide(NLuint i,NLuint k,NLdouble value)905 void nlMultiAddIRightHandSide(NLuint i, NLuint k, NLdouble value) {
906     NLuint n = nlCurrentContext->n;
907 #ifdef NL_DEBUG
908     NLuint kk;
909     if(nlCurrentContext->variable_is_locked != NULL) {
910 	for(kk=0; kk<nlCurrentContext->nb_variables; ++kk) {
911 	    nl_debug_assert(!nlCurrentContext->variable_is_locked[kk]);
912 	}
913     }
914 #endif
915     nlCheckState(NL_STATE_MATRIX);
916     nl_debug_range_assert(i, 0, nlCurrentContext->nb_variables - 1);
917     nl_debug_range_assert(k, 0, nlCurrentContext->nb_systems - 1);
918     nlCurrentContext->b[i + k*n] += value;
919     nlCurrentContext->ij_coefficient_called = NL_TRUE;
920 }
921 
nlRightHandSide(NLdouble value)922 void nlRightHandSide(NLdouble value) {
923     nlCurrentContext->right_hand_side[0] = value;
924 }
925 
nlMultiRightHandSide(NLuint k,NLdouble value)926 void nlMultiRightHandSide(NLuint k, NLdouble value) {
927     nl_debug_range_assert(k, 0, nlCurrentContext->nb_systems - 1);
928     nlCurrentContext->right_hand_side[k] = value;
929 }
930 
nlRowScaling(NLdouble value)931 void nlRowScaling(NLdouble value) {
932     nlCheckState(NL_STATE_MATRIX);
933     nlCurrentContext->row_scaling = value;
934 }
935 
nlBegin(NLenum prim)936 void nlBegin(NLenum prim) {
937     switch(prim) {
938     case NL_SYSTEM: {
939         nlBeginSystem();
940     } break;
941     case NL_MATRIX_PATTERN: {
942 	nlTransition(NL_STATE_SYSTEM, NL_STATE_MATRIX_PATTERN);
943 	nlInitializeMSystem();
944 	nlInitializeMCRSMatrixPattern();
945     } break;
946     case NL_MATRIX: {
947 	nlTransition(NL_STATE_SYSTEM, NL_STATE_MATRIX);
948 	if(
949 	    nlCurrentContext->matrix_mode == NL_STIFFNESS_MATRIX &&
950 	    nlCurrentContext->M == NULL
951 	) {
952 	    nlInitializeMSystem();
953 	    nlInitializeMSparseMatrix();
954 	}
955     } break;
956     case NL_ROW: {
957         nlBeginRow();
958     } break;
959     default: {
960         nl_assert_not_reached;
961     }
962     }
963 }
964 
nlEnd(NLenum prim)965 void nlEnd(NLenum prim) {
966     switch(prim) {
967     case NL_SYSTEM: {
968         nlEndSystem();
969     } break;
970     case NL_MATRIX: {
971         nlEndMatrix();
972     } break;
973     case NL_MATRIX_PATTERN: {
974 	nlTransition(NL_STATE_MATRIX_PATTERN, NL_STATE_SYSTEM);
975 	nlCRSMatrixPatternCompile(nlGetCurrentCRSMatrix());
976     } break;
977     case NL_ROW: {
978         nlEndRow();
979     } break;
980     default: {
981         nl_assert_not_reached;
982     }
983     }
984 }
985 
986 /******************************************************************************/
987 /* nlSolve() driver routine */
988 
nlSolve()989 NLboolean nlSolve() {
990     NLboolean result;
991     nlCheckState(NL_STATE_SYSTEM_CONSTRUCTED);
992     nlCurrentContext->start_time = nlCurrentTime();
993     nlCurrentContext->elapsed_time = 0.0;
994     nlCurrentContext->flops = 0;
995     result = nlCurrentContext->solver_func();
996     if(!nlCurrentContext->no_variables_indirection) {
997 	nlVectorToVariables();
998     }
999     nlCurrentContext->elapsed_time = nlCurrentTime() - nlCurrentContext->start_time;
1000     nlTransition(NL_STATE_SYSTEM_CONSTRUCTED, NL_STATE_SOLVED);
1001     return result;
1002 }
1003 
nlUpdateRightHandSide(NLdouble * values)1004 void nlUpdateRightHandSide(NLdouble* values) {
1005     /*
1006      * If we are in the solved state, get back to the
1007      * constructed state.
1008      */
1009     nl_assert(nlCurrentContext->nb_systems == 1);
1010     if(nlCurrentContext->state == NL_STATE_SOLVED) {
1011         nlTransition(NL_STATE_SOLVED, NL_STATE_SYSTEM_CONSTRUCTED);
1012     }
1013     nlCheckState(NL_STATE_SYSTEM_CONSTRUCTED);
1014     memcpy(nlCurrentContext->x, values, nlCurrentContext->n * sizeof(double));
1015 }
1016 
1017 /******************************************************************************/
1018 /* Buffers management */
1019 
nlBindBuffer(NLenum buffer,NLuint k,void * addr,NLuint stride)1020 void nlBindBuffer(
1021     NLenum buffer, NLuint k, void* addr, NLuint stride
1022 ) {
1023     nlCheckState(NL_STATE_SYSTEM);
1024     nl_assert(nlIsEnabled(buffer));
1025     nl_assert(buffer == NL_VARIABLES_BUFFER);
1026     nl_assert(k<nlCurrentContext->nb_systems);
1027     if(stride == 0) {
1028 	stride = sizeof(NLdouble);
1029     }
1030     nlCurrentContext->variable_buffer[k].base_address = addr;
1031     nlCurrentContext->variable_buffer[k].stride = stride;
1032 }
1033 
1034 /******************************************************************************/
1035 /* Eigen solver */
1036 
nlMatrixMode(NLenum matrix)1037 void nlMatrixMode(NLenum matrix) {
1038     NLuint n = 0;
1039     NLuint i;
1040     nl_assert(
1041 	nlCurrentContext->state == NL_STATE_SYSTEM ||
1042 	nlCurrentContext->state == NL_STATE_MATRIX_CONSTRUCTED
1043     );
1044     nlCurrentContext->state = NL_STATE_SYSTEM;
1045     nlCurrentContext->matrix_mode = matrix;
1046     nlCurrentContext->current_row = 0;
1047     nlCurrentContext->ij_coefficient_called = NL_FALSE;
1048     switch(matrix) {
1049 	case NL_STIFFNESS_MATRIX: {
1050 	    /* Stiffness matrix is already constructed. */
1051 	} break ;
1052 	case NL_MASS_MATRIX: {
1053 	    if(nlCurrentContext->B == NULL) {
1054 		for(i=0; i<nlCurrentContext->nb_variables; ++i) {
1055 		    if(!nlCurrentContext->variable_is_locked[i]) {
1056 			++n;
1057 		    }
1058 		}
1059 		nlCurrentContext->B = (NLMatrix)(NL_NEW(NLSparseMatrix));
1060 		nlSparseMatrixConstruct(
1061 		    (NLSparseMatrix*)(nlCurrentContext->B),
1062 		    n, n, NL_MATRIX_STORE_ROWS
1063 		);
1064 	    }
1065 	} break ;
1066 	default:
1067 	    nl_assert_not_reached;
1068     }
1069 }
1070 
1071 
nlEigenSolverParameterd(NLenum pname,NLdouble val)1072 void nlEigenSolverParameterd(
1073     NLenum pname, NLdouble val
1074 ) {
1075     switch(pname) {
1076 	case NL_EIGEN_SHIFT: {
1077 	    nlCurrentContext->eigen_shift =  val;
1078 	} break;
1079 	case NL_EIGEN_THRESHOLD: {
1080 	    nlSolverParameterd(pname, val);
1081 	} break;
1082 	default:
1083 	    nl_assert_not_reached;
1084     }
1085 }
1086 
nlEigenSolverParameteri(NLenum pname,NLint val)1087 void nlEigenSolverParameteri(
1088     NLenum pname, NLint val
1089 ) {
1090     switch(pname) {
1091 	case NL_EIGEN_SOLVER: {
1092 	    nlCurrentContext->eigen_solver = (NLenum)val;
1093 	} break;
1094 	case NL_SYMMETRIC:
1095 	case NL_NB_VARIABLES:
1096 	case NL_NB_EIGENS:
1097 	case NL_EIGEN_MAX_ITERATIONS: {
1098 	    nlSolverParameteri(pname, val);
1099 	} break;
1100 	default:
1101 	    nl_assert_not_reached;
1102     }
1103 }
1104 
nlEigenSolve()1105 void nlEigenSolve() {
1106     if(nlCurrentContext->eigen_value == NULL) {
1107 	nlCurrentContext->eigen_value = NL_NEW_ARRAY(
1108 	    NLdouble,nlCurrentContext->nb_systems
1109 	);
1110     }
1111 
1112     nlMatrixCompress(&nlCurrentContext->M);
1113     if(nlCurrentContext->B != NULL) {
1114 	nlMatrixCompress(&nlCurrentContext->B);
1115     }
1116 
1117     switch(nlCurrentContext->eigen_solver) {
1118 	case NL_ARPACK_EXT:
1119 	    nlEigenSolve_ARPACK();
1120 	    break;
1121 	default:
1122 	    nl_assert_not_reached;
1123     }
1124 }
1125 
nlGetEigenValue(NLuint i)1126 double nlGetEigenValue(NLuint i) {
1127     nl_debug_assert(i < nlCurrentContext->nb_variables);
1128     return nlCurrentContext->eigen_value[i];
1129 }
1130 
nlSetRowLength(NLuint i,NLuint m)1131 void nlSetRowLength(NLuint i, NLuint m) {
1132     NLCRSMatrix* M = nlGetCurrentCRSMatrix();
1133     nl_assert(M != NULL);
1134     nlCRSMatrixPatternSetRowLength(M, i, m);
1135 }
1136