1 /*! \file MM-DissectionSolver.cpp
2 \brief test rouinte of dissection solver reading Matrix Market format
3 \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions
4 \date Jun. 20th 2014
5 \date Jul. 12th 2015
6 \date Feb. 29th 2016
7 */
8
9 // This file is part of Dissection
10 //
11 // Dissection is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // Linking Dissection statically or dynamically with other modules is making
17 // a combined work based on Disssection. Thus, the terms and conditions of
18 // the GNU General Public License cover the whole combination.
19 //
20 // As a special exception, the copyright holders of Dissection give you
21 // permission to combine Dissection program with free software programs or
22 // libraries that are released under the GNU LGPL and with independent modules
23 // that communicate with Dissection solely through the Dissection-fortran
24 // interface. You may copy and distribute such a system following the terms of
25 // the GNU GPL for Dissection and the licenses of the other code concerned,
26 // provided that you include the source code of that other code when and as
27 // the GNU GPL requires distribution of source code and provided that you do
28 // not modify the Dissection-fortran interface.
29 //
30 // Note that people who make modified versions of Dissection are not obligated
31 // to grant this special exception for their modified versions; it is their
32 // choice whether to do so. The GNU General Public License gives permission to
33 // release a modified version without this exception; this exception also makes
34 // it possible to release a modified version which carries forward this
35 // exception. If you modify the Dissection-fortran interface, this exception
36 // does not apply to your modified version of Dissection, and you must remove
37 // this exception when you distribute your modified version.
38 //
39 // This exception is an additional permission under section 7 of the GNU
40 // General Public License, version 3 ("GPLv3")
41 //
42 // Dissection is distributed in the hope that it will be useful,
43 // but WITHOUT ANY WARRANTY; without even the implied warranty of
44 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 // GNU General Public License for more details.
46 //
47 // You should have received a copy of the GNU General Public License
48 // along with Dissection. If not, see <http://www.gnu.org/licenses/>.
49 //
50
51 #include <cstdio>
52 #include <cstdlib>
53 #include <cstring>
54 #include <fcntl.h>
55 #include <unistd.h>
56 #include <math.h>
57
58 #include "Driver/DissectionSolver.hpp"
59 #ifdef BLAS_MKL
60 #include <mkl_service.h>
61 #endif
62 #include <pthread.h>
63 #include <iostream>
64 #include <sstream>
65 #include <fstream>
66 #include <complex>
67 #include "Compiler/arithmetic.hpp"
68
69 using namespace std;
70
71 static int _stat = (-1);
72
thread_child(void * arg)73 void *thread_child(void *arg)
74 {
75 char buf[256];
76 int *pid = (int *)arg;
77 unsigned int mem_tmp, mem_min, mem_max;
78 double avg_mem;
79 avg_mem = 0.0;
80 mem_min = (1U << 31) - 1;
81 mem_max = 0U;
82 int stat0, stat1;
83 stat0 = _stat;
84 unsigned int count = 0U;
85 // fprintf(stderr, "thread_child forked\n");
86 while(_stat != 0) {
87 stat1 = _stat;
88 if (stat1 == 1) {
89 sprintf(buf, "/proc/%d/statm", *pid);
90 ifstream fin(buf);
91 fin >> mem_tmp;
92 fin.close();
93 if (mem_tmp > mem_max) {
94 mem_max = mem_tmp;
95 }
96 if (mem_tmp < mem_min) {
97 mem_min = mem_tmp;
98 }
99 avg_mem += (double)mem_tmp;
100 count++;
101 }
102 if ((stat1 == (-1)) && (stat0 == 1)) {
103 fprintf(stderr,
104 "used memory :min: %14.8e max: %14.8e avg: %14.8e count: %d\n",
105 (double)mem_min * 4.0 / (1024.0 * 1024.0),
106 (double)mem_max * 4.0 / (1024.0 * 1024.0),
107 (avg_mem / (double)count) * 4.0 / (1024.0 * 1024.0),
108 count);
109 count = 0U;
110 avg_mem = 0.0;
111 mem_min = (1U << 31) - 1;
112 mem_max = 0U;
113 }
114 stat0 = stat1;
115 usleep(1000);
116 }
117 // fprintf(stderr, "thread_child join\n count = %ld\n", count);
118 pthread_exit(arg);
119
120 return (void *)NULL;
121 }
122
123 template<typename T>
generate_CSR(std::list<int> * ind_cols_tmp,std::list<T> * val_tmp,int nrow,int * nnz,int * mask,int * old2new,int * irow,int * jcol,T * val,bool symmetrize)124 void generate_CSR(std::list<int>* ind_cols_tmp, std::list<T>* val_tmp,
125 int nrow, int *nnz, int *mask, int *old2new,
126 int *irow, int *jcol, T* val, bool symmetrize)
127 {
128 const T zero(0.0);
129 // ind_cols_tmp = new std::list<int>[nrow];
130 // val_tmp = new std::list<T>[nrow];
131 int nnz1 = *nnz;
132 for (int i = 0; i < *nnz; i++) {
133 const int i0 = irow[i];
134 const int j0 = jcol[i];
135 const int ii = old2new[i0];
136 const int jj = old2new[j0];
137 if ((mask[i0] != 1) || (mask[j0] != 1)) {
138 // fprintf(stderr, "%d %d\n", i0, j0);
139 nnz1--;
140 continue;
141 }
142 // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj);
143 if (ind_cols_tmp[ii].empty()) {
144 ind_cols_tmp[ii].push_back(jj);
145 val_tmp[ii].push_back(val[i]);
146 }
147 else {
148 if (ind_cols_tmp[ii].back() < jj) {
149 ind_cols_tmp[ii].push_back(jj);
150 val_tmp[ii].push_back(val[i]);
151 }
152 else {
153 typename std::list<T>::iterator iv = val_tmp[ii].begin();
154 std::list<int>::iterator it = ind_cols_tmp[ii].begin();
155 for ( ; it != ind_cols_tmp[ii].end(); ++it, ++iv) {
156 if (*it == jj) {
157 fprintf(stderr, "already exits? (%d %d)\n", ii, jj);
158 break;
159 }
160 if (*it > jj) {
161 ind_cols_tmp[ii].insert(it, jj);
162 val_tmp[ii].insert(iv, val[i]);
163 break;
164 }
165 }
166 }
167 }
168 }
169 // symmetrize
170 if (symmetrize) {
171 for (int i = 0; i < nrow; i++) {
172 std::list<int>::iterator jt = ind_cols_tmp[i].begin();
173 for ( ; jt != ind_cols_tmp[i].end(); ++jt) {
174 const int jj = (*jt);
175 bool flag = false;
176 std::list<int>::iterator it = ind_cols_tmp[jj].begin();
177 for ( ; it != ind_cols_tmp[jj].end(); ++it) {
178 if ((*it) == i) {
179 flag = true;
180 // fprintf(stderr, "%d %d symmetric position found\n", i, jj);
181 break;
182 }
183 }
184 if (!flag) {
185 if (ind_cols_tmp[jj].back() < i) {
186 ind_cols_tmp[jj].push_back(i);
187 val_tmp[jj].push_back(zero);
188 fprintf(stderr, "%d %d added for symmetry\n", i, jj);
189 nnz1++;
190 }
191 else {
192 typename std::list<T>::iterator iv = val_tmp[jj].begin();
193 std::list<int>::iterator it = ind_cols_tmp[jj].begin();
194 for ( ; it != ind_cols_tmp[jj].end(); ++it, ++iv) {
195 std::list<int>::iterator it1 = it;
196 ++it1;
197 if (((*it)) < i && ((*it1) > i)) {
198 ind_cols_tmp[jj].insert(it, i);
199 val_tmp[jj].insert(iv, zero);
200 nnz1++;
201 // fprintf(stderr, "%d %d inserted for symmetry\n", i, jj);
202 break;
203 }
204 }
205 }
206 } // if (!flag);
207 }
208 }
209 }
210 #if 0
211 {
212 for (int i = 0; i < nrow; i++) {
213 std::list<int>::iterator jt = ind_cols_tmp[i].begin();
214 for ( ; jt != ind_cols_tmp[i].end(); ++jt) {
215 const int jj = (*jt);
216 bool flag = false;
217 std::list<int>::iterator it = ind_cols_tmp[jj].begin();
218 for ( ; it != ind_cols_tmp[jj].end(); ++it) {
219 if ((*it) == i) {
220 flag = true;
221 break;
222 }
223 }
224 if (!flag) {
225 fprintf(stderr, "%d %d position is not symmetric\n", i, jj);
226 }
227 }
228 }
229 }
230 #endif
231 *nnz = nnz1;
232 }
233
234 template<typename T>
copy_CSR(int * indcols,int * ptrows,T * coefs,int nrow,bool upper_flag,bool isSym,std::list<int> * ind_cols_tmp,std::list<T> * val_tmp)235 void copy_CSR(int *indcols, int *ptrows, T* coefs, int nrow,
236 bool upper_flag, bool isSym,
237 std::list<int>* ind_cols_tmp, std::list<T>* val_tmp)
238 {
239 const T zero(0.0);
240 ptrows[0] = 0;
241 for (int i = 0; i < nrow; i++) {
242 int k;
243 int itmp = ind_cols_tmp[i].size();
244 if (upper_flag) {
245 if (ind_cols_tmp[i].front() == i) {
246 ptrows[i + 1] = ptrows[i] + itmp;
247 k = ptrows[i];
248 }
249 else {
250 fprintf(stderr, "zero is added to diagonal : %d\n", i);
251 ptrows[i + 1] = ptrows[i] + itmp + 1;
252 indcols[ptrows[i]] = i;
253 coefs[ptrows[i]] = zero;
254 k = ptrows[i] + 1;
255 }
256 }
257 else {
258 k = ptrows[i];
259 if (ind_cols_tmp[i].back() == i || (!isSym)) {
260 ptrows[i + 1] = ptrows[i] + itmp;
261 }
262 else {
263 fprintf(stderr, "zero is added to diagonal : %d\n", i);
264 ptrows[i + 1] = ptrows[i] + itmp + 1;
265 indcols[ptrows[i + 1] - 1] = i;
266 coefs[ptrows[i + 1] - 1] = zero;
267 }
268 }
269 std::list<int>::iterator it = ind_cols_tmp[i].begin();
270 typename std::list<T>::iterator iv = val_tmp[i].begin();
271 for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv, k++) {
272 indcols[k] = *it;
273 coefs[k] = *iv;
274 }
275 } // loop : i
276 }
277
main(int argc,char ** argv)278 int main(int argc, char **argv)
279 {
280 int n, itmp, jtmp;
281 char fname[256], fname1[256];
282 char buf[1024];
283 int nrow, nnz, flag;
284 int *ptrows, *indcols;
285 int *irow, *jcol;
286 double *val, *coefs;
287 complex<double> *valc, *ccoefs;
288 quadruple *qcoefs;
289 complex<quadruple> *qccoefs;
290 int decomposer;
291 int num_threads;
292 int scaling = 1;
293 double eps_pivot;
294 int numlevels = -1;
295 int minNodes = 128;
296 std::list<int>* ind_cols_tmp;
297 std::list<double>* val_tmp;
298 std::list<complex<double> >* val_tmpc;
299 FILE *fp;
300 bool isSym, isComplex;
301 bool upper_flag = true;
302 bool isWhole = false;
303 bool kernel_detection_all = false;
304 int *indx_excl;
305 int nexcl = 0;
306 bool excl_flag = false;
307
308 if (argc < 6) {
309 fprintf(stderr, "MM-dissection [data file] [decomposer] [num_threads] [eps_pivot] [num_levels] [scaling] [kerner_detection_all] [upper_flag] [minNodes]\n");
310 exit(-1);
311 }
312 strcpy(fname, argv[1]);
313 decomposer = atoi(argv[2]);
314 num_threads = atoi(argv[3]);
315 eps_pivot = atof(argv[4]);
316 numlevels = atof(argv[5]);
317 if (argc >= 7) {
318 scaling = atoi(argv[6]);
319 }
320 if (argc >= 8) {
321 kernel_detection_all = (atoi(argv[7]) == 1);
322 }
323
324 if (argc >= 9) {
325 upper_flag = (atoi(argv[8]) == 1);
326 isWhole = (atoi(argv[8]) == (-1));
327 }
328 if (argc >= 10) {
329 strcpy(fname1, argv[9]);
330 excl_flag = true;
331 }
332 if (argc >= 11) {
333 minNodes = atoi(argv[10]);
334 }
335
336 // read from the file
337 if ((fp = fopen(fname, "r")) == NULL) {
338 fprintf(stderr, "fail to open %s\n", fname);
339 }
340 fgets(buf, 256, fp);
341 //
342 if (strstr(buf, "symmetric") != NULL) {
343 isSym = true;
344 }
345 else {
346 isSym = false;
347 upper_flag = false;
348 }
349 if (strstr(buf, "complex") != NULL) {
350 isComplex = true;
351 }
352 else {
353 isComplex = false;
354 }
355
356 fprintf(stderr, "symmetric = %s\n", isSym ? "true " : "false");
357 fprintf(stderr, "scaling = %d\n", scaling);
358 fprintf(stderr, "upper = %s\n", upper_flag ? "true" : "false");
359 if (kernel_detection_all) {
360 fprintf(stderr, "kernel detection is activated for all submatrices\n");
361 }
362 if (excl_flag) {
363 fprintf(stderr, "list of singular nodes %s\n", fname1);
364 }
365 while (1) {
366 fgets(buf, 256, fp);
367 if (buf[0] != '%') {
368 sscanf(buf, "%d %d %d", &nrow, &itmp, &nnz);
369 break;
370 }
371 }
372 irow = new int[nnz];
373 jcol = new int[nnz];
374
375 if (isComplex) {
376 double xreal, ximag;
377 valc = new complex<double>[nnz];
378 if (upper_flag) {
379 for (int i = 0; i < nnz; i++) {
380 fscanf(fp, "%d %d %lf %lf", &jcol[i], &irow[i], &xreal, &ximag);
381 valc[i] = complex<double>(xreal, ximag);
382 irow[i]--;
383 jcol[i]--;
384 if (isSym && irow[i] > jcol[i]) {
385 fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]);
386 itmp = irow[i];
387 irow[i] = jcol[i];
388 jcol[i] = itmp;
389 }
390 }
391 }
392 else {
393 for (int i = 0; i < nnz; i++) {
394 fscanf(fp, "%d %d %lf %lf", &irow[i], &jcol[i], &xreal, &ximag);
395 valc[i] = complex<double>(xreal, ximag);
396 irow[i]--;
397 jcol[i]--;
398 if (isSym && irow[i] < jcol[i]) {
399 fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]);
400 itmp = irow[i];
401 irow[i] = jcol[i];
402 jcol[i] = itmp;
403 }
404 }
405 }
406 }
407 else {
408 val = new double[nnz];
409 if (upper_flag) {
410 for (int i = 0; i < nnz; i++) {
411 fscanf(fp, "%d %d %lf", &jcol[i], &irow[i], &val[i]); // read lower
412 irow[i]--;
413 jcol[i]--;
414 if (isSym && irow[i] > jcol[i]) {
415 fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]);
416 itmp = irow[i];
417 irow[i] = jcol[i];
418 jcol[i] = itmp;
419 }
420 }
421 }
422 else {
423 for (int i = 0; i < nnz; i++) {
424 fscanf(fp, "%d %d %lf", &irow[i], &jcol[i], &val[i]); // read lower
425 irow[i]--;
426 jcol[i]--;
427 if (isSym && irow[i] < jcol[i]) {
428 fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]);
429 itmp = irow[i];
430 irow[i] = jcol[i];
431 jcol[i] = itmp;
432 }
433 }
434 }
435 }
436 fclose (fp);
437
438 if (excl_flag) {
439 if ((fp = fopen(fname1, "r")) == NULL) {
440 fprintf(stderr, "fail to open %s\n", fname1);
441 }
442 fgets(buf, 256, fp);
443 sscanf(buf, "# %d", &nexcl);
444 indx_excl = new int[nexcl];
445 for (int i = 0; i < nexcl; i++) {
446 fgets(buf, 256, fp);
447 sscanf(buf, "%d", &itmp);
448 indx_excl[i] = itmp;
449 }
450 fclose(fp);
451 }
452
453 int *mask = new int[nrow];
454 int *old2new = new int[nrow];
455 for (int i = 0; i < nrow; i++) {
456 mask[i] = 1;
457 }
458 for (int i = 0; i < nexcl; i++) {
459 mask[indx_excl[i]] = 0;
460 }
461 itmp = 0;
462 jtmp = nrow - nexcl;
463 for (int i = 0; i < nrow; i++) {
464 if (mask[i] == 1) {
465 old2new[i] = itmp++;
466 }
467 else {
468 old2new[i] = jtmp++;
469 }
470 }
471 #if 0
472 if ((fp = fopen("debug-index.data", "w")) != NULL) {
473 for (int i = 0; i < nrow; i++) {
474 fprintf(fp, "%d %d %d\n", i, old2new[i], mask[i]);
475 }
476 fclose(fp);
477 }
478 #endif
479 nrow -= nexcl;
480
481 ind_cols_tmp = new std::list<int>[nrow];
482 fprintf(stderr, "%s %d : getnerate_CSR\n", __FILE__, __LINE__);
483 if (isComplex) {
484 val_tmpc = new std::list<complex<double> >[nrow];
485 generate_CSR<complex<double> >(ind_cols_tmp, val_tmpc,
486 nrow, &nnz, mask, old2new,
487 irow, jcol, valc, (!isSym));
488 }
489 else {
490 val_tmp = new std::list<double>[nrow];
491 generate_CSR<double>(ind_cols_tmp, val_tmp,
492 nrow, &nnz, mask, old2new,
493 irow, jcol, val, (!isSym));
494 }
495 delete [] irow;
496 delete [] jcol;
497 delete [] mask;
498 delete [] old2new;
499 if (isComplex) {
500 delete [] valc;
501 }
502 else {
503 delete [] val;
504 }
505
506 if (upper_flag) {
507 for (int i = 0; i < nrow; i++) {
508 if (ind_cols_tmp[i].front() != i) {
509 nnz++;
510 }
511 }
512 }
513 else {
514 for (int i = 0; i < nrow; i++) {
515 if (ind_cols_tmp[i].back() != i) {
516 nnz++;
517 }
518 }
519 }
520 fprintf(stderr, "%s %d : copy_CSR\n", __FILE__, __LINE__);
521 ptrows = new int[nrow + 1];
522 indcols = new int[nnz];
523 if (isComplex) {
524 ccoefs = new complex<double>[nnz];
525 copy_CSR<complex<double> >(indcols, ptrows, ccoefs,
526 nrow, upper_flag, isSym,
527 ind_cols_tmp, val_tmpc);
528 qccoefs = new complex<quadruple>[nnz];
529 for (int i = 0; i < nnz; i++) {
530 qccoefs[i] = complex<quadruple>(quadruple(std::real(ccoefs[i])),
531 quadruple(std::imag(ccoefs[i])));
532 }
533 delete [] ccoefs;
534 }
535
536 else {
537 coefs = new double[nnz];
538 copy_CSR<double>(indcols, ptrows, coefs,
539 nrow, upper_flag, isSym,
540 ind_cols_tmp, val_tmp);
541 qcoefs = new quadruple[nnz];
542 for (int i = 0; i < nnz; i++) {
543 qcoefs[i] = quadruple(coefs[i]);
544 }
545 delete [] coefs;
546 }
547 delete [] ind_cols_tmp;
548
549 if (isComplex) {
550 delete [] val_tmpc;
551 }
552 else {
553 delete [] val_tmp;
554 }
555 #if 0
556 if ((fp = fopen("debug.matrix.data", "w")) != NULL) {
557 for (int i = 0; i < nrow; i++) {
558 fprintf(fp, "%d : %d :: ", i, (ptrows[i + 1] - ptrows[i]));
559 for (int k = ptrows[i]; k < ptrows[i + 1]; k++) {
560 fprintf(fp, "%d ", indcols[k]);
561 }
562 fprintf(fp, "\n");
563 }
564 }
565 fclose(fp);
566 #endif
567 int pid = (int)getpid();
568 #if 1
569 fprintf(stderr, "pid = %d\n", pid);
570 sprintf(fname, "dissection.%04d.log", pid);
571 fp = fopen(fname, "a");
572 #else
573 fp = stderr;
574 #endif
575 fprintf(stderr, "%s %d : before pthread_create\n", __FILE__, __LINE__);
576 void* results;
577 pthread_attr_t th_attr;
578 pthread_t thread;
579 pthread_attr_init(&th_attr);
580 pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE);
581 int pthid = pthread_create(&thread, &th_attr,
582 &thread_child,
583 (void *)&pid);
584 if (pthid != 0) {
585 cout << "bad thread creation ? " << pid << endl;
586 exit(0);
587 }
588 fprintf(stderr, "%s %d : after pthread_create\n", __FILE__, __LINE__);
589
590 if (isWhole) {
591 isSym = true;
592 upper_flag = false;
593 }
594
595 if (isComplex) {
596 DissectionSolver<complex<quadruple>, quadruple>*dslv =
597 new DissectionSolver<complex<quadruple>, quadruple>(num_threads, true, 0,
598 fp);
599
600 int called = 0;
601 clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu;
602 elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed,
603 t4_elapsed, t5_elapsed;
604 #ifdef BLAS_MKL
605 mkl_set_num_threads(1);
606 #endif
607 #ifdef VECLIB
608 setenv("VECLIB_MAXIMUM_THREADS", "1", true);
609 #endif
610 t0_cpu = clock();
611 get_realtime(&t0_elapsed);
612
613 dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols,
614 isSym,
615 upper_flag,
616 isWhole,
617 decomposer, numlevels, minNodes);
618
619 t1_cpu = clock();
620 get_realtime(&t1_elapsed);
621
622 t2_cpu = clock();
623 get_realtime(&t2_elapsed);
624 _stat = 1;
625 usleep(5000);
626 dslv->NumericFact(0, (complex<quadruple> *)qccoefs, scaling,
627 eps_pivot, kernel_detection_all);
628 _stat = (-1);
629 t3_cpu = clock();
630 get_realtime(&t3_elapsed);
631 usleep(5000);
632 fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__);
633
634 int n0;
635 n0 = dslv->kern_dimension();
636 fprintf(fp, "## kernel dimension = %d\n", n0);
637
638 complex<quadruple> *x = new complex<quadruple>[nrow];
639 complex<quadruple> *y = new complex<quadruple>[nrow];
640 complex<quadruple> *z = new complex<quadruple>[nrow];
641 for (int i = 0; i < nrow; i++) {
642 y[i] = complex<quadruple>((quadruple)(i % 11), quadruple(0));
643 }
644 dslv->SpMV(y, x);
645 dslv->SpMV(x, y);
646 for (int i = 0; i < nrow; i++) {
647 z[i] = y[i];
648 }
649
650 t4_cpu = clock();
651 get_realtime(&t4_elapsed);
652 _stat = 1;
653 usleep(5000);
654 dslv->SolveSingle(y, false, false, true); // with projection : true
655 _stat = (-1);
656 usleep(5000);
657 fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__);
658 t5_cpu = clock();
659 get_realtime(&t5_elapsed);
660 quadruple norm0, norm1;
661 norm0 = quadruple(0);
662 norm1 = quadruple(0);
663 for (int i = 0; i < nrow; i++) {
664 norm0 += x[i].real() * x[i].real() + x[i].imag() * x[i].imag();
665 complex<quadruple> ztmp = y[i] - x[i];
666 norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag();
667 }
668 fprintf(fp, "%s %d : ## error = %18.7e\n",
669 __FILE__, __LINE__,
670 sqrt(quad2double(norm1 / norm0)));
671
672 dslv->SpMV(y, x);
673
674 norm0 = 0.0;
675 norm1 = 0.0;
676 for (int i = 0; i < nrow; i++) {
677 norm0 += z[i].real() * z[i].real() + z[i].imag() * z[i].imag();
678 complex<quadruple> ztmp = z[i] - x[i];
679 norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag();
680 }
681 fprintf(fp, "%s %d : ## residual = %18.7e\n",
682 __FILE__, __LINE__,
683 sqrt(quad2double(norm1 / norm0)));
684 _stat = 0;
685 pthread_attr_destroy(&th_attr);
686 pthid = pthread_join(thread, &results);
687 if (pthid != 0) {
688 cout << "bad thread join ? " << pthid << endl;
689 exit(0);
690 }
691
692 fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n",
693 __FILE__, __LINE__,
694 (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC,
695 convert_time(t1_elapsed, t0_elapsed));
696
697 fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n",
698 __FILE__, __LINE__,
699 (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC,
700 convert_time(t3_elapsed, t2_elapsed));
701
702 fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n",
703 __FILE__, __LINE__,
704 (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC,
705 convert_time(t5_elapsed, t4_elapsed));
706
707 delete dslv;
708 delete [] ptrows;
709 delete [] indcols;
710 delete [] qccoefs;
711 delete [] x;
712 delete [] y;
713 delete [] z;
714 } // if (isComplex)
715 else {
716 DissectionSolver<quadruple> *dslv =
717 new DissectionSolver<quadruple>(num_threads, true, 0, fp);
718
719 int called = 0;
720 clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu;
721 elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed,
722 t4_elapsed, t5_elapsed;
723 #ifdef BLAS_MKL
724 mkl_set_num_threads(1);
725 #endif
726 #ifdef VECLIB
727 setenv("VECLIB_MAXIMUM_THREADS", "1", true);
728 #endif
729 t0_cpu = clock();
730 get_realtime(&t0_elapsed);
731
732 dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols,
733 isSym,
734 upper_flag,
735 isWhole,
736 decomposer, numlevels, minNodes);
737 // sym, upper
738 // dslv->SaveMMMatrix(0, coefs);
739 // exit(-1);
740 t1_cpu = clock();
741 get_realtime(&t1_elapsed);
742
743 _stat = 1;
744 usleep(5000);
745 t2_cpu = clock();
746 get_realtime(&t2_elapsed);
747 dslv->NumericFact(0, (quadruple *)qcoefs, scaling,
748 eps_pivot, kernel_detection_all);
749 t3_cpu = clock();
750 get_realtime(&t3_elapsed);
751 _stat = (-1);
752 usleep(5000);
753 fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__);
754 int n0;
755 n0 = dslv->kern_dimension();
756 fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0);
757
758 quadruple *x = new quadruple[nrow];
759 quadruple *y = new quadruple[nrow];
760 quadruple *z = new quadruple[nrow];
761 for (int i = 0; i < nrow; i++) {
762 y[i] = (quadruple)(i % 11);
763 }
764 #define NORMAL
765 #ifdef NORMAL
766 #if 0
767 if (!isSym && (n0 > 0)) {
768 dslv->ComputeTransposedKernels(true);
769 }
770 #endif
771 dslv->SpMV(y, x);
772 if (n0 > 0) {
773 dslv->ProjectionImageSingle(x);
774 }
775 dslv->SpMV(x, y);
776 for (int i = 0; i < nrow; i++) {
777 z[i] = y[i];
778 }
779 _stat = 1;
780 usleep(5000);
781 t4_cpu = clock();
782 get_realtime(&t4_elapsed);
783 dslv->SolveSingle(y, false, false, true); // with projection : true
784 _stat = (-1);
785 usleep(5000);
786 fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__);
787 if (n0 > 0) {
788 dslv->ProjectionImageSingle(y);
789 }
790 t5_cpu = clock();
791 get_realtime(&t5_elapsed);
792 #else
793 dslv->SpMtV(y, x);
794 // dslv->ProjectionImageSingle(x);
795 dslv->SpMtV(x, y);
796 for (int i = 0; i < nrow; i++) {
797 z[i] = y[i];
798 }
799
800 _stat = 1;
801 usleep(5000);
802 t4_cpu = clock();
803 get_realtime(&t4_elapsed);
804 dslv->SolveSingle(y, false, true); // with projection : true
805 // dslv->ProjectionImageSingle(y);
806 t5_cpu = clock();
807 get_realtime(&t5_elapsed);
808 _stat = (-1);
809 usleep(5000);
810 fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__);
811 #endif
812 quadruple norm0, norm1;
813 norm0 = quadruple(0);
814 norm1 = quadruple(0);
815 for (int i = 0; i < nrow; i++) {
816 norm0 += x[i] * x[i];
817 norm1 += (y[i] - x[i]) * (y[i] - x[i]);
818 }
819 fprintf(fp, "%s %d : ## error = %18.7e\n",
820 __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0)));
821 #ifdef NORMAL
822 dslv->SpMV(y, x);
823 #else
824 dslv->SpMtV(y, x);
825 #endif
826
827 norm0 = quadruple(0);
828 norm1 = quadruple(0);
829 for (int i = 0; i < nrow; i++) {
830 norm0 += z[i] * z[i];
831 norm1 += (z[i] - x[i]) * (z[i] - x[i]);
832 }
833 fprintf(fp, "%s %d : ## residual = %18.7e\n",
834 __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0)));
835 _stat = 0;
836 pthread_attr_destroy(&th_attr);
837 pthid = pthread_join(thread, &results);
838 if (pthid != 0) {
839 cout << "bad thread join ? " << pthid << endl;
840 exit(0);
841 }
842
843 fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n",
844 __FILE__, __LINE__,
845 (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC,
846 convert_time(t1_elapsed, t0_elapsed));
847
848 fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n",
849 __FILE__, __LINE__,
850 (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC,
851 convert_time(t3_elapsed, t2_elapsed));
852
853 fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n",
854 __FILE__, __LINE__,
855 (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC,
856 convert_time(t5_elapsed, t4_elapsed));
857
858 #if 0
859 double *scalediag;
860 scalediag = new double[nrow];
861 dslv->GetMatrixScaling(scalediag);
862 for (int i = 0; i < nrow; i++) {
863 for (int k = ptrows[i]; k < ptrows[i + 1]; k++) {
864 if (indcols[k] == i) {
865 fprintf(fp, "%i : %g : %g %g\n", i, scalediag[i],
866 1.0 / (scalediag[i] * scalediag[i]), coefs[k]);
867 break;
868 }
869 }
870 }
871 delete [] scalediag;
872 #endif
873
874 delete dslv;
875 delete [] ptrows;
876 delete [] indcols;
877 delete [] qcoefs;
878 delete [] x;
879 delete [] y;
880 delete [] z;
881 }
882 fclose(fp);
883
884 }
885