1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include <string>
18 #include <cstring>
19 #include <cstdio>
20 #include <cmath>
21
22 #include <vector>
23 #include <map>
24 #include <tuple>
25
26 #include "mkl_lapack.h"
27 #include "mkl.h"
28
29 #include "oneapi/tbb/flow_graph.h"
30 #include "oneapi/tbb/tick_count.h"
31 #include "oneapi/tbb/global_control.h"
32
33 // Application command line arguments parsing
34 #include "common/utility/utility.hpp"
35 #include "common/utility/get_default_num_threads.hpp"
36
37 /************************************************************
38 FORWARD DECLARATIONS
39 ************************************************************/
40
41 /**********************************************
42 Read or generate a positive-definite matrix
43 -- reads from file if fname != nullptr
44 -- sets n to matrix size
45 -- allocates and reads values in to A
46 -- otherwise generates a matrix
47 -- uses n to determine size
48 -- allocates and generates values in to A
49 **********************************************/
50 void matrix_init(double *&A, int &n, const char *fname);
51
52 /**********************************************
53 Writes a lower triangular matrix to a file
54 -- first line of file is n
55 -- subsequently 1 row per line
56 **********************************************/
57 void matrix_write(double *A, int n, const char *fname, bool is_triangular = false);
58
59 /************************************************************
60 GLOBAL VARIABLES
61 ************************************************************/
62 bool g_benchmark_run = false;
63 int g_n = -1, g_b = -1, g_num_trials = 1;
64 char *g_input_file_name = nullptr;
65 char *g_output_prefix = nullptr;
66 std::string g_alg_name;
67 int g_num_tbb_threads;
68
69 // Creates tiled array
create_tile_array(double * A,int n,int b)70 static double ***create_tile_array(double *A, int n, int b) {
71 const int p = n / b;
72 double ***tile = (double ***)calloc(sizeof(double **), p);
73
74 for (int j = 0; j < p; ++j) {
75 tile[j] = (double **)calloc(sizeof(double *), p);
76 }
77
78 for (int j = 0; j < p; ++j) {
79 for (int i = 0; i < p; ++i) {
80 double *temp_block = (double *)calloc(sizeof(double), b * b);
81
82 for (int A_j = j * b, T_j = 0; T_j < b; ++A_j, ++T_j) {
83 for (int A_i = i * b, T_i = 0; T_i < b; ++A_i, ++T_i) {
84 temp_block[T_j * b + T_i] = A[A_j * n + A_i];
85 }
86 }
87
88 tile[j][i] = temp_block;
89 }
90 }
91 return tile;
92 }
93
collapse_tile_array(double *** tile,double * A,int n,int b)94 static void collapse_tile_array(double ***tile, double *A, int n, int b) {
95 const int p = n / b;
96
97 for (int j = 0; j < p; ++j) {
98 for (int i = 0; i < p; ++i) {
99 double *temp_block = tile[j][i];
100
101 for (int A_j = j * b, T_j = 0; T_j < b; ++A_j, ++T_j) {
102 for (int A_i = i * b, T_i = 0; T_i < b; ++A_i, ++T_i) {
103 A[A_j * n + A_i] = temp_block[T_j * b + T_i];
104 }
105 }
106
107 free(temp_block);
108 tile[j][i] = nullptr;
109 }
110
111 free(tile[j]);
112 }
113
114 free(tile);
115 }
116
117 /************************************************************
118 Helper base class: algorithm
119 ************************************************************/
120 class algorithm {
121 std::string name;
122 bool is_tiled;
123
check_if_valid(double * A0,double * C,double * A,int n)124 bool check_if_valid(double *A0, double *C, double *A, int n) {
125 char transa = 'n', transb = 't';
126 double alpha = 1;
127 double beta = 0;
128
129 for (int i = 0; i < n; ++i) {
130 for (int j = i + 1; j < n; ++j) {
131 A0[j * n + i] = 0.;
132 }
133 }
134
135 dgemm(&transa, &transb, &n, &n, &n, &alpha, A0, &n, A0, &n, &beta, C, &n);
136
137 for (int j = 0; j < n; ++j) {
138 for (int i = 0; i < n; ++i) {
139 const double epsilon = std::abs(A[j * n + i] * 0.1);
140
141 if (std::abs(C[j * n + i] - A[j * n + i]) > epsilon) {
142 printf("ERROR: %s did not validate at C(%d,%d) = %lf != A(%d,%d) = %lf\n",
143 name.c_str(),
144 i,
145 j,
146 C[j * n + i],
147 i,
148 j,
149 A[j * n + i]);
150 printf("ERROR: %g; %g < %g < %g\n",
151 epsilon,
152 A[j * n + i] - epsilon,
153 C[j * n + i],
154 A[j * n + i] + epsilon);
155 return false;
156 }
157 }
158 }
159 return true;
160 }
161
162 public:
algorithm(const std::string & alg_name,bool t)163 algorithm(const std::string &alg_name, bool t) : name(alg_name), is_tiled(t) {}
164
operator ()(double * A,int n,int b,int trials)165 double operator()(double *A, int n, int b, int trials) {
166 oneapi::tbb::tick_count t0, t1;
167 double elapsed_time = 0.0;
168 double *A0 = (double *)calloc(sizeof(double), n * n);
169 double *C = (double *)calloc(sizeof(double), n * n);
170
171 for (int i = 0; i < trials + 1; ++i) {
172 if (is_tiled) {
173 double ***tile = create_tile_array(A, n, b);
174 t0 = oneapi::tbb::tick_count::now();
175 func(tile, n, b);
176 t1 = oneapi::tbb::tick_count::now();
177
178 collapse_tile_array(tile, A0, n, b);
179 }
180 else {
181 memcpy(A0, A, sizeof(double) * n * n);
182 t0 = oneapi::tbb::tick_count::now();
183 func(A0, n, b);
184 t1 = oneapi::tbb::tick_count::now();
185 }
186
187 if (i)
188 elapsed_time += (t1 - t0).seconds();
189
190 if (!g_benchmark_run && !check_if_valid(A0, C, A, n)) {
191 if (g_output_prefix) {
192 std::string s(g_output_prefix);
193 s += "_" + name + ".txt";
194 matrix_write(A0, g_n, s.c_str(), true);
195 free(A0);
196 free(C);
197 return 0.;
198 }
199 }
200 }
201
202 if (g_output_prefix) {
203 std::string s(g_output_prefix);
204 s += "_" + name + ".txt";
205 matrix_write(A0, g_n, s.c_str(), true);
206 }
207
208 printf("%s %d %d %d %d %lf %lf\n",
209 name.c_str(),
210 g_num_tbb_threads,
211 trials,
212 n,
213 b,
214 elapsed_time,
215 elapsed_time / trials);
216 free(A0);
217 free(C);
218 return elapsed_time;
219 }
220
221 protected:
222 // Main algorithm body function must be defined in any direved class
223 virtual void func(void *ptr, int n, int b) = 0;
224 };
225
226 /***********************************************************/
227
call_dpotf2(double *** tile,int b,int k)228 static void call_dpotf2(double ***tile, int b, int k) {
229 double *A_block = tile[k][k];
230 char uplo = 'l';
231 int info = 0;
232 dpotf2(&uplo, &b, A_block, &b, &info);
233 return;
234 }
235
call_dtrsm(double *** tile,int b,int k,int j)236 static void call_dtrsm(double ***tile, int b, int k, int j) {
237 double *A_block = tile[k][j];
238 double *L_block = tile[k][k];
239 char uplo = 'l', side = 'r', transa = 't', diag = 'n';
240 double alpha = 1;
241 dtrsm(&side, &uplo, &transa, &diag, &b, &b, &alpha, L_block, &b, A_block, &b);
242 return;
243 }
244
call_dsyr2k(double *** tile,int b,int k,int j,int i)245 static void call_dsyr2k(double ***tile, int b, int k, int j, int i) {
246 double *A_block = tile[i][j];
247 char transa = 'n', transb = 't';
248 char uplo = 'l';
249 double alpha = -1;
250 double beta = 1;
251
252 if (i == j) { // Diagonal block
253 double *L_block = tile[k][i];
254 dsyrk(&uplo, &transa, &b, &b, &alpha, L_block, &b, &beta, A_block, &b);
255 }
256 else { // Non-diagonal block
257 double *L2_block = tile[k][i];
258 double *L1_block = tile[k][j];
259 dgemm(&transa, &transb, &b, &b, &b, &alpha, L1_block, &b, L2_block, &b, &beta, A_block, &b);
260 }
261 return;
262 }
263
264 class algorithm_crout : public algorithm {
265 public:
algorithm_crout()266 algorithm_crout() : algorithm("crout_cholesky", true) {}
267
268 protected:
func(void * ptr,int n,int b)269 virtual void func(void *ptr, int n, int b) {
270 double ***tile = (double ***)ptr;
271 const int p = n / b;
272
273 for (int k = 0; k < p; ++k) {
274 call_dpotf2(tile, b, k);
275
276 for (int j = k + 1; j < p; ++j) {
277 call_dtrsm(tile, b, k, j);
278
279 for (int i = k + 1; i <= j; ++i) {
280 call_dsyr2k(tile, b, k, j, i);
281 }
282 }
283 }
284 }
285 };
286
287 class algorithm_dpotrf : public algorithm {
288 public:
algorithm_dpotrf()289 algorithm_dpotrf() : algorithm("dpotrf_cholesky", false) {}
290
291 protected:
func(void * ptr,int n,int)292 virtual void func(void *ptr, int n, int /* b */) {
293 double *A = (double *)ptr;
294 int lda = n;
295 int info = 0;
296 char uplo = 'l';
297 dpotrf(&uplo, &n, A, &lda, &info);
298 }
299 };
300
301 /************************************************************
302 Begin data join graph based version of cholesky
303 ************************************************************/
304
305 typedef union {
306 char a[4];
307 std::size_t tag;
308 } tag_t;
309
310 typedef double *tile_t;
311
312 typedef std::pair<tag_t, tile_t> tagged_tile_t;
313 typedef std::tuple<tagged_tile_t> t1_t;
314 typedef std::tuple<tagged_tile_t, tagged_tile_t> t2_t;
315 typedef std::tuple<tagged_tile_t, tagged_tile_t, tagged_tile_t> t3_t;
316
317 typedef oneapi::tbb::flow::multifunction_node<tagged_tile_t, t1_t> dpotf2_node_t;
318 typedef oneapi::tbb::flow::multifunction_node<t2_t, t2_t> dtrsm_node_t;
319 typedef oneapi::tbb::flow::multifunction_node<t3_t, t3_t> dsyr2k_node_t;
320
321 typedef oneapi::tbb::flow::join_node<t2_t, oneapi::tbb::flow::tag_matching> dtrsm_join_t;
322 typedef oneapi::tbb::flow::join_node<t3_t, oneapi::tbb::flow::tag_matching> dsyr2k_join_t;
323
324 class dpotf2_body {
325 int p;
326 int b;
327
328 public:
dpotf2_body(int p_,int b_)329 dpotf2_body(int p_, int b_) : p(p_), b(b_) {}
330
operator ()(const tagged_tile_t & in,dpotf2_node_t::output_ports_type & ports)331 void operator()(const tagged_tile_t &in, dpotf2_node_t::output_ports_type &ports) {
332 int k = in.first.a[0];
333 tile_t A_block = in.second;
334 tag_t t;
335 t.tag = 0;
336 t.a[0] = k;
337 char uplo = 'l';
338 int info = 0;
339 dpotf2(&uplo, &b, A_block, &b, &info);
340
341 // Send to dtrsms in same column
342 // k == k j == k
343 t.a[2] = k;
344 for (int j = k + 1; j < p; ++j) {
345 t.a[1] = j;
346 std::get<0>(ports).try_put(std::make_pair(t, A_block));
347 }
348 }
349 };
350
351 class dtrsm_body {
352 int p;
353 int b;
354
355 public:
dtrsm_body(int p_,int b_)356 dtrsm_body(int p_, int b_) : p(p_), b(b_) {}
357
operator ()(const t2_t & in,dtrsm_node_t::output_ports_type & ports)358 void operator()(const t2_t &in, dtrsm_node_t::output_ports_type &ports) {
359 tagged_tile_t in0 = std::get<0>(in);
360 tagged_tile_t in1 = std::get<1>(in);
361 int k = in0.first.a[0];
362 int j = in0.first.a[1];
363 tile_t L_block = in0.second;
364 tile_t A_block = in1.second;
365 tag_t t;
366 t.tag = 0;
367 t.a[0] = k;
368 char uplo = 'l', side = 'r', transa = 't', diag = 'n';
369 double alpha = 1;
370 dtrsm(&side, &uplo, &transa, &diag, &b, &b, &alpha, L_block, &b, A_block, &b);
371
372 // Send to rest of my row
373 t.a[1] = j;
374 for (int i = k + 1; i <= j; ++i) {
375 t.a[2] = i;
376 std::get<0>(ports).try_put(std::make_pair(t, A_block));
377 }
378
379 // Send to transposed row
380 t.a[2] = j;
381 for (int i = j; i < p; ++i) {
382 t.a[1] = i;
383 std::get<1>(ports).try_put(std::make_pair(t, A_block));
384 }
385 }
386 };
387
388 class dsyr2k_body {
389 int p;
390 int b;
391
392 public:
dsyr2k_body(int p_,int b_)393 dsyr2k_body(int p_, int b_) : p(p_), b(b_) {}
394
operator ()(const t3_t & in,dsyr2k_node_t::output_ports_type & ports)395 void operator()(const t3_t &in, dsyr2k_node_t::output_ports_type &ports) {
396 tag_t t;
397 t.tag = 0;
398 char transa = 'n', transb = 't';
399 char uplo = 'l';
400 double alpha = -1;
401 double beta = 1;
402
403 tagged_tile_t in0 = std::get<0>(in);
404 tagged_tile_t in1 = std::get<1>(in);
405 tagged_tile_t in2 = std::get<2>(in);
406 int k = in2.first.a[0];
407 int j = in2.first.a[1];
408 int i = in2.first.a[2];
409
410 tile_t A_block = in2.second;
411 if (i == j) { // Diagonal block
412 tile_t L_block = in0.second;
413 dsyrk(&uplo, &transa, &b, &b, &alpha, L_block, &b, &beta, A_block, &b);
414 }
415 else { // Non-diagonal block
416 tile_t L1_block = in0.second;
417 tile_t L2_block = in1.second;
418 dgemm(&transa,
419 &transb,
420 &b,
421 &b,
422 &b,
423 &alpha,
424 L1_block,
425 &b,
426 L2_block,
427 &b,
428 &beta,
429 A_block,
430 &b);
431 }
432
433 // All outputs flow to next step
434 t.a[0] = k + 1;
435 t.a[1] = j;
436 t.a[2] = i;
437 if (k != p - 1 && j == k + 1 && i == k + 1) {
438 std::get<0>(ports).try_put(std::make_pair(t, A_block));
439 }
440
441 if (k < p - 2) {
442 if (i == k + 1 && j > i) {
443 t.a[0] = k + 1;
444 t.a[1] = j;
445 std::get<1>(ports).try_put(std::make_pair(t, A_block));
446 }
447
448 if (j != k + 1 && i != k + 1) {
449 t.a[0] = k + 1;
450 t.a[1] = j;
451 t.a[2] = i;
452 std::get<2>(ports).try_put(std::make_pair(t, A_block));
453 }
454 }
455 }
456 };
457
458 struct tagged_tile_to_size_t {
operator ()tagged_tile_to_size_t459 std::size_t operator()(const tagged_tile_t &t) {
460 return t.first.tag;
461 }
462 };
463
464 class algorithm_join : public algorithm {
465 public:
algorithm_join()466 algorithm_join() : algorithm("data_join_cholesky", true) {}
467
468 protected:
func(void * ptr,int n,int b)469 virtual void func(void *ptr, int n, int b) {
470 using oneapi::tbb::flow::unlimited;
471 using oneapi::tbb::flow::output_port;
472 using oneapi::tbb::flow::input_port;
473
474 double ***tile = (double ***)ptr;
475 const int p = n / b;
476 oneapi::tbb::flow::graph g;
477
478 dpotf2_node_t dpotf2_node(g, unlimited, dpotf2_body(p, b));
479 dtrsm_node_t dtrsm_node(g, unlimited, dtrsm_body(p, b));
480 dsyr2k_node_t dsyr2k_node(g, unlimited, dsyr2k_body(p, b));
481 dtrsm_join_t dtrsm_join(g, tagged_tile_to_size_t(), tagged_tile_to_size_t());
482 dsyr2k_join_t dsyr2k_join(
483 g, tagged_tile_to_size_t(), tagged_tile_to_size_t(), tagged_tile_to_size_t());
484
485 make_edge(output_port<0>(dsyr2k_node), dpotf2_node);
486
487 make_edge(output_port<0>(dpotf2_node), input_port<0>(dtrsm_join));
488 make_edge(output_port<1>(dsyr2k_node), input_port<1>(dtrsm_join));
489 make_edge(dtrsm_join, dtrsm_node);
490
491 make_edge(output_port<0>(dtrsm_node), input_port<0>(dsyr2k_join));
492 make_edge(output_port<1>(dtrsm_node), input_port<1>(dsyr2k_join));
493 make_edge(output_port<2>(dsyr2k_node), input_port<2>(dsyr2k_join));
494 make_edge(dsyr2k_join, dsyr2k_node);
495
496 // Now we need to send out the tiles to their first nodes
497 tag_t t;
498 t.tag = 0;
499 t.a[0] = 0;
500 t.a[1] = 0;
501 t.a[2] = 0;
502
503 // Send to feedback input of first dpotf2
504 // k == 0, j == 0, i == 0
505 dpotf2_node.try_put(std::make_pair(t, tile[0][0]));
506
507 // Send to feedback input (port 1) of each dtrsm
508 // k == 0, j == 1..p-1
509 for (int j = 1; j < p; ++j) {
510 t.a[1] = j;
511 input_port<1>(dtrsm_join).try_put(std::make_pair(t, tile[0][j]));
512 }
513
514 // Send to feedback input (port 2) of each dsyr2k
515 // k == 0
516 for (int i = 1; i < p; ++i) {
517 t.a[2] = i;
518
519 for (int j = i; j < p; ++j) {
520 t.a[1] = j;
521 input_port<2>(dsyr2k_join).try_put(std::make_pair(t, tile[i][j]));
522 }
523 }
524
525 g.wait_for_all();
526 }
527 };
528
529 /************************************************************
530 End data join graph based version of cholesky
531 ************************************************************/
532
533 /************************************************************
534 Begin dependence graph based version of cholesky
535 ************************************************************/
536
537 typedef oneapi::tbb::flow::continue_node<oneapi::tbb::flow::continue_msg> continue_type;
538 typedef continue_type *continue_ptr_type;
539
540 class algorithm_depend : public algorithm {
541 public:
algorithm_depend()542 algorithm_depend() : algorithm("depend_cholesky", true) {}
543
544 protected:
func(void * ptr,int n,int b)545 virtual void func(void *ptr, int n, int b) {
546 double ***tile = (double ***)ptr;
547
548 const int p = n / b;
549 continue_ptr_type *c = new continue_ptr_type[p];
550 continue_ptr_type **t = new continue_ptr_type *[p];
551 continue_ptr_type ***u = new continue_ptr_type **[p];
552
553 oneapi::tbb::flow::graph g;
554 for (int k = p - 1; k >= 0; --k) {
555 c[k] = new continue_type(g, [=](const oneapi::tbb::flow::continue_msg &) {
556 call_dpotf2(tile, b, k);
557 });
558 t[k] = new continue_ptr_type[p];
559 u[k] = new continue_ptr_type *[p];
560
561 for (int j = k + 1; j < p; ++j) {
562 t[k][j] = new continue_type(g, [=](const oneapi::tbb::flow::continue_msg &) {
563 call_dtrsm(tile, b, k, j);
564 });
565 make_edge(*c[k], *t[k][j]);
566 u[k][j] = new continue_ptr_type[p];
567
568 for (int i = k + 1; i <= j; ++i) {
569 u[k][j][i] = new continue_type(g, [=](const oneapi::tbb::flow::continue_msg &) {
570 call_dsyr2k(tile, b, k, j, i);
571 });
572
573 if (k < p - 2 && k + 1 != j && k + 1 != i) {
574 make_edge(*u[k][j][i], *u[k + 1][j][i]);
575 }
576
577 make_edge(*t[k][j], *u[k][j][i]);
578
579 if (i != j) {
580 make_edge(*t[k][i], *u[k][j][i]);
581 }
582
583 if (k < p - 2 && j > i && i == k + 1) {
584 make_edge(*u[k][j][i], *t[i][j]);
585 }
586 }
587 }
588
589 if (k != p - 1) {
590 make_edge(*u[k][k + 1][k + 1], *c[k + 1]);
591 }
592 }
593
594 c[0]->try_put(oneapi::tbb::flow::continue_msg());
595 g.wait_for_all();
596
597 for (int k = p - 1; k >= 0; --k) {
598 for (int j = k + 1; j < p; ++j) {
599 for (int i = k + 1; i <= j; ++i) {
600 delete u[k][j][i];
601 }
602 delete t[k][j];
603 delete[] u[k][j];
604 }
605 delete c[k];
606 delete[] t[k];
607 delete[] u[k];
608 }
609 delete[] c;
610 delete[] t;
611 delete[] u;
612 }
613 }; // class algorithm_depend
614
615 /************************************************************
616 End dependence graph based version of cholesky
617 ************************************************************/
618
process_args(int argc,char * argv[])619 bool process_args(int argc, char *argv[]) {
620 utility::parse_cli_arguments(
621 argc,
622 argv,
623 utility::cli_argument_pack()
624 //"-h" option for displaying help is present implicitly
625 .positional_arg(g_n, "size", "the row/column size of NxN matrix (size <= 46000)")
626 .positional_arg(
627 g_b, "blocksize", "the block size; size must be a multiple of the blocksize")
628 .positional_arg(g_num_trials, "num_trials", "the number of times to run each algorithm")
629 .positional_arg(
630 g_output_prefix,
631 "output_prefix",
632 "if provided the prefix will be preappended to output files:\n"
633 " output_prefix_posdef.txt\n"
634 " output_prefix_X.txt; where X is the algorithm used\n"
635 " if output_prefix is not provided, no output will be written")
636 .positional_arg(g_alg_name,
637 "algorithm",
638 "name of the used algorithm - can be dpotrf, crout, depend or join")
639 .positional_arg(g_num_tbb_threads, "num_tbb_threads", "number of started TBB threads")
640
641 .arg(g_input_file_name,
642 "input_file",
643 "if provided it will be read to get the input matrix")
644 .arg(g_benchmark_run, "-x", "skips all validation"));
645
646 if (g_n > 46000) {
647 printf("ERROR: invalid 'size' value (must be less or equal 46000): %d\n", g_n);
648 return false;
649 }
650
651 if (g_n % g_b != 0) {
652 printf("ERROR: size %d must be a multiple of the blocksize %d\n", g_n, g_b);
653 return false;
654 }
655
656 if (g_n / g_b > 256) {
657 // Because tile index size is 1 byte only in tag_t type
658 printf("ERROR: size / blocksize must be less or equal 256, but %d / %d = %d\n",
659 g_n,
660 g_b,
661 g_n / g_b);
662 return false;
663 }
664
665 if (g_b == -1 || (g_n == -1 && g_input_file_name == nullptr)) {
666 return false;
667 }
668
669 return true;
670 }
671
main(int argc,char * argv[])672 int main(int argc, char *argv[]) {
673 g_num_tbb_threads = utility::get_default_num_threads();
674 typedef std::map<std::string, algorithm *> algmap_t;
675 algmap_t algmap;
676
677 // Init algorithms
678 algmap.insert(std::pair<std::string, algorithm *>("dpotrf", new algorithm_dpotrf));
679 algmap.insert(std::pair<std::string, algorithm *>("crout", new algorithm_crout));
680 algmap.insert(std::pair<std::string, algorithm *>("depend", new algorithm_depend));
681 algmap.insert(std::pair<std::string, algorithm *>("join", new algorithm_join));
682
683 if (!process_args(argc, argv)) {
684 printf("ERROR: Invalid arguments. Run: %s -h\n", argv[0]);
685 return -1;
686 }
687
688 oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism,
689 g_num_tbb_threads);
690 double *A = nullptr;
691
692 // Read input matrix
693 matrix_init(A, g_n, g_input_file_name);
694
695 // Write input matrix if output_prefix is set and we didn't read from a file
696 if (!g_input_file_name && g_output_prefix) {
697 std::string s(g_output_prefix);
698 s += "_posdef.txt";
699 matrix_write(A, g_n, s.c_str());
700 }
701
702 if (g_alg_name.empty()) {
703 for (algmap_t::iterator i = algmap.begin(); i != algmap.end(); ++i) {
704 algorithm *const alg = i->second;
705 (*alg)(A, g_n, g_b, g_num_trials);
706 }
707 }
708 else {
709 algmap_t::iterator alg_iter = algmap.find(g_alg_name);
710
711 if (alg_iter != algmap.end()) {
712 algorithm *const alg = alg_iter->second;
713 (*alg)(A, g_n, g_b, g_num_trials);
714 }
715 else {
716 printf("ERROR: Invalid algorithm name: %s\n", g_alg_name.c_str());
717 return -1;
718 }
719 }
720
721 free(A);
722 return 0;
723 }
724