1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28         #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31         #include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #ifdef _OPENMP
39 	#include <omp.h>
40 #endif
41 #ifdef USE_MPI
42 	#include <mpi.h>
43 #endif
44 #include "lislib.h"
45 
46 char *lis_storagename2[]   = {"CSR", "CSC", "MSR", "DIA", "ELL", "JAD", "BSR", "BSC", "VBR", "COO", "DNS"};
47 
48 #undef __FUNC__
49 #define __FUNC__ "main"
main(int argc,char * argv[])50 LIS_INT main(int argc, char* argv[])
51 {
52   LIS_Comm comm;
53   LIS_MATRIX A,A0;
54   LIS_VECTOR b,x;
55   int nprocs,my_rank;
56   LIS_INT nthreads, maxthreads;
57   LIS_INT nnz;
58   LIS_INT i,n,np;
59   LIS_INT block;
60   LIS_INT is,ie;
61   LIS_INT err,iter,matrix_type;
62   double time,time2,nnzs,nnzap,nnzt;
63   LIS_REAL val;
64   double commtime,comptime,flops;
65   FILE *file;
66 
67   LIS_DEBUG_FUNC_IN;
68 
69   lis_initialize(&argc, &argv);
70 
71   comm = LIS_COMM_WORLD;
72 
73 #ifdef USE_MPI
74   MPI_Comm_size(comm,&nprocs);
75   MPI_Comm_rank(comm,&my_rank);
76 #else
77   nprocs  = 1;
78   my_rank = 0;
79 #endif
80 
81   if( argc < 4 )
82     {
83       lis_printf(comm,"Usage: %s matrix_filename matrix_type iter [block] \n", argv[0]);
84       CHKERR(1);
85     }
86 
87   file = fopen(argv[1], "r");
88   if( file==NULL ) CHKERR(1);
89 
90   matrix_type  = atoi(argv[2]);
91   iter = atoi(argv[3]);
92   if (argv[4] == NULL) {
93     block = 2;
94   }
95   else {
96     block = atoi(argv[4]);
97   }
98 
99   if( matrix_type<1 || matrix_type>11 )
100     {
101       lis_printf(comm,"matrix_type=%D <1 or matrix_type=%D >11\n",matrix_type,matrix_type);
102       CHKERR(1);
103     }
104   if( iter<=0 )
105     {
106       lis_printf(comm,"iter=%D <= 0\n",iter);
107       CHKERR(1);
108     }
109 
110   lis_printf(comm,"\n");
111   lis_printf(comm,"number of processes = %d\n",nprocs);
112 
113 #ifdef _OPENMP
114       nthreads = omp_get_num_procs();
115       maxthreads = omp_get_max_threads();
116       lis_printf(comm,"max number of threads = %d\n", nthreads);
117       lis_printf(comm,"number of threads = %d\n", maxthreads);
118 #else
119       nthreads = 1;
120       maxthreads = 1;
121 #endif
122 
123   /* create matrix and vectors */
124   lis_matrix_create(comm,&A0);
125   err = lis_input(A0,NULL,NULL,argv[1]);
126   CHKERR(err);
127 
128   n   = A0->n;
129   nnz = A0->nnz;
130   np  = A0->np-n;
131 #ifdef USE_MPI
132   MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
133   nnzap = (double)i / (double)nprocs;
134   nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
135   nnz   = i;
136   MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
137   nnzs  = (nnzs / (double)nprocs)/nnzap;
138   MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
139   np = i;
140 #endif
141 
142   lis_printf(comm,"block size of BSR and BSC = %D x %D\n",block,block);
143   lis_printf(comm,"number of iterations = %D\n\n",iter);
144 
145   err = lis_vector_duplicate(A0,&x);
146   if( err ) CHKERR(err);
147   err = lis_vector_duplicate(A0,&b);
148   if( err ) CHKERR(err);
149 
150   lis_matrix_get_range(A0,&is,&ie);
151   for(i=0;i<n;i++)
152     {
153       err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
154     }
155 
156   lis_matrix_duplicate(A0,&A);
157   lis_matrix_set_type(A,matrix_type);
158 
159   //the matrix_type check, should be done before converting the A matrix
160   //or block formats will fail
161   if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC )
162     {
163 	// following line should be added to correctly establish the block size
164 	  lis_matrix_set_blocksize(A,block,block,NULL,NULL);
165       A->bnr = block;
166       A->bnc = block;
167     }
168 
169   err = lis_matrix_convert(A0,A);
170   if( err ) CHKERR(err);
171 
172   comptime = 0.0;
173   commtime = 0.0;
174 
175 
176   for(i=0;i<iter;i++)
177     {
178 #ifdef USE_MPI
179       MPI_Barrier(A->comm);
180       time = lis_wtime();
181       lis_send_recv(A->commtable,x->value);
182       commtime += lis_wtime() - time;
183 #endif
184       time2 = lis_wtime();
185       lis_matvec(A,x,b);
186       comptime += lis_wtime() - time2;
187     }
188   lis_vector_nrm2(b,&val);
189 
190   flops = 2.0*nnz*iter*1.0e-6 / comptime;
191   lis_printf(comm,"matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,(double)val);
192 
193   lis_matrix_destroy(A);
194   lis_matrix_destroy(A0);
195   lis_vector_destroy(b);
196   lis_vector_destroy(x);
197 
198   lis_finalize();
199 
200   LIS_DEBUG_FUNC_OUT;
201 
202   return 0;
203 }
204 
205 
206