1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28         #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31         #include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #ifdef _OPENMP
39 	#include <omp.h>
40 #endif
41 #ifdef USE_MPI
42 	#include <mpi.h>
43 #endif
44 #include "lislib.h"
45 
46 char *lis_storagename2[]   = {"CSR", "CSC", "MSR", "DIA", "ELL", "JAD", "BSR", "BSC", "VBR", "COO", "DNS"};
47 
48 #undef __FUNC__
49 #define __FUNC__ "main"
main(int argc,char * argv[])50 LIS_INT main(int argc, char* argv[])
51 {
52   LIS_Comm comm;
53   LIS_MATRIX A,A0;
54   LIS_VECTOR b,x;
55   LIS_SCALAR *value;
56   int nprocs,my_rank;
57   LIS_INT nthreads,maxthreads;
58   LIS_INT gn,nnz,np;
59   LIS_INT i,j,k,si,sj,sk,ii,jj,ctr;
60   LIS_INT l,m,n,nn;
61   LIS_INT is,ie;
62   LIS_INT err,iter,matrix_type,storage,ss,se;
63   LIS_INT *ptr,*index;
64   double time,time2,nnzs,nnzap,nnzt;
65   LIS_REAL val;
66   double commtime,comptime,flops;
67 
68   LIS_DEBUG_FUNC_IN;
69 
70   lis_initialize(&argc, &argv);
71 
72   comm = LIS_COMM_WORLD;
73 
74 #ifdef USE_MPI
75   MPI_Comm_size(comm,&nprocs);
76   MPI_Comm_rank(comm,&my_rank);
77 #else
78   nprocs  = 1;
79   my_rank = 0;
80 #endif
81 
82   if( argc < 5 )
83     {
84       lis_printf(comm,"Usage: %s l m n iter [matrix_type]\n", argv[0]);
85       CHKERR(1);
86     }
87 
88   l  = atoi(argv[1]);
89   m  = atoi(argv[2]);
90   n  = atoi(argv[3]);
91   iter = atoi(argv[4]);
92   if (argv[5] == NULL) {
93     storage = 0;
94   }
95   else {
96     storage = atoi(argv[5]);
97   }
98 
99   if( iter<=0 )
100     {
101       lis_printf(comm,"iter=%D <= 0\n",iter);
102       CHKERR(1);
103     }
104   if( l<=0 || m<=0 || n<=0 )
105     {
106       lis_printf(comm,"l=%D <=0, m=%D <=0 or n=%D <=0\n",l,m,n);
107       CHKERR(1);
108     }
109   if( storage<0 || storage>11 )
110     {
111       lis_printf(comm,"matrix_type=%D < 0 or matrix_type=%D > 11\n",storage,storage);
112       CHKERR(1);
113     }
114 
115   lis_printf(comm,"\n");
116   lis_printf(comm,"number of processes = %d\n",nprocs);
117 
118 #ifdef _OPENMP
119   nthreads = omp_get_num_procs();
120   maxthreads = omp_get_max_threads();
121   lis_printf(comm,"max number of threads = %d\n", nthreads);
122   lis_printf(comm,"number of threads = %d\n", maxthreads);
123 #else
124   nthreads = 1;
125   maxthreads = 1;
126 #endif
127 
128   /* create matrix and vectors */
129   nn = l*m*n;
130   err = lis_matrix_create(LIS_COMM_WORLD,&A0);
131   err = lis_matrix_set_size(A0,0,nn);
132   CHKERR(err);
133 
134   ptr   = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT));
135   if( ptr==NULL ) CHKERR(1);
136   index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT));
137   if( index==NULL ) CHKERR(1);
138   value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR));
139   if( value==NULL ) CHKERR(1);
140 
141   lis_matrix_get_range(A0,&is,&ie);
142   ctr = 0;
143   for(ii=is;ii<ie;ii++)
144     {
145       i = ii/(m*n);
146       j = (ii - i*m*n)/n;
147       k = ii - i*m*n - j*n;
148       for(si=-1;si<=1;si++) {
149 	if( i+si>-1 && i+si<l ) {
150 	  for(sj=-1;sj<=1;sj++) {
151 	    if( j+sj>-1 && j+sj<m ) {
152 	      for(sk=-1;sk<=1;sk++) {
153 		if( k+sk>-1 && k+sk<n ) {
154 		  jj = ii + si*m*n + sj*n + sk;
155 		  index[ctr] = jj;
156 		  if( jj==ii ) { value[ctr++] = 26.0;}
157 		  else { value[ctr++] = -1.0;}
158 		}
159 	      }
160 	    }
161 	  }
162 	}
163       }
164       ptr[ii-is+1] = ctr;
165     }
166   ptr[0] = 0;
167   err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0);
168   CHKERR(err);
169   err = lis_matrix_assemble(A0);
170   CHKERR(err);
171 
172   n   = A0->n;
173   gn  = A0->gn;
174   nnz = A0->nnz;
175   np  = A0->np-n;
176 
177 #ifdef USE_MPI
178   MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
179   nnzap = (double)i / (double)nprocs;
180   nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
181   nnz   = i;
182   MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
183   nnzs  = (nnzs / (double)nprocs)/nnzap;
184   MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
185   np = i;
186 #endif
187 
188   lis_printf(comm,"matrix size = %D x %D (%D nonzero entries)\n",gn,gn,nnz);
189   lis_printf(comm,"number of iterations = %D\n\n",iter);
190 
191   err = lis_vector_duplicate(A0,&x);
192   if( err ) CHKERR(err);
193   err = lis_vector_duplicate(A0,&b);
194   if( err ) CHKERR(err);
195 
196   lis_matrix_get_range(A0,&is,&ie);
197   for(i=0;i<n;i++)
198     {
199       err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
200     }
201   for(i=0;i<n;i++)
202     {
203       lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value);
204     }
205 
206   /*
207      MPI version of VBR is not implemented.
208      DNS is also excluded to reduce memory usage.
209   */
210 
211   if (storage==0)
212     {
213       ss = 1;
214       se = 11;
215     }
216   else
217     {
218       ss = storage;
219       se = storage+1;
220     }
221 
222   for (matrix_type=ss;matrix_type<se;matrix_type++)
223     {
224       if ( nprocs>1 && matrix_type==9 ) continue;
225       lis_matrix_duplicate(A0,&A);
226       lis_matrix_set_type(A,matrix_type);
227       err = lis_matrix_convert(A0,A);
228       if( err ) CHKERR(err);
229 
230       comptime = 0.0;
231       commtime = 0.0;
232 
233       for(i=0;i<iter;i++)
234 	{
235 #ifdef USE_MPI
236 	  MPI_Barrier(A->comm);
237 	  time = lis_wtime();
238 	  lis_send_recv(A->commtable,x->value);
239 	  commtime += lis_wtime() - time;
240 #endif
241 	  time2 = lis_wtime();
242 	  lis_matvec(A,x,b);
243 	  comptime += lis_wtime() - time2;
244 	}
245       lis_vector_nrm2(b,&val);
246 
247       flops = 2.0*nnz*iter*1.0e-6 / comptime;
248 #ifdef USE_MPI
249       lis_printf(comm,"matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,(double)val);
250 #else
251       printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,(double)val);
252 #endif
253       lis_matrix_destroy(A);
254     }
255 
256   lis_matrix_destroy(A0);
257   lis_vector_destroy(b);
258   lis_vector_destroy(x);
259 
260   lis_finalize();
261 
262   LIS_DEBUG_FUNC_OUT;
263 
264   return 0;
265 }
266 
267 
268