1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28         #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31         #include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #ifdef _OPENMP
39 	#include <omp.h>
40 #endif
41 #ifdef USE_MPI
42 	#include <mpi.h>
43 #endif
44 #include "lislib.h"
45 
46 char *lis_storagename2[]   = {"CSR", "CSC", "MSR", "DIA", "ELL", "JAD", "BSR", "BSC", "VBR", "COO", "DNS"};
47 
48 #undef __FUNC__
49 #define __FUNC__ "main"
main(int argc,char * argv[])50 LIS_INT main(int argc, char* argv[])
51 {
52   LIS_Comm comm;
53   LIS_MATRIX A,A0;
54   LIS_VECTOR b,x;
55   LIS_SCALAR *value;
56   int nprocs,my_rank;
57   LIS_INT nthreads,maxthreads;
58   LIS_INT gn,nnz;
59   LIS_INT i,ii,j,jj,ctr,np;
60   LIS_INT m,n,nn;
61   LIS_INT is,ie;
62   LIS_INT err,iter,matrix_type,storage,ss,se;
63   LIS_INT *ptr,*index;
64   double time,time2,nnzs,nnzap,nnzt;
65   LIS_REAL val;
66   double commtime,comptime,flops;
67 
68   LIS_DEBUG_FUNC_IN;
69 
70   lis_initialize(&argc, &argv);
71 
72   comm = LIS_COMM_WORLD;
73 
74 #ifdef USE_MPI
75   MPI_Comm_size(comm,&nprocs);
76   MPI_Comm_rank(comm,&my_rank);
77 #else
78   nprocs  = 1;
79   my_rank = 0;
80 #endif
81 
82   if( argc < 4 )
83     {
84       lis_printf(comm,"Usage: %s m n iter [matrix_type]\n", argv[0]);
85       CHKERR(1);
86     }
87 
88   m  = atoi(argv[1]);
89   n  = atoi(argv[2]);
90   iter = atoi(argv[3]);
91   if (argv[4] == NULL) {
92     storage = 0;
93   }
94   else {
95     storage = atoi(argv[4]);
96   }
97 
98   if( iter<=0 )
99     {
100       lis_printf(comm,"iter=%D <= 0\n",iter);
101       CHKERR(1);
102     }
103   if( n<=0 || m<=0 )
104     {
105       lis_printf(comm,"m=%D <=0 or n=%D <=0\n",m,n);
106       CHKERR(1);
107     }
108   if( storage<0 || storage>11 )
109     {
110       lis_printf(comm,"matrix_type=%D < 0 or matrix_type=%D > 11\n",storage,storage);
111       CHKERR(1);
112     }
113 
114   lis_printf(comm,"\n");
115   lis_printf(comm,"number of processes = %d\n",nprocs);
116 
117 #ifdef _OPENMP
118   nthreads = omp_get_num_procs();
119   maxthreads = omp_get_max_threads();
120   lis_printf(comm,"max number of threads = %d\n", nthreads);
121   lis_printf(comm,"number of threads = %d\n", maxthreads);
122 #else
123   nthreads = 1;
124   maxthreads = 1;
125 #endif
126 
127   /* create matrix and vectors */
128   nn = m*n;
129   err = lis_matrix_create(comm,&A0);
130   err = lis_matrix_set_size(A0,0,nn);
131   CHKERR(err);
132 
133   ptr   = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT));
134   if( ptr==NULL ) CHKERR(1);
135   index = (LIS_INT *)malloc(5*A0->n*sizeof(LIS_INT));
136   if( index==NULL ) CHKERR(1);
137   value = (LIS_SCALAR *)malloc(5*A0->n*sizeof(LIS_SCALAR));
138   if( value==NULL ) CHKERR(1);
139 
140   lis_matrix_get_range(A0,&is,&ie);
141   ctr = 0;
142   for(ii=is;ii<ie;ii++)
143     {
144       i = ii/n;
145       j = ii - i*n;
146       if( i>0 )   { jj = ii - n; index[ctr] = jj; value[ctr++] = -1.0;}
147       if( i<m-1 ) { jj = ii + n; index[ctr] = jj; value[ctr++] = -1.0;}
148       if( j>0 )   { jj = ii - 1; index[ctr] = jj; value[ctr++] = -1.0;}
149       if( j<n-1 ) { jj = ii + 1; index[ctr] = jj; value[ctr++] = -1.0;}
150       index[ctr] = ii; value[ctr++] = 4.0;
151       ptr[ii-is+1] = ctr;
152     }
153   ptr[0] = 0;
154   err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0);
155   CHKERR(err);
156   err = lis_matrix_assemble(A0);
157   CHKERR(err);
158 
159   n   = A0->n;
160   gn  = A0->gn;
161   nnz = A0->nnz;
162   np  = A0->np-n;
163 
164 #ifdef USE_MPI
165   MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
166   nnzap = (double)i / (double)nprocs;
167   nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
168   nnz   = i;
169   MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
170   nnzs  = (nnzs / (double)nprocs)/nnzap;
171   MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
172   np = i;
173 #endif
174 
175   lis_printf(comm,"matrix size = %D x %D (%D nonzero entries)\n",gn,gn,nnz);
176   lis_printf(comm,"number of iterations = %D\n\n",iter);
177 
178   err = lis_vector_duplicate(A0,&x);
179   if( err ) CHKERR(err);
180   err = lis_vector_duplicate(A0,&b);
181   if( err ) CHKERR(err);
182 
183   lis_matrix_get_range(A0,&is,&ie);
184   for(i=0;i<n;i++)
185     {
186       err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
187     }
188   for(i=0;i<n;i++)
189     {
190       lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value);
191     }
192 
193   /*
194      MPI version of VBR is not implemented.
195      DNS is also excluded to reduce memory usage.
196   */
197 
198   if (storage==0)
199     {
200       ss = 1;
201       se = 11;
202     }
203   else
204     {
205       ss = storage;
206       se = storage+1;
207     }
208 
209   for (matrix_type=ss;matrix_type<se;matrix_type++)
210     {
211       if ( nprocs>1 && matrix_type==9 ) continue;
212       lis_matrix_duplicate(A0,&A);
213       lis_matrix_set_type(A,matrix_type);
214       err = lis_matrix_convert(A0,A);
215       if( err ) CHKERR(err);
216 
217       comptime = 0.0;
218       commtime = 0.0;
219 
220       for(i=0;i<iter;i++)
221 	{
222 #ifdef USE_MPI
223 	  MPI_Barrier(A->comm);
224 	  time = lis_wtime();
225 	  lis_send_recv(A->commtable,x->value);
226 	  commtime += lis_wtime() - time;
227 #endif
228 	  time2 = lis_wtime();
229 	  lis_matvec(A,x,b);
230 	  comptime += lis_wtime() - time2;
231 	}
232       lis_vector_nrm2(b,&val);
233 
234       flops = 2.0*nnz*iter*1.0e-6 / comptime;
235 #ifdef USE_MPI
236       lis_printf(comm,"matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,(double)val);
237 #else
238       printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,(double)val);
239 #endif
240       lis_matrix_destroy(A);
241     }
242 
243   lis_matrix_destroy(A0);
244   lis_vector_destroy(b);
245   lis_vector_destroy(x);
246 
247   lis_finalize();
248 
249   LIS_DEBUG_FUNC_OUT;
250 
251   return 0;
252 }
253 
254 
255