1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41 #ifdef USE_MPI
42 #include <mpi.h>
43 #endif
44 #include "lislib.h"
45
46 char *lis_storagename2[] = {"CSR", "CSC", "MSR", "DIA", "ELL", "JAD", "BSR", "BSC", "VBR", "COO", "DNS"};
47
48 #undef __FUNC__
49 #define __FUNC__ "main"
main(int argc,char * argv[])50 LIS_INT main(int argc, char* argv[])
51 {
52 LIS_Comm comm;
53 LIS_MATRIX A,A0;
54 LIS_VECTOR b,x;
55 LIS_SCALAR *value;
56 int nprocs,my_rank;
57 LIS_INT nthreads,maxthreads;
58 LIS_INT gn,nnz;
59 LIS_INT i,ii,j,jj,ctr,np;
60 LIS_INT m,n,nn;
61 LIS_INT is,ie;
62 LIS_INT err,iter,matrix_type,storage,ss,se;
63 LIS_INT *ptr,*index;
64 double time,time2,nnzs,nnzap,nnzt;
65 LIS_REAL val;
66 double commtime,comptime,flops;
67
68 LIS_DEBUG_FUNC_IN;
69
70 lis_initialize(&argc, &argv);
71
72 comm = LIS_COMM_WORLD;
73
74 #ifdef USE_MPI
75 MPI_Comm_size(comm,&nprocs);
76 MPI_Comm_rank(comm,&my_rank);
77 #else
78 nprocs = 1;
79 my_rank = 0;
80 #endif
81
82 if( argc < 4 )
83 {
84 lis_printf(comm,"Usage: %s m n iter [matrix_type]\n", argv[0]);
85 CHKERR(1);
86 }
87
88 m = atoi(argv[1]);
89 n = atoi(argv[2]);
90 iter = atoi(argv[3]);
91 if (argv[4] == NULL) {
92 storage = 0;
93 }
94 else {
95 storage = atoi(argv[4]);
96 }
97
98 if( iter<=0 )
99 {
100 lis_printf(comm,"iter=%D <= 0\n",iter);
101 CHKERR(1);
102 }
103 if( n<=0 || m<=0 )
104 {
105 lis_printf(comm,"m=%D <=0 or n=%D <=0\n",m,n);
106 CHKERR(1);
107 }
108 if( storage<0 || storage>11 )
109 {
110 lis_printf(comm,"matrix_type=%D < 0 or matrix_type=%D > 11\n",storage,storage);
111 CHKERR(1);
112 }
113
114 lis_printf(comm,"\n");
115 lis_printf(comm,"number of processes = %d\n",nprocs);
116
117 #ifdef _OPENMP
118 nthreads = omp_get_num_procs();
119 maxthreads = omp_get_max_threads();
120 lis_printf(comm,"max number of threads = %d\n", nthreads);
121 lis_printf(comm,"number of threads = %d\n", maxthreads);
122 #else
123 nthreads = 1;
124 maxthreads = 1;
125 #endif
126
127 /* create matrix and vectors */
128 nn = m*n;
129 err = lis_matrix_create(comm,&A0);
130 err = lis_matrix_set_size(A0,0,nn);
131 CHKERR(err);
132
133 ptr = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT));
134 if( ptr==NULL ) CHKERR(1);
135 index = (LIS_INT *)malloc(5*A0->n*sizeof(LIS_INT));
136 if( index==NULL ) CHKERR(1);
137 value = (LIS_SCALAR *)malloc(5*A0->n*sizeof(LIS_SCALAR));
138 if( value==NULL ) CHKERR(1);
139
140 lis_matrix_get_range(A0,&is,&ie);
141 ctr = 0;
142 for(ii=is;ii<ie;ii++)
143 {
144 i = ii/n;
145 j = ii - i*n;
146 if( i>0 ) { jj = ii - n; index[ctr] = jj; value[ctr++] = -1.0;}
147 if( i<m-1 ) { jj = ii + n; index[ctr] = jj; value[ctr++] = -1.0;}
148 if( j>0 ) { jj = ii - 1; index[ctr] = jj; value[ctr++] = -1.0;}
149 if( j<n-1 ) { jj = ii + 1; index[ctr] = jj; value[ctr++] = -1.0;}
150 index[ctr] = ii; value[ctr++] = 4.0;
151 ptr[ii-is+1] = ctr;
152 }
153 ptr[0] = 0;
154 err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0);
155 CHKERR(err);
156 err = lis_matrix_assemble(A0);
157 CHKERR(err);
158
159 n = A0->n;
160 gn = A0->gn;
161 nnz = A0->nnz;
162 np = A0->np-n;
163
164 #ifdef USE_MPI
165 MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
166 nnzap = (double)i / (double)nprocs;
167 nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap);
168 nnz = i;
169 MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
170 nnzs = (nnzs / (double)nprocs)/nnzap;
171 MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
172 np = i;
173 #endif
174
175 lis_printf(comm,"matrix size = %D x %D (%D nonzero entries)\n",gn,gn,nnz);
176 lis_printf(comm,"number of iterations = %D\n\n",iter);
177
178 err = lis_vector_duplicate(A0,&x);
179 if( err ) CHKERR(err);
180 err = lis_vector_duplicate(A0,&b);
181 if( err ) CHKERR(err);
182
183 lis_matrix_get_range(A0,&is,&ie);
184 for(i=0;i<n;i++)
185 {
186 err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
187 }
188 for(i=0;i<n;i++)
189 {
190 lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value);
191 }
192
193 /*
194 MPI version of VBR is not implemented.
195 DNS is also excluded to reduce memory usage.
196 */
197
198 if (storage==0)
199 {
200 ss = 1;
201 se = 11;
202 }
203 else
204 {
205 ss = storage;
206 se = storage+1;
207 }
208
209 for (matrix_type=ss;matrix_type<se;matrix_type++)
210 {
211 if ( nprocs>1 && matrix_type==9 ) continue;
212 lis_matrix_duplicate(A0,&A);
213 lis_matrix_set_type(A,matrix_type);
214 err = lis_matrix_convert(A0,A);
215 if( err ) CHKERR(err);
216
217 comptime = 0.0;
218 commtime = 0.0;
219
220 for(i=0;i<iter;i++)
221 {
222 #ifdef USE_MPI
223 MPI_Barrier(A->comm);
224 time = lis_wtime();
225 lis_send_recv(A->commtable,x->value);
226 commtime += lis_wtime() - time;
227 #endif
228 time2 = lis_wtime();
229 lis_matvec(A,x,b);
230 comptime += lis_wtime() - time2;
231 }
232 lis_vector_nrm2(b,&val);
233
234 flops = 2.0*nnz*iter*1.0e-6 / comptime;
235 #ifdef USE_MPI
236 lis_printf(comm,"matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,(double)val);
237 #else
238 printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,(double)val);
239 #endif
240 lis_matrix_destroy(A);
241 }
242
243 lis_matrix_destroy(A0);
244 lis_vector_destroy(b);
245 lis_vector_destroy(x);
246
247 lis_finalize();
248
249 LIS_DEBUG_FUNC_OUT;
250
251 return 0;
252 }
253
254
255