1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41 #ifdef USE_MPI
42 #include <mpi.h>
43 #endif
44 #include "lislib.h"
45
46 char *lis_storagename2[] = {"CSR", "CSC", "MSR", "DIA", "ELL", "JAD", "BSR", "BSC", "VBR", "COO", "DNS"};
47
48 #undef __FUNC__
49 #define __FUNC__ "main"
main(int argc,char * argv[])50 LIS_INT main(int argc, char* argv[])
51 {
52 LIS_Comm comm;
53 LIS_MATRIX A,A0;
54 LIS_VECTOR b,x;
55 LIS_SCALAR *value;
56 int nprocs,my_rank;
57 LIS_INT nthreads,maxthreads;
58 LIS_INT gn,nnz,np;
59 LIS_INT i,j,k,si,sj,sk,ii,jj,ctr;
60 LIS_INT l,m,n,nn;
61 LIS_INT is,ie;
62 LIS_INT err,iter,matrix_type,storage,ss,se;
63 LIS_INT *ptr,*index;
64 double time,time2,nnzs,nnzap,nnzt;
65 LIS_REAL val;
66 double commtime,comptime,flops;
67
68 LIS_DEBUG_FUNC_IN;
69
70 lis_initialize(&argc, &argv);
71
72 comm = LIS_COMM_WORLD;
73
74 #ifdef USE_MPI
75 MPI_Comm_size(comm,&nprocs);
76 MPI_Comm_rank(comm,&my_rank);
77 #else
78 nprocs = 1;
79 my_rank = 0;
80 #endif
81
82 if( argc < 5 )
83 {
84 lis_printf(comm,"Usage: %s l m n iter [matrix_type]\n", argv[0]);
85 CHKERR(1);
86 }
87
88 l = atoi(argv[1]);
89 m = atoi(argv[2]);
90 n = atoi(argv[3]);
91 iter = atoi(argv[4]);
92 if (argv[5] == NULL) {
93 storage = 0;
94 }
95 else {
96 storage = atoi(argv[5]);
97 }
98
99 if( iter<=0 )
100 {
101 lis_printf(comm,"iter=%D <= 0\n",iter);
102 CHKERR(1);
103 }
104 if( l<=0 || m<=0 || n<=0 )
105 {
106 lis_printf(comm,"l=%D <=0, m=%D <=0 or n=%D <=0\n",l,m,n);
107 CHKERR(1);
108 }
109 if( storage<0 || storage>11 )
110 {
111 lis_printf(comm,"matrix_type=%D < 0 or matrix_type=%D > 11\n",storage,storage);
112 CHKERR(1);
113 }
114
115 lis_printf(comm,"\n");
116 lis_printf(comm,"number of processes = %d\n",nprocs);
117
118 #ifdef _OPENMP
119 nthreads = omp_get_num_procs();
120 maxthreads = omp_get_max_threads();
121 lis_printf(comm,"max number of threads = %d\n", nthreads);
122 lis_printf(comm,"number of threads = %d\n", maxthreads);
123 #else
124 nthreads = 1;
125 maxthreads = 1;
126 #endif
127
128 /* create matrix and vectors */
129 nn = l*m*n;
130 err = lis_matrix_create(LIS_COMM_WORLD,&A0);
131 err = lis_matrix_set_size(A0,0,nn);
132 CHKERR(err);
133
134 ptr = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT));
135 if( ptr==NULL ) CHKERR(1);
136 index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT));
137 if( index==NULL ) CHKERR(1);
138 value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR));
139 if( value==NULL ) CHKERR(1);
140
141 lis_matrix_get_range(A0,&is,&ie);
142 ctr = 0;
143 for(ii=is;ii<ie;ii++)
144 {
145 i = ii/(m*n);
146 j = (ii - i*m*n)/n;
147 k = ii - i*m*n - j*n;
148 for(si=-1;si<=1;si++) {
149 if( i+si>-1 && i+si<l ) {
150 for(sj=-1;sj<=1;sj++) {
151 if( j+sj>-1 && j+sj<m ) {
152 for(sk=-1;sk<=1;sk++) {
153 if( k+sk>-1 && k+sk<n ) {
154 jj = ii + si*m*n + sj*n + sk;
155 index[ctr] = jj;
156 if( jj==ii ) { value[ctr++] = 26.0;}
157 else { value[ctr++] = -1.0;}
158 }
159 }
160 }
161 }
162 }
163 }
164 ptr[ii-is+1] = ctr;
165 }
166 ptr[0] = 0;
167 err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0);
168 CHKERR(err);
169 err = lis_matrix_assemble(A0);
170 CHKERR(err);
171
172 n = A0->n;
173 gn = A0->gn;
174 nnz = A0->nnz;
175 np = A0->np-n;
176
177 #ifdef USE_MPI
178 MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
179 nnzap = (double)i / (double)nprocs;
180 nnzt = ((double)nnz -nnzap)*((double)nnz -nnzap);
181 nnz = i;
182 MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
183 nnzs = (nnzs / (double)nprocs)/nnzap;
184 MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
185 np = i;
186 #endif
187
188 lis_printf(comm,"matrix size = %D x %D (%D nonzero entries)\n",gn,gn,nnz);
189 lis_printf(comm,"number of iterations = %D\n\n",iter);
190
191 err = lis_vector_duplicate(A0,&x);
192 if( err ) CHKERR(err);
193 err = lis_vector_duplicate(A0,&b);
194 if( err ) CHKERR(err);
195
196 lis_matrix_get_range(A0,&is,&ie);
197 for(i=0;i<n;i++)
198 {
199 err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
200 }
201 for(i=0;i<n;i++)
202 {
203 lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value);
204 }
205
206 /*
207 MPI version of VBR is not implemented.
208 DNS is also excluded to reduce memory usage.
209 */
210
211 if (storage==0)
212 {
213 ss = 1;
214 se = 11;
215 }
216 else
217 {
218 ss = storage;
219 se = storage+1;
220 }
221
222 for (matrix_type=ss;matrix_type<se;matrix_type++)
223 {
224 if ( nprocs>1 && matrix_type==9 ) continue;
225 lis_matrix_duplicate(A0,&A);
226 lis_matrix_set_type(A,matrix_type);
227 err = lis_matrix_convert(A0,A);
228 if( err ) CHKERR(err);
229
230 comptime = 0.0;
231 commtime = 0.0;
232
233 for(i=0;i<iter;i++)
234 {
235 #ifdef USE_MPI
236 MPI_Barrier(A->comm);
237 time = lis_wtime();
238 lis_send_recv(A->commtable,x->value);
239 commtime += lis_wtime() - time;
240 #endif
241 time2 = lis_wtime();
242 lis_matvec(A,x,b);
243 comptime += lis_wtime() - time2;
244 }
245 lis_vector_nrm2(b,&val);
246
247 flops = 2.0*nnz*iter*1.0e-6 / comptime;
248 #ifdef USE_MPI
249 lis_printf(comm,"matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,(double)val);
250 #else
251 printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",(int)matrix_type,lis_storagename2[matrix_type-1],comptime,flops,(double)val);
252 #endif
253 lis_matrix_destroy(A);
254 }
255
256 lis_matrix_destroy(A0);
257 lis_vector_destroy(b);
258 lis_vector_destroy(x);
259
260 lis_finalize();
261
262 LIS_DEBUG_FUNC_OUT;
263
264 return 0;
265 }
266
267
268