1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 	#include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 	#include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #ifdef HAVE_MALLOC_H
39         #include <malloc.h>
40 #endif
41 #include <math.h>
42 #ifdef _OPENMP
43 	#include <omp.h>
44 #endif
45 #ifdef USE_MPI
46 	#include <mpi.h>
47 #endif
48 #include "lislib.h"
49 
lis_matvec_msr(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])50 void lis_matvec_msr(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
51 {
52 	LIS_INT i,j,js,je,jj;
53 	LIS_INT n;
54 	LIS_SCALAR t;
55 
56 	n      = A->n;
57 	if( A->is_splited )
58 	{
59 		n    = A->n;
60 		#ifdef _OPENMP
61 		#pragma omp parallel for private(i,j,js,je,t,jj)
62 		#endif
63 		for(i=0; i<n; i++)
64 		{
65 			t = A->D->value[i] * x[i];
66 			js = A->L->index[i];
67 			je = A->L->index[i+1];
68 			for(j=js;j<je;j++)
69 			{
70 				jj = A->L->index[j];
71 				t += A->L->value[j] * x[jj];
72 			}
73 			js = A->U->index[i];
74 			je = A->U->index[i+1];
75 			for(j=js;j<je;j++)
76 			{
77 				jj = A->U->index[j];
78 				t += A->U->value[j] * x[jj];
79 			}
80 			y[i] = t;
81 		}
82 	}
83 	else
84 	{
85 		#ifdef _OPENMP
86 		#pragma omp parallel for private(i,j,js,je,t,jj)
87 		#endif
88 		for(i=0; i<n; i++)
89 		{
90 			t = A->value[i] * x[i];
91 			js = A->index[i];
92 			je = A->index[i+1];
93 			for(j=js;j<je;j++)
94 			{
95 				jj = A->index[j];
96 				t += A->value[j] * x[jj];
97 			}
98 			y[i] = t;
99 		}
100 	}
101 }
102 
lis_matvech_msr(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])103 void lis_matvech_msr(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
104 {
105 	LIS_INT i,j,js,je,jj;
106 	LIS_INT n,np;
107 	LIS_SCALAR t;
108 	#ifdef _OPENMP
109 		LIS_INT k,nprocs;
110 		LIS_SCALAR *w;
111 	#endif
112 
113 	n    = A->n;
114 	np   = A->np;
115 
116 	if( A->is_splited )
117 	{
118 		for(i=0; i<n; i++)
119 		{
120 			y[i] = conj(A->D->value[i]) * x[i];
121 		}
122 		for(i=0; i<n; i++)
123 		{
124 			t = x[i];
125 			js = A->L->index[i];
126 			je = A->L->index[i+1];
127 			for(j=js;j<je;j++)
128 			{
129 				jj = A->L->index[j];
130 				y[jj] += conj(A->L->value[j]) * t;
131 			}
132 			js = A->U->index[i];
133 			je = A->U->index[i+1];
134 			for(j=js;j<je;j++)
135 			{
136 				jj = A->U->index[j];
137 				y[jj] += conj(A->U->value[j]) * t;
138 			}
139 		}
140 	}
141 	else
142 	{
143 		#ifdef _OPENMP
144 			nprocs = omp_get_max_threads();
145 			w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvech_msr::w" );
146 			#pragma omp parallel private(i,j,js,je,t,jj,k)
147 			{
148 				k = omp_get_thread_num();
149 				#pragma omp for
150 				for(j=0;j<nprocs;j++)
151 				{
152 					memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) );
153 				}
154 				#pragma omp for
155 				for(i=0; i<n; i++)
156 				{
157 					js = A->index[i];
158 					je = A->index[i+1];
159 					t = x[i];
160 					for(j=js;j<je;j++)
161 					{
162 						jj  = k*np+A->index[j];
163 						w[jj] += conj(A->value[j]) * t;
164 					}
165 					w[k*np+i] += conj(A->value[i]) * x[i];
166 				}
167 				#pragma omp for
168 				for(i=0;i<np;i++)
169 				{
170 					t = 0.0;
171 					for(j=0;j<nprocs;j++)
172 					{
173 						t += w[j*np+i];
174 					}
175 					y[i] = t;
176 				}
177 			}
178 			lis_free(w);
179 		#else
180 			for(i=0; i<n; i++)
181 			{
182 				y[i] = conj(A->value[i]) * x[i];
183 			}
184 			for(i=0; i<n; i++)
185 			{
186 				t = x[i];
187 				js = A->index[i];
188 				je = A->index[i+1];
189 				for(j=js;j<je;j++)
190 				{
191 					jj = A->index[j];
192 					y[jj] += conj(A->value[j]) * t;
193 				}
194 			}
195 		#endif
196 	}
197 }
198