1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 /*
35 PeIGS internal routine forwardLL
36
37 Let L, Z be two non-singular lower triangular matrices. Let Y be a
38 symmetric matrix. This subroutine solves the lower-triangular part of
39 Y for the matrix problem
40
41 L Y = Z
42
43 The resulting lower triangular part of matrix Y overwrites Z.
44
45 Data structure:
46
47 The matrix L and Z are assumed to be column wrapped.
48
49 !!!! The user should be warned that this routine will hang if
50 insufficient message buffering is not available. PeIGS uses this
51 routine for the one-processor case. !!!!!
52
53 */
54
55 #include <stdio.h>
56 #include <math.h>
57
58 #include "globalp.c.h"
59
60
forwardLL_(n,mapL,mapv_L,colL,mapZ,mapvecZ,colZ,scratch,nprocs,proclist,iscratch)61 void forwardLL_ ( n, mapL, mapv_L, colL, mapZ, mapvecZ, colZ, scratch, nprocs, proclist, iscratch)
62 Integer *n, *mapL, *mapv_L, *mapZ, *mapvecZ, *nprocs, *proclist, *iscratch;
63 DoublePrecision **colL, **colZ, *scratch;
64 {
65 /*
66
67 n = dimension of the matrix
68 mapL = index array holding the proces
69 mapv_L = index array holding the i-th column this processor owns
70 colL = DoublePrecision pointer to the array location of the i-th column
71
72 ditto for mapvecU, rowU, mapZ
73 scratch = (DoublePrecision ) of size 2n, scratch buffer for message passing
74 iscratch = integer scratch space of size p
75
76 remark: one can improve the x-fer by taking advantage size of the vector by segmenting the
77 column vectors
78
79 this is a communication intensive version since I can't figure out how
80 to code a more elegant version ( 4/1/93 gif)
81
82
83 */
84
85 static Integer ONE = 1;
86 Integer me, isize, indx, indx1, itype;
87 Integer iL, iZ, nvecsL, nvecsZ;
88 Integer ii, indx2;
89 DoublePrecision t;
90
91 extern Integer mxmynd_ ();
92 extern void combine_vector();
93
94 /* blas calls */
95
96 extern void dcopy_ ();
97
98 /*
99 mxsubs calls
100 */
101
102 extern Integer mxwrit_ (), mxread_ (), indxL();
103 extern Integer count_list();
104 extern void daxpy_();
105
106 me = mxmynd_ ();
107
108
109 nvecsZ = count_list(me, mapZ, n);
110 nvecsL = count_list(me, mapL, n);
111
112 if (nvecsZ == *n && nvecsL == *n && mapZ[0] == mapL[0])
113 {
114 *nprocs = 1;
115 }
116 else
117 {
118 *nprocs = 2;
119 }
120
121 /*
122 fprintf(stderr, " nvecsZ = %d nvecsL = %d \n", nvecsZ, nvecsL);
123 */
124
125 for ( iZ = 0; iZ < *n; iZ++ ) {
126 for ( iL = 0; iL < *n; iL++ ) {
127 /*
128 fprintf(stderr, " me = %d iZ = %d iL = %d \n", me, iZ, iL );
129 first case
130 */
131 if ( iZ == iL ) {
132 if ( mapZ[iZ] != me ) {
133 for ( ii = 0; ii < iZ+1; ii++ ) {
134 if ( mapL[ii] == me ) {
135 itype = *n + ii;
136 indx = indxL ( ii, nvecsL, mapv_L );
137 isize = (*n - iZ)*sizeof(DoublePrecision);
138 isize = mxwrit_ ( colL[indx] + iZ - ii , &isize, &mapZ[iZ] , &itype);
139 }
140 }
141 }
142
143
144 if ( mapZ[iZ] == me ) {
145 indx2 = indxL ( iZ, nvecsZ, mapvecZ );
146 if ( *nprocs > 1 ) {
147 for ( ii = 0; ii < iZ; ii++ ) {
148 if ( mapL[ii] == me ) {
149 /*
150 i own both the L vector and the Z vector
151 */
152 indx = indxL ( ii, nvecsL, mapv_L );
153 isize = *n - iZ;
154 dcopy_ ( &isize, colL[indx] + iZ - ii , &ONE, scratch, &ONE);
155 }
156 else {
157 isize = (*n - iZ) * sizeof(DoublePrecision);
158 itype = *n + ii;
159 isize = mxread_ ( scratch, &isize, &mapL[ii], &itype);
160 }
161
162
163 if ( mapZ[ii] == me ) {
164 indx1 = indxL ( ii, nvecsZ, mapvecZ );
165 t = - colZ[indx1][iZ-ii];
166 }
167 else {
168 itype = iL;
169 isize = sizeof(DoublePrecision);
170 isize = mxread_ ( &t , &isize, &mapZ[ii] , &itype);
171 }
172 /*
173 scratch contains the L vectors ; L[iL : n-1, iL]
174 */
175 isize = *n - iZ;
176 daxpy_ ( &isize, &t, scratch, &ONE, colZ[indx2], &ONE);
177 }
178
179 /*
180 the ii = iZ case
181 */
182 if ( mapL[iL] == me ) {
183 /*
184 i own both the L vector and the Z vector
185 */
186 indx = indxL ( iL, nvecsL, mapv_L );
187 isize = *n - iL;
188 dcopy_ ( &isize, colL[indx], &ONE, scratch, &ONE);
189 }
190 else {
191 isize = (*n - iZ) * sizeof(DoublePrecision);
192 itype = *n + iZ;
193 isize = mxread_ ( scratch, &isize, &mapL[iL], &itype);
194 }
195
196 indx = indxL ( iZ, nvecsZ, mapvecZ );
197 *colZ[indx] = *colZ[indx]/ *scratch;
198 t = - *colZ[indx];
199 isize = *n - iZ - 1;
200 daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx2] + 1, &ONE);
201 }
202
203 if ( *nprocs == 1 ) {
204 indx = indxL ( iZ, nvecsZ, mapvecZ );
205 isize = *n - iZ;
206 for ( ii = 0; ii < iZ; ii++ ) {
207 indx1 = indxL ( ii, nvecsL, &mapv_L[0] );
208 dcopy_ ( &isize, colL[indx1] + iZ - ii, &ONE, scratch, &ONE);
209 t = - colZ[ii][iZ-ii];
210 daxpy_ ( &isize, &t, scratch, &ONE, colZ[indx], &ONE);
211 }
212
213 /*
214 the case iL = iZ
215 */
216
217 indx1 = indxL ( iL, nvecsL, mapv_L );
218 dcopy_ ( &isize, colL[indx1], &ONE, scratch, &ONE);
219 indx = indxL ( iZ, nvecsZ, mapvecZ );
220 *colZ[indx] = *colZ[indx]/ *scratch;
221 t = - *colZ[indx];
222 isize = *n - iZ - 1;
223 daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx] + 1, &ONE);
224 }
225 }
226 }
227
228 if ( iL > iZ ) {
229 if ( mapZ[iZ] == me ) {
230 if ( mapL[iL] == me ) {
231 isize = *n - iL;
232 indx = indxL ( iL, nvecsL, mapv_L );
233 dcopy_ ( &isize, colL[indx], &ONE, scratch, &ONE);
234 }
235 else {
236 isize = *n - iL;
237 isize *= sizeof(DoublePrecision);
238 itype = *n + iL;
239 isize = mxread_ ( scratch, &isize, &mapL[iL], &itype);
240 }
241
242 /*
243 scratch contains the L[iL][iL:n-1] vector
244 */
245
246 indx = indxL ( iZ, nvecsZ, mapvecZ ); /* storage location of this column */
247 itype = iL - iZ;
248 colZ[indx][itype] = colZ[indx][itype] / *scratch;
249 t = - colZ[indx][itype];
250
251 if ( mapZ[iL] != me ){
252 itype = iL;
253 isize = sizeof(DoublePrecision);
254 isize = mxwrit_ ( &t, &isize, &mapZ[iL], &itype);
255 }
256
257 isize = *n - iL - 1;
258 daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx] + iL - iZ + 1, &ONE );
259 }
260 else {
261 if ( mapL[iL] == me ) {
262 isize = *n - iL;
263 isize = isize * sizeof( DoublePrecision );
264 indx = indxL ( iL, nvecsL, mapv_L );
265 itype = *n + iL;
266 isize = mxwrit_ ( colL[indx] , &isize, &mapZ[iZ] , &itype);
267 }
268 }
269 }
270 }
271 }
272 return;
273 }
274
275