1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 /*
35   PeIGS internal routine forwardLL
36 
37   Let L, Z be two non-singular lower triangular matrices. Let Y be a
38   symmetric matrix.  This subroutine solves the lower-triangular part of
39   Y for the matrix problem
40 
41   L Y = Z
42 
43   The resulting lower triangular part of matrix Y overwrites Z.
44 
45   Data structure:
46 
47   The matrix L and Z are assumed to be column wrapped.
48 
49   !!!! The user should be warned that this routine will hang if
50   insufficient message buffering is not available.  PeIGS uses this
51   routine for the one-processor case. !!!!!
52 
53   */
54 
55 #include <stdio.h>
56 #include <math.h>
57 
58 #include "globalp.c.h"
59 
60 
forwardLL_(n,mapL,mapv_L,colL,mapZ,mapvecZ,colZ,scratch,nprocs,proclist,iscratch)61 void forwardLL_ ( n, mapL, mapv_L, colL, mapZ, mapvecZ, colZ, scratch, nprocs, proclist, iscratch)
62      Integer *n, *mapL, *mapv_L, *mapZ, *mapvecZ, *nprocs, *proclist, *iscratch;
63      DoublePrecision **colL, **colZ, *scratch;
64 {
65   /*
66 
67     n            = dimension of the matrix
68     mapL         = index array holding the proces
69     mapv_L      = index array holding the i-th column this processor owns
70     colL         = DoublePrecision pointer to the array location of the i-th column
71 
72     ditto for mapvecU, rowU, mapZ
73     scratch      = (DoublePrecision ) of size 2n, scratch buffer for message passing
74     iscratch     = integer scratch space of size p
75 
76     remark: one can improve the x-fer by taking advantage size of the vector by segmenting the
77             column vectors
78 
79 	    this is a communication intensive version since I can't figure out how
80 	    to code a more elegant version ( 4/1/93 gif)
81 
82 
83 */
84 
85   static Integer ONE = 1;
86   Integer me, isize, indx, indx1, itype;
87   Integer iL, iZ, nvecsL, nvecsZ;
88   Integer ii, indx2;
89   DoublePrecision t;
90 
91   extern Integer mxmynd_ ();
92   extern void combine_vector();
93 
94   /* blas calls */
95 
96   extern void dcopy_ ();
97 
98   /*
99     mxsubs calls
100     */
101 
102   extern Integer mxwrit_ (),  mxread_ (), indxL();
103   extern Integer count_list();
104   extern void daxpy_();
105 
106   me = mxmynd_ ();
107 
108 
109   nvecsZ = count_list(me, mapZ, n);
110   nvecsL = count_list(me, mapL, n);
111 
112   if (nvecsZ == *n  && nvecsL == *n && mapZ[0] == mapL[0])
113   {
114     *nprocs = 1;
115   }
116   else
117   {
118     *nprocs = 2;
119   }
120 
121 /*
122   fprintf(stderr, " nvecsZ = %d nvecsL = %d \n", nvecsZ, nvecsL);
123 */
124 
125   for ( iZ = 0; iZ < *n; iZ++ ) {
126     for ( iL = 0; iL < *n; iL++ ) {
127       /*
128       fprintf(stderr, " me = %d  iZ = %d  iL = %d \n", me, iZ, iL );
129 	first case
130 	*/
131       if ( iZ == iL ) {
132 	if ( mapZ[iZ] != me ) {
133 	  for ( ii = 0; ii < iZ+1; ii++ ) {
134 	    if ( mapL[ii] == me ) {
135 	      itype = *n + ii;
136 	      indx = indxL ( ii, nvecsL, mapv_L );
137 	      isize = (*n - iZ)*sizeof(DoublePrecision);
138 	      isize = mxwrit_ ( colL[indx] + iZ - ii , &isize, &mapZ[iZ] , &itype);
139 	    }
140 	  }
141 	}
142 
143 
144 	if ( mapZ[iZ] == me ) {
145 	  indx2 = indxL ( iZ, nvecsZ, mapvecZ );
146 	  if ( *nprocs  > 1 ) {
147 	    for ( ii = 0; ii < iZ; ii++ ) {
148 	      if ( mapL[ii] == me ) {
149 		/*
150 		  i own both the L vector and the Z vector
151 		  */
152 		indx = indxL ( ii, nvecsL, mapv_L );
153 		isize = *n - iZ;
154 		dcopy_ ( &isize, colL[indx] + iZ - ii , &ONE, scratch, &ONE);
155 	      }
156 	      else {
157 		isize = (*n - iZ) * sizeof(DoublePrecision);
158 		itype = *n + ii;
159 		isize = mxread_ ( scratch, &isize, &mapL[ii], &itype);
160 	      }
161 
162 
163 	      if ( mapZ[ii] == me ) {
164 		indx1 = indxL ( ii, nvecsZ, mapvecZ );
165 		t = - colZ[indx1][iZ-ii];
166 	      }
167 	      else {
168 		itype = iL;
169 		isize = sizeof(DoublePrecision);
170 		isize = mxread_ ( &t , &isize, &mapZ[ii] , &itype);
171 	      }
172 	      /*
173 		scratch contains the L vectors ; L[iL : n-1, iL]
174 		*/
175 	      isize = *n - iZ;
176 	      daxpy_ ( &isize, &t, scratch, &ONE, colZ[indx2], &ONE);
177 	    }
178 
179 	    /*
180 	      the ii = iZ case
181 	      */
182 	    if ( mapL[iL] == me ) {
183 	      /*
184 		i own both the L vector and the Z vector
185 		*/
186 	      indx = indxL ( iL, nvecsL, mapv_L );
187 	      isize = *n - iL;
188 	      dcopy_ ( &isize, colL[indx], &ONE, scratch, &ONE);
189 	    }
190 	    else {
191 	      isize = (*n - iZ) * sizeof(DoublePrecision);
192 	      itype = *n + iZ;
193 	      isize = mxread_ ( scratch, &isize, &mapL[iL], &itype);
194 	    }
195 
196 	    indx = indxL ( iZ, nvecsZ, mapvecZ );
197 	    *colZ[indx] = *colZ[indx]/ *scratch;
198 	    t = - *colZ[indx];
199 	    isize = *n - iZ - 1;
200 	    daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx2] + 1, &ONE);
201 	  }
202 
203 	  if ( *nprocs == 1 ) {
204 	    indx = indxL ( iZ, nvecsZ, mapvecZ );
205 	    isize = *n - iZ;
206 	    for ( ii = 0; ii < iZ; ii++ ) {
207 	      indx1 = indxL ( ii, nvecsL, &mapv_L[0] );
208 	      dcopy_ ( &isize, colL[indx1] + iZ - ii, &ONE, scratch, &ONE);
209 	      t = - colZ[ii][iZ-ii];
210 	      daxpy_ ( &isize, &t, scratch, &ONE, colZ[indx], &ONE);
211 	    }
212 
213 	    /*
214 	      the case iL = iZ
215 	      */
216 
217 	    indx1 = indxL ( iL, nvecsL, mapv_L );
218 	    dcopy_ ( &isize, colL[indx1], &ONE, scratch, &ONE);
219 	    indx = indxL ( iZ, nvecsZ, mapvecZ );
220 	    *colZ[indx] = *colZ[indx]/ *scratch;
221 	    t = - *colZ[indx];
222 	    isize = *n - iZ - 1;
223 	    daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx] + 1, &ONE);
224 	  }
225 	}
226       }
227 
228       if ( iL > iZ ) {
229 	if ( mapZ[iZ] == me ) {
230 	  if ( mapL[iL] == me ) {
231 	    isize = *n - iL;
232 	    indx = indxL ( iL, nvecsL, mapv_L );
233 	    dcopy_ ( &isize, colL[indx], &ONE, scratch, &ONE);
234 	  }
235 	  else {
236 	    isize = *n - iL;
237 	    isize *= sizeof(DoublePrecision);
238 	    itype = *n + iL;
239 	    isize = mxread_ ( scratch, &isize, &mapL[iL], &itype);
240 	  }
241 
242 	  /*
243 	    scratch contains the L[iL][iL:n-1] vector
244 	    */
245 
246 	  indx = indxL ( iZ, nvecsZ, mapvecZ ); /* storage location of this column */
247 	  itype = iL - iZ;
248 	  colZ[indx][itype] = colZ[indx][itype] / *scratch;
249 	  t = - colZ[indx][itype];
250 
251 	  if ( mapZ[iL] != me ){
252 	    itype = iL;
253 	    isize = sizeof(DoublePrecision);
254 	    isize = mxwrit_ ( &t, &isize, &mapZ[iL], &itype);
255 	  }
256 
257 	  isize = *n - iL - 1;
258 	  daxpy_ ( &isize, &t, scratch + 1, &ONE, colZ[indx] + iL - iZ + 1, &ONE );
259 	}
260 	else {
261 	  if ( mapL[iL] == me ) {
262 	    isize = *n - iL;
263 	    isize  = isize * sizeof( DoublePrecision );
264 	    indx = indxL ( iL, nvecsL, mapv_L );
265 	    itype = *n + iL;
266 	    isize = mxwrit_ ( colL[indx] , &isize, &mapZ[iZ] , &itype);
267 	  }
268 	}
269       }
270     }
271   }
272   return;
273 }
274 
275