1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: DP.c,v 1.14 2003-02-18 00:24:32 manoj Exp $ */
6 #include "global.h"
7 #include "globalp.h"
8 #include "macommon.h"
9 #include "typesf2c.h"
10 #include "ga-papi.h"
11 #include "ga-wapi.h"
12
13 /*\ check if I own the patch
14 \*/
own_patch(g_a,ilo,ihi,jlo,jhi)15 static logical own_patch(g_a, ilo, ihi, jlo, jhi)
16 Integer g_a, ilo, ihi, jlo, jhi;
17 {
18 Integer ilop, ihip, jlop, jhip, me=pnga_nodeid();
19 Integer lo[2],hi[2];
20
21 pnga_distribution(g_a, me, lo, hi);
22 ilop = lo[0];
23 jlop = lo[1];
24 ihip = hi[0];
25 jhip = hi[1];
26 if(ihip != ihi || ilop != ilo || jhip != jhi || jlop != jlo) return(FALSE);
27 else return(TRUE);
28 }
29
patch_intersect(ilo,ihi,jlo,jhi,ilop,ihip,jlop,jhip)30 static logical patch_intersect(ilo, ihi, jlo, jhi, ilop, ihip, jlop, jhip)
31 Integer ilo, ihi, jlo, jhi;
32 Integer *ilop, *ihip, *jlop, *jhip;
33 {
34 /* check consistency of patch coordinates */
35 if( ihi < ilo || jhi < jlo) return FALSE; /* inconsistent */
36 if( *ihip < *ilop || *jhip < *jlop) return FALSE; /* inconsistent */
37
38 /* find the intersection and update (ilop: ihip, jlop: jhip) */
39 if( ihi < *ilop || *ihip < ilo) return FALSE; /* don't intersect */
40 if( jhi < *jlop || *jhip < jlo) return FALSE; /* don't intersect */
41 *ilop = GA_MAX(ilo,*ilop);
42 *ihip = GA_MIN(ihi,*ihip);
43 *jlop = GA_MAX(jlo,*jlop);
44 *jhip = GA_MIN(jhi,*jhip);
45
46 return TRUE;
47 }
48
49
50 /*\ COPY A PATCH
51 *
52 * . identical shapes
53 * . copy by column order - Fortran convention
54 \*/
55 #if HAVE_SYS_WEAK_ALIAS_PRAGMA
56 # pragma weak wnga_copy_patch_dp = pnga_copy_patch_dp
57 #endif
pnga_copy_patch_dp(t_a,g_a,ailo,aihi,ajlo,ajhi,g_b,bilo,bihi,bjlo,bjhi)58 void pnga_copy_patch_dp(t_a, g_a, ailo, aihi, ajlo, ajhi,
59 g_b, bilo, bihi, bjlo, bjhi)
60 Integer g_a, ailo, aihi, ajlo, ajhi;
61 Integer g_b, bilo, bihi, bjlo, bjhi;
62 char *t_a;
63 {
64 Integer atype, btype, adim1, adim2, bdim1, bdim2;
65 Integer ilos, ihis, jlos, jhis;
66 Integer ilod, ihid, jlod, jhid, corr, nelem;
67 Integer me= pnga_nodeid(), ld, i,j;
68 Integer lo[2], hi[2];
69 Integer ldT;
70 char transp;
71 DoublePrecision *dbl_ptrA=NULL, *dbl_ptrB=NULL;
72 Integer ndim, dims[2];
73
74 pnga_check_handle(g_a, "pnga_copy_patch_dp");
75 pnga_check_handle(g_b, "pnga_copy_patch_dp");
76
77 /* if(g_a == g_b) pnga_error("pnga_copy_patch_dp: arrays have to different ", 0L); */
78
79 pnga_inquire(g_a, &atype, &ndim, dims);
80 adim1 = dims[0];
81 adim2 = dims[1];
82 pnga_inquire(g_b, &btype, &ndim, dims);
83 bdim1 = dims[0];
84 bdim2 = dims[1];
85
86 if(atype != btype || (atype != C_DBL ))
87 pnga_error("pnga_copy_patch_dp: wrong types ", 0L);
88
89 /* check if patch indices and dims match */
90 if (ailo <= 0 || aihi > adim1 || ajlo <= 0 || ajhi > adim2)
91 pnga_error(" pnga_copy_patch_dp: g_a indices out of range ", 0L);
92 if (bilo <= 0 || bihi > bdim1 || bjlo <= 0 || bjhi > bdim2)
93 pnga_error(" pnga_copy_patch_dp: g_b indices out of range ", 0L);
94
95 /* check if numbers of elements in two patches match each other */
96 if (((bihi - bilo + 1) != (aihi - ailo + 1)) ||
97 ( (bjhi - bjlo + 1) != (ajhi - ajlo + 1)) )
98 pnga_error(" pnga_copy_patch_dp: shapes two of patches do not match ", 0L);
99
100 /* is transpose operation required ? */
101 transp = (*t_a == 'n' || *t_a =='N')? 'n' : 't';
102
103 /* now find out cordinates of a patch of g_a that I own */
104 pnga_distribution(g_a, me, lo, hi);
105 ilos = lo[0];
106 jlos = lo[1];
107 ihis = hi[0];
108 jhis = hi[1];
109
110 if(patch_intersect(ailo, aihi, ajlo, ajhi, &ilos, &ihis, &jlos, &jhis)){
111 pnga_access_ptr(g_a, lo, hi, &dbl_ptrA, &ld);
112
113 nelem = (ihis-ilos+1)*(jhis-jlos+1);
114
115 if ( transp == 'n' ) {
116 corr = bilo - ailo;
117 ilod = ilos + corr;
118 ihid = ihis + corr;
119 corr = bjlo - ajlo;
120 jlod = jlos + corr;
121 jhid = jhis + corr;
122 } else {
123 /* If this is a transpose copy, we need local scratch space */
124 dbl_ptrB = (DoublePrecision*) ga_malloc(nelem,MT_F_DBL,"copypatch_dp");
125
126 /* Copy from the source into this local array, transposed */
127 ldT = jhis-jlos+1;
128
129 for(j=0; j< jhis-jlos+1; j++)
130 for(i=0; i< ihis-ilos+1; i++)
131 *(dbl_ptrB + i*ldT + j) = *(dbl_ptrA + j*ld + i);
132
133 /* Now we can reset index to point to the transposed stuff */
134 pnga_release(g_a, lo, hi);
135 dbl_ptrA = dbl_ptrB;
136 ld = ldT;
137
138 /* And finally, figure out what the destination indices are */
139 corr = bilo - ajlo;
140 ilod = jlos + corr;
141 ihid = jhis + corr;
142 corr = bjlo - ailo;
143 jlod = ilos + corr;
144 jhid = ihis + corr;
145 }
146
147 /* Put it where it belongs */
148 lo[0] = ilod;
149 lo[1] = jlod;
150 hi[0] = ihid;
151 hi[1] = jhid;
152 pnga_put(g_b, lo, hi, dbl_ptrA, &ld);
153
154 /* Get rid of local memory if we used it */
155 if( transp == 't') ga_free(dbl_ptrB);
156 }
157 }
158
159 #if HAVE_SYS_WEAK_ALIAS_PRAGMA
160 # pragma weak wnga_ddot_patch_dp = pnga_ddot_patch_dp
161 #endif
pnga_ddot_patch_dp(g_a,t_a,ailo,aihi,ajlo,ajhi,g_b,t_b,bilo,bihi,bjlo,bjhi)162 DoublePrecision pnga_ddot_patch_dp(g_a, t_a, ailo, aihi, ajlo, ajhi,
163 g_b, t_b, bilo, bihi, bjlo, bjhi)
164 Integer g_a, ailo, aihi, ajlo, ajhi; /* patch of g_a */
165 Integer g_b, bilo, bihi, bjlo, bjhi; /* patch of g_b */
166 char *t_a, *t_b; /* transpose operators */
167 {
168 Integer atype, btype, adim1, adim2, bdim1, bdim2;
169 Integer iloA, ihiA, jloA, jhiA, ldA;
170 Integer iloB, ihiB, jloB, jhiB, ldB;
171 Integer alo[2], ahi[2];
172 Integer blo[2], bhi[2];
173 Integer g_A = g_a;
174 Integer me= pnga_nodeid(), i, j, temp_created=0;
175 Integer corr, nelem;
176 char transp, transp_a, transp_b;
177 DoublePrecision sum = 0.;
178 DoublePrecision *dbl_ptrA;
179 DoublePrecision *dbl_ptrB;
180 Integer ndim, dims[2];
181
182 pnga_check_handle(g_a, "pnga_ddot_patch_dp");
183 pnga_check_handle(g_b, "pnga_ddot_patch_dp");
184
185 pnga_inquire(g_a, &atype, &ndim, dims);
186 adim1 = dims[0];
187 adim2 = dims[1];
188 pnga_inquire(g_b, &btype, &ndim, dims);
189 bdim1 = dims[0];
190 bdim2 = dims[1];
191
192 if(atype != btype || (atype != C_DBL ))
193 pnga_error("pnga_ddot_patch_dp: wrong types ", 0L);
194
195 /* check if patch indices and g_a dims match */
196 if (ailo <= 0 || aihi > adim1 || ajlo <= 0 || ajhi > adim2)
197 pnga_error(" pnga_ddot_patch_dp: g_a indices out of range ", 0L);
198
199 /* check if patch indices and g_b dims match */
200 if (bilo <= 0 || bihi > bdim1 || bjlo <= 0 || bjhi > bdim2)
201 pnga_error(" pnga_ddot_patch_dp: g_b indices out of range ", 0L);
202
203
204 /* is transpose operation required ? */
205 /* -- only if for one array transpose operation requested*/
206 transp_a = (*t_a == 'n' || *t_a =='N')? 'n' : 't';
207 transp_b = (*t_b == 'n' || *t_b =='N')? 'n' : 't';
208 transp = (transp_a == transp_b)? 'n' : 't';
209 if(transp == 't')
210 pnga_error(" pnga_ddot_patch_dp: transpose operators don't match: ", me);
211
212
213 /* find out coordinates of patches of g_A and g_B that I own */
214 pnga_distribution(g_A, me, alo, ahi);
215 iloA = alo[0];
216 jloA = alo[1];
217 ihiA = ahi[0];
218 jhiA = ahi[1];
219
220 if (patch_intersect(ailo, aihi, ajlo, ajhi, &iloA, &ihiA, &jloA, &jhiA)){
221
222 pnga_access_ptr(g_A, alo, ahi, &dbl_ptrA, &ldA);
223 nelem = (ihiA-iloA+1)*(jhiA-jloA+1);
224
225 corr = bilo - ailo;
226 iloB = iloA + corr;
227 ihiB = ihiA + corr;
228 corr = bjlo - ajlo;
229 jloB = jloA + corr;
230 jhiB = jhiA + corr;
231 blo[0] = iloB; blo[1] = jloB;
232 bhi[0] = ihiB; bhi[1] = jhiB;
233
234 if(own_patch(g_b, iloB, ihiB, jloB, jhiB)){
235 /* all the data is local */
236 pnga_access_ptr(g_b, blo, bhi, &dbl_ptrB, &ldB);
237 }else{
238 /* data is remote -- get it to temp storage*/
239 temp_created =1;
240 dbl_ptrB = (DoublePrecision*)ga_malloc(nelem, MT_F_DBL, "ddot_dp_b");
241
242 ldB = ihiB-iloB+1;
243 pnga_get(g_b, blo, bhi, dbl_ptrB, &ldB);
244 }
245
246 sum = 0.;
247 for(j=0; j< jhiA-jloA+1; j++)
248 for(i=0; i< ihiA-iloA+1; i++)
249 sum += *(dbl_ptrA + j*ldA + i) *
250 *(dbl_ptrB + j*ldB + i);
251 pnga_release(g_A, alo, ahi);
252
253 if(temp_created) ga_free(dbl_ptrB);
254 else pnga_release(g_b, blo, bhi);
255 }
256 return sum;
257 }
258
259