1 #if HAVE_CONFIG_H
2 #   include "config.h"
3 #endif
4 
5 /* $Id: DP.c,v 1.14 2003-02-18 00:24:32 manoj Exp $ */
6 #include "global.h"
7 #include "globalp.h"
8 #include "macommon.h"
9 #include "typesf2c.h"
10 #include "ga-papi.h"
11 #include "ga-wapi.h"
12 
13 /*\ check if I own the patch
14 \*/
own_patch(g_a,ilo,ihi,jlo,jhi)15 static logical own_patch(g_a, ilo, ihi, jlo, jhi)
16      Integer g_a, ilo, ihi, jlo, jhi;
17 {
18    Integer ilop, ihip, jlop, jhip, me=pnga_nodeid();
19    Integer lo[2],hi[2];
20 
21    pnga_distribution(g_a, me, lo, hi);
22    ilop = lo[0];
23    jlop = lo[1];
24    ihip = hi[0];
25    jhip = hi[1];
26    if(ihip != ihi || ilop != ilo || jhip != jhi || jlop != jlo) return(FALSE);
27    else return(TRUE);
28 }
29 
patch_intersect(ilo,ihi,jlo,jhi,ilop,ihip,jlop,jhip)30 static logical patch_intersect(ilo, ihi, jlo, jhi, ilop, ihip, jlop, jhip)
31      Integer ilo, ihi, jlo, jhi;
32      Integer *ilop, *ihip, *jlop, *jhip;
33 {
34      /* check consistency of patch coordinates */
35      if( ihi < ilo || jhi < jlo)     return FALSE; /* inconsistent */
36      if( *ihip < *ilop || *jhip < *jlop) return FALSE; /* inconsistent */
37 
38      /* find the intersection and update (ilop: ihip, jlop: jhip) */
39      if( ihi < *ilop || *ihip < ilo) return FALSE; /* don't intersect */
40      if( jhi < *jlop || *jhip < jlo) return FALSE; /* don't intersect */
41      *ilop = GA_MAX(ilo,*ilop);
42      *ihip = GA_MIN(ihi,*ihip);
43      *jlop = GA_MAX(jlo,*jlop);
44      *jhip = GA_MIN(jhi,*jhip);
45 
46      return TRUE;
47 }
48 
49 
50 /*\ COPY A PATCH
51  *
52  *  . identical shapes
53  *  . copy by column order - Fortran convention
54 \*/
55 #if HAVE_SYS_WEAK_ALIAS_PRAGMA
56 #   pragma weak wnga_copy_patch_dp = pnga_copy_patch_dp
57 #endif
pnga_copy_patch_dp(t_a,g_a,ailo,aihi,ajlo,ajhi,g_b,bilo,bihi,bjlo,bjhi)58 void pnga_copy_patch_dp(t_a, g_a, ailo, aihi, ajlo, ajhi,
59                    g_b, bilo, bihi, bjlo, bjhi)
60      Integer g_a, ailo, aihi, ajlo, ajhi;
61      Integer g_b, bilo, bihi, bjlo, bjhi;
62      char *t_a;
63 {
64 Integer atype, btype, adim1, adim2, bdim1, bdim2;
65 Integer ilos, ihis, jlos, jhis;
66 Integer ilod, ihid, jlod, jhid, corr, nelem;
67 Integer me= pnga_nodeid(), ld, i,j;
68 Integer lo[2], hi[2];
69 Integer ldT;
70 char transp;
71 DoublePrecision *dbl_ptrA=NULL, *dbl_ptrB=NULL;
72 Integer ndim, dims[2];
73 
74    pnga_check_handle(g_a, "pnga_copy_patch_dp");
75    pnga_check_handle(g_b, "pnga_copy_patch_dp");
76 
77    /* if(g_a == g_b) pnga_error("pnga_copy_patch_dp: arrays have to different ", 0L); */
78 
79    pnga_inquire(g_a, &atype, &ndim, dims);
80    adim1 = dims[0];
81    adim2 = dims[1];
82    pnga_inquire(g_b, &btype, &ndim, dims);
83    bdim1 = dims[0];
84    bdim2 = dims[1];
85 
86    if(atype != btype || (atype != C_DBL ))
87       pnga_error("pnga_copy_patch_dp: wrong types ", 0L);
88 
89    /* check if patch indices and dims match */
90    if (ailo <= 0 || aihi > adim1 || ajlo <= 0 || ajhi > adim2)
91        pnga_error(" pnga_copy_patch_dp: g_a indices out of range ", 0L);
92    if (bilo <= 0 || bihi > bdim1 || bjlo <= 0 || bjhi > bdim2)
93        pnga_error(" pnga_copy_patch_dp: g_b indices out of range ", 0L);
94 
95    /* check if numbers of elements in two patches match each other */
96    if (((bihi - bilo + 1)  != (aihi - ailo + 1)) ||
97       ( (bjhi - bjlo + 1)  != (ajhi - ajlo + 1)) )
98        pnga_error(" pnga_copy_patch_dp: shapes two of patches do not match ", 0L);
99 
100     /* is transpose operation required ? */
101    transp = (*t_a == 'n' || *t_a =='N')? 'n' : 't';
102 
103    /* now find out cordinates of a patch of g_a that I own */
104    pnga_distribution(g_a, me, lo, hi);
105    ilos = lo[0];
106    jlos = lo[1];
107    ihis = hi[0];
108    jhis = hi[1];
109 
110    if(patch_intersect(ailo, aihi, ajlo, ajhi, &ilos, &ihis, &jlos, &jhis)){
111       pnga_access_ptr(g_a, lo, hi, &dbl_ptrA, &ld);
112 
113       nelem = (ihis-ilos+1)*(jhis-jlos+1);
114 
115       if ( transp == 'n' ) {
116 	  corr  = bilo - ailo;
117 	  ilod  = ilos + corr;
118 	  ihid  = ihis + corr;
119 	  corr  = bjlo - ajlo;
120 	  jlod  = jlos + corr;
121 	  jhid  = jhis + corr;
122       } else {
123 	/* If this is a transpose copy, we need local scratch space */
124 	dbl_ptrB = (DoublePrecision*) ga_malloc(nelem,MT_F_DBL,"copypatch_dp");
125 
126 	  /* Copy from the source into this local array, transposed */
127 	  ldT = jhis-jlos+1;
128 
129 	  for(j=0; j< jhis-jlos+1; j++)
130 	      for(i=0; i< ihis-ilos+1; i++)
131 		  *(dbl_ptrB + i*ldT + j) = *(dbl_ptrA + j*ld + i);
132 
133 	  /* Now we can reset index to point to the transposed stuff */
134       pnga_release(g_a, lo, hi);
135 	  dbl_ptrA = dbl_ptrB;
136 	  ld = ldT;
137 
138 	  /* And finally, figure out what the destination indices are */
139 	  corr  = bilo - ajlo;
140 	  ilod  = jlos + corr;
141 	  ihid  = jhis + corr;
142 	  corr  = bjlo - ailo;
143 	  jlod  = ilos + corr;
144 	  jhid  = ihis + corr;
145       }
146 
147       /* Put it where it belongs */
148       lo[0] = ilod;
149       lo[1] = jlod;
150       hi[0] = ihid;
151       hi[1] = jhid;
152       pnga_put(g_b, lo, hi, dbl_ptrA, &ld);
153 
154       /* Get rid of local memory if we used it */
155       if( transp == 't') ga_free(dbl_ptrB);
156   }
157 }
158 
159 #if HAVE_SYS_WEAK_ALIAS_PRAGMA
160 #   pragma weak wnga_ddot_patch_dp = pnga_ddot_patch_dp
161 #endif
pnga_ddot_patch_dp(g_a,t_a,ailo,aihi,ajlo,ajhi,g_b,t_b,bilo,bihi,bjlo,bjhi)162 DoublePrecision pnga_ddot_patch_dp(g_a, t_a, ailo, aihi, ajlo, ajhi,
163                                   g_b, t_b, bilo, bihi, bjlo, bjhi)
164      Integer g_a, ailo, aihi, ajlo, ajhi;    /* patch of g_a */
165      Integer g_b, bilo, bihi, bjlo, bjhi;    /* patch of g_b */
166      char    *t_a, *t_b;                          /* transpose operators */
167 {
168 Integer atype, btype, adim1, adim2, bdim1, bdim2;
169 Integer iloA, ihiA, jloA, jhiA, ldA;
170 Integer iloB, ihiB, jloB, jhiB, ldB;
171 Integer alo[2], ahi[2];
172 Integer blo[2], bhi[2];
173 Integer g_A = g_a;
174 Integer me= pnga_nodeid(), i, j, temp_created=0;
175 Integer corr, nelem;
176 char    transp, transp_a, transp_b;
177 DoublePrecision  sum = 0.;
178 DoublePrecision *dbl_ptrA;
179 DoublePrecision *dbl_ptrB;
180 Integer ndim, dims[2];
181 
182    pnga_check_handle(g_a, "pnga_ddot_patch_dp");
183    pnga_check_handle(g_b, "pnga_ddot_patch_dp");
184 
185    pnga_inquire(g_a, &atype, &ndim, dims);
186    adim1 = dims[0];
187    adim2 = dims[1];
188    pnga_inquire(g_b, &btype, &ndim, dims);
189    bdim1 = dims[0];
190    bdim2 = dims[1];
191 
192    if(atype != btype || (atype != C_DBL ))
193       pnga_error("pnga_ddot_patch_dp: wrong types ", 0L);
194 
195   /* check if patch indices and g_a dims match */
196    if (ailo <= 0 || aihi > adim1 || ajlo <= 0 || ajhi > adim2)
197       pnga_error(" pnga_ddot_patch_dp: g_a indices out of range ", 0L);
198 
199    /* check if patch indices and g_b dims match */
200    if (bilo <= 0 || bihi > bdim1 || bjlo <= 0 || bjhi > bdim2)
201        pnga_error(" pnga_ddot_patch_dp: g_b indices out of range ", 0L);
202 
203 
204    /* is transpose operation required ? */
205    /* -- only if for one array transpose operation requested*/
206    transp_a = (*t_a == 'n' || *t_a =='N')? 'n' : 't';
207    transp_b = (*t_b == 'n' || *t_b =='N')? 'n' : 't';
208    transp   = (transp_a == transp_b)? 'n' : 't';
209    if(transp == 't')
210           pnga_error(" pnga_ddot_patch_dp: transpose operators don't match: ", me);
211 
212 
213    /* find out coordinates of patches of g_A and g_B that I own */
214    pnga_distribution(g_A, me, alo, ahi);
215    iloA = alo[0];
216    jloA = alo[1];
217    ihiA = ahi[0];
218    jhiA = ahi[1];
219 
220    if (patch_intersect(ailo, aihi, ajlo, ajhi, &iloA, &ihiA, &jloA, &jhiA)){
221 
222        pnga_access_ptr(g_A, alo, ahi, &dbl_ptrA, &ldA);
223        nelem = (ihiA-iloA+1)*(jhiA-jloA+1);
224 
225        corr  = bilo - ailo;
226        iloB  = iloA + corr;
227        ihiB  = ihiA + corr;
228        corr  = bjlo - ajlo;
229        jloB  = jloA + corr;
230        jhiB  = jhiA + corr;
231        blo[0] = iloB; blo[1] = jloB;
232        bhi[0] = ihiB; bhi[1] = jhiB;
233 
234       if(own_patch(g_b, iloB, ihiB, jloB, jhiB)){
235          /* all the data is local */
236          pnga_access_ptr(g_b, blo, bhi, &dbl_ptrB, &ldB);
237       }else{
238          /* data is remote -- get it to temp storage*/
239          temp_created =1;
240 	 dbl_ptrB = (DoublePrecision*)ga_malloc(nelem, MT_F_DBL, "ddot_dp_b");
241 
242          ldB   = ihiB-iloB+1;
243          pnga_get(g_b, blo, bhi, dbl_ptrB, &ldB);
244       }
245 
246       sum = 0.;
247       for(j=0; j< jhiA-jloA+1; j++)
248           for(i=0; i< ihiA-iloA+1; i++)
249              sum += *(dbl_ptrA + j*ldA + i) *
250                     *(dbl_ptrB + j*ldB + i);
251       pnga_release(g_A, alo, ahi);
252 
253       if(temp_created) ga_free(dbl_ptrB);
254       else pnga_release(g_b, blo, bhi);
255    }
256    return sum;
257 }
258 
259