1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: perf_aggr.c,v 1.7 2004-07-21 00:23:06 manoj Exp $ */
6 #if HAVE_STDIO_H
7 # include <stdio.h>
8 #endif
9 #if HAVE_STDLIB_H
10 # include <stdlib.h>
11 #endif
12 #if HAVE_ASSERT_H
13 # include <assert.h>
14 #endif
15 #if HAVE_UNISTD_H
16 # include <unistd.h>
17 #elif HAVE_WINDOWS_H
18 # include <windows.h>
19 # define sleep(x) Sleep(1000*(x))
20 #endif
21
22 #include "armci.h"
23 #include "message.h"
24
25 #define DIM1 5
26 #define DIM2 3
27 #ifdef __sun
28 /* Solaris has shared memory shortages in the default system configuration */
29 # define DIM3 6
30 # define DIM4 5
31 # define DIM5 4
32 #elif defined(__alpha__)
33 # define DIM3 8
34 # define DIM4 5
35 # define DIM5 6
36 #else
37 # define DIM3 8
38 # define DIM4 9
39 # define DIM5 7
40 #endif
41 #define DIM6 3
42 #define DIM7 2
43
44
45 #define OFF 1
46 #define EDIM1 (DIM1+OFF)
47 #define EDIM2 (DIM2+OFF)
48 #define EDIM3 (DIM3+OFF)
49 #define EDIM4 (DIM4+OFF)
50 #define EDIM5 (DIM5+OFF)
51 #define EDIM6 (DIM6+OFF)
52 #define EDIM7 (DIM7+OFF)
53
54 #define DIMS 4
55 #define MAXDIMS 7
56 #define MAX_DIM_VAL 50
57 #define LOOP 200
58
59 #define BASE 100.
60 #define MAXPROC 128
61 #define TIMES 100
62
63 #ifdef CRAY
64 # define ELEMS 800
65 #else
66 # define ELEMS 200
67 #endif
68
69
70 /***************************** macros ************************/
71 #define COPY(src, dst, bytes) memcpy((dst),(src),(bytes))
72 #define ARMCI_MAX(a,b) (((a) >= (b)) ? (a) : (b))
73 #define ARMCI_MIN(a,b) (((a) <= (b)) ? (a) : (b))
74 #define ARMCI_ABS(a) (((a) <0) ? -(a) : (a))
75
76 /***************************** global data *******************/
77 int me, nproc;
78 void *work[MAXPROC]; /* work array for propagating addresses */
79
80
81
82 #ifdef PVM
pvm_init(int argc,char * argv[])83 void pvm_init(int argc, char *argv[])
84 {
85 int mytid, mygid, ctid[MAXPROC];
86 int np, i;
87
88 mytid = pvm_mytid();
89 if ((argc != 2) && (argc != 1)) {
90 goto usage;
91 }
92 if (argc == 1) {
93 np = 1;
94 }
95 if (argc == 2)
96 if ((np = atoi(argv[1])) < 1) {
97 goto usage;
98 }
99 if (np > MAXPROC) {
100 goto usage;
101 }
102
103 mygid = pvm_joingroup(MPGROUP);
104
105 if (np > 1)
106 if (mygid == 0) {
107 i = pvm_spawn(argv[0], argv + 1, 0, "", np - 1, ctid);
108 }
109
110 while (pvm_gsize(MPGROUP) < np) {
111 sleep(1);
112 }
113
114 /* sync */
115 pvm_barrier(MPGROUP, np);
116
117 printf("PVM initialization done!\n");
118
119 return;
120
121 usage:
122 fprintf(stderr, "usage: %s <nproc>\n", argv[0]);
123 pvm_exit();
124 exit(-1);
125 }
126 #endif
127
128 /*void create_array(void *a[], int elem_size, int ndim, int dims[])*/
create_array(double * a[],int ndim,int dims[])129 void create_array(double *a[], int ndim, int dims[])
130 {
131 int bytes = sizeof(double), i, rc;
132
133 assert(ndim <= MAXDIMS);
134 for (i = 0; i < ndim; i++) {
135 bytes *= dims[i];
136 }
137
138 rc = ARMCI_Malloc((void **)a, bytes);
139 assert(rc == 0);
140
141 assert(a[me]);
142
143 }
144
145 /*void destroy_array(void *ptr[])*/
destroy_array(double * ptr[])146 void destroy_array(double *ptr[])
147 {
148 int check;
149
150 ARMCI_Barrier();
151 check = !ARMCI_Free(ptr[me]);
152 assert(check);
153 }
154
155 #define MAXELEMS 1000
156 #define MAX_REQUESTS MAXELEMS
157
test_aggregate(int dryrun)158 void test_aggregate(int dryrun)
159 {
160
161 int i, j, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
162 double *ddst_put[MAXPROC];
163 double *ddst_get[MAXPROC];
164 double *dsrc[MAXPROC];
165 armci_hdl_t aggr_hdl_put[MAXPROC];
166 armci_hdl_t aggr_hdl_get[MAXPROC];
167 armci_hdl_t hdl_put[MAXELEMS];
168 armci_hdl_t hdl_get[MAXELEMS];
169 armci_giov_t darr;
170 void *src_ptr[MAX_REQUESTS], *dst_ptr[MAX_REQUESTS];
171 int start = 0, end = 0;
172 double start_time;
173
174 create_array(ddst_put, 2, elems);
175 create_array(ddst_get, 2, elems);
176 create_array(dsrc, 1, &elems[1]);
177
178 for (i = 0; i < elems[1]; i++) {
179 dsrc[me][i] = i * 1.001 * (me + 1);
180 }
181 for (i = 0; i < elems[0]*elems[1]; i++) {
182 ddst_put[me][i] = 0.0;
183 ddst_get[me][i] = 0.0;
184 }
185
186 ARMCI_Barrier();
187
188 /* only proc 0 does the work */
189 if (me == 0) {
190 if (!dryrun) {
191 printf("Transferring %d doubles (Not an array of %d doubles)\n", MAXELEMS, MAXELEMS);
192 }
193
194 /* initializing non-blocking handles */
195 for (i = 0; i < elems[1]; i++) {
196 ARMCI_INIT_HANDLE(&hdl_put[i]);
197 }
198 for (i = 0; i < elems[1]; i++) {
199 ARMCI_INIT_HANDLE(&hdl_get[i]);
200 }
201
202 /* aggregate handles */
203 for (i = 0; i < nproc; i++) {
204 ARMCI_INIT_HANDLE(&aggr_hdl_put[i]);
205 }
206 for (i = 0; i < nproc; i++) {
207 ARMCI_INIT_HANDLE(&aggr_hdl_get[i]);
208 }
209 for (i = 0; i < nproc; i++) {
210 ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_put[i]);
211 }
212 for (i = 0; i < nproc; i++) {
213 ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_get[i]);
214 }
215
216 bytes = sizeof(double);
217
218 /* **************** PUT **************** */
219 /* register put */
220 start_time = armci_timer();
221 start = 0;
222 end = elems[1];
223 for (i = 1; i < nproc; i++) {
224 for (j = start; j < end; j++) {
225 ARMCI_NbPutValueDouble(dsrc[me][j], &ddst_put[i][me*elems[1] + j], i,
226 &hdl_put[j]);
227 }
228 for (j = start; j < end; j++) {
229 ARMCI_Wait(&hdl_put[j]);
230 }
231 }
232 if (!dryrun) {
233 printf("%d: Value Put time = %.2es\n", me, armci_timer() - start_time);
234 }
235
236 /* vector put */
237 start_time = armci_timer();
238 for (i = 1; i < nproc; i++) {
239 for (j = start; j < end; j++) {
240 src_ptr[j] = (void *)&dsrc[me][j];
241 dst_ptr[j] = (void *)&ddst_put[i][me*elems[1] + j];
242 }
243 darr.src_ptr_array = src_ptr;
244 darr.dst_ptr_array = dst_ptr;
245 darr.bytes = sizeof(double);
246 darr.ptr_array_len = elems[1];
247 if ((rc = ARMCI_NbPutV(&darr, 1, i, &hdl_put[i]))) {
248 ARMCI_Error("armci_nbputv failed\n", rc);
249 }
250 }
251 for (i = 1; i < nproc; i++) {
252 ARMCI_Wait(&hdl_put[i]);
253 }
254 if (!dryrun) {
255 printf("%d: Vector Put time = %.2es\n", me, armci_timer() - start_time);
256 }
257
258 /* regular put */
259 start_time = armci_timer();
260 for (i = 1; i < nproc; i++) {
261 for (j = start; j < end; j++) {
262 if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
263 i, &hdl_put[j]))) {
264 ARMCI_Error("armci_nbput failed\n", rc);
265 }
266 }
267 for (j = start; j < end; j++) {
268 ARMCI_Wait(&hdl_put[j]);
269 }
270 }
271 if (!dryrun) {
272 printf("%d: Regular Put time = %.2es\n", me, armci_timer() - start_time);
273 }
274
275 /* aggregate put */
276 start_time = armci_timer();
277 for (i = 1; i < nproc; i++) {
278 for (j = start; j < end; j++) {
279 if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
280 i, &aggr_hdl_put[i]))) {
281 ARMCI_Error("armci_nbput failed\n", rc);
282 }
283 }
284 }
285 for (i = 1; i < nproc; i++) {
286 ARMCI_Wait(&aggr_hdl_put[i]);
287 }
288 if (!dryrun) {
289 printf("%d: Aggregate Put time = %.2es\n\n", me, armci_timer() - start_time);
290 }
291
292
293 /* **************** GET **************** */
294
295 /* vector get */
296 start_time = armci_timer();
297 for (i = 1; i < nproc; i++) {
298 for (j = start; j < end; j++) {
299 src_ptr[j] = (void *)&dsrc[i][j];
300 dst_ptr[j] = (void *)&ddst_get[me][i*elems[1] + j];
301 }
302 darr.src_ptr_array = src_ptr;
303 darr.dst_ptr_array = dst_ptr;
304 darr.bytes = sizeof(double);
305 darr.ptr_array_len = elems[1];
306 if ((rc = ARMCI_NbGetV(&darr, 1, i, &hdl_get[i]))) {
307 ARMCI_Error("armci_nbgetv failed\n", rc);
308 }
309 ARMCI_Wait(&hdl_get[i]);
310 }
311 if (!dryrun) {
312 printf("%d: Vector Get time = %.2es\n", me, armci_timer() - start_time);
313 }
314
315 /* regular get */
316 start_time = armci_timer();
317 for (i = 1; i < nproc; i++) {
318 for (j = start; j < end; j++) {
319 if ((rc = ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
320 i, &hdl_get[j]))) {
321 ARMCI_Error("armci_nbget failed\n", rc);
322 }
323 }
324 for (j = start; j < end; j++) {
325 ARMCI_Wait(&hdl_get[j]);
326 }
327 }
328 if (!dryrun) {
329 printf("%d: Regular Get time = %.2es\n", me, armci_timer() - start_time);
330 }
331
332 /* aggregate get */
333 start_time = armci_timer();
334 for (i = 1; i < nproc; i++) {
335 for (j = start; j < end; j++) {
336 ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
337 i, &aggr_hdl_get[i]);
338 }
339 }
340 for (i = 1; i < nproc; i++) {
341 ARMCI_Wait(&aggr_hdl_get[i]);
342 }
343 if (!dryrun) {
344 printf("%d: Aggregate Get time = %.2es\n", me, armci_timer() - start_time);
345 }
346 }
347
348 ARMCI_Barrier();
349 ARMCI_AllFence();
350 ARMCI_Barrier();
351
352 /* Verify */
353 if (!(me == 0))
354 for (j = 0; j < elems[1]; j++) {
355 if (ARMCI_ABS(ddst_put[me][j] - j * 1.001) > 0.1) {
356 ARMCI_Error("aggregate put failed...1", 0);
357 }
358 }
359 ARMCI_Barrier();
360 if (!dryrun)if (me == 0) {
361 printf("\n aggregate put ..O.K.\n");
362 }
363 fflush(stdout);
364
365 if (me == 0) {
366 for (i = 1; i < nproc; i++) {
367 for (j = 0; j < elems[1]; j++) {
368 if (ARMCI_ABS(ddst_get[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) {
369 ARMCI_Error("aggregate get failed...1", 0);
370 }
371 }
372 }
373 }
374 ARMCI_Barrier();
375 if (!dryrun)if (me == 0) {
376 printf(" aggregate get ..O.K.\n");
377 }
378 fflush(stdout);
379
380
381 ARMCI_AllFence();
382 ARMCI_Barrier();
383
384 if (!dryrun)if (me == 0) {
385 printf("O.K.\n");
386 fflush(stdout);
387 }
388 destroy_array(ddst_put);
389 destroy_array(ddst_get);
390 destroy_array(dsrc);
391 }
392
393
main(int argc,char * argv[])394 int main(int argc, char *argv[])
395 {
396 armci_msg_init(&argc, &argv);
397 ARMCI_Init_args(&argc, &argv);
398 nproc = armci_msg_nproc();
399 me = armci_msg_me();
400
401 /* printf("nproc = %d, me = %d\n", nproc, me);*/
402
403 if (nproc > MAXPROC && me == 0) {
404 ARMCI_Error("Test works for up to %d processors\n", MAXPROC);
405 }
406
407 if (me == 0) {
408 printf("ARMCI test program (%d processes)\n", nproc);
409 fflush(stdout);
410 sleep(1);
411 }
412
413 if (me == 0) {
414 printf("\nAggregate put/get requests\n\n");
415 fflush(stdout);
416 }
417 test_aggregate(1); /* cold start */
418 test_aggregate(0); /* warm start */
419
420 ARMCI_AllFence();
421 ARMCI_Barrier();
422 if (me == 0) {
423 printf("\nSuccess!!\n");
424 fflush(stdout);
425 }
426 sleep(2);
427
428 ARMCI_Barrier();
429 ARMCI_Finalize();
430 armci_msg_finalize();
431 return(0);
432 }
433