1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: perf_nb.c,v 1.3 2004-03-29 19:14:51 vinod Exp $ */
6 #if HAVE_STDIO_H
7 # include <stdio.h>
8 #endif
9 #if HAVE_STDLIB_H
10 # include <stdlib.h>
11 #endif
12 #if HAVE_ASSERT_H
13 # include <assert.h>
14 #endif
15 #if HAVE_MATH_H
16 # include <math.h>
17 #endif
18 #if HAVE_UNISTD_H
19 # include <unistd.h>
20 #elif HAVE_WINDOWS_H
21 # include <windows.h>
22 # define sleep(x) Sleep(1000*(x))
23 #endif
24
25 #include "armci.h"
26 #include "message.h"
27
28 #define DIM1 5
29 #define DIM2 3
30 #ifdef __sun
31 /* Solaris has shared memory shortages in the default system configuration */
32 # define DIM3 6
33 # define DIM4 5
34 # define DIM5 4
35 #elif defined(__alpha__)
36 # define DIM3 8
37 # define DIM4 5
38 # define DIM5 6
39 #else
40 # define DIM3 8
41 # define DIM4 9
42 # define DIM5 7
43 #endif
44 #define DIM6 3
45 #define DIM7 2
46
47
48 #define OFF 1
49 #define EDIM1 (DIM1+OFF)
50 #define EDIM2 (DIM2+OFF)
51 #define EDIM3 (DIM3+OFF)
52 #define EDIM4 (DIM4+OFF)
53 #define EDIM5 (DIM5+OFF)
54 #define EDIM6 (DIM6+OFF)
55 #define EDIM7 (DIM7+OFF)
56
57 #define DIMS 4
58 #define MAXDIMS 7
59 #define MAX_DIM_VAL 50
60 #define LOOP 200
61
62 #define BASE 100.
63 #define MAXPROC 8
64 #define TIMES 100
65
66 #ifdef CRAY
67 # define ELEMS 800
68 #else
69 # define ELEMS 200
70 #endif
71
72 #define MAXELEMS 131072 /* 262144 */
73 #define MAX_REQUESTS MAXELEMS
74 #define PUT 11
75 #define GET 22
76 #define ACC 33
77
78 int VERIFY = 1; /* verifies results */
79 int DEBUG = 0; /* if debug=1, dump extra messages */
80
81 /***************************** macros ************************/
82 #define COPY(src, dst, bytes) memcpy((dst),(src),(bytes))
83 #define ARMCI_MAX(a,b) (((a) >= (b)) ? (a) : (b))
84 #define ARMCI_MIN(a,b) (((a) <= (b)) ? (a) : (b))
85 #define ARMCI_ABS(a) (((a) <0) ? -(a) : (a))
86
87 /***************************** global data *******************/
88 int me, nproc;
89 void *work[MAXPROC]; /* work array for propagating addresses */
90 double *ddst[MAXPROC];
91
92 #ifdef PVM
pvm_init(int argc,char * argv[])93 void pvm_init(int argc, char *argv[])
94 {
95 int mytid, mygid, ctid[MAXPROC];
96 int np, i;
97
98 mytid = pvm_mytid();
99 if ((argc != 2) && (argc != 1)) {
100 goto usage;
101 }
102 if (argc == 1) {
103 np = 1;
104 }
105 if (argc == 2)
106 if ((np = atoi(argv[1])) < 1) {
107 goto usage;
108 }
109 if (np > MAXPROC) {
110 goto usage;
111 }
112
113 mygid = pvm_joingroup(MPGROUP);
114
115 if (np > 1)
116 if (mygid == 0) {
117 i = pvm_spawn(argv[0], argv + 1, 0, "", np - 1, ctid);
118 }
119
120 while (pvm_gsize(MPGROUP) < np) {
121 sleep(1);
122 }
123
124 /* sync */
125 pvm_barrier(MPGROUP, np);
126
127 printf("PVM initialization done!\n");
128
129 return;
130
131 usage:
132 fprintf(stderr, "usage: %s <nproc>\n", argv[0]);
133 pvm_exit();
134 exit(-1);
135 }
136 #endif
137
138 /*void create_array(void *a[], int elem_size, int ndim, int dims[])*/
create_array(double * a[],int ndim,int dims[])139 void create_array(double *a[], int ndim, int dims[])
140 {
141 int bytes = sizeof(double), i, rc;
142
143 assert(ndim <= MAXDIMS);
144 for (i = 0; i < ndim; i++) {
145 bytes *= dims[i];
146 }
147
148 rc = ARMCI_Malloc((void **)a, bytes);
149 assert(rc == 0);
150
151 assert(a[me]);
152
153 }
154
155 /*void destroy_array(void *ptr[])*/
destroy_array(double * ptr[])156 void destroy_array(double *ptr[])
157 {
158 int check;
159
160 ARMCI_Barrier();
161 check = !ARMCI_Free(ptr[me]);
162 assert(check);
163 }
164
verify_results(int op,int * elems)165 void verify_results(int op, int *elems)
166 {
167 int i, j;
168
169 switch (op) {
170
171 case PUT:
172 if (!(me == 0))
173 for (j = 0; j < elems[1]; j++) {
174 if (ARMCI_ABS(ddst[me][j] - j * 1.001) > 0.1) {
175 ARMCI_Error("put failed...Invalid Value Obtained..1", 0);
176 }
177 }
178 ARMCI_Barrier();
179 if (DEBUG) if (me == 0) {
180 printf(" verifying put ..O.K.\n");
181 }
182 break;
183
184 case GET:
185 if (me == 0) {
186 for (i = 1; i < nproc; i++) {
187 for (j = 0; j < elems[1]; j++) {
188 if (ARMCI_ABS(ddst[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) {
189 ARMCI_Error("get failed...Invalid Value Obtained..1", 0);
190 }
191 }
192 }
193 }
194 ARMCI_Barrier();
195 if (DEBUG) if (me == 0) {
196 printf(" verifying get ..O.K.\n\n");
197 }
198 break;
199
200 case ACC:
201 if (me == 0)
202 for (j = 0; j < elems[1]; j++) {
203 /*printf("ddst[%d][%d] = %lf\n", me, j, ddst[me][j]);
204 fflush(stdout); */
205 if (ARMCI_ABS(ddst[me][j] - (double)nproc) > 0.1) {
206 ARMCI_Error("accumulate failed...Invalid Value Obtained..1", 0);
207 }
208 }
209 ARMCI_Barrier();
210 if (DEBUG)if (me == 0) {
211 printf(" verifying accumulate ..O.K.\n");
212 }
213 break;
214
215 default:
216 ARMCI_Error("Invalid Operation", 0);
217 }
218 fflush(stdout);
219 }
220
221
test_perf_nb(int dry_run)222 void test_perf_nb(int dry_run)
223 {
224
225 int i, j, loop, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
226 int stride, k = 0, ntimes;
227 double stime, t1, t2, t3, t4, t5, t6, t7, t8, t9;
228 double *dsrc[MAXPROC], scale = 1.0;
229 armci_hdl_t hdl_get, hdl_put, hdl_acc;
230
231 create_array(ddst, 2, elems);
232 create_array(dsrc, 1, &elems[1]);
233
234 if (!dry_run)if (me == 0) {
235 printf("\n\t\t\tRemote 1-D Array Section\n");
236 printf("section get nbget wait put nbput ");
237 printf(" wait acc nbacc wait\n");
238 printf("------- -------- -------- -------- -------- --------");
239 printf(" -------- -------- -------- --------\n");
240 fflush(stdout);
241 }
242
243 for (loop = 1; loop <= MAXELEMS; loop *= 2, k++) {
244
245 elems[1] = loop;
246 ntimes = (int)sqrt((double)(MAXELEMS / elems[1]));
247 if (ntimes < 1) {
248 ntimes = 1;
249 }
250
251 /* -------------------------- SETUP --------------------------- */
252 /*initializing non-blocking handles,time,src & dst buffers*/
253 ARMCI_INIT_HANDLE(&hdl_put);
254 ARMCI_INIT_HANDLE(&hdl_get);
255 ARMCI_INIT_HANDLE(&hdl_acc);
256 t1 = t2 = t3 = t4 = t5 = t6 = t7 = t8 = t9 = 0.0;
257 for (i = 0; i < elems[1]; i++) {
258 dsrc[me][i] = i * 1.001 * (me + 1);
259 }
260 for (i = 0; i < elems[0]*elems[1]; i++) {
261 ddst[me][i] = 0.0;
262 }
263 ARMCI_Barrier();
264
265 /* bytes transfered */
266 bytes = sizeof(double) * elems[1];
267 ARMCI_Barrier();
268
269 /* -------------------------- PUT/GET -------------------------- */
270 if (me == 0) {
271 for (i = 1; i < nproc; i++) {
272 stime = armci_timer();
273 for (j = 0; j < ntimes; j++)
274 if ((rc = ARMCI_Put(&dsrc[me][0], &ddst[i][me*elems[1]], bytes, i))) {
275 ARMCI_Error("armci_nbput failed\n", rc);
276 }
277 t1 += armci_timer() - stime;
278 }
279 }
280 ARMCI_Barrier();
281 ARMCI_AllFence();
282 ARMCI_Barrier();
283 if (VERIFY) {
284 verify_results(PUT, elems);
285 }
286 for (i = 0; i < elems[0]*elems[1]; i++) {
287 ddst[me][i] = 0.0;
288 }
289 ARMCI_Barrier();
290
291 if (me == 0) {
292 for (i = 1; i < nproc; i++) {
293 stime = armci_timer();
294 for (j = 0; j < ntimes; j++)
295 if ((rc = ARMCI_Get(&dsrc[i][0], &ddst[me][i*elems[1]], bytes, i))) {
296 printf("%d: armci_get. rc=%d\n", me, rc);
297 fflush(stdout);
298 ARMCI_Error("armci_nbget failed\n", rc);
299 }
300 t4 += armci_timer() - stime;
301 }
302 }
303 ARMCI_Barrier();
304 ARMCI_AllFence();
305 ARMCI_Barrier();
306 if (VERIFY) {
307 verify_results(GET, elems);
308 }
309 for (i = 0; i < elems[0]*elems[1]; i++) {
310 ddst[me][i] = 0.0;
311 }
312 ARMCI_Barrier();
313
314 /* ------------------------ nb PUT/GET ------------------------- */
315 if (me == 0) {
316 for (i = 1; i < nproc; i++) {
317 for (j = 0; j < ntimes; j++) {
318 stime = armci_timer();
319 if ((rc = ARMCI_NbPut(&dsrc[me][0], &ddst[i][me*elems[1]], bytes,
320 i, &hdl_put))) {
321 ARMCI_Error("armci_nbput failed\n", rc);
322 }
323 t2 += armci_timer() - stime;
324 stime = armci_timer();
325 ARMCI_Wait(&hdl_put);
326 t3 += armci_timer() - stime;
327 }
328 }
329 }
330 ARMCI_Barrier();
331 ARMCI_AllFence();
332 ARMCI_Barrier();
333 if (VERIFY) {
334 verify_results(PUT, elems);
335 }
336 for (i = 0; i < elems[0]*elems[1]; i++) {
337 ddst[me][i] = 0.0;
338 }
339 ARMCI_Barrier();
340
341 if (me == 0) {
342 for (i = 1; i < nproc; i++) {
343 for (j = 0; j < ntimes; j++) {
344 stime = armci_timer();
345 if ((rc = ARMCI_NbGet(&dsrc[i][0], &ddst[me][i*elems[1]], bytes,
346 i, &hdl_get))) {
347 ARMCI_Error("armci_nbget failed\n", rc);
348 }
349 t5 += armci_timer() - stime;
350 stime = armci_timer();
351 ARMCI_Wait(&hdl_get);
352 t6 += armci_timer() - stime;
353 }
354 }
355 }
356 ARMCI_Barrier();
357 ARMCI_AllFence();
358 ARMCI_Barrier();
359 if (VERIFY) {
360 verify_results(GET, elems);
361 }
362 for (i = 0; i < elems[0]*elems[1]; i++) {
363 ddst[me][i] = 0.0;
364 }
365 ARMCI_Barrier();
366
367
368 /* ------------------------ Accumulate ------------------------- */
369 for (i = 0; i < elems[1]; i++) {
370 dsrc[me][i] = 1.0;
371 }
372 ARMCI_Barrier();
373 stride = elems[1] * sizeof(double);
374 scale = 1.0;
375 for (j = 0; j < ntimes; j++) {
376 stime = armci_timer();
377 if ((rc = ARMCI_AccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride,
378 &ddst[0][0], &stride, &bytes, 0, 0))) {
379 ARMCI_Error("armci_acc failed\n", rc);
380 }
381 t7 += armci_timer() - stime;
382
383 ARMCI_Barrier();
384 ARMCI_AllFence();
385 ARMCI_Barrier();
386 if (VERIFY) {
387 verify_results(ACC, elems);
388 }
389 for (i = 0; i < elems[0]*elems[1]; i++) {
390 ddst[me][i] = 0.0;
391 }
392 ARMCI_Barrier();
393 }
394
395 #if PORTALS
396 /* See the note below why this part is disabled */
397 /* ---------------------- nb-Accumulate ------------------------ */
398 for (i = 0; i < elems[1]; i++) {
399 dsrc[me][i] = 1.0;
400 }
401 ARMCI_Barrier();
402 stride = elems[1] * sizeof(double);
403 scale = 1.0;
404 for (j = 0; j < ntimes; j++) {
405 stime = armci_timer();
406 if ((rc = ARMCI_NbAccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride,
407 &ddst[0][0], &stride, &bytes, 0, 0, &hdl_acc))) {
408 ARMCI_Error("armci_nbacc failed\n", rc);
409 }
410 t8 += armci_timer() - stime;
411 stime = armci_timer();
412 ARMCI_Wait(&hdl_acc);
413 t9 += armci_timer() - stime;
414
415 ARMCI_Barrier();
416 ARMCI_AllFence();
417 ARMCI_Barrier();
418 if (VERIFY) {
419 verify_results(ACC, elems);
420 }
421 for (i = 0; i < elems[0]*elems[1]; i++) {
422 ddst[me][i] = 0.0;
423 }
424 ARMCI_Barrier();
425 }
426 #endif
427
428 /* print timings */
429 if (!dry_run) if (me == 0) printf("%d\t %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e\n",
430 bytes, t4 / ntimes, t5 / ntimes, t6 / ntimes, t1 / ntimes,
431 t2 / ntimes, t3 / ntimes, t7 / ntimes, t8 / ntimes, t9 / ntimes);
432 }
433
434 ARMCI_AllFence();
435 ARMCI_Barrier();
436
437 if (!dry_run)if (me == 0) {
438 printf("O.K.\n");
439 fflush(stdout);
440 }
441 destroy_array(ddst);
442 destroy_array(dsrc);
443 }
444
445
main(int argc,char * argv[])446 int main(int argc, char *argv[])
447 {
448 armci_msg_init(&argc, &argv);
449 ARMCI_Init_args(&argc, &argv);
450 nproc = armci_msg_nproc();
451 me = armci_msg_me();
452
453 if (nproc < 2 || nproc > MAXPROC) {
454 if (me == 0)
455 fprintf(stderr,
456 "USAGE: 2 <= processes < %d - got %d\n", MAXPROC, nproc);
457 ARMCI_Barrier();
458 armci_msg_finalize();
459 exit(0);
460 }
461
462 if (me == 0) {
463 printf("ARMCI test program (%d processes)\n", nproc);
464 fflush(stdout);
465 sleep(1);
466 }
467
468 if (me == 0) {
469 printf("\n put/get/acc requests (Time in secs)\n\n");
470 fflush(stdout);
471 }
472
473 test_perf_nb(1);
474 test_perf_nb(0);
475
476 ARMCI_AllFence();
477 ARMCI_Barrier();
478 if (me == 0) {
479 printf("\nSuccess!!\n");
480 fflush(stdout);
481 }
482 sleep(2);
483
484 ARMCI_Barrier();
485 ARMCI_Finalize();
486 armci_msg_finalize();
487 return(0);
488 }
489
490 /*
491 NOTE: ARMCI_NbAcc fails in opus for buffer sizes greater than 400Kb
492 */
493