1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <assert.h>
9 #include <string.h>
10 #include <pthread.h>
11 #include "yaksa_config.h"
12 #include "yaksa.h"
13 #include "dtpools.h"
14 #include "pack-common.h"
15 
16 uintptr_t maxbufsize = 512 * 1024 * 1024;
17 
18 enum {
19     PACK_ORDER__UNSET,
20     PACK_ORDER__NORMAL,
21     PACK_ORDER__REVERSE,
22     PACK_ORDER__RANDOM,
23 };
24 
25 enum {
26     OVERLAP__NONE,
27     OVERLAP__REGULAR,
28     OVERLAP__IRREGULAR,
29 };
30 
31 #define MAX_DTP_BASESTRLEN (1024)
32 
33 static int verbose = 0;
34 
35 #define dprintf(...)                            \
36     do {                                        \
37         if (verbose)                            \
38             printf(__VA_ARGS__);                \
39     } while (0)
40 
swap_segments(uintptr_t * starts,uintptr_t * lengths,int x,int y)41 static void swap_segments(uintptr_t * starts, uintptr_t * lengths, int x, int y)
42 {
43     uintptr_t tmp = starts[x];
44     starts[x] = starts[y];
45     starts[y] = tmp;
46 
47     tmp = lengths[x];
48     lengths[x] = lengths[y];
49     lengths[y] = tmp;
50 }
51 
52 int device_id = 0;
53 int device_stride = 0;
54 
55 char typestr[MAX_DTP_BASESTRLEN + 1] = { 0 };
56 
57 int seed = -1;
58 int basecount = -1;
59 int iters = -1;
60 int max_segments = -1;
61 int pack_order = PACK_ORDER__UNSET;
62 int overlap = -1;
63 mem_type_e sbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
64 mem_type_e dbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
65 mem_type_e tbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
66 DTP_pool_s *dtp;
67 
68 void *runtest(void *arg);
runtest(void * arg)69 void *runtest(void *arg)
70 {
71     DTP_obj_s sobj, dobj;
72     int rc;
73     uintptr_t tid = (uintptr_t) arg;
74 
75     uintptr_t *segment_starts = (uintptr_t *) malloc(max_segments * sizeof(uintptr_t));
76     uintptr_t *segment_lengths = (uintptr_t *) malloc(max_segments * sizeof(uintptr_t));
77 
78     for (int i = 0; i < iters; i++) {
79         dprintf("==== iter %d ====\n", i);
80 
81         /* create the source object */
82         rc = DTP_obj_create(dtp[tid], &sobj, maxbufsize);
83         assert(rc == DTP_SUCCESS);
84 
85         char *sbuf_h = NULL, *sbuf_d = NULL;
86         pack_alloc_mem(sobj.DTP_bufsize, sbuf_memtype, (void **) &sbuf_h, (void **) &sbuf_d);
87         assert(sbuf_h);
88         assert(sbuf_d);
89 
90         if (verbose) {
91             char *desc;
92             rc = DTP_obj_get_description(sobj, &desc);
93             assert(rc == DTP_SUCCESS);
94             dprintf("==> sbuf_h %p, sbuf_d %p, sobj (count: %zu):\n%s\n", sbuf_h, sbuf_d,
95                     sobj.DTP_type_count, desc);
96             free(desc);
97         }
98 
99         rc = DTP_obj_buf_init(sobj, sbuf_h, 0, 1, basecount);
100         assert(rc == DTP_SUCCESS);
101 
102         uintptr_t ssize;
103         rc = yaksa_type_get_size(sobj.DTP_datatype, &ssize);
104         assert(rc == YAKSA_SUCCESS);
105 
106 
107         /* create the destination object */
108         rc = DTP_obj_create(dtp[tid], &dobj, maxbufsize);
109         assert(rc == DTP_SUCCESS);
110 
111         char *dbuf_h, *dbuf_d;
112         pack_alloc_mem(dobj.DTP_bufsize, dbuf_memtype, (void **) &dbuf_h, (void **) &dbuf_d);
113         assert(dbuf_h);
114         assert(dbuf_d);
115 
116         if (verbose) {
117             char *desc;
118             rc = DTP_obj_get_description(dobj, &desc);
119             assert(rc == DTP_SUCCESS);
120             dprintf("==> dbuf_h %p, dbuf_d %p, dobj (count: %zu):\n%s\n", dbuf_h, dbuf_d,
121                     dobj.DTP_type_count, desc);
122             free(desc);
123         }
124 
125         rc = DTP_obj_buf_init(dobj, dbuf_h, -1, -1, basecount);
126         assert(rc == DTP_SUCCESS);
127 
128         uintptr_t dsize;
129         rc = yaksa_type_get_size(dobj.DTP_datatype, &dsize);
130         assert(rc == YAKSA_SUCCESS);
131 
132 
133         /* the source and destination objects should have the same
134          * signature */
135         assert(ssize * sobj.DTP_type_count == dsize * dobj.DTP_type_count);
136 
137 
138         /* pack from the source object to a temporary buffer and
139          * unpack into the destination object */
140 
141         /* figure out the lengths and offsets of each segment */
142         uintptr_t type_size;
143         rc = yaksa_type_get_size(dtp[tid].DTP_base_type, &type_size);
144         assert(rc == YAKSA_SUCCESS);
145 
146         int segments = max_segments;
147         while (((ssize * sobj.DTP_type_count) / type_size) % segments)
148             segments--;
149 
150         uintptr_t offset = 0;
151         for (int j = 0; j < segments; j++) {
152             segment_starts[j] = offset;
153 
154             uintptr_t eqlength = ssize * sobj.DTP_type_count / segments;
155 
156             /* make sure eqlength is a multiple of type_size */
157             eqlength = (eqlength / type_size) * type_size;
158 
159             if (overlap == OVERLAP__NONE) {
160                 segment_lengths[j] = eqlength;
161                 offset += eqlength;
162             } else if (overlap == OVERLAP__REGULAR) {
163                 if (offset + 2 * eqlength <= ssize * sobj.DTP_type_count)
164                     segment_lengths[j] = 2 * eqlength;
165                 else
166                     segment_lengths[j] = eqlength;
167                 offset += eqlength;
168             } else {
169                 if (j == segments - 1) {
170                     if (ssize * sobj.DTP_type_count > offset)
171                         segment_lengths[j] = ssize * sobj.DTP_type_count - offset;
172                     else
173                         segment_lengths[j] = 0;
174                     segment_lengths[j] += rand() % eqlength;
175                 } else {
176                     segment_lengths[j] = rand() % (ssize * sobj.DTP_type_count - offset + eqlength);
177                 }
178 
179                 offset += ((rand() % (segment_lengths[j] + 1)) / type_size) * type_size;
180                 if (offset > ssize * sobj.DTP_type_count)
181                     offset = ssize * sobj.DTP_type_count;
182             }
183         }
184 
185         /* update the order in which we access the segments */
186         if (pack_order == PACK_ORDER__NORMAL) {
187             /* nothing to do */
188         } else if (pack_order == PACK_ORDER__REVERSE) {
189             for (int j = 0; j < segments / 2; j++) {
190                 swap_segments(segment_starts, segment_lengths, j, segments - j - 1);
191             }
192         } else if (pack_order == PACK_ORDER__RANDOM) {
193             for (int j = 0; j < 1000; j++) {
194                 int x = rand() % segments;
195                 int y = rand() % segments;
196                 swap_segments(segment_starts, segment_lengths, x, y);
197             }
198         }
199 
200         /* the actual pack/unpack loop */
201         pack_copy_content(sbuf_h, sbuf_d, sobj.DTP_bufsize, sbuf_memtype);
202         pack_copy_content(dbuf_h, dbuf_d, dobj.DTP_bufsize, dbuf_memtype);
203 
204         void *tbuf_h, *tbuf_d;
205         uintptr_t tbufsize = ssize * sobj.DTP_type_count;
206         pack_alloc_mem(tbufsize, tbuf_memtype, &tbuf_h, &tbuf_d);
207 
208         yaksa_info_t pack_info, unpack_info;
209         pack_get_ptr_attr(sbuf_d + sobj.DTP_buf_offset, tbuf_d, &pack_info);
210         pack_get_ptr_attr(tbuf_d, dbuf_d + dobj.DTP_buf_offset, &unpack_info);
211 
212         for (int j = 0; j < segments; j++) {
213             uintptr_t actual_pack_bytes;
214             yaksa_request_t request;
215 
216             rc = yaksa_ipack(sbuf_d + sobj.DTP_buf_offset, sobj.DTP_type_count, sobj.DTP_datatype,
217                              segment_starts[j], tbuf_d, segment_lengths[j], &actual_pack_bytes,
218                              pack_info, &request);
219             assert(rc == YAKSA_SUCCESS);
220             assert(actual_pack_bytes <= segment_lengths[j]);
221 
222             if (j == segments - 1) {
223                 DTP_obj_free(sobj);
224             }
225 
226             rc = yaksa_request_wait(request);
227             assert(rc == YAKSA_SUCCESS);
228 
229             uintptr_t actual_unpack_bytes;
230             rc = yaksa_iunpack(tbuf_d, actual_pack_bytes, dbuf_d + dobj.DTP_buf_offset,
231                                dobj.DTP_type_count, dobj.DTP_datatype, segment_starts[j],
232                                &actual_unpack_bytes, unpack_info, &request);
233             assert(rc == YAKSA_SUCCESS);
234             assert(actual_pack_bytes == actual_unpack_bytes);
235 
236             rc = yaksa_request_wait(request);
237             assert(rc == YAKSA_SUCCESS);
238         }
239 
240         if (pack_info) {
241             rc = yaksa_info_free(pack_info);
242             assert(rc == YAKSA_SUCCESS);
243         }
244 
245         if (unpack_info) {
246             rc = yaksa_info_free(unpack_info);
247             assert(rc == YAKSA_SUCCESS);
248         }
249 
250         pack_copy_content(dbuf_d, dbuf_h, dobj.DTP_bufsize, dbuf_memtype);
251         rc = DTP_obj_buf_check(dobj, dbuf_h, 0, 1, basecount);
252         assert(rc == DTP_SUCCESS);
253 
254 
255         /* free allocated buffers and objects */
256         pack_free_mem(sbuf_memtype, sbuf_h, sbuf_d);
257         pack_free_mem(dbuf_memtype, dbuf_h, dbuf_d);
258         pack_free_mem(tbuf_memtype, tbuf_h, tbuf_d);
259 
260         DTP_obj_free(dobj);
261     }
262 
263     free(segment_lengths);
264     free(segment_starts);
265 
266     return NULL;
267 }
268 
main(int argc,char ** argv)269 int main(int argc, char **argv)
270 {
271     int num_threads = 1;
272 
273     while (--argc && ++argv) {
274         if (!strcmp(*argv, "-datatype")) {
275             --argc;
276             ++argv;
277             strncpy(typestr, *argv, MAX_DTP_BASESTRLEN);
278         } else if (!strcmp(*argv, "-count")) {
279             --argc;
280             ++argv;
281             basecount = atoi(*argv);
282         } else if (!strcmp(*argv, "-seed")) {
283             --argc;
284             ++argv;
285             seed = atoi(*argv);
286         } else if (!strcmp(*argv, "-iters")) {
287             --argc;
288             ++argv;
289             iters = atoi(*argv);
290         } else if (!strcmp(*argv, "-segments")) {
291             --argc;
292             ++argv;
293             max_segments = atoi(*argv);
294         } else if (!strcmp(*argv, "-ordering")) {
295             --argc;
296             ++argv;
297             if (!strcmp(*argv, "normal"))
298                 pack_order = PACK_ORDER__NORMAL;
299             else if (!strcmp(*argv, "reverse"))
300                 pack_order = PACK_ORDER__REVERSE;
301             else if (!strcmp(*argv, "random"))
302                 pack_order = PACK_ORDER__RANDOM;
303             else {
304                 fprintf(stderr, "unknown packing order %s\n", *argv);
305                 exit(1);
306             }
307         } else if (!strcmp(*argv, "-overlap")) {
308             --argc;
309             ++argv;
310             if (!strcmp(*argv, "none"))
311                 overlap = OVERLAP__NONE;
312             else if (!strcmp(*argv, "regular"))
313                 overlap = OVERLAP__REGULAR;
314             else if (!strcmp(*argv, "irregular"))
315                 overlap = OVERLAP__IRREGULAR;
316             else {
317                 fprintf(stderr, "unknown overlap type %s\n", *argv);
318                 exit(1);
319             }
320         } else if (!strcmp(*argv, "-sbuf-memtype")) {
321             --argc;
322             ++argv;
323             if (!strcmp(*argv, "unreg-host"))
324                 sbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
325             else if (!strcmp(*argv, "reg-host"))
326                 sbuf_memtype = MEM_TYPE__REGISTERED_HOST;
327             else if (!strcmp(*argv, "managed"))
328                 sbuf_memtype = MEM_TYPE__MANAGED;
329             else if (!strcmp(*argv, "device"))
330                 sbuf_memtype = MEM_TYPE__DEVICE;
331             else {
332                 fprintf(stderr, "unknown buffer type %s\n", *argv);
333                 exit(1);
334             }
335         } else if (!strcmp(*argv, "-dbuf-memtype")) {
336             --argc;
337             ++argv;
338             if (!strcmp(*argv, "unreg-host"))
339                 dbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
340             else if (!strcmp(*argv, "reg-host"))
341                 dbuf_memtype = MEM_TYPE__REGISTERED_HOST;
342             else if (!strcmp(*argv, "managed"))
343                 dbuf_memtype = MEM_TYPE__MANAGED;
344             else if (!strcmp(*argv, "device"))
345                 dbuf_memtype = MEM_TYPE__DEVICE;
346             else {
347                 fprintf(stderr, "unknown buffer type %s\n", *argv);
348                 exit(1);
349             }
350         } else if (!strcmp(*argv, "-tbuf-memtype")) {
351             --argc;
352             ++argv;
353             if (!strcmp(*argv, "unreg-host"))
354                 tbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
355             else if (!strcmp(*argv, "reg-host"))
356                 tbuf_memtype = MEM_TYPE__REGISTERED_HOST;
357             else if (!strcmp(*argv, "managed"))
358                 tbuf_memtype = MEM_TYPE__MANAGED;
359             else if (!strcmp(*argv, "device"))
360                 tbuf_memtype = MEM_TYPE__DEVICE;
361             else {
362                 fprintf(stderr, "unknown buffer type %s\n", *argv);
363                 exit(1);
364             }
365         } else if (!strcmp(*argv, "-device-start-id")) {
366             --argc;
367             ++argv;
368             device_id = atoi(*argv);
369         } else if (!strcmp(*argv, "-device-stride")) {
370             --argc;
371             ++argv;
372             device_stride = atoi(*argv);
373         } else if (!strcmp(*argv, "-verbose")) {
374             verbose = 1;
375         } else if (!strcmp(*argv, "-num-threads")) {
376             --argc;
377             ++argv;
378             num_threads = atoi(*argv);
379         } else {
380             fprintf(stderr, "unknown argument %s\n", *argv);
381             exit(1);
382         }
383     }
384     if (strlen(typestr) == 0 || basecount <= 0 || seed < 0 || iters <= 0 || max_segments < 0 ||
385         pack_order == PACK_ORDER__UNSET || overlap < 0 || num_threads <= 0) {
386         fprintf(stderr, "Usage: ./pack {options}\n");
387         fprintf(stderr, "   -datatype    base datatype to use, e.g., int\n");
388         fprintf(stderr, "   -count       number of base datatypes in the signature\n");
389         fprintf(stderr, "   -seed        random seed (changes the datatypes generated)\n");
390         fprintf(stderr, "   -iters       number of iterations\n");
391         fprintf(stderr, "   -segments    number of segments to chop the packing into\n");
392         fprintf(stderr, "   -ordering  packing order of segments (normal, reverse, random)\n");
393         fprintf(stderr, "   -overlap     should packing overlap (none, regular, irregular)\n");
394         fprintf(stderr, "   -sbuf-memtype memory type (unreg-host, reg-host, device)\n");
395         fprintf(stderr, "   -dbuf-memtype memory type (unreg-host, reg-host, device)\n");
396         fprintf(stderr, "   -tbuf-memtype memory type (unreg-host, reg-host, device)\n");
397         fprintf(stderr, "   -device-start-id  ID of the device for the first allocation\n");
398         fprintf(stderr, "   -device-stride    difference between consecutive device allocations\n");
399         fprintf(stderr, "   -verbose     verbose output\n");
400         fprintf(stderr, "   -num-threads number of threads to spawn\n");
401         exit(1);
402     }
403 
404     yaksa_init(NULL);
405     pack_init_devices();
406 
407     dtp = (DTP_pool_s *) malloc(num_threads * sizeof(DTP_pool_s));
408     for (uintptr_t i = 0; i < num_threads; i++) {
409         int rc = DTP_pool_create(typestr, basecount, seed + i, &dtp[i]);
410         assert(rc == DTP_SUCCESS);
411     }
412 
413     pthread_t *threads = (pthread_t *) malloc(num_threads * sizeof(pthread_t));
414 
415     for (uintptr_t i = 0; i < num_threads; i++)
416         pthread_create(&threads[i], NULL, runtest, (void *) i);
417 
418     for (uintptr_t i = 0; i < num_threads; i++)
419         pthread_join(threads[i], NULL);
420 
421     free(threads);
422 
423     for (uintptr_t i = 0; i < num_threads; i++) {
424         int rc = DTP_pool_free(dtp[i]);
425         assert(rc == DTP_SUCCESS);
426     }
427     free(dtp);
428 
429     pack_finalize_devices();
430     yaksa_finalize();
431 
432     return 0;
433 }
434