1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <assert.h>
9 #include <string.h>
10 #include <pthread.h>
11 #include "yaksa_config.h"
12 #include "yaksa.h"
13 #include "dtpools.h"
14 #include "pack-common.h"
15
16 uintptr_t maxbufsize = 512 * 1024 * 1024;
17
18 enum {
19 PACK_ORDER__UNSET,
20 PACK_ORDER__NORMAL,
21 PACK_ORDER__REVERSE,
22 PACK_ORDER__RANDOM,
23 };
24
25 enum {
26 OVERLAP__NONE,
27 OVERLAP__REGULAR,
28 OVERLAP__IRREGULAR,
29 };
30
31 #define MAX_DTP_BASESTRLEN (1024)
32
33 static int verbose = 0;
34
35 #define dprintf(...) \
36 do { \
37 if (verbose) \
38 printf(__VA_ARGS__); \
39 } while (0)
40
swap_segments(uintptr_t * starts,uintptr_t * lengths,int x,int y)41 static void swap_segments(uintptr_t * starts, uintptr_t * lengths, int x, int y)
42 {
43 uintptr_t tmp = starts[x];
44 starts[x] = starts[y];
45 starts[y] = tmp;
46
47 tmp = lengths[x];
48 lengths[x] = lengths[y];
49 lengths[y] = tmp;
50 }
51
52 int device_id = 0;
53 int device_stride = 0;
54
55 char typestr[MAX_DTP_BASESTRLEN + 1] = { 0 };
56
57 int seed = -1;
58 int basecount = -1;
59 int iters = -1;
60 int max_segments = -1;
61 int pack_order = PACK_ORDER__UNSET;
62 int overlap = -1;
63 mem_type_e sbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
64 mem_type_e dbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
65 mem_type_e tbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
66 DTP_pool_s *dtp;
67
68 void *runtest(void *arg);
runtest(void * arg)69 void *runtest(void *arg)
70 {
71 DTP_obj_s sobj, dobj;
72 int rc;
73 uintptr_t tid = (uintptr_t) arg;
74
75 uintptr_t *segment_starts = (uintptr_t *) malloc(max_segments * sizeof(uintptr_t));
76 uintptr_t *segment_lengths = (uintptr_t *) malloc(max_segments * sizeof(uintptr_t));
77
78 for (int i = 0; i < iters; i++) {
79 dprintf("==== iter %d ====\n", i);
80
81 /* create the source object */
82 rc = DTP_obj_create(dtp[tid], &sobj, maxbufsize);
83 assert(rc == DTP_SUCCESS);
84
85 char *sbuf_h = NULL, *sbuf_d = NULL;
86 pack_alloc_mem(sobj.DTP_bufsize, sbuf_memtype, (void **) &sbuf_h, (void **) &sbuf_d);
87 assert(sbuf_h);
88 assert(sbuf_d);
89
90 if (verbose) {
91 char *desc;
92 rc = DTP_obj_get_description(sobj, &desc);
93 assert(rc == DTP_SUCCESS);
94 dprintf("==> sbuf_h %p, sbuf_d %p, sobj (count: %zu):\n%s\n", sbuf_h, sbuf_d,
95 sobj.DTP_type_count, desc);
96 free(desc);
97 }
98
99 rc = DTP_obj_buf_init(sobj, sbuf_h, 0, 1, basecount);
100 assert(rc == DTP_SUCCESS);
101
102 uintptr_t ssize;
103 rc = yaksa_type_get_size(sobj.DTP_datatype, &ssize);
104 assert(rc == YAKSA_SUCCESS);
105
106
107 /* create the destination object */
108 rc = DTP_obj_create(dtp[tid], &dobj, maxbufsize);
109 assert(rc == DTP_SUCCESS);
110
111 char *dbuf_h, *dbuf_d;
112 pack_alloc_mem(dobj.DTP_bufsize, dbuf_memtype, (void **) &dbuf_h, (void **) &dbuf_d);
113 assert(dbuf_h);
114 assert(dbuf_d);
115
116 if (verbose) {
117 char *desc;
118 rc = DTP_obj_get_description(dobj, &desc);
119 assert(rc == DTP_SUCCESS);
120 dprintf("==> dbuf_h %p, dbuf_d %p, dobj (count: %zu):\n%s\n", dbuf_h, dbuf_d,
121 dobj.DTP_type_count, desc);
122 free(desc);
123 }
124
125 rc = DTP_obj_buf_init(dobj, dbuf_h, -1, -1, basecount);
126 assert(rc == DTP_SUCCESS);
127
128 uintptr_t dsize;
129 rc = yaksa_type_get_size(dobj.DTP_datatype, &dsize);
130 assert(rc == YAKSA_SUCCESS);
131
132
133 /* the source and destination objects should have the same
134 * signature */
135 assert(ssize * sobj.DTP_type_count == dsize * dobj.DTP_type_count);
136
137
138 /* pack from the source object to a temporary buffer and
139 * unpack into the destination object */
140
141 /* figure out the lengths and offsets of each segment */
142 uintptr_t type_size;
143 rc = yaksa_type_get_size(dtp[tid].DTP_base_type, &type_size);
144 assert(rc == YAKSA_SUCCESS);
145
146 int segments = max_segments;
147 while (((ssize * sobj.DTP_type_count) / type_size) % segments)
148 segments--;
149
150 uintptr_t offset = 0;
151 for (int j = 0; j < segments; j++) {
152 segment_starts[j] = offset;
153
154 uintptr_t eqlength = ssize * sobj.DTP_type_count / segments;
155
156 /* make sure eqlength is a multiple of type_size */
157 eqlength = (eqlength / type_size) * type_size;
158
159 if (overlap == OVERLAP__NONE) {
160 segment_lengths[j] = eqlength;
161 offset += eqlength;
162 } else if (overlap == OVERLAP__REGULAR) {
163 if (offset + 2 * eqlength <= ssize * sobj.DTP_type_count)
164 segment_lengths[j] = 2 * eqlength;
165 else
166 segment_lengths[j] = eqlength;
167 offset += eqlength;
168 } else {
169 if (j == segments - 1) {
170 if (ssize * sobj.DTP_type_count > offset)
171 segment_lengths[j] = ssize * sobj.DTP_type_count - offset;
172 else
173 segment_lengths[j] = 0;
174 segment_lengths[j] += rand() % eqlength;
175 } else {
176 segment_lengths[j] = rand() % (ssize * sobj.DTP_type_count - offset + eqlength);
177 }
178
179 offset += ((rand() % (segment_lengths[j] + 1)) / type_size) * type_size;
180 if (offset > ssize * sobj.DTP_type_count)
181 offset = ssize * sobj.DTP_type_count;
182 }
183 }
184
185 /* update the order in which we access the segments */
186 if (pack_order == PACK_ORDER__NORMAL) {
187 /* nothing to do */
188 } else if (pack_order == PACK_ORDER__REVERSE) {
189 for (int j = 0; j < segments / 2; j++) {
190 swap_segments(segment_starts, segment_lengths, j, segments - j - 1);
191 }
192 } else if (pack_order == PACK_ORDER__RANDOM) {
193 for (int j = 0; j < 1000; j++) {
194 int x = rand() % segments;
195 int y = rand() % segments;
196 swap_segments(segment_starts, segment_lengths, x, y);
197 }
198 }
199
200 /* the actual pack/unpack loop */
201 pack_copy_content(sbuf_h, sbuf_d, sobj.DTP_bufsize, sbuf_memtype);
202 pack_copy_content(dbuf_h, dbuf_d, dobj.DTP_bufsize, dbuf_memtype);
203
204 void *tbuf_h, *tbuf_d;
205 uintptr_t tbufsize = ssize * sobj.DTP_type_count;
206 pack_alloc_mem(tbufsize, tbuf_memtype, &tbuf_h, &tbuf_d);
207
208 yaksa_info_t pack_info, unpack_info;
209 pack_get_ptr_attr(sbuf_d + sobj.DTP_buf_offset, tbuf_d, &pack_info);
210 pack_get_ptr_attr(tbuf_d, dbuf_d + dobj.DTP_buf_offset, &unpack_info);
211
212 for (int j = 0; j < segments; j++) {
213 uintptr_t actual_pack_bytes;
214 yaksa_request_t request;
215
216 rc = yaksa_ipack(sbuf_d + sobj.DTP_buf_offset, sobj.DTP_type_count, sobj.DTP_datatype,
217 segment_starts[j], tbuf_d, segment_lengths[j], &actual_pack_bytes,
218 pack_info, &request);
219 assert(rc == YAKSA_SUCCESS);
220 assert(actual_pack_bytes <= segment_lengths[j]);
221
222 if (j == segments - 1) {
223 DTP_obj_free(sobj);
224 }
225
226 rc = yaksa_request_wait(request);
227 assert(rc == YAKSA_SUCCESS);
228
229 uintptr_t actual_unpack_bytes;
230 rc = yaksa_iunpack(tbuf_d, actual_pack_bytes, dbuf_d + dobj.DTP_buf_offset,
231 dobj.DTP_type_count, dobj.DTP_datatype, segment_starts[j],
232 &actual_unpack_bytes, unpack_info, &request);
233 assert(rc == YAKSA_SUCCESS);
234 assert(actual_pack_bytes == actual_unpack_bytes);
235
236 rc = yaksa_request_wait(request);
237 assert(rc == YAKSA_SUCCESS);
238 }
239
240 if (pack_info) {
241 rc = yaksa_info_free(pack_info);
242 assert(rc == YAKSA_SUCCESS);
243 }
244
245 if (unpack_info) {
246 rc = yaksa_info_free(unpack_info);
247 assert(rc == YAKSA_SUCCESS);
248 }
249
250 pack_copy_content(dbuf_d, dbuf_h, dobj.DTP_bufsize, dbuf_memtype);
251 rc = DTP_obj_buf_check(dobj, dbuf_h, 0, 1, basecount);
252 assert(rc == DTP_SUCCESS);
253
254
255 /* free allocated buffers and objects */
256 pack_free_mem(sbuf_memtype, sbuf_h, sbuf_d);
257 pack_free_mem(dbuf_memtype, dbuf_h, dbuf_d);
258 pack_free_mem(tbuf_memtype, tbuf_h, tbuf_d);
259
260 DTP_obj_free(dobj);
261 }
262
263 free(segment_lengths);
264 free(segment_starts);
265
266 return NULL;
267 }
268
main(int argc,char ** argv)269 int main(int argc, char **argv)
270 {
271 int num_threads = 1;
272
273 while (--argc && ++argv) {
274 if (!strcmp(*argv, "-datatype")) {
275 --argc;
276 ++argv;
277 strncpy(typestr, *argv, MAX_DTP_BASESTRLEN);
278 } else if (!strcmp(*argv, "-count")) {
279 --argc;
280 ++argv;
281 basecount = atoi(*argv);
282 } else if (!strcmp(*argv, "-seed")) {
283 --argc;
284 ++argv;
285 seed = atoi(*argv);
286 } else if (!strcmp(*argv, "-iters")) {
287 --argc;
288 ++argv;
289 iters = atoi(*argv);
290 } else if (!strcmp(*argv, "-segments")) {
291 --argc;
292 ++argv;
293 max_segments = atoi(*argv);
294 } else if (!strcmp(*argv, "-ordering")) {
295 --argc;
296 ++argv;
297 if (!strcmp(*argv, "normal"))
298 pack_order = PACK_ORDER__NORMAL;
299 else if (!strcmp(*argv, "reverse"))
300 pack_order = PACK_ORDER__REVERSE;
301 else if (!strcmp(*argv, "random"))
302 pack_order = PACK_ORDER__RANDOM;
303 else {
304 fprintf(stderr, "unknown packing order %s\n", *argv);
305 exit(1);
306 }
307 } else if (!strcmp(*argv, "-overlap")) {
308 --argc;
309 ++argv;
310 if (!strcmp(*argv, "none"))
311 overlap = OVERLAP__NONE;
312 else if (!strcmp(*argv, "regular"))
313 overlap = OVERLAP__REGULAR;
314 else if (!strcmp(*argv, "irregular"))
315 overlap = OVERLAP__IRREGULAR;
316 else {
317 fprintf(stderr, "unknown overlap type %s\n", *argv);
318 exit(1);
319 }
320 } else if (!strcmp(*argv, "-sbuf-memtype")) {
321 --argc;
322 ++argv;
323 if (!strcmp(*argv, "unreg-host"))
324 sbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
325 else if (!strcmp(*argv, "reg-host"))
326 sbuf_memtype = MEM_TYPE__REGISTERED_HOST;
327 else if (!strcmp(*argv, "managed"))
328 sbuf_memtype = MEM_TYPE__MANAGED;
329 else if (!strcmp(*argv, "device"))
330 sbuf_memtype = MEM_TYPE__DEVICE;
331 else {
332 fprintf(stderr, "unknown buffer type %s\n", *argv);
333 exit(1);
334 }
335 } else if (!strcmp(*argv, "-dbuf-memtype")) {
336 --argc;
337 ++argv;
338 if (!strcmp(*argv, "unreg-host"))
339 dbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
340 else if (!strcmp(*argv, "reg-host"))
341 dbuf_memtype = MEM_TYPE__REGISTERED_HOST;
342 else if (!strcmp(*argv, "managed"))
343 dbuf_memtype = MEM_TYPE__MANAGED;
344 else if (!strcmp(*argv, "device"))
345 dbuf_memtype = MEM_TYPE__DEVICE;
346 else {
347 fprintf(stderr, "unknown buffer type %s\n", *argv);
348 exit(1);
349 }
350 } else if (!strcmp(*argv, "-tbuf-memtype")) {
351 --argc;
352 ++argv;
353 if (!strcmp(*argv, "unreg-host"))
354 tbuf_memtype = MEM_TYPE__UNREGISTERED_HOST;
355 else if (!strcmp(*argv, "reg-host"))
356 tbuf_memtype = MEM_TYPE__REGISTERED_HOST;
357 else if (!strcmp(*argv, "managed"))
358 tbuf_memtype = MEM_TYPE__MANAGED;
359 else if (!strcmp(*argv, "device"))
360 tbuf_memtype = MEM_TYPE__DEVICE;
361 else {
362 fprintf(stderr, "unknown buffer type %s\n", *argv);
363 exit(1);
364 }
365 } else if (!strcmp(*argv, "-device-start-id")) {
366 --argc;
367 ++argv;
368 device_id = atoi(*argv);
369 } else if (!strcmp(*argv, "-device-stride")) {
370 --argc;
371 ++argv;
372 device_stride = atoi(*argv);
373 } else if (!strcmp(*argv, "-verbose")) {
374 verbose = 1;
375 } else if (!strcmp(*argv, "-num-threads")) {
376 --argc;
377 ++argv;
378 num_threads = atoi(*argv);
379 } else {
380 fprintf(stderr, "unknown argument %s\n", *argv);
381 exit(1);
382 }
383 }
384 if (strlen(typestr) == 0 || basecount <= 0 || seed < 0 || iters <= 0 || max_segments < 0 ||
385 pack_order == PACK_ORDER__UNSET || overlap < 0 || num_threads <= 0) {
386 fprintf(stderr, "Usage: ./pack {options}\n");
387 fprintf(stderr, " -datatype base datatype to use, e.g., int\n");
388 fprintf(stderr, " -count number of base datatypes in the signature\n");
389 fprintf(stderr, " -seed random seed (changes the datatypes generated)\n");
390 fprintf(stderr, " -iters number of iterations\n");
391 fprintf(stderr, " -segments number of segments to chop the packing into\n");
392 fprintf(stderr, " -ordering packing order of segments (normal, reverse, random)\n");
393 fprintf(stderr, " -overlap should packing overlap (none, regular, irregular)\n");
394 fprintf(stderr, " -sbuf-memtype memory type (unreg-host, reg-host, device)\n");
395 fprintf(stderr, " -dbuf-memtype memory type (unreg-host, reg-host, device)\n");
396 fprintf(stderr, " -tbuf-memtype memory type (unreg-host, reg-host, device)\n");
397 fprintf(stderr, " -device-start-id ID of the device for the first allocation\n");
398 fprintf(stderr, " -device-stride difference between consecutive device allocations\n");
399 fprintf(stderr, " -verbose verbose output\n");
400 fprintf(stderr, " -num-threads number of threads to spawn\n");
401 exit(1);
402 }
403
404 yaksa_init(NULL);
405 pack_init_devices();
406
407 dtp = (DTP_pool_s *) malloc(num_threads * sizeof(DTP_pool_s));
408 for (uintptr_t i = 0; i < num_threads; i++) {
409 int rc = DTP_pool_create(typestr, basecount, seed + i, &dtp[i]);
410 assert(rc == DTP_SUCCESS);
411 }
412
413 pthread_t *threads = (pthread_t *) malloc(num_threads * sizeof(pthread_t));
414
415 for (uintptr_t i = 0; i < num_threads; i++)
416 pthread_create(&threads[i], NULL, runtest, (void *) i);
417
418 for (uintptr_t i = 0; i < num_threads; i++)
419 pthread_join(threads[i], NULL);
420
421 free(threads);
422
423 for (uintptr_t i = 0; i < num_threads; i++) {
424 int rc = DTP_pool_free(dtp[i]);
425 assert(rc == DTP_SUCCESS);
426 }
427 free(dtp);
428
429 pack_finalize_devices();
430 yaksa_finalize();
431
432 return 0;
433 }
434