1 /* This is part of the netCDF package. Copyright 2005-2018 University
2 Corporation for Atmospheric Research/Unidata See COPYRIGHT file for
3 conditions of use.
4
5 Runs benchmarks on different chunking sizes.
6
7 Russ Rew, Ed Hartnett, Dennis Heimbigner
8 */
9
10 #include <config.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h> /* for sysconf */
16 #endif
17 #ifdef HAVE_SYS_TYPES_H
18 #include <sys/types.h>
19 #endif
20 #ifdef HAVE_SYS_TIMES_H
21 #include <sys/times.h>
22 #endif
23 #ifdef HAVE_SYS_TIME_H
24 #include <sys/time.h>
25 #endif
26 #include <assert.h>
27 #ifdef HAVE_SYS_RESOURCE_H
28 #include <sys/resource.h>
29 #endif
30 #include "nc_tests.h" /* The ERR macro is here... */
31 #include "netcdf.h"
32
33 #define FILENAME "tst_chunks3.nc"
34
35 /*
36 * The following timing macros can be used by including the necessary
37 * declarations with
38 *
39 * TIMING_DECLS(seconds)
40 *
41 * and surrounding sections of code to be timed with the "statements"
42 *
43 * TIMING_START
44 * [code to be timed goes here]
45 * TIMING_END(seconds)
46 *
47 * The macros assume the user has stored a description of what is
48 * being timed in a 100-char string time_mess, and has included
49 * <sys/times.h> and <sys/resource.h>. The timing message printed by
50 * TIMING_END is not terminated by a new-line, to permit appending
51 * additional text to that line, so user must at least printf("\n")
52 * after that.
53 */
54
55 #define TIMING_DECLS(seconds) \
56 long TMreps; /* counts repetitions of timed code */ \
57 long TMrepeats; /* repetitions needed to exceed 0.1 second */ \
58 long emic ; /* elapsed time in microseconds */ \
59 struct rusage ru; \
60 long inb, oub; \
61 char time_mess[100]; \
62 float seconds;
63
64 #define TIMING_START \
65 TMrepeats = 1; \
66 do { \
67 if(getrusage(RUSAGE_SELF, &ru)) { \
68 printf("getrusage failed, returned %d\n", errno);} \
69 emic = (1000000*(ru.ru_utime.tv_sec + ru.ru_stime.tv_sec) \
70 + ru.ru_utime.tv_usec + ru.ru_stime.tv_usec); \
71 inb = ru.ru_inblock; \
72 oub = ru.ru_oublock; \
73 for(TMreps=0; TMreps < TMrepeats; TMreps++) {
74
75 #define TIMING_END(seconds) \
76 } \
77 if(getrusage(RUSAGE_SELF, &ru)) { \
78 printf("getrusage failed, returned %d\n", errno);} \
79 emic = (1000000*(ru.ru_utime.tv_sec + ru.ru_stime.tv_sec) \
80 + ru.ru_utime.tv_usec + ru.ru_stime.tv_usec) - emic; \
81 inb = ru.ru_inblock - inb; \
82 oub = ru.ru_oublock - oub; \
83 TMrepeats *= 2; \
84 } while (emic < 100000.0 ); \
85 seconds = emic / (1000000.0 * TMreps); \
86 printf("%-45.45s %7.2g sec", \
87 time_mess, seconds);
88
89 /* This macro prints an error message with line number and name of
90 * test program. */
91 #define ERR1(n) do { \
92 fflush(stdout); /* Make sure our stdout is synced with stderr. */ \
93 fprintf(stderr, "Sorry! Unexpected result, %s, line: %d - %s\n", \
94 __FILE__, __LINE__, nc_strerror(n)); \
95 return n; \
96 } while (0)
97
98 #define NC_COMPRESSED 1
99
100 void
parse_args(int argc,char * argv[],int * deflate_levelp,int * shufflep,size_t * dims,size_t * chunks,size_t * cache_sizep,size_t * cache_nelemsp,float * cache_prep)101 parse_args(int argc, char *argv[], /* from command-line invocation */
102 int *deflate_levelp, /* returned: 0 uncompressed,
103 1-9 compression level */
104 int *shufflep, /* returned: 1 if shuffle, otherwise 0 */
105 size_t *dims, /* returned: dimension sizes */
106 size_t *chunks, /* returned: chunk sizes */
107 size_t *cache_sizep, /* returned: cache size (bytes) */
108 size_t *cache_nelemsp, /* returned: cache capacity (chunks) */
109 float *cache_prep) /* returned: cache preemption policy (0-1) */
110 {
111
112 if(argc > 1) {
113 *deflate_levelp = atol(argv[1]);
114 if (*deflate_levelp < 0) {
115 *deflate_levelp = -*deflate_levelp;
116 *shufflep = NC_SHUFFLE;
117 }
118 }
119 if(argc > 2)
120 dims[0] = atol(argv[2]);
121 if(argc > 3)
122 chunks[0] = atol(argv[3]);
123 else
124 chunks[0] = (dims[0]+7)/8;
125 if(argc > 4)
126 dims[1] = atol(argv[4]);
127 else
128 dims[1] = dims[0];
129 if(argc > 5)
130 chunks[1] = atol(argv[5]);
131 else
132 chunks[1] = chunks[0];
133 if(argc > 6)
134 dims[2] = atol(argv[6]);
135 else
136 dims[2] = dims[1];
137 if(argc > 7)
138 chunks[2] = atol(argv[7]);
139 else
140 chunks[2] = chunks[1];
141 if(argc > 8)
142 *cache_sizep = atol(argv[8]);
143 if(argc > 9)
144 *cache_nelemsp = atol(argv[9]);
145 if(argc > 10)
146 *cache_prep = atof(argv[10]);
147 if(argc > 11) {
148 printf("Usage: %s [def_level] [dim1] [chunk1] [dim2] [chunk2] [dim3] [chunk3] [cache_size] [cache_nelems] [cache_pre]\n",
149 argv[0]);
150 exit(1);
151 }
152 return;
153 }
154
155 void *
emalloc(size_t bytes)156 emalloc(size_t bytes) {
157 size_t *memory;
158 memory = malloc(bytes);
159 if(memory == 0) {
160 printf("malloc failed\n");
161 exit(2);
162 }
163 return memory;
164 }
165
166
167 /* compare contiguous, chunked, and compressed performance */
168 int
main(int argc,char * argv[])169 main(int argc, char *argv[]) {
170
171 int stat; /* return status */
172 int ncid; /* netCDF id */
173 int i, j, k;
174 int dim1id, dim2id, dim3id;
175 int varid_g; /* varid for contiguous */
176 int varid_k; /* varid for chunked */
177 int varid_x; /* varid for compressed */
178
179 float *varxy, *varxz, *varyz; /* 2D memory slabs used for I/O */
180 int mm;
181 size_t dims[] = {256, 256, 256}; /* default dim lengths */
182 size_t chunks[] = {32, 32, 32}; /* default chunk sizes */
183 size_t start[3], count[3];
184 float contig_time, chunked_time, compressed_time, ratio;
185 int deflate_level = 1; /* default compression level, 9 is
186 * better and slower. If negative,
187 * turn on shuffle filter also. */
188 int shuffle = NC_NOSHUFFLE;
189 size_t cache_size_def;
190 size_t cache_hash_def;
191 float cache_pre_def;
192 size_t cache_size = 0; /* use library default */
193 size_t cache_hash = 0; /* use library default */
194 float cache_pre = -1.0f; /* use library default */
195
196 /* rank (number of dimensions) for each variable */
197 # define RANK_var1 3
198
199 /* variable shapes */
200 int var_dims[RANK_var1];
201
202 TIMING_DECLS(TMsec) ;
203
204 /* From args, get parameters for timing, including variable and
205 chunk sizes. Negative deflate level means also use shuffle
206 filter. */
207 parse_args(argc, argv, &deflate_level, &shuffle, dims,
208 chunks, &cache_size, &cache_hash, &cache_pre);
209
210 /* get cache defaults, then set cache parameters that are not default */
211 if((stat = nc_get_chunk_cache(&cache_size_def, &cache_hash_def,
212 &cache_pre_def)))
213 ERR1(stat);
214 if(cache_size == 0)
215 cache_size = cache_size_def;
216 if(cache_hash == 0)
217 cache_hash = cache_hash_def;
218 if(cache_pre == -1.0f)
219 cache_pre = cache_pre_def;
220 if((stat = nc_set_chunk_cache(cache_size, cache_hash, cache_pre)))
221 ERR1(stat);
222 printf("cache: %3.2f MBytes %ld objs %3.2f preempt, ",
223 cache_size/1.e6, cache_hash, cache_pre);
224
225 if(deflate_level == 0) {
226 printf("uncompressed ");
227 } else {
228 printf("compression level %d", deflate_level);
229 }
230 if(shuffle == 1) {
231 printf(", shuffled");
232 }
233 printf("\n\n");
234
235 /* initialize 2D slabs for writing along each axis with phony data */
236 varyz = (float *) emalloc(sizeof(float) * 1 * dims[1] * dims[2]);
237 varxz = (float *) emalloc(sizeof(float) * dims[0] * 1 * dims[2]);
238 varxy = (float *) emalloc(sizeof(float) * dims[0] * dims[1] * 1);
239 mm = 0;
240 for(j = 0; j < dims[1]; j++) {
241 for(k = 0; k < dims[2]; k++) {
242 varyz[mm++] = k + dims[2]*j;
243 }
244 }
245 mm = 0;
246 for(i = 0; i < dims[0]; i++) {
247 for(k = 0; k < dims[2]; k++) {
248 varxz[mm++] = k + dims[2]*i;
249 }
250 }
251 mm = 0;
252 for(i = 0; i < dims[0]; i++) {
253 for(j = 0; j < dims[1]; j++) {
254 varxy[mm++] = j + dims[1]*i;
255 }
256 }
257
258 if((stat = nc_create(FILENAME, NC_NETCDF4 | NC_CLASSIC_MODEL, &ncid)))
259 ERR1(stat);
260
261 /* define dimensions */
262 if((stat = nc_def_dim(ncid, "dim1", dims[0], &dim1id)))
263 ERR1(stat);
264 if((stat = nc_def_dim(ncid, "dim2", dims[1], &dim2id)))
265 ERR1(stat);
266 if((stat = nc_def_dim(ncid, "dim3", dims[2], &dim3id)))
267 ERR1(stat);
268
269 /* define variables */
270 var_dims[0] = dim1id;
271 var_dims[1] = dim2id;
272 var_dims[2] = dim3id;
273 if((stat = nc_def_var(ncid, "var_contiguous", NC_FLOAT, RANK_var1,
274 var_dims, &varid_g)))
275 ERR1(stat);
276 if((stat = nc_def_var(ncid, "var_chunked", NC_FLOAT, RANK_var1,
277 var_dims, &varid_k)))
278 ERR1(stat);
279 if((stat = nc_def_var(ncid, "var_compressed", NC_FLOAT, RANK_var1,
280 var_dims, &varid_x)))
281 ERR1(stat);
282
283 if((stat = nc_def_var_chunking(ncid, varid_g, NC_CONTIGUOUS, 0)))
284 ERR1(stat);
285
286 if((stat = nc_def_var_chunking(ncid, varid_k, NC_CHUNKED, chunks)))
287 ERR1(stat);
288
289 if((stat = nc_def_var_chunking(ncid, varid_x, NC_CHUNKED, chunks)))
290 ERR1(stat);
291
292 if (deflate_level != 0) {
293 if((stat = nc_def_var_deflate(ncid, varid_x, shuffle,
294 NC_COMPRESSED, deflate_level)))
295 ERR1(stat);
296 }
297
298 /* leave define mode */
299 if((stat = nc_enddef (ncid)))
300 ERR1(stat);
301
302 /* write each variable one yz slab at a time */
303 start[0] = 0;
304 start[1] = 0;
305 start[2] = 0;
306 count[0] = 1;
307 count[1] = dims[1];
308 count[2] = dims[2];
309
310 sprintf(time_mess," contiguous write %3d %3ld %3ld",
311 1, dims[1], dims[2]);
312 TIMING_START ;
313 for(i = 0; i < dims[0]; i++) {
314 start[0] = i;
315 if((stat = nc_put_vara(ncid, varid_g, start, count, &varyz[0])))
316 ERR1(stat);
317 }
318 TIMING_END(TMsec) ;
319 printf("\n");
320 contig_time = TMsec;
321
322 sprintf(time_mess," chunked write %3d %3ld %3ld %3ld %3ld %3ld",
323 1, dims[1], dims[2], chunks[0], chunks[1], chunks[2]);
324 TIMING_START ;
325 for(i = 0; i < dims[0]; i++) {
326 start[0] = i;
327 if((stat = nc_put_vara(ncid, varid_k, start, count, &varyz[0])))
328 ERR1(stat);
329 }
330 TIMING_END(TMsec) ;
331 chunked_time = TMsec;
332 ratio = contig_time/chunked_time;
333 if(ratio >= 1.0)
334 printf(" %5.2g x faster\n", ratio);
335 else
336 printf(" %5.2g x slower\n", 1.0/ratio);
337
338 sprintf(time_mess," compressed write %3d %3ld %3ld %3ld %3ld %3ld",
339 1, dims[1], dims[2], chunks[0], chunks[1], chunks[2]);
340 TIMING_START ;
341 for(i = 0; i < dims[0]; i++) {
342 start[0] = i;
343 if((stat = nc_put_vara(ncid, varid_x, start, count, &varyz[0])))
344 ERR1(stat);
345 }
346 TIMING_END(TMsec) ;
347 compressed_time = TMsec;
348 ratio = contig_time/compressed_time;
349 if(ratio >= 1.0)
350 printf(" %5.2g x faster\n", ratio);
351 else
352 printf(" %5.2g x slower\n", 1.0/ratio);
353 printf("\n");
354
355 /* write each variable one xz slab at a time */
356 start[0] = 0;
357 start[1] = 0;
358 start[2] = 0;
359 count[0] = dims[0];
360 count[1] = 1;
361 count[2] = dims[2];
362
363 sprintf(time_mess," contiguous write %3ld %3d %3ld",
364 dims[0], 1, dims[2]);
365 TIMING_START ;
366 for(i = 0; i < dims[1]; i++) {
367 start[1] = i;
368 if((stat = nc_put_vara(ncid, varid_g, start, count, &varxz[0])))
369 ERR1(stat);
370 }
371 TIMING_END(TMsec) ;
372 printf("\n");
373 contig_time = TMsec;
374
375 sprintf(time_mess," chunked write %3ld %3d %3ld %3ld %3ld %3ld",
376 dims[0], 1, dims[2], chunks[0], chunks[1], chunks[2]);
377 TIMING_START ;
378 for(i = 0; i < dims[1]; i++) {
379 start[1] = i;
380 if((stat = nc_put_vara(ncid, varid_k, start, count, &varxz[0])))
381 ERR1(stat);
382 }
383 TIMING_END(TMsec) ;
384 chunked_time = TMsec;
385 ratio = contig_time/chunked_time;
386 if(ratio >= 1.0)
387 printf(" %5.2g x faster\n", ratio);
388 else
389 printf(" %5.2g x slower\n", 1.0/ratio);
390
391 sprintf(time_mess," compressed write %3ld %3d %3ld %3ld %3ld %3ld",
392 dims[0], 1, dims[2], chunks[0], chunks[1], chunks[2]);
393 TIMING_START ;
394 for(i = 0; i < dims[1]; i++) {
395 start[1] = i;
396 if((stat = nc_put_vara(ncid, varid_x, start, count, &varxz[0])))
397 ERR1(stat);
398 }
399 TIMING_END(TMsec) ;
400 compressed_time = TMsec;
401 ratio = contig_time/compressed_time;
402 if(ratio >= 1.0)
403 printf(" %5.2g x faster\n", ratio);
404 else
405 printf(" %5.2g x slower\n", 1.0/ratio);
406 printf("\n");
407
408 /* write each variable one xy slab at a time */
409 start[0] = 0;
410 start[1] = 0;
411 start[2] = 0;
412 count[0] = dims[0];
413 count[1] = dims[1];
414 count[2] = 1;
415
416 sprintf(time_mess," contiguous write %3ld %3ld %3d",
417 dims[0], dims[1], 1);
418 TIMING_START ;
419 for(i = 0; i < dims[2]; i++) {
420 start[2] = i;
421 if((stat = nc_put_vara(ncid, varid_g, start, count, &varxy[0])))
422 ERR1(stat);
423 }
424 TIMING_END(TMsec) ;
425 printf("\n");
426 contig_time = TMsec;
427
428 sprintf(time_mess," chunked write %3ld %3ld %3d %3ld %3ld %3ld",
429 dims[0], dims[1], 1, chunks[0], chunks[1], chunks[2]);
430 TIMING_START ;
431 for(i = 0; i < dims[2]; i++) {
432 start[2] = i;
433 if((stat = nc_put_vara(ncid, varid_k, start, count, &varxy[0])))
434 ERR1(stat);
435 }
436 TIMING_END(TMsec) ;
437 chunked_time = TMsec;
438 ratio = contig_time/chunked_time;
439 if(ratio >= 1.0)
440 printf(" %5.2g x faster\n", ratio);
441 else
442 printf(" %5.2g x slower\n", 1.0/ratio);
443
444 sprintf(time_mess," compressed write %3ld %3ld %3d %3ld %3ld %3ld",
445 dims[0], dims[1], 1, chunks[0], chunks[1], chunks[2]);
446 TIMING_START ;
447 for(i = 0; i < dims[2]; i++) {
448 start[2] = i;
449 if((stat = nc_put_vara(ncid, varid_x, start, count, &varxy[0])))
450 ERR1(stat);
451 }
452 TIMING_END(TMsec) ;
453 compressed_time = TMsec;
454 ratio = contig_time/compressed_time;
455 if(ratio >= 1.0)
456 printf(" %5.2g x faster\n", ratio);
457 else
458 printf(" %5.2g x slower\n", 1.0/ratio);
459 printf("\n");
460
461 /* read each variable one yz slab at a time */
462 start[0] = 0;
463 start[1] = 0;
464 start[2] = 0;
465 count[0] = 1;
466 count[1] = dims[1];
467 count[2] = dims[2];
468
469 sprintf(time_mess," contiguous read %3d %3ld %3ld",
470 1, dims[1], dims[2]);
471 TIMING_START ;
472 for(i = 0; i < dims[0]; i++) {
473 start[0] = i;
474 if((stat = nc_get_vara(ncid, varid_g, start, count, &varyz[0])))
475 ERR1(stat);
476 }
477 TIMING_END(TMsec) ;
478 printf("\n");
479 contig_time = TMsec;
480
481 sprintf(time_mess," chunked read %3d %3ld %3ld %3ld %3ld %3ld",
482 1, dims[1], dims[2] , chunks[0], chunks[1], chunks[2]);
483 TIMING_START ;
484 for(i = 0; i < dims[0]; i++) {
485 start[0] = i;
486 if((stat = nc_get_vara(ncid, varid_k, start, count, &varyz[0])))
487 ERR1(stat);
488 }
489 TIMING_END(TMsec) ;
490 chunked_time = TMsec;
491 ratio = contig_time/chunked_time;
492 if(ratio >= 1.0)
493 printf(" %5.2g x faster\n", ratio);
494 else
495 printf(" %5.2g x slower\n", 1.0/ratio);
496
497 sprintf(time_mess," compressed read %3d %3ld %3ld %3ld %3ld %3ld",
498 1, dims[1], dims[2] , chunks[0], chunks[1], chunks[2]);
499 TIMING_START ;
500 for(i = 0; i < dims[0]; i++) {
501 start[0] = i;
502 if((stat = nc_get_vara(ncid, varid_x, start, count, &varyz[0])))
503 ERR1(stat);
504 }
505 TIMING_END(TMsec) ;
506 compressed_time = TMsec;
507 ratio = contig_time/compressed_time;
508 if(ratio >= 1.0)
509 printf(" %5.2g x faster\n", ratio);
510 else
511 printf(" %5.2g x slower\n", 1.0/ratio);
512 printf("\n");
513
514 /* read each variable one xz slab at a time */
515 start[0] = 0;
516 start[1] = 0;
517 start[2] = 0;
518 count[0] = dims[0];
519 count[1] = 1;
520 count[2] = dims[2];
521
522 sprintf(time_mess," contiguous read %3ld %3d %3ld",
523 dims[0], 1, dims[2]);
524 TIMING_START ;
525 for(i = 0; i < dims[1]; i++) {
526 start[1] = i;
527 if((stat = nc_get_vara(ncid, varid_g, start, count, &varxz[0])))
528 ERR1(stat);
529 }
530 TIMING_END(TMsec) ;
531 printf("\n");
532 contig_time = TMsec;
533
534 sprintf(time_mess," chunked read %3ld %3d %3ld %3ld %3ld %3ld",
535 dims[0], 1, dims[2], chunks[0], chunks[1], chunks[2]);
536 TIMING_START ;
537 for(i = 0; i < dims[1]; i++) {
538 start[1] = i;
539 if((stat = nc_get_vara(ncid, varid_k, start, count, &varxz[0])))
540 ERR1(stat);
541 }
542 TIMING_END(TMsec) ;
543 chunked_time = TMsec;
544 ratio = contig_time/chunked_time;
545 if(ratio >= 1.0)
546 printf(" %5.2g x faster\n", ratio);
547 else
548 printf(" %5.2g x slower\n", 1.0/ratio);
549
550 sprintf(time_mess," compressed read %3ld %3d %3ld %3ld %3ld %3ld",
551 dims[0], 1, dims[2], chunks[0], chunks[1], chunks[2]);
552 TIMING_START ;
553 for(i = 0; i < dims[1]; i++) {
554 start[1] = i;
555 if((stat = nc_get_vara(ncid, varid_x, start, count, &varxz[0])))
556 ERR1(stat);
557 }
558 TIMING_END(TMsec) ;
559 compressed_time = TMsec;
560 ratio = contig_time/compressed_time;
561 if(ratio >= 1.0)
562 printf(" %5.2g x faster\n", ratio);
563 else
564 printf(" %5.2g x slower\n", 1.0/ratio);
565 printf("\n");
566
567 /* read variable one xy slab at a time */
568 start[0] = 0;
569 start[1] = 0;
570 start[2] = 0;
571 count[0] = dims[0];
572 count[1] = dims[1];
573 count[2] = 1;
574
575 sprintf(time_mess," contiguous read %3ld %3ld %3d",
576 dims[0], dims[1], 1);
577 TIMING_START ;
578 for(i = 0; i < dims[2]; i++) {
579 start[2] = i;
580 if((stat = nc_get_vara(ncid, varid_g, start, count, &varxy[0])))
581 ERR1(stat);
582 }
583 TIMING_END(TMsec) ;
584 printf("\n");
585 contig_time = TMsec;
586
587 sprintf(time_mess," chunked read %3ld %3ld %3d %3ld %3ld %3ld",
588 dims[0], dims[1], 1, chunks[0], chunks[1], chunks[2]);
589 TIMING_START ;
590 for(i = 0; i < dims[2]; i++) {
591 start[2] = i;
592 if((stat = nc_get_vara(ncid, varid_k, start, count, &varxy[0])))
593 ERR1(stat);
594 }
595 TIMING_END(TMsec) ;
596 chunked_time = TMsec;
597 ratio = contig_time/chunked_time;
598 if(ratio >= 1.0)
599 printf(" %5.2g x faster\n", ratio);
600 else
601 printf(" %5.2g x slower\n", 1.0/ratio);
602
603 sprintf(time_mess," compressed read %3ld %3ld %3d %3ld %3ld %3ld",
604 dims[0], dims[1], 1, chunks[0], chunks[1], chunks[2]);
605 TIMING_START ;
606 for(i = 0; i < dims[2]; i++) {
607 start[2] = i;
608 if((stat = nc_get_vara(ncid, varid_x, start, count, &varxy[0])))
609 ERR1(stat);
610 }
611 TIMING_END(TMsec) ;
612 compressed_time = TMsec;
613 ratio = contig_time/compressed_time;
614 if(ratio >= 1.0)
615 printf(" %5.2g x faster\n", ratio);
616 else
617 printf(" %5.2g x slower\n", 1.0/ratio);
618
619 if((stat = nc_close(ncid)))
620 ERR1(stat);
621
622 return 0;
623 }
624