1 /*********************************************************************
2 Small benchmark for testing basic capabilities of Blosc.
3
4 You can select different degrees of 'randomness' in input buffer, as
5 well as external datafiles (uncomment the lines after "For data
6 coming from a file" comment).
7
8 For usage instructions of this benchmark, please see:
9
10 http://blosc.org/synthetic-benchmarks.html
11
12 I'm collecting speeds for different machines, so the output of your
13 benchmarks and your processor specifications are welcome!
14
15 Author: Francesc Alted <francesc@blosc.org>
16
17 Note: Compiling this with VS2008 does not work well with cmake. Here
18 it is a way to compile the benchmark (with added support for LZ4):
19
20 > cl /DHAVE_LZ4 /arch:SSE2 /Ox /Febench.exe /Iblosc /Iinternal-complibs\lz4-1.7.5 bench\bench.c blosc\blosc.c blosc\blosclz.c blosc\shuffle.c blosc\shuffle-sse2.c blosc\shuffle-generic.c blosc\bitshuffle-generic.c blosc\bitshuffle-sse2.c internal-complibs\lz4-1.7.5\*.c
21
22 See LICENSES/BLOSC.txt for details about copyright and rights to use.
23 **********************************************************************/
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31 #if defined(_WIN32)
32 /* For QueryPerformanceCounter(), etc. */
33 #include <windows.h>
34 #elif defined(__MACH__) && defined(__APPLE__)
35 #include <mach/clock.h>
36 #include <mach/mach.h>
37 #include <time.h>
38 #include <sys/time.h>
39 #elif defined(__unix__) || defined(__HAIKU__)
40 #include <unistd.h>
41 #if defined(__GLIBC__)
42 #include <time.h>
43 #else
44 #include <sys/time.h>
45 #endif
46 #else
47 #error Unable to detect platform.
48 #endif
49
50
51 #include "../blosc/blosc.h"
52
53 #define KB 1024
54 #define MB (1024*KB)
55 #define GB (1024*MB)
56
57 #define NCHUNKS (32*1024) /* maximum number of chunks */
58 #define MAX_THREADS 16
59
60
61 int nchunks = NCHUNKS;
62 int niter = 3; /* default number of iterations */
63 double totalsize = 0.; /* total compressed/decompressed size */
64
65 /* System-specific high-precision timing functions. */
66 #if defined(_WIN32)
67
68 /* The type of timestamp used on this system. */
69 #define blosc_timestamp_t LARGE_INTEGER
70
71 /* Set a timestamp value to the current time. */
blosc_set_timestamp(blosc_timestamp_t * timestamp)72 void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
73 /* Ignore the return value, assume the call always succeeds. */
74 QueryPerformanceCounter(timestamp);
75 }
76
77 /* Given two timestamp values, return the difference in microseconds. */
blosc_elapsed_usecs(blosc_timestamp_t start_time,blosc_timestamp_t end_time)78 double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
79 LARGE_INTEGER CounterFreq;
80 QueryPerformanceFrequency(&CounterFreq);
81
82 return (double)(end_time.QuadPart - start_time.QuadPart) / ((double)CounterFreq.QuadPart / 1e6);
83 }
84
85 #else
86
87 /* The type of timestamp used on this system. */
88 #define blosc_timestamp_t struct timespec
89
90 /* Set a timestamp value to the current time. */
blosc_set_timestamp(blosc_timestamp_t * timestamp)91 void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
92 #if defined(__MACH__) && defined(__APPLE__) // OS X does not have clock_gettime, use clock_get_time
93 clock_serv_t cclock;
94 mach_timespec_t mts;
95 host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
96 clock_get_time(cclock, &mts);
97 mach_port_deallocate(mach_task_self(), cclock);
98 timestamp->tv_sec = mts.tv_sec;
99 timestamp->tv_nsec = mts.tv_nsec;
100 #else
101 clock_gettime(CLOCK_MONOTONIC, timestamp);
102 #endif
103 }
104
105 /* Given two timestamp values, return the difference in microseconds. */
blosc_elapsed_usecs(blosc_timestamp_t start_time,blosc_timestamp_t end_time)106 double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
107 return (1e6 * (end_time.tv_sec - start_time.tv_sec))
108 + (1e-3 * (end_time.tv_nsec - start_time.tv_nsec));
109 }
110
111 #endif
112
113 /* Given two timeval stamps, return the difference in seconds */
getseconds(blosc_timestamp_t last,blosc_timestamp_t current)114 double getseconds(blosc_timestamp_t last, blosc_timestamp_t current) {
115 return 1e-6 * blosc_elapsed_usecs(last, current);
116 }
117
118 /* Given two timeval stamps, return the time per chunk in usec */
get_usec_chunk(blosc_timestamp_t last,blosc_timestamp_t current,int niter,size_t nchunks)119 double get_usec_chunk(blosc_timestamp_t last, blosc_timestamp_t current, int niter, size_t nchunks) {
120 double elapsed_usecs = (double)blosc_elapsed_usecs(last, current);
121 return elapsed_usecs / (double)(niter * nchunks);
122 }
123
124 /* Define posix_memalign for Windows */
125 #if defined(_WIN32)
126 #include <malloc.h>
127
posix_memalign(void ** memptr,size_t alignment,size_t size)128 int posix_memalign(void **memptr, size_t alignment, size_t size)
129 {
130 *memptr = _aligned_malloc(size, alignment);
131 return 0;
132 }
133
134 /* Buffers allocated with _aligned_malloc need to be freed with _aligned_free. */
135 #define aligned_free(memptr) _aligned_free(memptr)
136 #else
137 /* If not using MSVC, aligned memory can be freed in the usual way. */
138 #define aligned_free(memptr) free(memptr)
139 #endif /* defined(_WIN32) && !defined(__MINGW32__) */
140
get_value(int i,int rshift)141 int get_value(int i, int rshift) {
142 int v;
143
144 v = (i<<26)^(i<<18)^(i<<11)^(i<<3)^i;
145 if (rshift < 32) {
146 v &= (1 << rshift) - 1;
147 }
148 return v;
149 }
150
151
init_buffer(void * src,int size,int rshift)152 void init_buffer(void *src, int size, int rshift) {
153 unsigned int i;
154 int *_src = (int *)src;
155
156 /* To have reproducible results */
157 srand(1);
158
159 /* Initialize the original buffer */
160 for (i = 0; i < size/sizeof(int); ++i) {
161 /* Choose one below */
162 /* _src[i] = 0;
163 * _src[i] = 0x01010101;
164 * _src[i] = 0x01020304;
165 * _src[i] = i * 1/.3;
166 * _src[i] = i;
167 * _src[i] = rand() >> (32-rshift); */
168 _src[i] = get_value(i, rshift);
169 }
170 }
171
172
do_bench(char * compressor,char * shuffle,int nthreads,int size,int elsize,int rshift,FILE * ofile)173 void do_bench(char *compressor, char *shuffle, int nthreads, int size, int elsize,
174 int rshift, FILE * ofile) {
175 void *src, *srccpy;
176 void *dest[NCHUNKS], *dest2;
177 int nbytes = 0, cbytes = 0;
178 int i, j, retcode;
179 unsigned char *orig, *round;
180 blosc_timestamp_t last, current;
181 double tmemcpy, tshuf, tunshuf;
182 int clevel, doshuffle;
183
184 if (strcmp(shuffle, "shuffle") == 0) {
185 doshuffle = BLOSC_SHUFFLE;
186 }
187 else if (strcmp(shuffle, "bitshuffle") == 0) {
188 doshuffle = BLOSC_BITSHUFFLE;
189 }
190 else if (strcmp(shuffle, "noshuffle") == 0) {
191 doshuffle = BLOSC_NOSHUFFLE;
192 }
193 else abort();
194
195 blosc_set_nthreads(nthreads);
196 if(blosc_set_compressor(compressor) < 0){
197 printf("Compiled w/o support for compressor: '%s', so sorry.\n",
198 compressor);
199 exit(1);
200 }
201
202 /* Initialize buffers */
203 srccpy = malloc(size);
204 retcode = posix_memalign( (void **)(&src), 32, size);
205 if (retcode) abort();
206 retcode = posix_memalign( (void **)(&dest2), 32, size);
207 if (retcode) abort();
208
209 /* zero src to initialize byte on it, and not only multiples of 4 */
210 memset(src, 0, size);
211 init_buffer(src, size, rshift);
212 memcpy(srccpy, src, size);
213 for (j = 0; j < nchunks; j++) {
214 retcode = posix_memalign( (void **)(&dest[j]), 32, size+BLOSC_MAX_OVERHEAD);
215 if (retcode) abort();
216 }
217
218 fprintf(ofile, "--> %d, %d, %d, %d, %s, %s\n", nthreads, size, elsize,
219 rshift, compressor, shuffle);
220 fprintf(ofile, "********************** Run info ******************************\n");
221 fprintf(ofile, "Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
222 fprintf(ofile, "Using synthetic data with %d significant bits (out of 32)\n", rshift);
223 fprintf(ofile, "Dataset size: %d bytes\tType size: %d bytes\n", size, elsize);
224 fprintf(ofile, "Working set: %.1f MB\t\t", (size * nchunks) / (float)MB);
225 fprintf(ofile, "Number of threads: %d\n", nthreads);
226 fprintf(ofile, "********************** Running benchmarks *********************\n");
227
228 blosc_set_timestamp(&last);
229 for (i = 0; i < niter; i++) {
230 for (j = 0; j < nchunks; j++) {
231 memcpy(dest[j], src, size);
232 }
233 }
234 blosc_set_timestamp(¤t);
235 tmemcpy = get_usec_chunk(last, current, niter, nchunks);
236 fprintf(ofile, "memcpy(write):\t\t %6.1f us, %.1f MB/s\n",
237 tmemcpy, (size * 1e6) / (tmemcpy*MB));
238
239 blosc_set_timestamp(&last);
240 for (i = 0; i < niter; i++) {
241 for (j = 0; j < nchunks; j++) {
242 memcpy(dest2, dest[j], size);
243 }
244 }
245 blosc_set_timestamp(¤t);
246 tmemcpy = get_usec_chunk(last, current, niter, nchunks);
247 fprintf(ofile, "memcpy(read):\t\t %6.1f us, %.1f MB/s\n",
248 tmemcpy, (size * 1e6) / (tmemcpy*MB));
249
250 for (clevel=0; clevel<10; clevel++) {
251
252 fprintf(ofile, "Compression level: %d\n", clevel);
253
254 blosc_set_timestamp(&last);
255 for (i = 0; i < niter; i++) {
256 for (j = 0; j < nchunks; j++) {
257 cbytes = blosc_compress(clevel, doshuffle, elsize, size, src,
258 dest[j], size+BLOSC_MAX_OVERHEAD);
259 }
260 }
261 blosc_set_timestamp(¤t);
262 tshuf = get_usec_chunk(last, current, niter, nchunks);
263 fprintf(ofile, "comp(write):\t %6.1f us, %.1f MB/s\t ",
264 tshuf, (size * 1e6) / (tshuf*MB));
265 fprintf(ofile, "Final bytes: %d ", cbytes);
266 if (cbytes > 0) {
267 fprintf(ofile, "Ratio: %3.2f", size/(float)cbytes);
268 }
269 fprintf(ofile, "\n");
270
271 /* Compressor was unable to compress. Copy the buffer manually. */
272 if (cbytes == 0) {
273 for (j = 0; j < nchunks; j++) {
274 memcpy(dest[j], src, size);
275 }
276 }
277
278 blosc_set_timestamp(&last);
279 for (i = 0; i < niter; i++) {
280 for (j = 0; j < nchunks; j++) {
281 if (cbytes == 0) {
282 memcpy(dest2, dest[j], size);
283 nbytes = size;
284 }
285 else {
286 nbytes = blosc_decompress(dest[j], dest2, size);
287 }
288 }
289 }
290 blosc_set_timestamp(¤t);
291 tunshuf = get_usec_chunk(last, current, niter, nchunks);
292 fprintf(ofile, "decomp(read):\t %6.1f us, %.1f MB/s\t ",
293 tunshuf, (nbytes * 1e6) / (tunshuf*MB));
294 if (nbytes < 0) {
295 fprintf(ofile, "FAILED. Error code: %d\n", nbytes);
296 }
297 /* fprintf(ofile, "Orig bytes: %d\tFinal bytes: %d\n", cbytes, nbytes); */
298
299 /* Check if data has had a good roundtrip.
300 Byte-by-byte comparison is slow, so use 'memcmp' to check whether the
301 roundtripped data is correct. If not, fall back to the slow path to
302 print diagnostic messages. */
303 orig = (unsigned char *)srccpy;
304 round = (unsigned char *)dest2;
305 if (memcmp(orig, round, size) != 0)
306 {
307 for(i = 0; i<size; ++i){
308 if (orig[i] != round[i]) {
309 fprintf(ofile, "\nError: Original data and round-trip do not match in pos %d\n",
310 (int)i);
311 fprintf(ofile, "Orig--> %x, round-trip--> %x\n", orig[i], round[i]);
312 break;
313 }
314 }
315 }
316 else { i = size; }
317
318 if (i == size) fprintf(ofile, "OK\n");
319
320 } /* End clevel loop */
321
322
323 /* To compute the totalsize, we should take into account the 10
324 compression levels */
325 totalsize += (size * nchunks * niter * 10.);
326
327 aligned_free(src); free(srccpy); aligned_free(dest2);
328 for (i = 0; i < nchunks; i++) {
329 aligned_free(dest[i]);
330 }
331
332 }
333
334
335 /* Compute a sensible value for nchunks */
get_nchunks(int size_,int ws)336 int get_nchunks(int size_, int ws) {
337 int nchunks;
338
339 nchunks = ws / size_;
340 if (nchunks > NCHUNKS) nchunks = NCHUNKS;
341 if (nchunks < 1) nchunks = 1;
342 return nchunks;
343 }
344
print_compress_info(void)345 void print_compress_info(void)
346 {
347 char *name = NULL, *version = NULL;
348 int ret;
349
350 printf("Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
351
352 printf("List of supported compressors in this build: %s\n",
353 blosc_list_compressors());
354
355 printf("Supported compression libraries:\n");
356 ret = blosc_get_complib_info("blosclz", &name, &version);
357 if (ret >= 0) printf(" %s: %s\n", name, version);
358 ret = blosc_get_complib_info("lz4", &name, &version);
359 if (ret >= 0) printf(" %s: %s\n", name, version);
360 ret = blosc_get_complib_info("snappy", &name, &version);
361 if (ret >= 0) printf(" %s: %s\n", name, version);
362 ret = blosc_get_complib_info("zlib", &name, &version);
363 if (ret >= 0) printf(" %s: %s\n", name, version);
364 ret = blosc_get_complib_info("zstd", &name, &version);
365 if (ret >= 0) printf(" %s: %s\n", name, version);
366
367 }
368
369
main(int argc,char * argv[])370 int main(int argc, char *argv[]) {
371 char compressor[32];
372 char shuffle[32] = "shuffle";
373 char bsuite[32];
374 int single = 1;
375 int suite = 0;
376 int hard_suite = 0;
377 int extreme_suite = 0;
378 int debug_suite = 0;
379 int nthreads = 4; /* The number of threads */
380 int size = 4 * MB; /* Buffer size */
381 int elsize = 8; /* Datatype size */
382 int rshift = 19; /* Significant bits */
383 int workingset = 256 * MB; /* The maximum allocated memory */
384 int nthreads_, size_, elsize_, rshift_, i;
385 FILE * output_file = stdout;
386 blosc_timestamp_t last, current;
387 float totaltime;
388 char usage[256];
389
390 print_compress_info();
391
392 strncpy(usage, "Usage: bench [blosclz | lz4 | lz4hc | snappy | zlib | zstd] "
393 "[noshuffle | shuffle | bitshuffle] "
394 "[single | suite | hardsuite | extremesuite | debugsuite] "
395 "[nthreads] [bufsize(bytes)] [typesize] [sbits]", 255);
396
397 if (argc < 2) {
398 printf("%s\n", usage);
399 exit(1);
400 }
401
402 strcpy(compressor, argv[1]);
403
404 if (strcmp(compressor, "blosclz") != 0 &&
405 strcmp(compressor, "lz4") != 0 &&
406 strcmp(compressor, "lz4hc") != 0 &&
407 strcmp(compressor, "snappy") != 0 &&
408 strcmp(compressor, "zlib") != 0 &&
409 strcmp(compressor, "zstd") != 0) {
410 printf("No such compressor: '%s'\n", compressor);
411 printf("%s\n", usage);
412 exit(2);
413 }
414
415 if (argc >= 3) {
416 strcpy(shuffle, argv[2]);
417 if (strcmp(shuffle, "shuffle") != 0 &&
418 strcmp(shuffle, "bitshuffle") != 0 &&
419 strcmp(shuffle, "noshuffle") != 0) {
420 printf("No such shuffler: '%s'\n", shuffle);
421 printf("%s\n", usage);
422 exit(2);
423 }
424 }
425
426 if (argc < 4)
427 strcpy(bsuite, "single");
428 else
429 strcpy(bsuite, argv[3]);
430
431 if (strcmp(bsuite, "single") == 0) {
432 single = 1;
433 }
434 else if (strcmp(bsuite, "test") == 0) {
435 single = 1;
436 workingset = 128*MB;
437 }
438 else if (strcmp(bsuite, "suite") == 0) {
439 suite = 1;
440 }
441 else if (strcmp(bsuite, "hardsuite") == 0) {
442 hard_suite = 1;
443 workingset = 64*MB;
444 /* Values here are ending points for loops */
445 nthreads = 2;
446 size = 8*MB;
447 elsize = 32;
448 rshift = 32;
449 }
450 else if (strcmp(bsuite, "extremesuite") == 0) {
451 extreme_suite = 1;
452 workingset = 32*MB;
453 niter = 1;
454 /* Values here are ending points for loops */
455 nthreads = 4;
456 size = 16*MB;
457 elsize = 32;
458 rshift = 32;
459 }
460 else if (strcmp(bsuite, "debugsuite") == 0) {
461 debug_suite = 1;
462 workingset = 32*MB;
463 niter = 1;
464 /* Warning: values here are starting points for loops. This is
465 useful for debugging. */
466 nthreads = 1;
467 size = 16*KB;
468 elsize = 1;
469 rshift = 0;
470 }
471 else {
472 printf("%s\n", usage);
473 exit(1);
474 }
475
476 printf("Using compressor: %s\n", compressor);
477 printf("Using shuffle type: %s\n", shuffle);
478 printf("Running suite: %s\n", bsuite);
479
480 if (argc >= 5) {
481 nthreads = atoi(argv[4]);
482 }
483 if (argc >= 6) {
484 size = atoi(argv[5]);
485 }
486 if (argc >= 7) {
487 elsize = atoi(argv[6]);
488 }
489 if (argc >= 8) {
490 rshift = atoi(argv[7]);
491 }
492
493 if ((argc >= 9) || !(single || suite || hard_suite || extreme_suite)) {
494 printf("%s\n", usage);
495 exit(1);
496 }
497
498 nchunks = get_nchunks(size, workingset);
499 blosc_set_timestamp(&last);
500
501 blosc_init();
502
503 if (suite) {
504 for (nthreads_=1; nthreads_ <= nthreads; nthreads_++) {
505 do_bench(compressor, shuffle, nthreads_, size, elsize, rshift, output_file);
506 }
507 }
508 else if (hard_suite) {
509 /* Let's start the rshift loop by 4 so that 19 is visited. This
510 is to allow a direct comparison with the plain suite, that runs
511 precisely at 19 significant bits. */
512 for (rshift_ = 4; rshift_ <= rshift; rshift_ += 5) {
513 for (elsize_ = 1; elsize_ <= elsize; elsize_ *= 2) {
514 /* The next loop is for getting sizes that are not power of 2 */
515 for (i = -elsize_; i <= elsize_; i += elsize_) {
516 for (size_ = 32*KB; size_ <= size; size_ *= 2) {
517 nchunks = get_nchunks(size_+i, workingset);
518 niter = 1;
519 for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
520 do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
521 blosc_set_timestamp(¤t);
522 totaltime = (float)getseconds(last, current);
523 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
524 totaltime, totalsize / GB);
525 }
526 }
527 }
528 }
529 }
530 }
531 else if (extreme_suite) {
532 for (rshift_ = 0; rshift_ <= rshift; rshift_++) {
533 for (elsize_ = 1; elsize_ <= elsize; elsize_++) {
534 /* The next loop is for getting sizes that are not power of 2 */
535 for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
536 for (size_ = 32*KB; size_ <= size; size_ *= 2) {
537 nchunks = get_nchunks(size_+i, workingset);
538 for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
539 do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
540 blosc_set_timestamp(¤t);
541 totaltime = (float)getseconds(last, current);
542 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
543 totaltime, totalsize / GB);
544 }
545 }
546 }
547 }
548 }
549 }
550 else if (debug_suite) {
551 for (rshift_ = rshift; rshift_ <= 32; rshift_++) {
552 for (elsize_ = elsize; elsize_ <= 32; elsize_++) {
553 /* The next loop is for getting sizes that are not power of 2 */
554 for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
555 for (size_ = size; size_ <= 16*MB; size_ *= 2) {
556 nchunks = get_nchunks(size_+i, workingset);
557 for (nthreads_ = nthreads; nthreads_ <= 6; nthreads_++) {
558 do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
559 blosc_set_timestamp(¤t);
560 totaltime = (float)getseconds(last, current);
561 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
562 totaltime, totalsize / GB);
563 }
564 }
565 }
566 }
567 }
568 }
569 /* Single mode */
570 else {
571 do_bench(compressor, shuffle, nthreads, size, elsize, rshift, output_file);
572 }
573
574 /* Print out some statistics */
575 blosc_set_timestamp(¤t);
576 totaltime = (float)getseconds(last, current);
577 printf("\nRound-trip compr/decompr on %.1f GB\n", totalsize / GB);
578 printf("Elapsed time:\t %6.1f s, %.1f MB/s\n",
579 totaltime, totalsize*2*1.1/(MB*totaltime));
580
581 /* Free blosc resources */
582 blosc_free_resources();
583 blosc_destroy();
584 return 0;
585 }
586