1 /*********************************************************************
2 Copyright (C) 2021 The Blosc Developers <blosc@blosc.org>
3 https://blosc.org
4 License: BSD 3-Clause (see LICENSE.txt)
5
6 Small benchmark for testing basic capabilities of Blosc.
7
8 You can select different degrees of 'randomness' in input buffer, as
9 well as external datafiles (uncomment the lines after "For data
10 coming from a file" comment).
11
12 For usage instructions of this benchmark, please see:
13
14 https://www.blosc.org/pages/synthetic-benchmarks/
15
16 I'm collecting speeds for different machines, so the output of your
17 benchmarks and your processor specifications are welcome!
18
19 Note: Compiling this with VS2008 does not work well with cmake. Here
20 it is a way to compile the benchmark (with added support for LZ4):
21
22 > cl /arch:SSE2 /Ox /Febench.exe /Iblosc /Iinternal-complibs\lz4-1.7.0 bench\bench.c blosc\blosc.c blosc\blosclz.c blosc\shuffle.c blosc\shuffle-sse2.c blosc\shuffle-generic.c blosc\bitshuffle-generic.c blosc\bitshuffle-sse2.c internal-complibs\lz4-1.7.0\*.c
23
24 See LICENSE.txt for details about copyright and rights to use.
25 **********************************************************************/
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include "blosc2.h"
32
33 #define KB 1024u
34 #define MB (1024*KB)
35 #define GB (1024*MB)
36
37 #define NCHUNKS (32*1024) /* maximum number of chunks */
38
39
40 int nchunks = NCHUNKS;
41 int niter = 1;
42 int niter_c = 1;
43 int niter_d = 1;
44 /* default number of iterations */
45 double totalsize = 0.; /* total compressed/decompressed size */
46
47 /* Define posix_memalign for Windows */
48 #if defined(_WIN32)
49 #include <malloc.h>
50
posix_memalign(void ** memptr,size_t alignment,size_t size)51 int posix_memalign(void **memptr, size_t alignment, size_t size)
52 {
53 *memptr = _aligned_malloc(size, alignment);
54 return 0;
55 }
56
57 /* Buffers allocated with _aligned_malloc need to be freed with _aligned_free. */
58 #define aligned_free(memptr) _aligned_free(memptr)
59 #else
60 /* If not using MSVC, aligned memory can be freed in the usual way. */
61 #define aligned_free(memptr) free(memptr)
62 #endif /* defined(_WIN32) && !defined(__MINGW32__) */
63
64 /* Given two timeval stamps, return the time per chunk in usec */
get_usec_chunk(blosc_timestamp_t last,blosc_timestamp_t current,int niter_,int nchunks_)65 double get_usec_chunk(blosc_timestamp_t last, blosc_timestamp_t current,
66 int niter_, int nchunks_) {
67 double elapsed_usecs = 1e-3 * blosc_elapsed_nsecs(last, current);
68 return elapsed_usecs / (double)(niter_ * nchunks_);
69 }
70
71
get_value(int i,int rshift)72 int get_value(int i, int rshift) {
73 int v;
74
75 v = (i << 26) ^ (i << 18) ^ (i << 11) ^ (i << 3) ^ i;
76 if (rshift < 32) {
77 v &= (1 << rshift) - 1;
78 }
79 return v;
80 }
81
82
init_buffer(void * src,size_t size,int rshift)83 void init_buffer(void* src, size_t size, int rshift) {
84 unsigned int i;
85 int* _src = (int*)src;
86
87 /* To have reproducible results */
88 srand(1);
89
90 /* Initialize the original buffer */
91 for (i = 0; i < size / sizeof(int); ++i) {
92 /* Choose one below */
93 /* _src[i] = 0;
94 * _src[i] = 0x01010101;
95 * _src[i] = 0x01020304;
96 * _src[i] = i * 1/.3;
97 * _src[i] = i; */
98 //_src[i] = rand() >> (32 - rshift);
99 _src[i] = get_value(i, rshift);
100 }
101 }
102
103
do_bench(char * compressor,char * shuffle,int nthreads,int size_,int elsize,int rshift,FILE * ofile)104 void do_bench(char* compressor, char* shuffle, int nthreads, int size_, int elsize,
105 int rshift, FILE* ofile) {
106 size_t size = (size_t)size_;
107 void* src, *srccpy;
108 void* dest[NCHUNKS], *dest2;
109 int nbytes = 0, cbytes = 0;
110 int i, j, retcode;
111 unsigned char* orig, * round;
112 blosc_timestamp_t last, current;
113 double tmemcpy, tshuf, tunshuf;
114 int clevel, doshuffle = BLOSC_NOFILTER;
115
116 if (strcmp(shuffle, "shuffle") == 0) {
117 doshuffle = BLOSC_SHUFFLE;
118 }
119 else if (strcmp(shuffle, "bitshuffle") == 0) {
120 doshuffle = BLOSC_BITSHUFFLE;
121 }
122 else if (strcmp(shuffle, "noshuffle") == 0) {
123 doshuffle = BLOSC_NOSHUFFLE;
124 }
125
126 blosc_set_nthreads((int16_t)nthreads);
127 if (blosc_set_compressor(compressor) < 0) {
128 printf("Compiled w/o support for compressor: '%s', so sorry.\n",
129 compressor);
130 exit(1);
131 }
132
133 /* Initialize buffers */
134 srccpy = malloc(size);
135 retcode = posix_memalign(&src, 32, size);
136 if (retcode != 0) {
137 printf("Error in allocating memory!");
138 }
139 retcode = posix_memalign(&dest2, 32, size);
140 if (retcode != 0) {
141 printf("Error in allocating memory!");
142 }
143
144 /* zero src to initialize all bytes on it, and not only multiples of 4 */
145 memset(src, 0, size);
146 init_buffer(src, size, rshift);
147 memcpy(srccpy, src, size);
148 for (j = 0; j < nchunks; j++) {
149 retcode = posix_memalign(&dest[j], 32, size + BLOSC_MAX_OVERHEAD);
150 if (retcode != 0) {
151 printf("Error in allocating memory!");
152 }
153 }
154 memset(dest2, 0, size); // just to avoid some GCC compiler warnings
155
156 fprintf(ofile, "--> %d, %d, %d, %d, %s, %s\n", nthreads, (int)size, elsize, rshift, compressor, shuffle);
157 fprintf(ofile, "********************** Run info ******************************\n");
158 fprintf(ofile, "Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
159 fprintf(ofile, "Using synthetic data with %d significant bits (out of 32)\n", rshift);
160 fprintf(ofile, "Dataset size: %d bytes\tType size: %d bytes\n", (int)size, elsize);
161 fprintf(ofile, "Working set: %.1f MB\t\t", (size * nchunks) / (float)MB);
162 fprintf(ofile, "Number of threads: %d\n", nthreads);
163 fprintf(ofile, "********************** Running benchmarks *********************\n");
164
165 blosc_set_timestamp(&last);
166 for (i = 0; i < niter; i++) {
167 for (j = 0; j < nchunks; j++) {
168 memcpy(dest[j], src, size);
169 }
170 }
171 blosc_set_timestamp(¤t);
172 tmemcpy = get_usec_chunk(last, current, niter, nchunks);
173 fprintf(ofile, "memcpy(write):\t\t %6.1f us, %.1f MB/s\n",
174 tmemcpy, (size * 1e6) / (tmemcpy * MB));
175
176 blosc_set_timestamp(&last);
177 for (i = 0; i < niter; i++) {
178 for (j = 0; j < nchunks; j++) {
179 memcpy(dest2, dest[j], size);
180 }
181 }
182 blosc_set_timestamp(¤t);
183 tmemcpy = get_usec_chunk(last, current, niter, nchunks);
184 fprintf(ofile, "memcpy(read):\t\t %6.1f us, %.1f MB/s\n",
185 tmemcpy, (size * 1e6) / (tmemcpy * MB));
186
187 for (clevel = 0; clevel < 10; clevel++) {
188
189 fprintf(ofile, "Compression level: %d\n", clevel);
190
191 blosc_set_timestamp(&last);
192 for (i = 0; i < niter_c; i++) {
193 for (j = 0; j < nchunks; j++) {
194 cbytes = blosc_compress(clevel, doshuffle, (size_t)elsize, size, src,
195 dest[j], size + BLOSC_MAX_OVERHEAD);
196 }
197 }
198 blosc_set_timestamp(¤t);
199 tshuf = get_usec_chunk(last, current, niter_c, nchunks);
200 fprintf(ofile, "comp(write):\t %6.1f us, %.1f MB/s\t ",
201 tshuf, (size * 1e6) / (tshuf * MB));
202 fprintf(ofile, "Final bytes: %d ", cbytes);
203 if (cbytes > 0) {
204 fprintf(ofile, "Ratio: %3.2f", size / (float)cbytes);
205 }
206 fprintf(ofile, "\n");
207
208 /* Compressor was unable to compress. Copy the buffer manually. */
209 if (cbytes == 0) {
210 for (j = 0; j < nchunks; j++) {
211 memcpy(dest[j], src, size);
212 }
213 }
214
215 blosc_set_timestamp(&last);
216 for (i = 0; i < niter_d; i++) {
217 for (j = 0; j < nchunks; j++) {
218 if (cbytes == 0) {
219 memcpy(dest2, dest[j], size);
220 nbytes = (int)size;
221 }
222 else {
223 nbytes = blosc_decompress(dest[j], dest2, size);
224 }
225 }
226 }
227 blosc_set_timestamp(¤t);
228 tunshuf = get_usec_chunk(last, current, niter_d, nchunks);
229 fprintf(ofile, "decomp(read):\t %6.1f us, %.1f MB/s\t ",
230 tunshuf, (nbytes * 1e6) / (tunshuf * MB));
231 if (nbytes < 0) {
232 fprintf(ofile, "FAILED. Error code: %d\n", nbytes);
233 }
234 /* fprintf(ofile, "Orig bytes: %d\tFinal bytes: %d\n", cbytes, nbytes); */
235
236 /* Check if data has had a good roundtrip.
237 Byte-by-byte comparison is slow, so use 'memcmp' to check whether the
238 roundtripped data is correct. If not, fall back to the slow path to
239 print diagnostic messages. */
240 orig = (unsigned char*)srccpy;
241 round = (unsigned char*)dest2;
242 if (memcmp(orig, round, size) != 0) {
243 for (i = 0; i < (int)size; ++i) {
244 if (orig[i] != round[i]) {
245 fprintf(ofile, "\nError: Original data and round-trip do not match in pos %d\n", i);
246 fprintf(ofile, "Orig--> %x, round-trip--> %x\n", orig[i], round[i]);
247 break;
248 }
249 }
250 }
251 else {
252 i = (int)size;
253 }
254
255 if (i == (int)size) fprintf(ofile, "OK\n");
256
257 } /* End clevel loop */
258
259
260 /* To compute the totalsize, we should take into account the 10
261 compression levels */
262 totalsize += (size * nchunks * niter * 10.);
263
264 aligned_free(src);
265 free(srccpy);
266 aligned_free(dest2);
267 for (i = 0; i < nchunks; i++) {
268 aligned_free(dest[i]);
269 }
270 }
271
272
273 /* Compute a sensible value for nchunks */
get_nchunks(int size_,int ws)274 int get_nchunks(int size_, int ws) {
275 int nchunks_;
276
277 nchunks_ = ws / size_;
278 if (nchunks_ > NCHUNKS) nchunks_ = NCHUNKS;
279 if (nchunks_ < 1) nchunks_ = 1;
280 return nchunks_;
281 }
282
print_compress_info(void)283 void print_compress_info(void) {
284 char* name = NULL, * version = NULL;
285 int ret;
286
287 printf("Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
288
289 printf("List of supported compressors in this build: %s\n",
290 blosc_list_compressors());
291
292 printf("Supported compression libraries:\n");
293 ret = blosc_get_complib_info("blosclz", &name, &version);
294 if (ret >= 0) printf(" %s: %s\n", name, version);
295 free(name); free(version);
296 ret = blosc_get_complib_info("lz4", &name, &version);
297 if (ret >= 0) printf(" %s: %s\n", name, version);
298 free(name); free(version);
299 ret = blosc_get_complib_info("zlib", &name, &version);
300 if (ret >= 0) printf(" %s: %s\n", name, version);
301 free(name); free(version);
302 ret = blosc_get_complib_info("zstd", &name, &version);
303 if (ret >= 0) printf(" %s: %s\n", name, version);
304 free(name); free(version);
305 }
306
307
main(int argc,char * argv[])308 int main(int argc, char* argv[]) {
309 char compressor[32];
310 char shuffle[32] = "shuffle";
311 char bsuite[32];
312 int single = 1;
313 int suite = 0;
314 int hard_suite = 0;
315 int extreme_suite = 0;
316 int debug_suite = 0;
317 int nthreads = 8; /* The number of threads */
318 int size = 8 * MB; /* Buffer size */
319 int elsize = 4; /* Datatype size */
320 int rshift = 19; /* Significant bits */
321 unsigned int workingset = 256 * MB; /* The maximum allocated memory */
322 int nthreads_, size_, elsize_, rshift_, i;
323 FILE* output_file = stdout;
324 blosc_timestamp_t last, current;
325 double totaltime;
326 char usage[256];
327
328 print_compress_info();
329
330 strncpy(usage, "Usage: bench [blosclz | lz4 | lz4hc | zlib | zstd] "
331 "[noshuffle | shuffle | bitshuffle] "
332 "[single | suite | hardsuite | extremesuite | debugsuite] "
333 "[nthreads] [bufsize(bytes)] [typesize] [sbits]", 255);
334
335 if (argc < 1) {
336 printf("%s\n", usage);
337 exit(1);
338 }
339
340 if (argc >= 2) {
341 strcpy(compressor, argv[1]);
342 }
343 else {
344 strcpy(compressor, "blosclz");
345 }
346
347 if (strcmp(compressor, "blosclz") != 0 &&
348 strcmp(compressor, "lz4") != 0 &&
349 strcmp(compressor, "lz4hc") != 0 &&
350 strcmp(compressor, "zlib") != 0 &&
351 strcmp(compressor, "zstd") != 0) {
352 printf("No such compressor: '%s'\n", compressor);
353 printf("%s\n", usage);
354 exit(2);
355 }
356
357 if (argc >= 3) {
358 strcpy(shuffle, argv[2]);
359 if (strcmp(shuffle, "shuffle") != 0 &&
360 strcmp(shuffle, "bitshuffle") != 0 &&
361 strcmp(shuffle, "noshuffle") != 0) {
362 printf("No such shuffler: '%s'\n", shuffle);
363 printf("%s\n", usage);
364 exit(2);
365 }
366 }
367
368 if (argc < 4)
369 strcpy(bsuite, "single");
370 else
371 strcpy(bsuite, argv[3]);
372
373 if (strcmp(bsuite, "single") == 0) {
374 single = 1;
375 }
376 else if (strcmp(bsuite, "test") == 0) {
377 single = 1;
378 workingset /= 2;
379 }
380 else if (strcmp(bsuite, "suite") == 0) {
381 suite = 1;
382 }
383 else if (strcmp(bsuite, "hardsuite") == 0) {
384 hard_suite = 1;
385 workingset /= 4;
386 /* Values here are ending points for loops */
387 nthreads = 2;
388 size = 8 * MB;
389 elsize = 32;
390 rshift = 32;
391 }
392 else if (strcmp(bsuite, "extremesuite") == 0) {
393 extreme_suite = 1;
394 workingset /= 8;
395 niter = 1;
396 /* Values here are ending points for loops */
397 nthreads = 4;
398 size = 16 * MB;
399 elsize = 32;
400 rshift = 32;
401 }
402 else if (strcmp(bsuite, "debugsuite") == 0) {
403 debug_suite = 1;
404 workingset /= 8;
405 niter = 1;
406 /* Warning: values here are starting points for loops. This is
407 useful for debugging. */
408 nthreads = 1;
409 size = 16 * KB;
410 elsize = 1;
411 rshift = 0;
412 }
413 else {
414 printf("%s\n", usage);
415 exit(1);
416 }
417
418 printf("Using compressor: %s\n", compressor);
419 printf("Using shuffle type: %s\n", shuffle);
420 printf("Running suite: %s\n", bsuite);
421
422 if (argc >= 5) {
423 nthreads = (int)strtol(argv[4], NULL, 10);
424 }
425 if (argc >= 6) {
426 size = (int)strtol(argv[5], NULL, 10);
427 }
428 if (argc >= 7) {
429 elsize = (int)strtol(argv[6], NULL, 10);
430 }
431 if (argc >= 8) {
432 rshift = (int)strtol(argv[7], NULL, 10);
433 }
434
435 if ((argc >= 9) || !(single || suite || hard_suite || extreme_suite)) {
436 printf("%s\n", usage);
437 exit(1);
438 }
439
440 nchunks = get_nchunks(size, workingset);
441 blosc_set_timestamp(&last);
442
443 blosc_init();
444
445 if (suite) {
446 for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
447 do_bench(compressor, shuffle, nthreads_, size, elsize, rshift, output_file);
448 }
449 }
450 else if (hard_suite) {
451 /* Let's start the rshift loop by 4 so that 19 is visited. This
452 is to allow a direct comparison with the plain suite, that runs
453 precisely at 19 significant bits. */
454 for (rshift_ = 4; rshift_ <= rshift; rshift_ += 5) {
455 for (elsize_ = 1; elsize_ <= elsize; elsize_ *= 2) {
456 /* The next loop is for getting sizes that are not power of 2 */
457 for (i = -elsize_; i <= elsize_; i += elsize_) {
458 for (size_ = 32 * KB; size_ <= size; size_ *= 2) {
459 nchunks = get_nchunks(size_ + i, workingset);
460 niter = 1;
461 for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
462 do_bench(compressor, shuffle, nthreads_, size_ + i, elsize_, rshift_, output_file);
463 blosc_set_timestamp(¤t);
464 totaltime = blosc_elapsed_secs(last, current);
465 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
466 totaltime, totalsize / GB);
467 }
468 }
469 }
470 }
471 }
472 }
473 else if (extreme_suite) {
474 for (rshift_ = 0; rshift_ <= rshift; rshift_++) {
475 for (elsize_ = 1; elsize_ <= elsize; elsize_++) {
476 /* The next loop is for getting sizes that are not power of 2 */
477 for (i = -elsize_ * 2; i <= elsize_ * 2; i += elsize_) {
478 for (size_ = 32 * KB; size_ <= size; size_ *= 2) {
479 nchunks = get_nchunks(size_ + i, workingset);
480 for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
481 do_bench(compressor, shuffle, nthreads_, size_ + i, elsize_, rshift_, output_file);
482 blosc_set_timestamp(¤t);
483 totaltime = blosc_elapsed_secs(last, current);
484 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
485 totaltime, totalsize / GB);
486 }
487 }
488 }
489 }
490 }
491 }
492 else if (debug_suite) {
493 for (rshift_ = rshift; rshift_ <= 32; rshift_++) {
494 for (elsize_ = elsize; elsize_ <= 32; elsize_++) {
495 /* The next loop is for getting sizes that are not power of 2 */
496 for (i = -elsize_ * 2; i <= elsize_ * 2; i += elsize_) {
497 for (size_ = size; size_ <= 16 * MB; size_ *= 2) {
498 nchunks = get_nchunks(size_ + i, workingset);
499 for (nthreads_ = nthreads; nthreads_ <= 6; nthreads_++) {
500 do_bench(compressor, shuffle, nthreads_, size_ + i, elsize_, rshift_, output_file);
501 blosc_set_timestamp(¤t);
502 totaltime = blosc_elapsed_secs(last, current);
503 printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n",
504 totaltime, totalsize / GB);
505 }
506 }
507 }
508 }
509 }
510 }
511 /* Single mode */
512 else {
513 do_bench(compressor, shuffle, nthreads, size, elsize, rshift, output_file);
514 }
515
516 /* Print out some statistics */
517 blosc_set_timestamp(¤t);
518 totaltime = (float)blosc_elapsed_secs(last, current);
519 printf("\nRound-trip compr/decompr on %.1f GB\n", totalsize / GB);
520 printf("Elapsed time:\t %6.1f s, %.1f MB/s\n",
521 totaltime, totalsize * 2 * 1.1 / (MB * totaltime));
522
523 /* Free blosc resources */
524 blosc_free_resources();
525 blosc_destroy();
526 return 0;
527 }
528