1 /*********************************************************************
2   Small benchmark for testing basic capabilities of Blosc.
3 
4   You can select different degrees of 'randomness' in input buffer, as
5   well as external datafiles (uncomment the lines after "For data
6   coming from a file" comment).
7 
8   For usage instructions of this benchmark, please see:
9 
10     http://blosc.org/synthetic-benchmarks.html
11 
12   I'm collecting speeds for different machines, so the output of your
13   benchmarks and your processor specifications are welcome!
14 
15   Author: Francesc Alted <francesc@blosc.org>
16 
17   Note: Compiling this with VS2008 does not work well with cmake.  Here
18   it is a way to compile the benchmark (with added support for LZ4):
19 
20   > cl /DHAVE_LZ4 /arch:SSE2 /Ox /Febench.exe /Iblosc /Iinternal-complibs\lz4-1.7.5 bench\bench.c blosc\blosc.c blosc\blosclz.c blosc\shuffle.c blosc\shuffle-sse2.c blosc\shuffle-generic.c blosc\bitshuffle-generic.c blosc\bitshuffle-sse2.c internal-complibs\lz4-1.7.5\*.c
21 
22   See LICENSES/BLOSC.txt for details about copyright and rights to use.
23 **********************************************************************/
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31 #if defined(_WIN32)
32   /* For QueryPerformanceCounter(), etc. */
33   #include <windows.h>
34 #elif defined(__MACH__) && defined(__APPLE__)
35   #include <mach/clock.h>
36   #include <mach/mach.h>
37   #include <time.h>
38   #include <sys/time.h>
39 #elif defined(__unix__) || defined(__HAIKU__)
40   #include <unistd.h>
41   #if defined(__GLIBC__)
42     #include <time.h>
43   #else
44     #include <sys/time.h>
45   #endif
46 #else
47   #error Unable to detect platform.
48 #endif
49 
50 
51 #include "../blosc/blosc.h"
52 
53 #define KB  1024
54 #define MB  (1024*KB)
55 #define GB  (1024*MB)
56 
57 #define NCHUNKS (32*1024)       /* maximum number of chunks */
58 #define MAX_THREADS 16
59 
60 
61 int nchunks = NCHUNKS;
62 int niter = 3;                  /* default number of iterations */
63 double totalsize = 0.;          /* total compressed/decompressed size */
64 
65 /* System-specific high-precision timing functions. */
66 #if defined(_WIN32)
67 
68 /* The type of timestamp used on this system. */
69 #define blosc_timestamp_t LARGE_INTEGER
70 
71 /* Set a timestamp value to the current time. */
blosc_set_timestamp(blosc_timestamp_t * timestamp)72 void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
73   /* Ignore the return value, assume the call always succeeds. */
74   QueryPerformanceCounter(timestamp);
75 }
76 
77 /* Given two timestamp values, return the difference in microseconds. */
blosc_elapsed_usecs(blosc_timestamp_t start_time,blosc_timestamp_t end_time)78 double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
79   LARGE_INTEGER CounterFreq;
80   QueryPerformanceFrequency(&CounterFreq);
81 
82   return (double)(end_time.QuadPart - start_time.QuadPart) / ((double)CounterFreq.QuadPart / 1e6);
83 }
84 
85 #else
86 
87 /* The type of timestamp used on this system. */
88 #define blosc_timestamp_t struct timespec
89 
90 /* Set a timestamp value to the current time. */
blosc_set_timestamp(blosc_timestamp_t * timestamp)91 void blosc_set_timestamp(blosc_timestamp_t* timestamp) {
92 #if defined(__MACH__) && defined(__APPLE__) // OS X does not have clock_gettime, use clock_get_time
93   clock_serv_t cclock;
94   mach_timespec_t mts;
95   host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
96   clock_get_time(cclock, &mts);
97   mach_port_deallocate(mach_task_self(), cclock);
98   timestamp->tv_sec = mts.tv_sec;
99   timestamp->tv_nsec = mts.tv_nsec;
100 #else
101   clock_gettime(CLOCK_MONOTONIC, timestamp);
102 #endif
103 }
104 
105 /* Given two timestamp values, return the difference in microseconds. */
blosc_elapsed_usecs(blosc_timestamp_t start_time,blosc_timestamp_t end_time)106 double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) {
107 	return (1e6 * (end_time.tv_sec - start_time.tv_sec))
108 		+ (1e-3 * (end_time.tv_nsec - start_time.tv_nsec));
109 }
110 
111 #endif
112 
113 /* Given two timeval stamps, return the difference in seconds */
getseconds(blosc_timestamp_t last,blosc_timestamp_t current)114 double getseconds(blosc_timestamp_t last, blosc_timestamp_t current) {
115   return 1e-6 * blosc_elapsed_usecs(last, current);
116 }
117 
118 /* Given two timeval stamps, return the time per chunk in usec */
get_usec_chunk(blosc_timestamp_t last,blosc_timestamp_t current,int niter,size_t nchunks)119 double get_usec_chunk(blosc_timestamp_t last, blosc_timestamp_t current, int niter, size_t nchunks) {
120   double elapsed_usecs = (double)blosc_elapsed_usecs(last, current);
121   return elapsed_usecs / (double)(niter * nchunks);
122 }
123 
124 /* Define posix_memalign for Windows */
125 #if defined(_WIN32)
126 #include <malloc.h>
127 
posix_memalign(void ** memptr,size_t alignment,size_t size)128 int posix_memalign(void **memptr, size_t alignment, size_t size)
129 {
130 	*memptr = _aligned_malloc(size, alignment);
131 	return 0;
132 }
133 
134 /* Buffers allocated with _aligned_malloc need to be freed with _aligned_free. */
135 #define aligned_free(memptr) _aligned_free(memptr)
136 #else
137 /* If not using MSVC, aligned memory can be freed in the usual way. */
138 #define aligned_free(memptr) free(memptr)
139 #endif  /* defined(_WIN32) && !defined(__MINGW32__) */
140 
get_value(int i,int rshift)141 int get_value(int i, int rshift) {
142   int v;
143 
144   v = (i<<26)^(i<<18)^(i<<11)^(i<<3)^i;
145   if (rshift < 32) {
146     v &= (1 << rshift) - 1;
147   }
148   return v;
149 }
150 
151 
init_buffer(void * src,int size,int rshift)152 void init_buffer(void *src, int size, int rshift) {
153   unsigned int i;
154   int *_src = (int *)src;
155 
156   /* To have reproducible results */
157   srand(1);
158 
159   /* Initialize the original buffer */
160   for (i = 0; i < size/sizeof(int); ++i) {
161     /* Choose one below */
162     /* _src[i] = 0;
163      * _src[i] = 0x01010101;
164      * _src[i] = 0x01020304;
165      * _src[i] = i * 1/.3;
166      * _src[i] = i;
167      * _src[i] = rand() >> (32-rshift); */
168     _src[i] = get_value(i, rshift);
169   }
170 }
171 
172 
do_bench(char * compressor,char * shuffle,int nthreads,int size,int elsize,int rshift,FILE * ofile)173 void do_bench(char *compressor, char *shuffle, int nthreads, int size, int elsize,
174               int rshift, FILE * ofile) {
175   void *src, *srccpy;
176   void *dest[NCHUNKS], *dest2;
177   int nbytes = 0, cbytes = 0;
178   int i, j, retcode;
179   unsigned char *orig, *round;
180   blosc_timestamp_t last, current;
181   double tmemcpy, tshuf, tunshuf;
182   int clevel, doshuffle;
183 
184   if (strcmp(shuffle, "shuffle") == 0) {
185       doshuffle = BLOSC_SHUFFLE;
186     }
187   else if (strcmp(shuffle, "bitshuffle") == 0) {
188       doshuffle = BLOSC_BITSHUFFLE;
189     }
190   else if (strcmp(shuffle, "noshuffle") == 0) {
191       doshuffle = BLOSC_NOSHUFFLE;
192     }
193   else abort();
194 
195   blosc_set_nthreads(nthreads);
196   if(blosc_set_compressor(compressor) < 0){
197     printf("Compiled w/o support for compressor: '%s', so sorry.\n",
198            compressor);
199     exit(1);
200   }
201 
202   /* Initialize buffers */
203   srccpy = malloc(size);
204   retcode = posix_memalign( (void **)(&src), 32, size);
205   if (retcode) abort();
206   retcode = posix_memalign( (void **)(&dest2), 32, size);
207   if (retcode) abort();
208 
209   /* zero src to initialize byte on it, and not only multiples of 4 */
210   memset(src, 0, size);
211   init_buffer(src, size, rshift);
212   memcpy(srccpy, src, size);
213   for (j = 0; j < nchunks; j++) {
214      retcode = posix_memalign( (void **)(&dest[j]), 32, size+BLOSC_MAX_OVERHEAD);
215      if (retcode) abort();
216   }
217 
218   fprintf(ofile, "--> %d, %d, %d, %d, %s, %s\n", nthreads, size, elsize,
219           rshift, compressor, shuffle);
220   fprintf(ofile, "********************** Run info ******************************\n");
221   fprintf(ofile, "Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
222   fprintf(ofile, "Using synthetic data with %d significant bits (out of 32)\n", rshift);
223   fprintf(ofile, "Dataset size: %d bytes\tType size: %d bytes\n", size, elsize);
224   fprintf(ofile, "Working set: %.1f MB\t\t", (size * nchunks) / (float)MB);
225   fprintf(ofile, "Number of threads: %d\n", nthreads);
226   fprintf(ofile, "********************** Running benchmarks *********************\n");
227 
228   blosc_set_timestamp(&last);
229   for (i = 0; i < niter; i++) {
230     for (j = 0; j < nchunks; j++) {
231       memcpy(dest[j], src, size);
232     }
233   }
234   blosc_set_timestamp(&current);
235   tmemcpy = get_usec_chunk(last, current, niter, nchunks);
236   fprintf(ofile, "memcpy(write):\t\t %6.1f us, %.1f MB/s\n",
237          tmemcpy, (size * 1e6) / (tmemcpy*MB));
238 
239   blosc_set_timestamp(&last);
240   for (i = 0; i < niter; i++) {
241     for (j = 0; j < nchunks; j++) {
242       memcpy(dest2, dest[j], size);
243     }
244   }
245   blosc_set_timestamp(&current);
246   tmemcpy = get_usec_chunk(last, current, niter, nchunks);
247   fprintf(ofile, "memcpy(read):\t\t %6.1f us, %.1f MB/s\n",
248          tmemcpy, (size * 1e6) / (tmemcpy*MB));
249 
250   for (clevel=0; clevel<10; clevel++) {
251 
252     fprintf(ofile, "Compression level: %d\n", clevel);
253 
254     blosc_set_timestamp(&last);
255     for (i = 0; i < niter; i++) {
256       for (j = 0; j < nchunks; j++) {
257         cbytes = blosc_compress(clevel, doshuffle, elsize, size, src,
258                                 dest[j], size+BLOSC_MAX_OVERHEAD);
259       }
260     }
261     blosc_set_timestamp(&current);
262     tshuf = get_usec_chunk(last, current, niter, nchunks);
263     fprintf(ofile, "comp(write):\t %6.1f us, %.1f MB/s\t  ",
264            tshuf, (size * 1e6) / (tshuf*MB));
265     fprintf(ofile, "Final bytes: %d  ", cbytes);
266     if (cbytes > 0) {
267       fprintf(ofile, "Ratio: %3.2f", size/(float)cbytes);
268     }
269     fprintf(ofile, "\n");
270 
271     /* Compressor was unable to compress.  Copy the buffer manually. */
272     if (cbytes == 0) {
273       for (j = 0; j < nchunks; j++) {
274         memcpy(dest[j], src, size);
275       }
276     }
277 
278     blosc_set_timestamp(&last);
279     for (i = 0; i < niter; i++) {
280       for (j = 0; j < nchunks; j++) {
281         if (cbytes == 0) {
282           memcpy(dest2, dest[j], size);
283           nbytes = size;
284         }
285         else {
286           nbytes = blosc_decompress(dest[j], dest2, size);
287         }
288       }
289     }
290     blosc_set_timestamp(&current);
291     tunshuf = get_usec_chunk(last, current, niter, nchunks);
292     fprintf(ofile, "decomp(read):\t %6.1f us, %.1f MB/s\t  ",
293            tunshuf, (nbytes * 1e6) / (tunshuf*MB));
294     if (nbytes < 0) {
295       fprintf(ofile, "FAILED.  Error code: %d\n", nbytes);
296     }
297     /* fprintf(ofile, "Orig bytes: %d\tFinal bytes: %d\n", cbytes, nbytes); */
298 
299     /* Check if data has had a good roundtrip.
300        Byte-by-byte comparison is slow, so use 'memcmp' to check whether the
301        roundtripped data is correct. If not, fall back to the slow path to
302        print diagnostic messages. */
303     orig = (unsigned char *)srccpy;
304     round = (unsigned char *)dest2;
305     if (memcmp(orig, round, size) != 0)
306     {
307       for(i = 0; i<size; ++i){
308         if (orig[i] != round[i]) {
309           fprintf(ofile, "\nError: Original data and round-trip do not match in pos %d\n",
310                  (int)i);
311           fprintf(ofile, "Orig--> %x, round-trip--> %x\n", orig[i], round[i]);
312           break;
313         }
314       }
315     }
316     else { i = size; }
317 
318     if (i == size) fprintf(ofile, "OK\n");
319 
320   } /* End clevel loop */
321 
322 
323   /* To compute the totalsize, we should take into account the 10
324      compression levels */
325   totalsize += (size * nchunks * niter * 10.);
326 
327   aligned_free(src); free(srccpy); aligned_free(dest2);
328   for (i = 0; i < nchunks; i++) {
329     aligned_free(dest[i]);
330   }
331 
332 }
333 
334 
335 /* Compute a sensible value for nchunks */
get_nchunks(int size_,int ws)336 int get_nchunks(int size_, int ws) {
337   int nchunks;
338 
339   nchunks = ws / size_;
340   if (nchunks > NCHUNKS) nchunks = NCHUNKS;
341   if (nchunks < 1) nchunks = 1;
342   return nchunks;
343 }
344 
print_compress_info(void)345 void print_compress_info(void)
346 {
347   char *name = NULL, *version = NULL;
348   int ret;
349 
350   printf("Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE);
351 
352   printf("List of supported compressors in this build: %s\n",
353          blosc_list_compressors());
354 
355   printf("Supported compression libraries:\n");
356   ret = blosc_get_complib_info("blosclz", &name, &version);
357   if (ret >= 0) printf("  %s: %s\n", name, version);
358   ret = blosc_get_complib_info("lz4", &name, &version);
359   if (ret >= 0) printf("  %s: %s\n", name, version);
360   ret = blosc_get_complib_info("snappy", &name, &version);
361   if (ret >= 0) printf("  %s: %s\n", name, version);
362   ret = blosc_get_complib_info("zlib", &name, &version);
363   if (ret >= 0) printf("  %s: %s\n", name, version);
364   ret = blosc_get_complib_info("zstd", &name, &version);
365   if (ret >= 0) printf("  %s: %s\n", name, version);
366 
367 }
368 
369 
main(int argc,char * argv[])370 int main(int argc, char *argv[]) {
371   char compressor[32];
372   char shuffle[32] = "shuffle";
373   char bsuite[32];
374   int single = 1;
375   int suite = 0;
376   int hard_suite = 0;
377   int extreme_suite = 0;
378   int debug_suite = 0;
379   int nthreads = 4;                     /* The number of threads */
380   int size = 4 * MB;                    /* Buffer size */
381   int elsize = 8;                       /* Datatype size */
382   int rshift = 19;                      /* Significant bits */
383   int workingset = 256 * MB;            /* The maximum allocated memory */
384   int nthreads_, size_, elsize_, rshift_, i;
385   FILE * output_file = stdout;
386   blosc_timestamp_t last, current;
387   float totaltime;
388   char usage[256];
389 
390   print_compress_info();
391 
392   strncpy(usage, "Usage: bench [blosclz | lz4 | lz4hc | snappy | zlib | zstd] "
393           "[noshuffle | shuffle | bitshuffle] "
394           "[single | suite | hardsuite | extremesuite | debugsuite] "
395           "[nthreads] [bufsize(bytes)] [typesize] [sbits]", 255);
396 
397   if (argc < 2) {
398     printf("%s\n", usage);
399     exit(1);
400   }
401 
402   strcpy(compressor, argv[1]);
403 
404   if (strcmp(compressor, "blosclz") != 0 &&
405       strcmp(compressor, "lz4") != 0 &&
406       strcmp(compressor, "lz4hc") != 0 &&
407       strcmp(compressor, "snappy") != 0 &&
408       strcmp(compressor, "zlib") != 0 &&
409       strcmp(compressor, "zstd") != 0) {
410     printf("No such compressor: '%s'\n", compressor);
411     printf("%s\n", usage);
412     exit(2);
413   }
414 
415   if (argc >= 3) {
416       strcpy(shuffle, argv[2]);
417       if (strcmp(shuffle, "shuffle") != 0 &&
418           strcmp(shuffle, "bitshuffle") != 0 &&
419           strcmp(shuffle, "noshuffle") != 0) {
420 	printf("No such shuffler: '%s'\n", shuffle);
421 	printf("%s\n", usage);
422 	exit(2);
423      }
424   }
425 
426   if (argc < 4)
427     strcpy(bsuite, "single");
428   else
429     strcpy(bsuite, argv[3]);
430 
431   if (strcmp(bsuite, "single") == 0) {
432     single = 1;
433   }
434   else if (strcmp(bsuite, "test") == 0) {
435     single = 1;
436     workingset = 128*MB;
437   }
438   else if (strcmp(bsuite, "suite") == 0) {
439     suite = 1;
440   }
441   else if (strcmp(bsuite, "hardsuite") == 0) {
442     hard_suite = 1;
443     workingset = 64*MB;
444     /* Values here are ending points for loops */
445     nthreads = 2;
446     size = 8*MB;
447     elsize = 32;
448     rshift = 32;
449   }
450   else if (strcmp(bsuite, "extremesuite") == 0) {
451     extreme_suite = 1;
452     workingset = 32*MB;
453     niter = 1;
454     /* Values here are ending points for loops */
455     nthreads = 4;
456     size = 16*MB;
457     elsize = 32;
458     rshift = 32;
459   }
460   else if (strcmp(bsuite, "debugsuite") == 0) {
461     debug_suite = 1;
462     workingset = 32*MB;
463     niter = 1;
464     /* Warning: values here are starting points for loops.  This is
465        useful for debugging. */
466     nthreads = 1;
467     size = 16*KB;
468     elsize = 1;
469     rshift = 0;
470   }
471   else {
472     printf("%s\n", usage);
473     exit(1);
474   }
475 
476   printf("Using compressor: %s\n", compressor);
477   printf("Using shuffle type: %s\n", shuffle);
478   printf("Running suite: %s\n", bsuite);
479 
480   if (argc >= 5) {
481     nthreads = atoi(argv[4]);
482   }
483   if (argc >= 6) {
484     size = atoi(argv[5]);
485   }
486   if (argc >= 7) {
487     elsize = atoi(argv[6]);
488   }
489   if (argc >= 8) {
490     rshift = atoi(argv[7]);
491   }
492 
493   if ((argc >= 9) || !(single || suite || hard_suite || extreme_suite)) {
494     printf("%s\n", usage);
495     exit(1);
496   }
497 
498   nchunks = get_nchunks(size, workingset);
499   blosc_set_timestamp(&last);
500 
501   blosc_init();
502 
503   if (suite) {
504     for (nthreads_=1; nthreads_ <= nthreads; nthreads_++) {
505       do_bench(compressor, shuffle, nthreads_, size, elsize, rshift, output_file);
506     }
507   }
508   else if (hard_suite) {
509     /* Let's start the rshift loop by 4 so that 19 is visited.  This
510        is to allow a direct comparison with the plain suite, that runs
511        precisely at 19 significant bits. */
512     for (rshift_ = 4; rshift_ <= rshift; rshift_ += 5) {
513       for (elsize_ = 1; elsize_ <= elsize; elsize_ *= 2) {
514         /* The next loop is for getting sizes that are not power of 2 */
515         for (i = -elsize_; i <= elsize_; i += elsize_) {
516           for (size_ = 32*KB; size_ <= size; size_ *= 2) {
517             nchunks = get_nchunks(size_+i, workingset);
518     	    niter = 1;
519             for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
520               do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
521               blosc_set_timestamp(&current);
522               totaltime = (float)getseconds(last, current);
523               printf("Elapsed time:\t %6.1f s.  Processed data: %.1f GB\n",
524                      totaltime, totalsize / GB);
525             }
526           }
527         }
528       }
529     }
530   }
531   else if (extreme_suite) {
532     for (rshift_ = 0; rshift_ <= rshift; rshift_++) {
533       for (elsize_ = 1; elsize_ <= elsize; elsize_++) {
534         /* The next loop is for getting sizes that are not power of 2 */
535         for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
536           for (size_ = 32*KB; size_ <= size; size_ *= 2) {
537             nchunks = get_nchunks(size_+i, workingset);
538             for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) {
539               do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
540               blosc_set_timestamp(&current);
541               totaltime = (float)getseconds(last, current);
542               printf("Elapsed time:\t %6.1f s.  Processed data: %.1f GB\n",
543                      totaltime, totalsize / GB);
544             }
545           }
546         }
547       }
548     }
549   }
550   else if (debug_suite) {
551     for (rshift_ = rshift; rshift_ <= 32; rshift_++) {
552       for (elsize_ = elsize; elsize_ <= 32; elsize_++) {
553         /* The next loop is for getting sizes that are not power of 2 */
554         for (i = -elsize_*2; i <= elsize_*2; i += elsize_) {
555           for (size_ = size; size_ <= 16*MB; size_ *= 2) {
556             nchunks = get_nchunks(size_+i, workingset);
557             for (nthreads_ = nthreads; nthreads_ <= 6; nthreads_++) {
558               do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file);
559               blosc_set_timestamp(&current);
560               totaltime = (float)getseconds(last, current);
561               printf("Elapsed time:\t %6.1f s.  Processed data: %.1f GB\n",
562                      totaltime, totalsize / GB);
563             }
564           }
565         }
566       }
567     }
568   }
569   /* Single mode */
570   else {
571     do_bench(compressor, shuffle, nthreads, size, elsize, rshift, output_file);
572   }
573 
574   /* Print out some statistics */
575   blosc_set_timestamp(&current);
576   totaltime = (float)getseconds(last, current);
577   printf("\nRound-trip compr/decompr on %.1f GB\n", totalsize / GB);
578   printf("Elapsed time:\t %6.1f s, %.1f MB/s\n",
579          totaltime, totalsize*2*1.1/(MB*totaltime));
580 
581   /* Free blosc resources */
582   blosc_free_resources();
583   blosc_destroy();
584   return 0;
585 }
586