1// -*- mode: c++; mode: visual-line; mode: flyspell; fill-column: 100000 -*-
2/***************************************************************************
3 *  doc/common.dox
4 *
5 *  Part of the STXXL. See http://stxxl.sourceforge.net
6 *
7 *  Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
8 *
9 *  Distributed under the Boost Software License, Version 1.0.
10 *  (See accompanying file LICENSE_1_0.txt or copy at
11 *  http://www.boost.org/LICENSE_1_0.txt)
12 **************************************************************************/
13
14namespace stxxl {
15
16////////////////////////////////////////////////////////////////////////////////
17/** \page common Common Utilities and Helpers
18
19\author Timo Bingmann (2013)
20
21A lots of basic utility classes and helper functions have accumulated in STXXL. Try are usually fundamental enough to be also used in an application program.  Before implementing a common software utility, please check the list below; it might already exist in STXXL:
22
23- \subpage common_io_counter "I/O statistics and performance counter"
24- \subpage common_random "random number generators"
25- \subpage common_timer "timestamp and timer function"
26- \subpage common_simple_vector "a non-growing, non-initializing simple_vector"
27- \subpage common_counting_ptr "reference counted (shared) objects via counting_ptr"
28- \subpage common_cmdline "command line parser"
29- \subpage common_binary_buffer "serialization of variable data structures into blobs"
30- \subpage common_thread_sync "synchronization primitives for multi-threading"
31- \subpage common_logging "logging macros"
32- \subpage common_assert "macros for checking assertions"
33- \subpage common_throw "macros for throwing exceptions"
34- \subpage common_types "signed and unsigned integer types"
35- \subpage common_log2 "calculating log_2(x)"
36- \subpage common_misc_macros "miscellaneous macros"
37- \subpage common_misc_funcs "miscellaneous functions"
38
39*/
40
41////////////////////////////////////////////////////////////////////////////////
42
43/** \page common_random Random Number Generators
44
45See also file common/rand.h
46
47Measuring the time consumption of program sections are often of great interest. The Stxxl comes with several build-in pseudo random number generators as shown below:
48
49\code
50
51stxxl::random_number32 rand32;  // integer values in [0, 2^32)
52stxxl::random_number64 rand64;  // integer values in [0, 2^64)
53stxxl::random_uniform_slow urand_slow;  // uniform values in [0.0, 1.0)
54stxxl::random_uniform_fast urand_fast;  // uniform values in [0.0, 1.0)
55stxxl::random_number<> n_rand;  // integer values in [0,N)
56
57unsigned int random32 = rand32();
58stxxl::uint64 random64 = rand64();
59double urandom_slow = urand_slow();
60double urandom_fast = urand_fast();
61unsigned int n_random = n_rand(123456789);
62
63STXXL_MSG("random 32 bit number: " << random32);
64STXXL_MSG("random 64 bit number: " << random64);
65STXXL_MSG("random number between [0.0, 1.0) (slow): " << urandom_slow);
66STXXL_MSG("random number between [0.0, 1.0) (fast): " << urandom_fast);
67STXXL_MSG("random number between [0,123456789): " << n_random);
68
69\endcode
70
71*/
72
73////////////////////////////////////////////////////////////////////////////////
74
75/** \page common_timer Timestamp and Timer Classes
76
77See also file common/timer.h
78
79Measuring the time certain parts of an algorithm or the entire algorithm consume will often be of great interest. The STXXL provides build-in time measurement class stxxl::timer which can be used as follows:
80
81\code
82#include <stxxl/timer>  // make timer class available
83
84stxxl::timer Timer;  // create Timer object
85
86Timer.start();
87
88// code section which shall be measured
89
90Timer.stop();
91
92// get results:
93STXXL_MSG(",easured time: " << (Timer.seconds()) << " (seconds), " << (Timer.mseconds()) << " (milliseconds), " << (Timer.useconds()) << " (microseconds))
94
95Timer.reset();  // reset clock to zero which allows to run start() again
96
97\endcode
98
99As an alternative, one can also work on the timestamp itself:
100
101\code
102double start = stxxl::timestamp();
103
104// code section to be measured
105
106double stop = stxxl::timestamp();
107
108STXXL_MSG("measured time: " << (stop - start) << " seconds.");
109\endcode
110
111*/
112
113////////////////////////////////////////////////////////////////////////////////
114
115/** \page common_simple_vector A Non-growing, Non-initializing Simpler Vector
116
117For applications where a std::vector is overkill, or one wishes to allocate an uninitialied POD array, the \ref simple_vector is a good method.
118
119*/
120
121////////////////////////////////////////////////////////////////////////////////
122
123/** \page common_counting_ptr Reference Counted (Shared) Objects
124
125Some objects in STXXL are reference counted. This is usually done for large objects, which should not be copied deeply in assignments. Instead, a reference counting pointer is used, which holds a handle while the pointer exists and deletes the object once all pointers go out of scope. Examples are matrices and \ref stream::sorted_runs.
126
127The method used is called \ref counting_ptr or intrusive reference counting.  This is similar, but not identical to boost or TR1's \c shared_ptr.
128
129The \ref counting_ptr is accompanied by \ref counted_object, which contains the actual reference counter (a single integer). A reference counted object must derive from \ref counted_object :
130
131\code
132struct something : public stxxl::counted_object
133{
134};
135\endcode
136
137Code that now wishes to use pointers referencing this object, will typedef an \ref counting_ptr, which is used to increment and decrement the included reference counter automatically.
138
139\code
140typedef stxxl::counting_ptr<something> something_ptr
141{
142    // create new instance of something
143    something_ptr p1 = new something;
144    {
145        // create a new reference to the same instance (no deep copy!)
146        something_ptr p2 = p1;
147        // this block end will decrement the reference count, but not delete the object
148    }
149    // this block end will delete the object
150}
151\endcode
152
153The \ref counting_ptr can generally be used like a usual pointer or \c shared_ptr (see the docs for more).
154
155There is also \ref const_counting_ptr to return const objects (note that a const \ref counting_ptr will only render the pointer const, not the enclosed object).
156
157*/
158
159////////////////////////////////////////////////////////////////////////////////
160
161/** \page common_cmdline Command Line Parser
162
163STXXL now contains a rather sophisticated command line parser for C++, \ref cmdline_parser, which enables rapid creation of complex command line constructions. Maybe most importantly for application with external memory: the parser will recognize byte sizes with SI/IEC suffixes like '2 GiB' and transform it appropriately.
164
165\snippet examples/common/cmdline.cpp example
166
167When running the program above without arguments, it will print:
168\verbatim
169$ ./cmdline
170Missing required argument for parameter 'filename'
171
172Usage: ./cmdline [options] <filename>
173
174This may some day be a useful program, which solves many serious problems of
175the real world and achives global peace.
176
177Author: Timo Bingmann <tb@panthema.net>
178
179Parameters:
180  filename  A filename to process
181Options:
182  -r, --rounds N  Run N rounds of the experiment.
183  -s, --size      Number of bytes to process.
184\endverbatim
185
186Nice output, notice the line wrapping of the description and formatting of parameters and arguments. These too are wrapped if the description is too long.
187
188We now try to give the program some arguments:
189\verbatim
190$ ./cmdline -s 2GiB -r 42 /dev/null
191Option -s, --size set to 2147483648.
192Option -r, --rounds N set to 42.
193Parameter filename set to "/dev/null".
194Command line parsed okay.
195Parameters:
196  filename        (string)            "/dev/null"
197Options:
198  -r, --rounds N  (unsigned integer)  42
199  -s, --size      (bytes)             2147483648
200\endverbatim
201The output shows pretty much what happens. The command line parser is by default in a verbose mode outputting all arguments and values parsed. The debug summary shows to have values the corresponding variables were set.
202
203One feature worth naming is that the parser also supports lists of strings, i.e. \c std::vector<std::string> via \ref cmdline_parser::add_param_stringlist() and similar.
204
205\example examples/common/cmdline.cpp
206This example is documented in \ref common_cmdline tutorial.
207
208*/
209
210////////////////////////////////////////////////////////////////////////////////
211
212/** \page common_binary_buffer Serializing Variable Data Structures with binary_buffer
213
214Some applications of STXXL will require variable data structures. Currently there is not much support for this in STXXL.
215
216For serializing information into in-memory data blocks, the STXXL provides the helper classes \ref binary_buffer and \ref binary_reader. These provide functions \ref binary_buffer::put<>() to append arbitrary integral data types and \ref binary_reader::get<>() to read these again. Serialization and deserialization of variable data structures are then composed of identical sequences of put()/get().
217
218Additionally, the classes already provide methods to serialize variable length strings (together with their lengths), and thereby also sub-block serialization. These functions are called \ref binary_buffer::put_string() and \ref binary_reader::get_string().
219
220Furthermore, to squeeze small integers into fewer bytes, they classes also contain "varint" encoding, where each byte contains 7 data bits and one continuation bit. These functions are called \ref binary_buffer::put_varint() and \ref binary_reader::get_varint().
221
222The following example fills a binary_buffer with some data elements:
223\snippet tests/common/test_binary_buffer.cpp serialize
224
225And the following binary_reader example deserializes the data elements and check's their content.
226\snippet tests/common/test_binary_buffer.cpp deserialize
227
228*/
229
230////////////////////////////////////////////////////////////////////////////////
231
232/** \page common_thread_sync Synchronization Primitives for Multi-Threading
233
234To support multi-threading, some parts of STXXL use synchronization primitives to ensure correct results. The primitives are based either on pthreads or on Boost classes.
235
236\section mutex Mutexes
237
238For simple mutual exclusion contexts, stxxl::mutex objects should be used together with scoped locks:
239
240\code
241class Something
242{
243    stxxl::mutex m_mtx;
244    void critical_function()
245    {
246        stxxl::scoped_mutex_lock lock(m_mtx);
247        // do something requiring locking
248    }
249};
250\endcode
251
252\section semaphore Semaphores
253
254Additionally stxxl::semaphore is available if counting is required.
255
256\section further Further Primitives: State and OnOff-Switch
257
258stxxl::state is a synchronized state switching mechanism?
259
260stxxl::onoff_switch is a two state semaphore thing?
261
262*/
263
264////////////////////////////////////////////////////////////////////////////////
265
266/** \page common_logging Logging Macros STXXL_MSG
267
268All STXXL components should output log or trace messages using the following macros. There are two basic methods for logging using ostream syntax:
269
270\code
271// for plain messages
272STXXL_MSG("text " << var)
273// for error messages
274STXXL_ERRMSG("error message " << reason)
275\endcode
276
277For debugging and tracing the following macros can be used for increasing
278levels of verbosity:
279
280\code
281// level 0 (for current debugging)
282STXXL_VERBOSE0("text " << var)
283// level 1,2 and 3 for more verbose debugging level
284STXXL_VERBOSE1("text " << var)
285STXXL_VERBOSE2("text " << var)
286STXXL_VERBOSE3("text " << var)
287\endcode
288
289A method used by some submodule authors to create their own levels of verbosity is to make their own debugging macros:
290
291\code
292#define STXXL_VERBOSE_VECTOR(msg) STXXL_VERBOSE1("vector[" << static_cast<const void *>(this) << "]::" << msg)
293\endcode
294
295*/
296
297////////////////////////////////////////////////////////////////////////////////
298
299/** \page common_assert Macros for Checking Assertions
300
301There are quite a bunch of macros for testing assertions. You must be careful to pick the right one depending on when and what you want to assert on.
302
303# Compile-time Assertions: STXXL_STATIC_ASSERT
304
305To check specific conditions at compile time use \ref STXXL_STATIC_ASSERT.
306
307\code
308struct item { int a,b,c,d; }
309STXXL_STATIC_ASSERT(sizeof(item) == 4 * sizeof(int));
310\endcode
311
312# Assertions in Unit Tests: STXXL_CHECK
313
314Assertions in unit tests must use the following macros to ensure that the condition is also checked in release builds (where a plain \c "assert()" is void). These \c CHECK function should NOT be used to test return values, since we try to throw exceptions instead of aborting the program.
315
316\code
317// test a condition
318STXXL_CHECK( 2+2 == 4 );
319// test a condition and output a more verbose reason on failure
320STXXL_CHECK2( 2+2 == 4, "We cannot count!");
321\endcode
322
323Sometimes one also wants to check that a specific expression \b throws an exception. This checking can be done automatically using a <tt>try { } catch {}</tt> by using \ref STXXL_CHECK_THROW.
324
325# Plain Assertions: assert
326
327For the usual assertions, that should be removed in production code for performance, we use the standard \c "assert()" function.
328
329However, there is also \ref STXXL_ASSERT(), which can be used as a replacement for \c assert(), when compiler warnings about unused variables or typedefs occur. The issue is that assert() completely removes the code, whereas \ref STXXL_ASSERT() keeps the code encloses it inside \c if(0).
330
331*/
332
333////////////////////////////////////////////////////////////////////////////////
334
335/** \page common_throw Macros for Throwing Exception
336
337The STXXL provides several pre-defined exception macros to detect run-time errors. The basic ones are:
338
339- STXXL_THROW(exception_type, error_message)
340- STXXL_THROW2(exception_type, location, error_message)
341- STXXL_THROW_ERRNO(exception_type, error_message)
342- STXXL_THROW_INVALID_ARGUMENT(error_message)
343- STXXL_THROW_UNREACHABLE(error_message)
344
345In addition, we also defined \b conditional throw macros, which check the outcome of a function call:
346
347- STXXL_THROW_IF(expr, exception_type, error_message)
348- STXXL_THROW_NE_0(expr, exception_type, error_message)
349- STXXL_THROW_EQ_0(expr, exception_type, error_message)
350- STXXL_THROW_LT_0(expr, exception_type, error_message)
351
352For checking system calls which set errno, the following macros are used to also provide strerror information for the user:
353
354- STXXL_THROW_ERRNO_IF(expr, exception_type, error_message)
355- STXXL_THROW_ERRNO_NE_0(expr, exception_type, error_message)
356- STXXL_THROW_ERRNO_EQ_0(expr, exception_type, error_message)
357- STXXL_THROW_ERRNO_LT_0(expr, exception_type, error_message)
358
359For checking pthread system calls, a special macro is needed, because these do not set errno. Instead they return the errno value:
360
361- STXXL_CHECK_PTHREAD_CALL(pthread call)
362
363And for WINAPI calls there is a special macro to call GetLastError and format it in a nice way:
364
365- STXXL_THROW_WIN_LASTERROR(exception_type, error_message)
366
367*/
368
369////////////////////////////////////////////////////////////////////////////////
370
371/** \page common_log2 Calculating log2(x) for Integers and at Compile-Time
372
373STXXL provides three methods to calculate log2(x), which is often needed for binary trees, etc.
374
375The first is during \b compile-time using template meta-programming magic:
376
377\code
378#include <stxxl/bits/common/tmeta.h>
379
380std::cout << stxxl::LOG2<10000>::floor << std::endl;
381std::cout << stxxl::LOG2<10000>::ceil << std::endl;
382\endcode
383
384The second is for calculating log2(x) for \b integer arguments using simple bit shift arithmetic:
385
386\code
387#include <stxxl/bits/common/utils.h>
388
389std::cout << stxxl::ilog2_floor(10000) << std::endl;
390std::cout << stxxl::ilog2_ceil(10000) << std::endl;
391\endcode
392
393The third and probably least useful is to use conversion to \b double and \c math.h's facilities:
394
395\code
396#include <stxxl/bits/common/utils.h>
397
398std::cout << stxxl::log2_floor(10000) << std::endl;
399std::cout << stxxl::log2_ceil(10000) << std::endl;
400\endcode
401
402*/
403
404////////////////////////////////////////////////////////////////////////////////
405
406/** \page common_types Signed and Unsigned Integer Types
407
408STXXL provides two very important types: \ref stxxl::int_type and \ref stxxl::unsigned_type. These should be used for general counting and indexing types, as they are defined to be the size of a register on the machines: on 32-bit machines the two types are 4 bytes size, while on a 64-bit machine the two types are 8 bytes in size!
409
410The previous types are for general purpose counting. For real 64-bit integers, also on 32-bit machines, STXXL also provides a stxx::uint64 type (independent of other headers).
411
412See the file common/types.h
413
414\section common_types_uint uint40 and uint48 Unsigned Integer Types
415
416When storing file offsets in external memory, one often does not require full 64-bit indexes. Mostly, 40-bit or 48-bit are sufficient, if only < 1 TiB or < 16 TiB of data are processed. If one stores theses integers in five or six bytes, the total I/O volume can be reduced significantly.
417
418Since this issue occurs commonly in EM algorithms, STXXL provides two types: stxxl::uint40 and stxxl::uint48.
419
420See the file common/uint_types.h
421
422*/
423
424////////////////////////////////////////////////////////////////////////////////
425
426/** \page common_misc_macros Miscellaneous Macros
427
428\section namespaces STXXL_BEGIN_NAMESPACE
429
430A long, long time ago, not all compilers supported C++ namespaces, thus STXXL uses the macros \ref STXXL_BEGIN_NAMESPACE and \ref STXXL_END_NAMESPACE, which open and close the \c "namespace stxxl".
431
432\section unused STXXL_UNUSED
433
434\ref STXXL_UNUSED is not actually a macro. It is a remedy against "unused variable" warnings, for whatever reason. Usage:
435
436\code
437void function(int x)
438{
439    STXXL_UNUSED(x);
440}
441\endcode
442
443\section likely LIKELY and UNLIKEY
444
445Some compilers have facilities to specify whether a condition is likely or unlikely to be true. This may have consequences on how to layout the assembler code better.
446
447\code
448if (LIKELY(x > 1)) { ... }
449if (UNLIKELY(x > 8)) { ... }
450\endcode
451
452\section deprecated Deprecated Functions
453
454Some compilers can warn the user about deprecated function by tagging them in the source. In STXXL we use the macro \ref STXXL_DEPRECATED(...) to enclose deprecated functions.
455
456*/
457
458////////////////////////////////////////////////////////////////////////////////
459
460/** \page common_misc_funcs Miscellaneous Functions
461
462\section parse_filesize Parsing Filesizes with K, M, G suffixes
463
464Since with STXXL one often has to parse large file or disk sizes, there is a function called \ref parse_SI_IEC_size(), which accepts strings like "1 GiB" or "20 TB" as input.
465
466See the \ref install_config documentation page on the format of accepted file size strings.
467
468*/
469
470////////////////////////////////////////////////////////////////////////////////
471
472/** \page common_io_counter I/O Performance Counter
473
474The STXXL library provides various I/O performance counters (stxxl::stats class) which can be used to get an extensive set of I/O statistics. They can be accessed as follows:
475
476\code
477  // generate stats instance
478  stxxl::stats * Stats = stxxl::stats::get_instance();
479
480  // start measurement here
481  stxxl::stats_data stats_begin(*Stats);
482
483  // some computation ...
484
485  // substract current stats from stats at the beginning of the measurement
486  std::cout << (stxxl::stats_data(*Stats) - stats_begin);
487\endcode
488
489The Stats ostream holds various measured I/O data:
490
491\verbatim
492STXXL I/O statistics
493 total number of reads                      : 2
494 average block size (read)                  : 2097152 (2.000 MiB)
495 number of bytes read from disks            : 4194304 (4.000 MiB)
496 time spent in serving all read requests    : 0.062768 s @ 63.7268 MiB/s
497 time spent in reading (parallel read time) : 0.062768 s @ 63.7268 MiB/s
498 total number of writes                     : 2
499 average block size (write)                 : 2097152 (2.000 MiB)
500 number of bytes written to disks           : 4194304 (4.000 MiB)
501 time spent in serving all write requests   : 0.0495751 s @ 80.6857 MiB/s
502 time spent in writing (parallel write time): 0.0495751 s @ 80.6857 MiB/s
503 time spent in I/O (parallel I/O time)      : 0.112343 s @ 71.2104 MiB/s
504 I/O wait time                              : 0.104572 s
505 I/O wait4read time                         : 0.054934 s
506 I/O wait4write time                        : 0.049638 s
507 Time since the last reset                  : 0.605008 s
508\endverbatim
509
510We can access individual I/O data in contrast to the whole content of Stats ostream by:
511
512\code
513std::cout << Stats->get_written_volume() << std::endl;  // print number of bytes written to the disks
514\endcode
515
516\b Hint: There's another statistical parameter which may be in developer's interest: the maximum number of bytes (the peak) allocated in external memory during program run. This parameter can be accessed by:
517
518\code
519stxxl::block_manager * bm = stxxl::block_manager::get_instance();
520// lots of external work here...
521std::cout << "max: " << bm->get_maximum_allocation() << std::endl;  // max. number of bytes allocated until now
522\endcode
523
524See \ref stxxl::stats and \ref stxxl::stats_data class reference for all provided individual functions.
525
526*/
527
528////////////////////////////////////////////////////////////////////////////////
529
530} // namespace stxxl
531