1// -*- mode: c++; mode: visual-line; mode: flyspell; fill-column: 100000 -*- 2/*************************************************************************** 3 * doc/common.dox 4 * 5 * Part of the STXXL. See http://stxxl.sourceforge.net 6 * 7 * Copyright (C) 2013 Timo Bingmann <tb@panthema.net> 8 * 9 * Distributed under the Boost Software License, Version 1.0. 10 * (See accompanying file LICENSE_1_0.txt or copy at 11 * http://www.boost.org/LICENSE_1_0.txt) 12 **************************************************************************/ 13 14namespace stxxl { 15 16//////////////////////////////////////////////////////////////////////////////// 17/** \page common Common Utilities and Helpers 18 19\author Timo Bingmann (2013) 20 21A lots of basic utility classes and helper functions have accumulated in STXXL. Try are usually fundamental enough to be also used in an application program. Before implementing a common software utility, please check the list below; it might already exist in STXXL: 22 23- \subpage common_io_counter "I/O statistics and performance counter" 24- \subpage common_random "random number generators" 25- \subpage common_timer "timestamp and timer function" 26- \subpage common_simple_vector "a non-growing, non-initializing simple_vector" 27- \subpage common_counting_ptr "reference counted (shared) objects via counting_ptr" 28- \subpage common_cmdline "command line parser" 29- \subpage common_binary_buffer "serialization of variable data structures into blobs" 30- \subpage common_thread_sync "synchronization primitives for multi-threading" 31- \subpage common_logging "logging macros" 32- \subpage common_assert "macros for checking assertions" 33- \subpage common_throw "macros for throwing exceptions" 34- \subpage common_types "signed and unsigned integer types" 35- \subpage common_log2 "calculating log_2(x)" 36- \subpage common_misc_macros "miscellaneous macros" 37- \subpage common_misc_funcs "miscellaneous functions" 38 39*/ 40 41//////////////////////////////////////////////////////////////////////////////// 42 43/** \page common_random Random Number Generators 44 45See also file common/rand.h 46 47Measuring the time consumption of program sections are often of great interest. The Stxxl comes with several build-in pseudo random number generators as shown below: 48 49\code 50 51stxxl::random_number32 rand32; // integer values in [0, 2^32) 52stxxl::random_number64 rand64; // integer values in [0, 2^64) 53stxxl::random_uniform_slow urand_slow; // uniform values in [0.0, 1.0) 54stxxl::random_uniform_fast urand_fast; // uniform values in [0.0, 1.0) 55stxxl::random_number<> n_rand; // integer values in [0,N) 56 57unsigned int random32 = rand32(); 58stxxl::uint64 random64 = rand64(); 59double urandom_slow = urand_slow(); 60double urandom_fast = urand_fast(); 61unsigned int n_random = n_rand(123456789); 62 63STXXL_MSG("random 32 bit number: " << random32); 64STXXL_MSG("random 64 bit number: " << random64); 65STXXL_MSG("random number between [0.0, 1.0) (slow): " << urandom_slow); 66STXXL_MSG("random number between [0.0, 1.0) (fast): " << urandom_fast); 67STXXL_MSG("random number between [0,123456789): " << n_random); 68 69\endcode 70 71*/ 72 73//////////////////////////////////////////////////////////////////////////////// 74 75/** \page common_timer Timestamp and Timer Classes 76 77See also file common/timer.h 78 79Measuring the time certain parts of an algorithm or the entire algorithm consume will often be of great interest. The STXXL provides build-in time measurement class stxxl::timer which can be used as follows: 80 81\code 82#include <stxxl/timer> // make timer class available 83 84stxxl::timer Timer; // create Timer object 85 86Timer.start(); 87 88// code section which shall be measured 89 90Timer.stop(); 91 92// get results: 93STXXL_MSG(",easured time: " << (Timer.seconds()) << " (seconds), " << (Timer.mseconds()) << " (milliseconds), " << (Timer.useconds()) << " (microseconds)) 94 95Timer.reset(); // reset clock to zero which allows to run start() again 96 97\endcode 98 99As an alternative, one can also work on the timestamp itself: 100 101\code 102double start = stxxl::timestamp(); 103 104// code section to be measured 105 106double stop = stxxl::timestamp(); 107 108STXXL_MSG("measured time: " << (stop - start) << " seconds."); 109\endcode 110 111*/ 112 113//////////////////////////////////////////////////////////////////////////////// 114 115/** \page common_simple_vector A Non-growing, Non-initializing Simpler Vector 116 117For applications where a std::vector is overkill, or one wishes to allocate an uninitialied POD array, the \ref simple_vector is a good method. 118 119*/ 120 121//////////////////////////////////////////////////////////////////////////////// 122 123/** \page common_counting_ptr Reference Counted (Shared) Objects 124 125Some objects in STXXL are reference counted. This is usually done for large objects, which should not be copied deeply in assignments. Instead, a reference counting pointer is used, which holds a handle while the pointer exists and deletes the object once all pointers go out of scope. Examples are matrices and \ref stream::sorted_runs. 126 127The method used is called \ref counting_ptr or intrusive reference counting. This is similar, but not identical to boost or TR1's \c shared_ptr. 128 129The \ref counting_ptr is accompanied by \ref counted_object, which contains the actual reference counter (a single integer). A reference counted object must derive from \ref counted_object : 130 131\code 132struct something : public stxxl::counted_object 133{ 134}; 135\endcode 136 137Code that now wishes to use pointers referencing this object, will typedef an \ref counting_ptr, which is used to increment and decrement the included reference counter automatically. 138 139\code 140typedef stxxl::counting_ptr<something> something_ptr 141{ 142 // create new instance of something 143 something_ptr p1 = new something; 144 { 145 // create a new reference to the same instance (no deep copy!) 146 something_ptr p2 = p1; 147 // this block end will decrement the reference count, but not delete the object 148 } 149 // this block end will delete the object 150} 151\endcode 152 153The \ref counting_ptr can generally be used like a usual pointer or \c shared_ptr (see the docs for more). 154 155There is also \ref const_counting_ptr to return const objects (note that a const \ref counting_ptr will only render the pointer const, not the enclosed object). 156 157*/ 158 159//////////////////////////////////////////////////////////////////////////////// 160 161/** \page common_cmdline Command Line Parser 162 163STXXL now contains a rather sophisticated command line parser for C++, \ref cmdline_parser, which enables rapid creation of complex command line constructions. Maybe most importantly for application with external memory: the parser will recognize byte sizes with SI/IEC suffixes like '2 GiB' and transform it appropriately. 164 165\snippet examples/common/cmdline.cpp example 166 167When running the program above without arguments, it will print: 168\verbatim 169$ ./cmdline 170Missing required argument for parameter 'filename' 171 172Usage: ./cmdline [options] <filename> 173 174This may some day be a useful program, which solves many serious problems of 175the real world and achives global peace. 176 177Author: Timo Bingmann <tb@panthema.net> 178 179Parameters: 180 filename A filename to process 181Options: 182 -r, --rounds N Run N rounds of the experiment. 183 -s, --size Number of bytes to process. 184\endverbatim 185 186Nice output, notice the line wrapping of the description and formatting of parameters and arguments. These too are wrapped if the description is too long. 187 188We now try to give the program some arguments: 189\verbatim 190$ ./cmdline -s 2GiB -r 42 /dev/null 191Option -s, --size set to 2147483648. 192Option -r, --rounds N set to 42. 193Parameter filename set to "/dev/null". 194Command line parsed okay. 195Parameters: 196 filename (string) "/dev/null" 197Options: 198 -r, --rounds N (unsigned integer) 42 199 -s, --size (bytes) 2147483648 200\endverbatim 201The output shows pretty much what happens. The command line parser is by default in a verbose mode outputting all arguments and values parsed. The debug summary shows to have values the corresponding variables were set. 202 203One feature worth naming is that the parser also supports lists of strings, i.e. \c std::vector<std::string> via \ref cmdline_parser::add_param_stringlist() and similar. 204 205\example examples/common/cmdline.cpp 206This example is documented in \ref common_cmdline tutorial. 207 208*/ 209 210//////////////////////////////////////////////////////////////////////////////// 211 212/** \page common_binary_buffer Serializing Variable Data Structures with binary_buffer 213 214Some applications of STXXL will require variable data structures. Currently there is not much support for this in STXXL. 215 216For serializing information into in-memory data blocks, the STXXL provides the helper classes \ref binary_buffer and \ref binary_reader. These provide functions \ref binary_buffer::put<>() to append arbitrary integral data types and \ref binary_reader::get<>() to read these again. Serialization and deserialization of variable data structures are then composed of identical sequences of put()/get(). 217 218Additionally, the classes already provide methods to serialize variable length strings (together with their lengths), and thereby also sub-block serialization. These functions are called \ref binary_buffer::put_string() and \ref binary_reader::get_string(). 219 220Furthermore, to squeeze small integers into fewer bytes, they classes also contain "varint" encoding, where each byte contains 7 data bits and one continuation bit. These functions are called \ref binary_buffer::put_varint() and \ref binary_reader::get_varint(). 221 222The following example fills a binary_buffer with some data elements: 223\snippet tests/common/test_binary_buffer.cpp serialize 224 225And the following binary_reader example deserializes the data elements and check's their content. 226\snippet tests/common/test_binary_buffer.cpp deserialize 227 228*/ 229 230//////////////////////////////////////////////////////////////////////////////// 231 232/** \page common_thread_sync Synchronization Primitives for Multi-Threading 233 234To support multi-threading, some parts of STXXL use synchronization primitives to ensure correct results. The primitives are based either on pthreads or on Boost classes. 235 236\section mutex Mutexes 237 238For simple mutual exclusion contexts, stxxl::mutex objects should be used together with scoped locks: 239 240\code 241class Something 242{ 243 stxxl::mutex m_mtx; 244 void critical_function() 245 { 246 stxxl::scoped_mutex_lock lock(m_mtx); 247 // do something requiring locking 248 } 249}; 250\endcode 251 252\section semaphore Semaphores 253 254Additionally stxxl::semaphore is available if counting is required. 255 256\section further Further Primitives: State and OnOff-Switch 257 258stxxl::state is a synchronized state switching mechanism? 259 260stxxl::onoff_switch is a two state semaphore thing? 261 262*/ 263 264//////////////////////////////////////////////////////////////////////////////// 265 266/** \page common_logging Logging Macros STXXL_MSG 267 268All STXXL components should output log or trace messages using the following macros. There are two basic methods for logging using ostream syntax: 269 270\code 271// for plain messages 272STXXL_MSG("text " << var) 273// for error messages 274STXXL_ERRMSG("error message " << reason) 275\endcode 276 277For debugging and tracing the following macros can be used for increasing 278levels of verbosity: 279 280\code 281// level 0 (for current debugging) 282STXXL_VERBOSE0("text " << var) 283// level 1,2 and 3 for more verbose debugging level 284STXXL_VERBOSE1("text " << var) 285STXXL_VERBOSE2("text " << var) 286STXXL_VERBOSE3("text " << var) 287\endcode 288 289A method used by some submodule authors to create their own levels of verbosity is to make their own debugging macros: 290 291\code 292#define STXXL_VERBOSE_VECTOR(msg) STXXL_VERBOSE1("vector[" << static_cast<const void *>(this) << "]::" << msg) 293\endcode 294 295*/ 296 297//////////////////////////////////////////////////////////////////////////////// 298 299/** \page common_assert Macros for Checking Assertions 300 301There are quite a bunch of macros for testing assertions. You must be careful to pick the right one depending on when and what you want to assert on. 302 303# Compile-time Assertions: STXXL_STATIC_ASSERT 304 305To check specific conditions at compile time use \ref STXXL_STATIC_ASSERT. 306 307\code 308struct item { int a,b,c,d; } 309STXXL_STATIC_ASSERT(sizeof(item) == 4 * sizeof(int)); 310\endcode 311 312# Assertions in Unit Tests: STXXL_CHECK 313 314Assertions in unit tests must use the following macros to ensure that the condition is also checked in release builds (where a plain \c "assert()" is void). These \c CHECK function should NOT be used to test return values, since we try to throw exceptions instead of aborting the program. 315 316\code 317// test a condition 318STXXL_CHECK( 2+2 == 4 ); 319// test a condition and output a more verbose reason on failure 320STXXL_CHECK2( 2+2 == 4, "We cannot count!"); 321\endcode 322 323Sometimes one also wants to check that a specific expression \b throws an exception. This checking can be done automatically using a <tt>try { } catch {}</tt> by using \ref STXXL_CHECK_THROW. 324 325# Plain Assertions: assert 326 327For the usual assertions, that should be removed in production code for performance, we use the standard \c "assert()" function. 328 329However, there is also \ref STXXL_ASSERT(), which can be used as a replacement for \c assert(), when compiler warnings about unused variables or typedefs occur. The issue is that assert() completely removes the code, whereas \ref STXXL_ASSERT() keeps the code encloses it inside \c if(0). 330 331*/ 332 333//////////////////////////////////////////////////////////////////////////////// 334 335/** \page common_throw Macros for Throwing Exception 336 337The STXXL provides several pre-defined exception macros to detect run-time errors. The basic ones are: 338 339- STXXL_THROW(exception_type, error_message) 340- STXXL_THROW2(exception_type, location, error_message) 341- STXXL_THROW_ERRNO(exception_type, error_message) 342- STXXL_THROW_INVALID_ARGUMENT(error_message) 343- STXXL_THROW_UNREACHABLE(error_message) 344 345In addition, we also defined \b conditional throw macros, which check the outcome of a function call: 346 347- STXXL_THROW_IF(expr, exception_type, error_message) 348- STXXL_THROW_NE_0(expr, exception_type, error_message) 349- STXXL_THROW_EQ_0(expr, exception_type, error_message) 350- STXXL_THROW_LT_0(expr, exception_type, error_message) 351 352For checking system calls which set errno, the following macros are used to also provide strerror information for the user: 353 354- STXXL_THROW_ERRNO_IF(expr, exception_type, error_message) 355- STXXL_THROW_ERRNO_NE_0(expr, exception_type, error_message) 356- STXXL_THROW_ERRNO_EQ_0(expr, exception_type, error_message) 357- STXXL_THROW_ERRNO_LT_0(expr, exception_type, error_message) 358 359For checking pthread system calls, a special macro is needed, because these do not set errno. Instead they return the errno value: 360 361- STXXL_CHECK_PTHREAD_CALL(pthread call) 362 363And for WINAPI calls there is a special macro to call GetLastError and format it in a nice way: 364 365- STXXL_THROW_WIN_LASTERROR(exception_type, error_message) 366 367*/ 368 369//////////////////////////////////////////////////////////////////////////////// 370 371/** \page common_log2 Calculating log2(x) for Integers and at Compile-Time 372 373STXXL provides three methods to calculate log2(x), which is often needed for binary trees, etc. 374 375The first is during \b compile-time using template meta-programming magic: 376 377\code 378#include <stxxl/bits/common/tmeta.h> 379 380std::cout << stxxl::LOG2<10000>::floor << std::endl; 381std::cout << stxxl::LOG2<10000>::ceil << std::endl; 382\endcode 383 384The second is for calculating log2(x) for \b integer arguments using simple bit shift arithmetic: 385 386\code 387#include <stxxl/bits/common/utils.h> 388 389std::cout << stxxl::ilog2_floor(10000) << std::endl; 390std::cout << stxxl::ilog2_ceil(10000) << std::endl; 391\endcode 392 393The third and probably least useful is to use conversion to \b double and \c math.h's facilities: 394 395\code 396#include <stxxl/bits/common/utils.h> 397 398std::cout << stxxl::log2_floor(10000) << std::endl; 399std::cout << stxxl::log2_ceil(10000) << std::endl; 400\endcode 401 402*/ 403 404//////////////////////////////////////////////////////////////////////////////// 405 406/** \page common_types Signed and Unsigned Integer Types 407 408STXXL provides two very important types: \ref stxxl::int_type and \ref stxxl::unsigned_type. These should be used for general counting and indexing types, as they are defined to be the size of a register on the machines: on 32-bit machines the two types are 4 bytes size, while on a 64-bit machine the two types are 8 bytes in size! 409 410The previous types are for general purpose counting. For real 64-bit integers, also on 32-bit machines, STXXL also provides a stxx::uint64 type (independent of other headers). 411 412See the file common/types.h 413 414\section common_types_uint uint40 and uint48 Unsigned Integer Types 415 416When storing file offsets in external memory, one often does not require full 64-bit indexes. Mostly, 40-bit or 48-bit are sufficient, if only < 1 TiB or < 16 TiB of data are processed. If one stores theses integers in five or six bytes, the total I/O volume can be reduced significantly. 417 418Since this issue occurs commonly in EM algorithms, STXXL provides two types: stxxl::uint40 and stxxl::uint48. 419 420See the file common/uint_types.h 421 422*/ 423 424//////////////////////////////////////////////////////////////////////////////// 425 426/** \page common_misc_macros Miscellaneous Macros 427 428\section namespaces STXXL_BEGIN_NAMESPACE 429 430A long, long time ago, not all compilers supported C++ namespaces, thus STXXL uses the macros \ref STXXL_BEGIN_NAMESPACE and \ref STXXL_END_NAMESPACE, which open and close the \c "namespace stxxl". 431 432\section unused STXXL_UNUSED 433 434\ref STXXL_UNUSED is not actually a macro. It is a remedy against "unused variable" warnings, for whatever reason. Usage: 435 436\code 437void function(int x) 438{ 439 STXXL_UNUSED(x); 440} 441\endcode 442 443\section likely LIKELY and UNLIKEY 444 445Some compilers have facilities to specify whether a condition is likely or unlikely to be true. This may have consequences on how to layout the assembler code better. 446 447\code 448if (LIKELY(x > 1)) { ... } 449if (UNLIKELY(x > 8)) { ... } 450\endcode 451 452\section deprecated Deprecated Functions 453 454Some compilers can warn the user about deprecated function by tagging them in the source. In STXXL we use the macro \ref STXXL_DEPRECATED(...) to enclose deprecated functions. 455 456*/ 457 458//////////////////////////////////////////////////////////////////////////////// 459 460/** \page common_misc_funcs Miscellaneous Functions 461 462\section parse_filesize Parsing Filesizes with K, M, G suffixes 463 464Since with STXXL one often has to parse large file or disk sizes, there is a function called \ref parse_SI_IEC_size(), which accepts strings like "1 GiB" or "20 TB" as input. 465 466See the \ref install_config documentation page on the format of accepted file size strings. 467 468*/ 469 470//////////////////////////////////////////////////////////////////////////////// 471 472/** \page common_io_counter I/O Performance Counter 473 474The STXXL library provides various I/O performance counters (stxxl::stats class) which can be used to get an extensive set of I/O statistics. They can be accessed as follows: 475 476\code 477 // generate stats instance 478 stxxl::stats * Stats = stxxl::stats::get_instance(); 479 480 // start measurement here 481 stxxl::stats_data stats_begin(*Stats); 482 483 // some computation ... 484 485 // substract current stats from stats at the beginning of the measurement 486 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 487\endcode 488 489The Stats ostream holds various measured I/O data: 490 491\verbatim 492STXXL I/O statistics 493 total number of reads : 2 494 average block size (read) : 2097152 (2.000 MiB) 495 number of bytes read from disks : 4194304 (4.000 MiB) 496 time spent in serving all read requests : 0.062768 s @ 63.7268 MiB/s 497 time spent in reading (parallel read time) : 0.062768 s @ 63.7268 MiB/s 498 total number of writes : 2 499 average block size (write) : 2097152 (2.000 MiB) 500 number of bytes written to disks : 4194304 (4.000 MiB) 501 time spent in serving all write requests : 0.0495751 s @ 80.6857 MiB/s 502 time spent in writing (parallel write time): 0.0495751 s @ 80.6857 MiB/s 503 time spent in I/O (parallel I/O time) : 0.112343 s @ 71.2104 MiB/s 504 I/O wait time : 0.104572 s 505 I/O wait4read time : 0.054934 s 506 I/O wait4write time : 0.049638 s 507 Time since the last reset : 0.605008 s 508\endverbatim 509 510We can access individual I/O data in contrast to the whole content of Stats ostream by: 511 512\code 513std::cout << Stats->get_written_volume() << std::endl; // print number of bytes written to the disks 514\endcode 515 516\b Hint: There's another statistical parameter which may be in developer's interest: the maximum number of bytes (the peak) allocated in external memory during program run. This parameter can be accessed by: 517 518\code 519stxxl::block_manager * bm = stxxl::block_manager::get_instance(); 520// lots of external work here... 521std::cout << "max: " << bm->get_maximum_allocation() << std::endl; // max. number of bytes allocated until now 522\endcode 523 524See \ref stxxl::stats and \ref stxxl::stats_data class reference for all provided individual functions. 525 526*/ 527 528//////////////////////////////////////////////////////////////////////////////// 529 530} // namespace stxxl 531