1 /* 2 ** Copyright (C) 2001-2020 by Carnegie Mellon University. 3 ** 4 ** @OPENSOURCE_LICENSE_START@ 5 ** See license information in ../../LICENSE.txt 6 ** @OPENSOURCE_LICENSE_END@ 7 */ 8 9 /* 10 ** rwsort reads SiLK Flow Records from the standard input or from 11 ** named files and sorts them on one or more user-specified fields. 12 ** 13 ** See rwsort.c for implementation details. 14 ** 15 */ 16 #ifndef _RWSORT_H 17 #define _RWSORT_H 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #include <silk/silk.h> 23 24 RCSIDENTVAR(rcsID_RWSORT_H, "$SiLK: rwsort.h ef14e54179be 2020-04-14 21:57:45Z mthomas $"); 25 26 #include <silk/rwascii.h> 27 #include <silk/rwrec.h> 28 #include <silk/skplugin.h> 29 #include <silk/skipaddr.h> 30 #include <silk/skstream.h> 31 #include <silk/sktempfile.h> 32 #include <silk/utils.h> 33 34 /* use TRACEMSG_LEVEL as our tracing variable */ 35 #define TRACEMSG(msg) TRACEMSG_TO_TRACEMSGLVL(1, msg) 36 #include <silk/sktracemsg.h> 37 38 39 /* LOCAL DEFINES AND TYPEDEFS */ 40 41 /* 42 * The default buffer size to use, unless the user selects a 43 * different value with the --sort-buffer-size switch. 44 * 45 * Support of a buffer of almost 2GB. 46 */ 47 #define DEFAULT_SORT_BUFFER_SIZE "1920m" 48 49 /* 50 * We do not allocate the buffer at once, but use realloc() to grow 51 * the buffer linearly to the maximum size. The following is the 52 * number of steps to take to reach the maximum size. The number 53 * of realloc() calls will be once less than this value. 54 * 55 * If the initial allocation fails, the number of chunks is 56 * incremented---making the size of the initial malloc() 57 * smaller---and allocation is attempted again. 58 */ 59 #define NUM_CHUNKS 6 60 61 /* 62 * Do not allocate more than this number of bytes at a time. 63 * 64 * If dividing the buffer size by NUM_CHUNKS gives a chunk size 65 * larger than this; determine the number of chunks by dividing the 66 * buffer size by this value. 67 * 68 * Use a value of 1g 69 */ 70 #define MAX_CHUNK_SIZE ((size_t)(0x40000000)) 71 72 /* 73 * If we cannot allocate a buffer that will hold at least this many 74 * records, give up. 75 */ 76 #define MIN_IN_CORE_RECORDS 1000 77 78 /* 79 * Maximum number of files to attempt to merge-sort at once. 80 */ 81 #define MAX_MERGE_FILES 1024 82 83 /* 84 * Maximum number of fields that can come from plugins. Allow four 85 * per plug-in. 86 */ 87 #define MAX_PLUGIN_KEY_FIELDS 32 88 89 /* 90 * Maximum bytes allotted to a "node", which is the complete rwRec 91 * and the bytes required by all keys that can come from plug-ins. 92 * Allow 8 bytes per field, plus enough space for an rwRec. 93 */ 94 #define MAX_NODE_SIZE ((size_t)(8 * MAX_PLUGIN_KEY_FIELDS + SK_MAX_RECORD_SIZE)) 95 96 /* 97 * The maximum buffer size is the maximum size we can allocate. 98 */ 99 #define MAXIMUM_SORT_BUFFER_SIZE ((size_t)(SIZE_MAX)) 100 101 /* 102 * The minium buffer size. 103 */ 104 #define MINIMUM_SORT_BUFFER_SIZE ((size_t)(MAX_NODE_SIZE * MIN_IN_CORE_RECORDS)) 105 106 107 /* for key fields that come from plug-ins, this struct will hold 108 * information about a single field */ 109 typedef struct key_field_st { 110 /* The plugin field handle */ 111 skplugin_field_t *kf_field_handle; 112 /* the byte-offset for this field */ 113 size_t kf_offset; 114 /* the byte-width of this field */ 115 size_t kf_width; 116 } key_field_t; 117 118 119 120 /* VARIABLES */ 121 122 /* number of fields to sort over; skStringMapParse() sets this */ 123 extern uint32_t num_fields; 124 125 /* IDs of the fields to sort over; skStringMapParse() sets it; values 126 * are from the rwrec_printable_fields_t enum and from values that 127 * come from plug-ins. */ 128 extern uint32_t *sort_fields; 129 130 /* the size of a "node". Because the output from rwsort are SiLK 131 * records, the node size includes the complete rwRec, plus any binary 132 * fields that we get from plug-ins to use as the key. This node_size 133 * value may increase when we parse the --fields switch. */ 134 extern size_t node_size; 135 136 /* the columns that make up the key that come from plug-ins */ 137 extern key_field_t key_fields[MAX_PLUGIN_KEY_FIELDS]; 138 139 /* the number of these key_fields */ 140 extern size_t key_num_fields; 141 142 /* output stream */ 143 extern skstream_t *out_stream; 144 145 /* temp file context */ 146 extern sk_tempfilectx_t *tmpctx; 147 148 /* whether the user wants to reverse the sort order */ 149 extern int reverse; 150 151 /* whether to treat the input files as already sorted */ 152 extern int presorted_input; 153 154 /* maximum amount of RAM to attempt to allocate */ 155 extern size_t sort_buffer_size; 156 157 /* FUNCTIONS */ 158 159 void 160 appExit( 161 int status) 162 NORETURN; 163 void 164 appSetup( 165 int argc, 166 char **argv); 167 int 168 appNextInput( 169 skstream_t **stream); 170 171 #ifdef __cplusplus 172 } 173 #endif 174 #endif /* _RWSORT_H */ 175 176 /* 177 ** Local Variables: 178 ** mode:c 179 ** indent-tabs-mode:nil 180 ** c-basic-offset:4 181 ** End: 182 */ 183