1 /*
2 ** Copyright (C) 2001-2020 by Carnegie Mellon University.
3 **
4 ** @OPENSOURCE_LICENSE_START@
5 ** See license information in ../../LICENSE.txt
6 ** @OPENSOURCE_LICENSE_END@
7 */
8 
9 /*
10 **  rwsort reads SiLK Flow Records from the standard input or from
11 **  named files and sorts them on one or more user-specified fields.
12 **
13 **  See rwsort.c for implementation details.
14 **
15 */
16 #ifndef _RWSORT_H
17 #define _RWSORT_H
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 #include <silk/silk.h>
23 
24 RCSIDENTVAR(rcsID_RWSORT_H, "$SiLK: rwsort.h ef14e54179be 2020-04-14 21:57:45Z mthomas $");
25 
26 #include <silk/rwascii.h>
27 #include <silk/rwrec.h>
28 #include <silk/skplugin.h>
29 #include <silk/skipaddr.h>
30 #include <silk/skstream.h>
31 #include <silk/sktempfile.h>
32 #include <silk/utils.h>
33 
34 /* use TRACEMSG_LEVEL as our tracing variable */
35 #define TRACEMSG(msg) TRACEMSG_TO_TRACEMSGLVL(1, msg)
36 #include <silk/sktracemsg.h>
37 
38 
39 /* LOCAL DEFINES AND TYPEDEFS */
40 
41 /*
42  *    The default buffer size to use, unless the user selects a
43  *    different value with the --sort-buffer-size switch.
44  *
45  *    Support of a buffer of almost 2GB.
46  */
47 #define DEFAULT_SORT_BUFFER_SIZE    "1920m"
48 
49 /*
50  *    We do not allocate the buffer at once, but use realloc() to grow
51  *    the buffer linearly to the maximum size.  The following is the
52  *    number of steps to take to reach the maximum size.  The number
53  *    of realloc() calls will be once less than this value.
54  *
55  *    If the initial allocation fails, the number of chunks is
56  *    incremented---making the size of the initial malloc()
57  *    smaller---and allocation is attempted again.
58  */
59 #define NUM_CHUNKS  6
60 
61 /*
62  *    Do not allocate more than this number of bytes at a time.
63  *
64  *    If dividing the buffer size by NUM_CHUNKS gives a chunk size
65  *    larger than this; determine the number of chunks by dividing the
66  *    buffer size by this value.
67  *
68  *    Use a value of 1g
69  */
70 #define MAX_CHUNK_SIZE      ((size_t)(0x40000000))
71 
72 /*
73  *    If we cannot allocate a buffer that will hold at least this many
74  *    records, give up.
75  */
76 #define MIN_IN_CORE_RECORDS     1000
77 
78 /*
79  *    Maximum number of files to attempt to merge-sort at once.
80  */
81 #define MAX_MERGE_FILES         1024
82 
83 /*
84  *    Maximum number of fields that can come from plugins.  Allow four
85  *    per plug-in.
86  */
87 #define MAX_PLUGIN_KEY_FIELDS   32
88 
89 /*
90  *    Maximum bytes allotted to a "node", which is the complete rwRec
91  *    and the bytes required by all keys that can come from plug-ins.
92  *    Allow 8 bytes per field, plus enough space for an rwRec.
93  */
94 #define MAX_NODE_SIZE ((size_t)(8 * MAX_PLUGIN_KEY_FIELDS + SK_MAX_RECORD_SIZE))
95 
96 /*
97  *    The maximum buffer size is the maximum size we can allocate.
98  */
99 #define MAXIMUM_SORT_BUFFER_SIZE    ((size_t)(SIZE_MAX))
100 
101 /*
102  *    The minium buffer size.
103  */
104 #define MINIMUM_SORT_BUFFER_SIZE ((size_t)(MAX_NODE_SIZE * MIN_IN_CORE_RECORDS))
105 
106 
107 /* for key fields that come from plug-ins, this struct will hold
108  * information about a single field */
109 typedef struct key_field_st {
110     /* The plugin field handle */
111     skplugin_field_t *kf_field_handle;
112     /* the byte-offset for this field */
113     size_t            kf_offset;
114     /* the byte-width of this field */
115     size_t            kf_width;
116 } key_field_t;
117 
118 
119 
120 /* VARIABLES */
121 
122 /* number of fields to sort over; skStringMapParse() sets this */
123 extern uint32_t num_fields;
124 
125 /* IDs of the fields to sort over; skStringMapParse() sets it; values
126  * are from the rwrec_printable_fields_t enum and from values that
127  * come from plug-ins. */
128 extern uint32_t *sort_fields;
129 
130 /* the size of a "node".  Because the output from rwsort are SiLK
131  * records, the node size includes the complete rwRec, plus any binary
132  * fields that we get from plug-ins to use as the key.  This node_size
133  * value may increase when we parse the --fields switch. */
134 extern size_t node_size;
135 
136 /* the columns that make up the key that come from plug-ins */
137 extern key_field_t key_fields[MAX_PLUGIN_KEY_FIELDS];
138 
139 /* the number of these key_fields */
140 extern size_t key_num_fields;
141 
142 /* output stream */
143 extern skstream_t *out_stream;
144 
145 /* temp file context */
146 extern sk_tempfilectx_t *tmpctx;
147 
148 /* whether the user wants to reverse the sort order */
149 extern int reverse;
150 
151 /* whether to treat the input files as already sorted */
152 extern int presorted_input;
153 
154 /* maximum amount of RAM to attempt to allocate */
155 extern size_t sort_buffer_size;
156 
157 /* FUNCTIONS */
158 
159 void
160 appExit(
161     int                 status)
162     NORETURN;
163 void
164 appSetup(
165     int                 argc,
166     char              **argv);
167 int
168 appNextInput(
169     skstream_t        **stream);
170 
171 #ifdef __cplusplus
172 }
173 #endif
174 #endif /* _RWSORT_H */
175 
176 /*
177 ** Local Variables:
178 ** mode:c
179 ** indent-tabs-mode:nil
180 ** c-basic-offset:4
181 ** End:
182 */
183