1 /*
2  *  Copyright (c) 2009-2020, Peter Haag
3  *  Copyright (c) 2008, SWITCH - Teleinformatikdienste fuer Lehre und Forschung
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright notice,
12  *     this list of conditions and the following disclaimer in the documentation
13  *     and/or other materials provided with the distribution.
14  *   * Neither the name of the author nor the names of its contributors may be
15  *     used to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  *  POSSIBILITY OF SUCH DAMAGE.
29  *
30  */
31 
32 #ifndef _NFFILE_H
33 #define _NFFILE_H 1
34 
35 #include "config.h"
36 
37 #include <stddef.h>
38 #include <sys/types.h>
39 #ifdef HAVE_STDINT_H
40 #include <stdint.h>
41 #endif
42 
43 #define IDENTLEN	128
44 #define IDENTNONE	"none"
45 
46 #define NF_EOF		 	 0
47 #define NF_ERROR		-1
48 #define NF_CORRUPT		-2
49 
50 #define NF_DUMPFILE         "nfcapd.current"
51 
52 #define NOT_COMPRESSED 0
53 #define LZO_COMPRESSED 1
54 #define BZ2_COMPRESSED 2
55 #define LZ4_COMPRESSED 3
56 
57 /*
58  * output buffer max size, before writing data to the file
59  * used to cache flows before writing to disk. size: tradeoff between
60  * size and time to flush to disk. Do not delay collector with long I/O
61  */
62 #define WRITE_BUFFSIZE 1048576
63 
64 /*
65  * use this buffer size to allocate memory for the output buffer
66  * data other than flow records, such as histograms, may be larger than
67  * WRITE_BUFFSIZE and have potentially more time to flush to disk
68  */
69 #define BUFFSIZE (5*WRITE_BUFFSIZE)
70 
71 /* if the output buffer reaches this limit, it gets flushed. This means,
72  * that 0.5MB input data may produce max 1MB data in output buffer, otherwise
73  * a buffer overflow may occur, and data does not get processed correctly.
74  * However, every Process_vx function checks buffer boundaries.
75  */
76 
77 /*
78  * nfdump binary file layout 1
79  * ===========================
80  * Each data file starts with a file header, which identifies the file as an nfdump data file.
81  * The magic 16bit integer at the beginning of each file must read 0xA50C. This also guarantees
82  * that endian dependant files are read correct.
83  *
84  * Principal layout, recognized as LAYOUT_VERSION_1:
85  *
86  *   +-----------+-------------+-------------+-------------+-----+-------------+
87  *   |Fileheader | stat record | datablock 1 | datablock 2 | ... | datablock n |
88  *   +-----------+-------------+-------------+-------------+-----+-------------+
89  */
90 
91 
92 typedef struct file_header_s {
93 	uint16_t	magic;				// magic to recognize nfdump file type and endian type
94 #define MAGIC 0xA50C
95 
96 	uint16_t	version;			// version of binary file layout, incl. magic
97 #define LAYOUT_VERSION_1	1
98 
99 	uint32_t	flags;
100 #define NUM_FLAGS		4
101 #define FLAG_NOT_COMPRESSED	0x0		// records are not compressed
102 #define FLAG_LZO_COMPRESSED	0x1		// records are LZO compressed
103 #define FLAG_ANONYMIZED 	0x2		// flow data are anonimized
104 #define FLAG_UNUSED			0x4		// unused
105 #define FLAG_BZ2_COMPRESSED 0x8		// records are BZ2 compressed
106 #define FLAG_LZ4_COMPRESSED 0x10	// records are LZ4 compressed
107 #define COMPRESSION_MASK	0x19	// all compression bits
108 // shortcuts
109 
110 #define FILE_IS_NOT_COMPRESSED(n) (((n)->file_header->flags & COMPRESSION_MASK) == 0)
111 #define FILE_IS_LZO_COMPRESSED(n) ((n)->file_header->flags & FLAG_LZO_COMPRESSED)
112 #define FILE_IS_BZ2_COMPRESSED(n) ((n)->file_header->flags & FLAG_BZ2_COMPRESSED)
113 #define FILE_IS_LZ4_COMPRESSED(n) ((n)->file_header->flags & FLAG_LZ4_COMPRESSED)
114 #define FILE_COMPRESSION(n) (FILE_IS_LZO_COMPRESSED(n) ? LZO_COMPRESSED : (FILE_IS_BZ2_COMPRESSED(n) ? BZ2_COMPRESSED : (FILE_IS_LZ4_COMPRESSED(n) ? LZ4_COMPRESSED : NOT_COMPRESSED)))
115 
116 #define BLOCK_IS_COMPRESSED(n) ((n)->flags == 2 )
117 #define IP_ANONYMIZED(n) ((n)->file_header->flags & FLAG_ANONYMIZED)
118 
119 
120 	uint32_t	NumBlocks;			// number of data blocks in file
121 	char		ident[IDENTLEN];	// string identifier for this file
122 } file_header_t;
123 
124 /*
125  * In file layout format 1: After the file header an
126  * inplicit stat record follows, which contains the statistics
127  * information about all netflow records in this file.
128  */
129 
130 typedef struct stat_record_s {
131 	// overall stat
132 	uint64_t	numflows;
133 	uint64_t	numbytes;
134 	uint64_t	numpackets;
135 	// flow stat
136 	uint64_t	numflows_tcp;
137 	uint64_t	numflows_udp;
138 	uint64_t	numflows_icmp;
139 	uint64_t	numflows_other;
140 	// bytes stat
141 	uint64_t	numbytes_tcp;
142 	uint64_t	numbytes_udp;
143 	uint64_t	numbytes_icmp;
144 	uint64_t	numbytes_other;
145 	// packet stat
146 	uint64_t	numpackets_tcp;
147 	uint64_t	numpackets_udp;
148 	uint64_t	numpackets_icmp;
149 	uint64_t	numpackets_other;
150 	// time window
151 	uint32_t	first_seen;
152 	uint32_t	last_seen;
153 	uint16_t	msec_first;
154 	uint16_t	msec_last;
155 	// other
156 	uint32_t	sequence_failure;
157 } stat_record_t;
158 
159 
160 // legacy nfdump 1.5.x data block type
161 #define DATA_BLOCK_TYPE_1		1
162 
163 // nfdump 1.6.x data block type
164 #define DATA_BLOCK_TYPE_2		2
165 
166 /*
167  *
168  * Block type 2:
169  * =============
170  * Each data block start with a common data block header, which specifies the size, type and the number of records
171  * in this data block
172  */
173 
174 typedef struct data_block_header_s {
175 	uint32_t	NumRecords;		// number of data records in data block
176 	uint32_t	size;			// size of this block in bytes without this header
177 	uint16_t	id;				// Block ID == DATA_BLOCK_TYPE_2
178 	uint16_t	flags;			// 0 - compatibility
179 								// 1 - block uncompressed
180 								// 2 - block compressed
181 } data_block_header_t;
182 
183 /*
184  * Generic file handle for reading/writing files
185  * if a file is read only writeto and block_header are NULL
186  */
187 typedef struct nffile_s {
188 	file_header_t		*file_header;	// file header
189 #define NUM_BUFFS 2
190 	void				*buff_pool[NUM_BUFFS];	// buffer space for read/write/compression
191 	size_t				buff_size;
192 	data_block_header_t	*block_header;	// buffer ptr
193 	void				*buff_ptr;		// pointer into buffer for read/write blocks/records
194 	stat_record_t 		*stat_record;	// flow stat record
195 	int					fd;				// file descriptor
196 } nffile_t;
197 
198 /*
199  * The block type 2 contains a common record and multiple extension records. This allows a more flexible data
200  * storage of netflow v9 records and 3rd party extension to nfdump.
201  *
202  * A block type 2 may contain different record types, as described below.
203  *
204  * Record description:
205  * -------------------
206  * A record always starts with a 16bit record id followed by a 16bit record size. This record size is the full size of this
207  * record incl. record type and size fields and all record extensions.
208  *
209  * Know record types:
210  * Type 0: reserved
211  * Type 1: Common netflow record incl. all record extensions
212  * Type 2: Extension map
213  * Type 3: xstat - port histogram record
214  * Type 4: xstat - bpp histogram record
215  */
216 
217 #define CommonRecordV0Type	1
218 #define ExtensionMapType	2
219 #define PortHistogramType	3
220 #define BppHistogramType	4
221 
222 // Legacy records
223 #define LegacyRecordType1	5
224 #define LegacyRecordType2	6
225 
226 // exporter/sampler types
227 #define ExporterInfoRecordType	7
228 #define ExporterStatRecordType	8
229 #define SamplerInfoRecordype	9
230 
231 typedef struct record_header_s {
232  	// record header
233  	uint16_t	type;
234  	uint16_t	size;
235 } record_header_t;
236 
237 
238 /*
239  * for the detailed description of the record definition see nfx.h
240 */
241 
242 
243 void SumStatRecords(stat_record_t *s1, stat_record_t *s2);
244 
245 nffile_t *OpenFile(char *filename, nffile_t *nffile);
246 
247 nffile_t *OpenNewFile(char *filename, nffile_t *nffile, int compress, int anonymized, char *ident);
248 
249 nffile_t *AppendFile(char *filename);
250 
251 int ChangeIdent(char *filename, char *Ident);
252 
253 void PrintStat(stat_record_t *s);
254 
255 void QueryFile(char *filename);
256 
257 stat_record_t *GetStatRecord(char *filename, stat_record_t *stat_record);
258 
259 nffile_t *DisposeFile(nffile_t *nffile);
260 
261 void CloseFile(nffile_t *nffile);
262 
263 int CloseUpdateFile(nffile_t *nffile, char *ident);
264 
265 int ReadBlock(nffile_t *nffile);
266 
267 int WriteBlock(nffile_t *nffile);
268 
269 int RenameAppend(char *from, char *to);
270 
271 void ModifyCompressFile(char * rfile, char *Rfile, int compress);
272 
273 
274 #endif //_NFFILE_H
275 
276