1 /* 2 ** Copyright (C) 2004-2020 by Carnegie Mellon University. 3 ** 4 ** @OPENSOURCE_LICENSE_START@ 5 ** See license information in ../../LICENSE.txt 6 ** @OPENSOURCE_LICENSE_END@ 7 */ 8 #ifndef _SKSTREAM_PRIV_H 9 #define _SKSTREAM_PRIV_H 10 #ifdef __cplusplus 11 extern "C" { 12 #endif 13 14 #include <silk/silk.h> 15 16 RCSIDENTVAR(rcsID_SKSTREAM_PRIV_H, "$SiLK: skstream_priv.h ef14e54179be 2020-04-14 21:57:45Z mthomas $"); 17 18 /* 19 ** skstream_priv.h 20 ** 21 ** For sharing of functions to read/write SiLK Flow records 22 ** 23 ** THESE FUNCTIONS ARE FOR INTERNAL USE BY skStream*(). 24 ** 25 */ 26 27 #include <silk/skstream.h> 28 #include <silk/rwrec.h> 29 #include <silk/sksite.h> 30 #include <silk/utils.h> 31 #include "skiobuf.h" 32 33 34 /* macros to swap the bytes in place */ 35 #define SWAP_DATA64(d) *((uint64_t*)(d)) = BSWAP64(*(uint64_t*)(d)) 36 #define SWAP_DATA32(d) *((uint32_t*)(d)) = BSWAP32(*(uint32_t*)(d)) 37 #define SWAP_DATA16(d) *((uint16_t*)(d)) = BSWAP16(*(uint16_t*)(d)) 38 #if 0 39 /* macros to swap the bytes in place */ 40 #define _SWAP_HELP(_byte, ar, a, b) \ 41 { \ 42 (_byte) = (ar)[(a)]; \ 43 (ar)[(a)] = (ar)[(b)]; \ 44 (ar)[(b)] = (_byte); \ 45 } 46 #define SWAP_DATA16(ar) \ 47 { \ 48 uint8_t _byte; \ 49 _SWAP_HELP(_byte, (ar), 0, 1); \ 50 } 51 #define SWAP_DATA32(ar) \ 52 { \ 53 uint8_t _byte; \ 54 _SWAP_HELP(_byte, (ar), 0, 3); \ 55 _SWAP_HELP(_byte, (ar), 1, 2); \ 56 } 57 #define SWAP_DATA64(ar) \ 58 { \ 59 uint8_t _byte; \ 60 _SWAP_HELP(_byte, (ar), 0, 7); \ 61 _SWAP_HELP(_byte, (ar), 1, 6); \ 62 _SWAP_HELP(_byte, (ar), 2, 5); \ 63 _SWAP_HELP(_byte, (ar), 3, 4); \ 64 } 65 #endif /* 0 */ 66 67 68 /* 69 * We store the packet count in a 20 bit value. When the packet 70 * count is larger than that, we divide the value by the 71 * PKTS_DIVISOR and store the result. That gives an absolute max 72 * of 67,100,864 packets. 73 */ 74 #define MAX_PKTS 1048576 /* 2^20 */ 75 #define PKTS_DIVISOR 64 76 #define DBL_MAX_PKTS 67108864 /* 2^26 */ 77 78 #define BPP_BITS 6 79 #define BPP_PRECN 64 /* 2^BPP_BITS */ 80 #define BPP_PRECN_DIV_2 32 /* 2^BPP_BITS/2 */ 81 82 83 /* 84 * We pack flows by their start time into hourly files. The file's 85 * hour is stored in the header; each record's start time is offset 86 * from that and stored in 12 bits. 87 */ 88 #define MAX_START_TIME 4096 /* 2^12 */ 89 90 /* 91 * The elapsed time is the offset from the record's start time. We 92 * assume the router flushes flows at least once an hour, though in 93 * practice CISCO flushes every 30 mintues. The elapsed time is 94 * stored in 11 or 12 bits, depending on file format. 95 */ 96 #define MAX_ELAPSED_TIME 4096 /* 2^12 */ 97 #define MAX_ELAPSED_TIME_OLD 2048 /* 2^11 */ 98 99 100 /* 101 * Masks for bit field manipulation: these masks will pass the 102 * specified number of bits strarting from the least significant 103 * bit. For example, masking a value with MASKARRAY_01 gives the 104 * least significant bit; MASKARRAY_09 gives the rightmost 9 bits, 105 * etc. 106 */ 107 #define MASKARRAY_01 1U 108 #define MASKARRAY_02 3U 109 #define MASKARRAY_03 7U 110 #define MASKARRAY_04 15U 111 #define MASKARRAY_05 31U 112 #define MASKARRAY_06 63U 113 #define MASKARRAY_07 127U 114 #define MASKARRAY_08 255U 115 116 #define MASKARRAY_09 511U 117 #define MASKARRAY_10 1023U 118 #define MASKARRAY_11 2047U 119 #define MASKARRAY_12 4095U 120 #define MASKARRAY_13 8191U 121 #define MASKARRAY_14 16383U 122 #define MASKARRAY_15 32767U 123 #define MASKARRAY_16 65535U 124 125 #define MASKARRAY_17 131071U 126 #define MASKARRAY_18 262143U 127 #define MASKARRAY_19 524287U 128 #define MASKARRAY_20 1048575U 129 #define MASKARRAY_21 2097151U 130 #define MASKARRAY_22 4194303U 131 #define MASKARRAY_23 8388607U 132 #define MASKARRAY_24 16777215U 133 134 #define MASKARRAY_25 33554431U 135 #define MASKARRAY_26 67108863U 136 #define MASKARRAY_27 134217727U 137 #define MASKARRAY_28 268435455U 138 #define MASKARRAY_29 536870911U 139 #define MASKARRAY_30 1073741823U 140 #define MASKARRAY_31 2147483647U 141 142 143 /* Web classification utilities */ 144 145 /* SK_WEBPORT_CHECK(p) is defined in rwrec.h */ 146 147 /* 148 * encoding = SK_WEBPORT_ENCODE(p) 149 * 150 * Encode the port 'p' into a value suitable for storing in the 151 * wPort field of an FT_RWWWW record. 152 */ 153 #define SK_WEBPORT_ENCODE(p) \ 154 (((p) == 80) \ 155 ? 0 \ 156 : (((p) == 443) \ 157 ? 1u \ 158 : (((p) == 8080) \ 159 ? 2u \ 160 : 3u))) 161 162 163 /* 164 * decoding = SK_WEBPORT_EXPAND(p) 165 * 166 * Decode the port 'p' from the value stored in the wPort field in 167 * an FT_RWWWW record. 168 */ 169 #define SK_WEBPORT_EXPAND(p) \ 170 (((p) == 0) \ 171 ? 80 \ 172 : (((p) == 1) \ 173 ? 443 \ 174 : (((p) == 2) \ 175 ? 8080 \ 176 : 0))) 177 178 179 /* 180 * Unless the SK_NOTFIX_TCPSTATE_EXPANDED cpp macro is defined, fix 181 * records that were written prior to SiLK-3.6.0 on read. 182 * 183 * These broken records have the SK_TCPSTATE_EXPANDED bit set on 184 * either non-TCP records or on records where the initial-tcpflags 185 * and session-tcpflags values are both 0. 186 */ 187 #ifdef SK_NOTFIX_TCPSTATE_EXPANDED 188 # define RWREC_MAYBE_CLEAR_TCPSTATE_EXPANDED(r) 189 #else 190 # define RWREC_MAYBE_CLEAR_TCPSTATE_EXPANDED(r) \ 191 if (rwRecGetTcpState(r) & SK_TCPSTATE_EXPANDED \ 192 && (IPPROTO_TCP != rwRecGetProto(r) \ 193 || (0 == rwRecGetInitFlags(r) && 0 == rwRecGetRestFlags(r)))) \ 194 { \ 195 rwRecSetTcpState(r, (rwRecGetTcpState(r) & ~SK_TCPSTATE_EXPANDED)); \ 196 rwRecSetInitFlags(r, 0); \ 197 rwRecSetRestFlags(r, 0); \ 198 } 199 #endif /* SK_NO_TCP_STATE_FIX */ 200 201 /* Formerly public macros only used by flowcapio.c and rwfilterio.c */ 202 203 /* 204 * Return only the milliseconds portion of an rwRec's start time. 205 */ 206 #define rwRecGetStartMSec(r) ((uint16_t)(rwRecGetStartTime(r) % 1000)) 207 208 /* 209 * Return only the milliseconds portion of an rwRec's elapsed 210 * field. 211 */ 212 #define rwRecGetElapsedMSec(r) ((uint16_t)(rwRecGetElapsed(r) % 1000)) 213 214 215 216 217 struct skstream_st { 218 /* A FILE pointer to the file */ 219 FILE *fp; 220 #if SK_ENABLE_ZLIB 221 /* When the entire file has been compressed, we use gzread/gzwrite 222 * to process the file, this is interface to those functions */ 223 gzFile gz; 224 #endif 225 226 /* A handle to our own I/O buffering code */ 227 sk_iobuf_t *iobuf; 228 229 /* The full path to the file */ 230 char *pathname; 231 232 /* For a SiLK file, this holds the file's header */ 233 sk_file_header_t *silk_hdr; 234 235 /* Number of records read or written. For appending, this is the 236 * number records added to the file. */ 237 uint64_t rec_count; 238 239 /* Start time as recorded in file's header, or 0. For easy access */ 240 sktime_t hdr_starttime; 241 242 /* Pointer to a function to convert an array of bytes into a record */ 243 int (*rwUnpackFn)(skstream_t*, rwRec*, uint8_t*); 244 /* Pointer to a function to convert a record into an array of bytes */ 245 int (*rwPackFn)(skstream_t*, const rwRec*, uint8_t*); 246 /* The stream to copy the input to---for support of the --all-dest 247 * and --copy-input switches */ 248 skstream_t *copyInputFD; 249 250 /* An object to hold the parameter that caused the last error */ 251 union { 252 uint32_t num; 253 const rwRec *rec; 254 } errobj; 255 256 /* Offset where the skIOBuf was created */ 257 off_t pre_iobuf_pos; 258 259 /* Return value from most recent function skStream* call. See 260 * also err_info. Should we combine these into a single value? */ 261 ssize_t last_rv; 262 263 /* Holds the most recent error code. See also last_rv. Should we 264 * combine these into a single value? */ 265 int err_info; 266 267 /* The errno from the last system call that failed */ 268 int errnum; 269 270 /* The open file descriptor, or -1 if closed */ 271 int fd; 272 273 /* The fixed length of records of this type */ 274 uint16_t recLen; 275 276 /* The sensor ID stored in the file's header, or 277 * SK_INVALID_SENSOR. For easy access. */ 278 sk_sensor_id_t hdr_sensor; 279 280 /* The flowtype ID stored in the file's header, or 281 * SK_INVALID_FLOWTYPE. For easy access. */ 282 sk_flowtype_id_t hdr_flowtype; 283 284 /* Whether stream is read, write, append. */ 285 skstream_mode_t io_mode; 286 287 /* ipv6 policy */ 288 sk_ipv6policy_t v6policy; 289 290 /* When sending textual output to a pager, the name of the pager 291 * to use */ 292 char *pager; 293 294 /* When reading textual input, the text that denotes the start of 295 * a comment. */ 296 char *comment_start; 297 298 /* The type of data to read/write: text, silk, silk-flow, etc */ 299 skcontent_t content_type; 300 301 /* Set to 1 if the stream is seekable (i.e., a "real" file) */ 302 unsigned is_seekable :1; 303 304 /* Set to 1 if the stream is a binary stream with a SiLK header */ 305 unsigned is_silk :1; 306 307 /* Set to 1 if the stream contains SiLK flow data */ 308 unsigned is_silk_flow :1; 309 310 /* Set to 1 if the pager is being used for textual output. */ 311 unsigned is_pager_active :1; 312 313 /* Set to 1 if the stream contains binary data (silk or non-silk) */ 314 unsigned is_binary :1; 315 316 /* Set to 1 if the stream is connected to a terminal (tty) */ 317 unsigned is_terminal :1; 318 319 /* Set to 1 if data has been read-from/written-to the stream */ 320 unsigned is_dirty :1; 321 322 /* Set to 1 if the stream has been closed */ 323 unsigned is_closed :1; 324 325 /* Set to 1 if the stream is coming from an MPI node */ 326 unsigned is_mpi :1; 327 328 /* Set to 1 if the stream is connected to a standard I/O stream */ 329 unsigned is_stdio :1; 330 331 /* Set to 1 if the stream is not using the IOBuf */ 332 unsigned is_unbuffered :1; 333 334 /* Set to 1 if the stream has reached the end-of-file. */ 335 unsigned is_eof :1; 336 337 /* Set to 1 if an error has occurred in an skStream* function that 338 * was called by an skIOBuf* function as part of a callback */ 339 unsigned is_iobuf_error :1; 340 341 /* Set to 1 if the silk flow data in this stream supports IPv6 */ 342 unsigned supports_ipv6 :1; 343 344 /* Set to 1 if the silk header has been read from the stream */ 345 unsigned have_hdr :1; 346 347 /* Set to 1 if the data in the stream is in non-native byte order */ 348 unsigned swapFlag :1; 349 }; 350 /* skstream_t */ 351 352 353 354 /* ***** Functions exported from each rw<format>io.c file ***** */ 355 356 357 /* 358 * status = <format>ioPrepare(stream); 359 * 360 * DO NOT CALL DIRECTLY. FOR INTERNAL LIBRW USE 361 * 362 * Sets the record version to the default if it is unspecified, 363 * checks that the record format supports the requested record 364 * version, sets the record length, and sets the pack and unpack 365 * functions for this record format and version. 366 * 367 * Returns SKSTREAM_OK on success; otherwise returns an error code 368 * on failure: bad version. 369 */ 370 int 371 augmentedioPrepare( 372 skstream_t *stream); 373 int 374 augroutingioPrepare( 375 skstream_t *stream); 376 int 377 augsnmpoutioPrepare( 378 skstream_t *stream); 379 int 380 augwebioPrepare( 381 skstream_t *stream); 382 int 383 filterioPrepare( 384 skstream_t *stream); 385 int 386 flowcapioPrepare( 387 skstream_t *stream); 388 int 389 genericioPrepare( 390 skstream_t *stream); 391 int 392 ipv6ioPrepare( 393 skstream_t *stream); 394 int 395 ipv6routingioPrepare( 396 skstream_t *stream); 397 int 398 notroutedioPrepare( 399 skstream_t *stream); 400 int 401 routedioPrepare( 402 skstream_t *stream); 403 int 404 splitioPrepare( 405 skstream_t *stream); 406 int 407 wwwioPrepare( 408 skstream_t *stream); 409 410 411 /* 412 * length = <format>ioGetRecLen(version); 413 * 414 * Return the on-disk length in bytes of records of the specified 415 * type and vresion; or return 0 if the specified version is not 416 * defined for the given type. 417 */ 418 uint16_t 419 augmentedioGetRecLen( 420 sk_file_version_t); 421 uint16_t 422 augroutingioGetRecLen( 423 sk_file_version_t); 424 uint16_t 425 augsnmpoutioGetRecLen( 426 sk_file_version_t); 427 uint16_t 428 augwebioGetRecLen( 429 sk_file_version_t); 430 uint16_t 431 filterioGetRecLen( 432 sk_file_version_t); 433 uint16_t 434 flowcapioGetRecLen( 435 sk_file_version_t); 436 uint16_t 437 genericioGetRecLen( 438 sk_file_version_t); 439 uint16_t 440 ipv6ioGetRecLen( 441 sk_file_version_t); 442 uint16_t 443 ipv6routingioGetRecLen( 444 sk_file_version_t); 445 uint16_t 446 notroutedioGetRecLen( 447 sk_file_version_t); 448 uint16_t 449 routedioGetRecLen( 450 sk_file_version_t); 451 uint16_t 452 splitioGetRecLen( 453 sk_file_version_t); 454 uint16_t 455 wwwioGetRecLen( 456 sk_file_version_t); 457 458 459 460 /* ***** rwpack.c ***** */ 461 462 #ifdef RWPACK_BYTES_PACKETS 463 /* 464 * Uses fields from the rwRec pointed to by 'rwrec' to compute the 465 * bytes-per-packet ('bpp'), packets ('pkts'), and 466 * packets-multiplier ('pflag') field required by the packed file 467 * formats FILTER, SPLIT, WWW, ROUTED, and NOTROUTED. 468 * 469 * The parameters 'bpp', 'pkts', and 'pflag' will be the values to 470 * store in the packed file format; i.e., they will be the values 471 * that rwpackUnpackBytesPackets() can read; they will be in native 472 * byte order. 473 * 474 * Specifically, 'pkts' is either the packet count or the packet 475 * count divided by the PKTS_DIVISOR when 'pflag' is non-zero. 476 * 'bpp' is the bytes-per-packet ratio given by a 14 bit value and 477 * a 6 bit fractional part. 478 * 479 * This function returns SKSTREAM_OK on success, or the following 480 * to indicate an error: SKSTREAM_ERR_PKTS_ZERO-the 'pkts' field on 481 * rwrec is 0; SKSTREAM_ERR_PKTS_OVRFLO-the 'pkts' value is too 482 * large to store in the packed file format. 483 */ 484 static int 485 rwpackPackBytesPackets( 486 uint32_t *bpp_out, 487 uint32_t *pkts_out, 488 uint32_t *pflag_out, 489 const rwGenericRec_V5 *rwrec); 490 491 492 /* 493 * Does the reverse of rwpackPackBytesPackets(): Fills in the 494 * 'bytes', 'packets', and 'bpp' fields of the rwRec pointed to by 495 * 'rwrec'. All values are expected to be in native byte order. 496 * 497 * This function does no error checking. 498 */ 499 static void 500 rwpackUnpackBytesPackets( 501 rwGenericRec_V5 *rwrec, 502 uint32_t bpp, 503 uint32_t pkts, 504 uint32_t pflag); 505 #endif /* RWPACK_BYTES_PACKETS */ 506 507 508 #ifdef RWPACK_PROTO_FLAGS 509 /* 510 * Uses fields from the rwRec pointed to by 'rwrec' to compute the 511 * values pointed to by these variables: 512 * 513 * is_tcp_out - 1 if the flow is TCP (proto==6); 0 otherwise 514 * 515 * prot_flags_out - protocol when is_tcp==0; bitwise OR of TCP 516 * flags on ALL packages when is_tcp==1 and tcp_state!=0; TCP 517 * flags on FIRST packet when is_tcp==1 and tcp_state!=0 518 * 519 * tcp_state_out - value of tcp_state field on the rwrec 520 * 521 * rest_flags_out - the flags reported by the flow collector 522 * when is_tcp==0 (even though there are no flags to report); 523 * empty when is_tcp==1 and tcp_state==0; bitwise OR of TCP 524 * flags on all but the first packet when is_tcp==1 and 525 * tcp_state!=0. 526 * 527 * The output variables prot_flags, tcp_state, and rest_flags will 528 * be the values to store in the packed file format; is_tcp can be 529 * stored in a single bit. The values can be read by the 530 * rwpackUnpackProtoFlags() function. 531 * 532 * This function should never fail, and thus has no return value. 533 */ 534 static void 535 rwpackPackProtoFlags( 536 uint8_t *is_tcp_out, 537 uint8_t *prot_flags_out, 538 uint8_t *tcp_state_out, 539 uint8_t *rest_flags_out, 540 const rwGenericRec_V5 *rwrec); 541 542 543 /* 544 * Does the reverse of rwpackPackProtoFlags(): Fills in the 'proto', 545 * 'flags', 'init_flags', 'rest_flags', and 'tcp_state' fields on 546 * the rwRec pointed to by 'rwrec'. All values are expected to be 547 * in native byte order. 548 * 549 * This function does no error checking. 550 */ 551 static void 552 rwpackUnpackProtoFlags( 553 rwGenericRec_V5 *rwrec, 554 uint8_t is_tcp, 555 uint8_t prot_flags, 556 uint8_t tcp_state, 557 uint8_t rest_flags); 558 #endif /* RWPACK_PROTO_FLAGS */ 559 560 561 #ifdef RWPACK_SBB_PEF 562 /* 563 * Uses fields from the rwRec pointed to by 'rwrec' to compute the 564 * 'sbb' and 'pef' fields used when packing SPLIT, WWW, ROUTED, and 565 * NOTROUTED V1 and V2 files. 'file_start_time' is the time value 566 * stored in the header--record times are offset from that time. 567 * 568 * Uses the sTime, elapsed, pkts, bytes in the rwrec to compute 569 * these values. Any millisec values for sTime and/or elapsed on 570 * the rwRec are ignored. 571 * 572 * sbb and pef are returned in native byte order. 573 * 574 * Returns 0 on success or non-zero on these failures: rwrec's sTime 575 * is earlier than the 'file_start_time' or is too large; elapsed 576 * time is too large; packets field is zero or too large. 577 */ 578 static int 579 rwpackPackSbbPef( 580 uint32_t *sbb_out, 581 uint32_t *pef_out, 582 const rwGenericRec_V5 *rwrec, 583 sktime_t file_start_time); 584 585 586 /* 587 * Does the reverse of rwpackPackSbbPef(): Fills in the 'sTime', 588 * 'elapsed', 'bytes', 'pkts', and 'bpp' fields on the rwRec 589 * pointed to by 'rwrec'. All values are expected to be in native 590 * byte order. 591 * 592 * This function does no error checking. 593 */ 594 static void 595 rwpackUnpackSbbPef( 596 rwGenericRec_V5 *rwrec, 597 sktime_t file_start_time, 598 const uint32_t *sbb, 599 const uint32_t *pef); 600 #endif /* RWPACK_SBB_PEF */ 601 602 603 #ifdef RWPACK_TIME_BYTES_PKTS_FLAGS 604 /* 605 * Computes the 'pkts_stime', 'bbe', and 'msec_flags' fields used 606 * when packing into various formats. 607 * 608 * Uses the sTime, elapsed, pkts, and bytes fields in the rwRec 609 * pointed to by rwrec to compute these values. 'file_start_time' 610 * is the hour stored in the file's header---record times are 611 * offset from it. 612 * 613 * sbb and pef are returned in native byte order. 614 * 615 * Returns 0 on success or non-zero on these failures: rwrec's sTime 616 * is earlier than stream's sTime or is too large; elapsed time 617 * is too large; packets field is too large. 618 */ 619 static int 620 rwpackPackTimeBytesPktsFlags( 621 uint32_t *pkts_stime_out, 622 uint32_t *bbe_out, 623 uint32_t *msec_flags_out, 624 const rwGenericRec_V5 *rwrec, 625 sktime_t file_start_time); 626 627 628 /* 629 * Does the reverse of rwpackPackSbbPef(): Fills in the 'sTime', 630 * 'elapsed', 'sTime_msec, 'elapsed_msec', 'bytes', 'pkts', and 631 * 'bpp' fields on the rwRec pointed to by 'rwrec'. All values are 632 * expected to be in native byte order. 633 * 634 * This function does no error checking. 635 */ 636 static void 637 rwpackUnpackTimeBytesPktsFlags( 638 rwGenericRec_V5 *rwrec, 639 sktime_t file_start_time, 640 const uint32_t *pkts_stime, 641 const uint32_t *bbe, 642 const uint32_t *msec_flags); 643 #endif /* RWPACK_TIME_BYTES_PKTS_FLAGS */ 644 645 646 #ifdef RWPACK_FLAGS_TIMES_VOLUMES 647 static int 648 rwpackPackFlagsTimesVolumes( 649 uint8_t *ar, 650 const rwGenericRec_V5 *rwrec, 651 sktime_t file_start_time, 652 size_t len); 653 654 655 static void 656 rwpackUnpackFlagsTimesVolumes( 657 rwGenericRec_V5 *rwrec, 658 const uint8_t *ar, 659 sktime_t file_start_time, 660 size_t len, 661 int is_tcp); 662 #endif /* RWPACK_FLAGS_TIMES_VOLUMES */ 663 664 665 #ifdef RWPACK_TIMES_FLAGS_PROTO 666 static int 667 rwpackPackTimesFlagsProto( 668 const rwGenericRec_V5 *rwrec, 669 uint8_t *ar, 670 sktime_t file_start_time); 671 672 673 static void 674 rwpackUnpackTimesFlagsProto( 675 rwGenericRec_V5 *rwrec, 676 const uint8_t *ar, 677 sktime_t file_start_time); 678 #endif /* RWPACK_TIMES_FLAGS_PROTO */ 679 680 681 #ifdef __cplusplus 682 } 683 #endif 684 #endif /* _SKSTREAM_PRIV_H */ 685 686 /* 687 ** Local Variables: 688 ** mode:c 689 ** indent-tabs-mode:nil 690 ** c-basic-offset:4 691 ** End: 692 */ 693