1#! @PERL@
2#######################################################################
3# Copyright (C) 2006-2020 by Carnegie Mellon University.
4#
5# @OPENSOURCE_LICENSE_START@
6# See license information in ../../LICENSE.txt
7# @OPENSOURCE_LICENSE_END@
8#
9#######################################################################
10# $SiLK: rwscanquery.in ef14e54179be 2020-04-14 21:57:45Z mthomas $
11#######################################################################
12# rwscanquery
13#
14# Query and generate reports from a network scan database.
15#######################################################################
16
17use strict;
18use warnings;
19
20use Getopt::Long qw(:config gnu_compat permute no_getopt_compat no_bundling);
21use Data::Dumper;
22use FindBin;
23use Pod::Usage;
24use File::Temp;
25
26### Config
27
28my $conf_db_driver;
29my $conf_db_user;
30my $conf_db_pass;
31my $conf_db_instance;
32my $conf_rw_in_class;
33my $conf_rw_in_type;
34my $conf_rw_out_class;
35my $conf_rw_out_type;
36
37### Prototypes for subs defined later
38
39sub db_connect_oracle();
40sub db_connect_postgresql();
41sub db_connect_mysql();
42sub db_connect_sqlite();
43sub parse_options();
44sub load_rcfile();
45sub val_date(\$);
46sub val_ip(\$$);
47sub val_set(\$$);
48sub do_query($);
49sub write_standard_results($);
50sub write_export_results($);
51sub write_volume_results($);
52sub write_scan_set($);
53sub write_scan_flows($);
54sub write_resp_flows($);
55sub tool_version_exit();
56
57### Argument processing
58
59my $opt_start_hour;
60my $opt_end_hour;
61my $opt_saddress;
62my $opt_sipset;
63my $opt_daddress;
64my $opt_dipset;
65my $opt_report = "standard";
66my $opt_volume_summary;
67my $opt_show_header = 0;
68my $opt_columnar    = 0;
69
70my $opt_database;
71
72my $opt_verbose;
73
74my $outfile    = "";    # for Perl filehandles
75my $outfile_rw = "";    # for rw tools
76
77### SiLK commands used
78
79my $rwfilter   = $ENV{RWFILTER}   || '@rwfilter@';
80my $rwset      = $ENV{RWSET}      || '@rwset@';
81my $rwsetbuild = $ENV{RWSETBUILD} || '@rwsetbuild@';
82my $rwsetcat   = $ENV{RWSETCAT}   || '@rwsetcat@';
83
84my $appname = $0;
85$appname =~ s/.*\///;
86
87# The queries below are optimized based on the fact that scans in the database
88# have a maximum duration of about an hour.  There are some cases where a scan
89# for a particular hour can begin in the previous hour, though, so the date
90# arithmetic looks back (and ahead) of the time period in question a little
91# bit.  Also, note that as of this writing, only the Oracle queries have been
92# heavily performance tested, though all have been tested for proper selection.
93
94my %queries = (
95    "oracle" => {
96        "standard" => q{
97            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
98              s.flows, s.packets, s.bytes
99            FROM scans s
100            WHERE s.stime < to_date('$end_hour') + 1/24
101            AND s.etime >= to_date('$start_hour')
102            AND s.stime >= to_date('$start_hour') - 1/24
103            AND s.etime < to_date('$end_hour') + 2/24
104            $saddress_part
105            $sipset_part
106        },
107        "volume" => q{
108            SELECT TO_CHAR(s.stime, 'YYYY/MM/DD') AS scan_date,
109            SUM(s.flows) AS flows,
110            SUM(s.packets) AS pkts,
111            SUM(s.bytes) AS bytes
112            FROM scans s
113            WHERE s.stime < to_date('$end_hour') + 1
114            AND s.etime >= to_date('$start_hour')
115            AND s.stime >= to_date('$start_hour') - 1
116            AND s.etime < to_date('$end_hour') + 1
117            $saddress_part
118            $sipset_part
119            GROUP BY TO_CHAR(s.stime, 'YYYY/MM/DD')
120            ORDER BY TO_CHAR(s.stime, 'YYYY/MM/DD')
121        },
122        "scanip" => q{
123            SELECT DISTINCT s.sip
124            FROM scans s
125            WHERE s.stime < to_date('$end_hour') + 1/24
126            AND s.etime >= to_date('$start_hour')
127            AND s.stime >= to_date('$start_hour') - 1/24
128            AND s.etime < to_date('$end_hour') + 2/24
129            $saddress_part
130            $sipset_part
131        },
132        "export" => q{
133            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
134              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
135            FROM scans s
136            WHERE s.stime < to_date('$end_hour') + 1/24
137            AND s.etime >= to_date('$start_hour')
138            AND s.stime >= to_date('$start_hour') - 1/24
139            AND s.etime < to_date('$end_hour') + 2/24
140            $saddress_part
141            $sipset_part
142        }
143    },
144    "postgresql" => {
145        "standard" => q{
146            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
147              s.flows, s.packets, s.bytes
148            FROM scans s
149            WHERE s.stime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
150                + INTERVAL '1 HOUR'
151            AND s.etime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
152            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
153                - INTERVAL '1 HOUR'
154            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
155                + INTERVAL '2 HOUR'
156            $saddress_part
157            $sipset_part
158        },
159        "volume" => q{
160            SELECT to_char(s.stime, 'YYYY/MM/DD') AS scan_date,
161            SUM(s.flows) AS flows,
162            SUM(s.packets) AS pkts,
163            SUM(s.bytes) AS bytes
164            FROM scans s
165            WHERE s.stime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
166                + INTERVAL '1 DAY'
167            AND s.etime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
168            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
169                - INTERVAL '1 DAY'
170            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
171                + INTERVAL '1 DAY'
172            $saddress_part
173            $sipset_part
174            GROUP BY to_char(s.stime, 'YYYY/MM/DD')
175            ORDER BY to_char(s.stime, 'YYYY/MM/DD')
176        },
177        "scanip" => q{
178            SELECT DISTINCT s.sip
179            FROM scans s
180            WHERE s.stime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
181                + INTERVAL '1 HOUR'
182            AND s.etime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
183            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
184                - INTERVAL '1 HOUR'
185            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
186                + INTERVAL '2 HOUR'
187            $saddress_part
188            $sipset_part
189        },
190        "export" => q{
191            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
192              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
193            FROM scans s
194            WHERE s.stime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
195                + INTERVAL '1 HOUR'
196            AND s.etime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
197            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
198                - INTERVAL '1 HOUR'
199            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
200                + INTERVAL '2 HOUR'
201            $saddress_part
202            $sipset_part
203        }
204    },
205    "mysql" => {
206        "standard" => q{
207            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
208              s.flows, s.packets, s.bytes
209            FROM scans s
210            WHERE s.stime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
211                + INTERVAL 1 HOUR
212            AND s.etime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
213            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
214                - INTERVAL 1 HOUR
215            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
216                + INTERVAL 2 HOUR
217            $saddress_part
218            $sipset_part
219        },
220        "volume" => q{
221            SELECT DATE_FORMAT(s.stime, '%Y/%m/%d:%H') AS scan_date,
222            SUM(s.flows) AS flows,
223            SUM(s.packets) AS pkts,
224            SUM(s.bytes) AS bytes
225            FROM scans s
226            WHERE s.stime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
227                + INTERVAL 1 DAY
228            AND s.etime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
229            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
230                - INTERVAL 1 DAY
231            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
232                + INTERVAL 1 DAY
233            $saddress_part
234            $sipset_part
235            GROUP BY DATE_FORMAT(s.stime, '%Y/%m/%d')
236            ORDER BY DATE_FORMAT(s.stime, '%Y/%m/%d')
237        },
238        "scanip" => q{
239            SELECT DISTINCT s.sip
240            FROM scans s
241            WHERE s.stime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
242                + INTERVAL 1 HOUR
243            AND s.etime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
244            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
245                - INTERVAL 1 HOUR
246            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
247                + INTERVAL 2 HOUR
248            $saddress_part
249            $sipset_part
250        },
251        "export" => q{
252            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
253              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
254            FROM scans s
255            WHERE s.stime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
256                + INTERVAL 1 HOUR
257            AND s.etime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
258            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
259                - INTERVAL 1 HOUR
260            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
261                + INTERVAL 2 HOUR
262            $saddress_part
263            $sipset_part
264        }
265    },
266    "sqlite" => {
267        "standard" => q{
268            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
269              s.flows, s.packets, s.bytes
270            FROM scans s
271            WHERE s.stime < datetime('$end_hour_iso', '+1 hour')
272            AND s.etime >= datetime('$start_hour_iso')
273            AND s.stime >= datetime('$start_hour_iso', '-1 hour')
274            AND s.etime < datetime('$end_hour_iso', '+2 hours')
275            $saddress_part
276            $sipset_part
277        },
278        "volume" => q{
279            SELECT strftime('%Y/%m/%d', s.stime) AS scan_date,
280            SUM(s.flows) AS flows,
281            SUM(s.packets) AS pkts,
282            SUM(s.bytes) AS bytes
283            FROM scans s
284            WHERE s.stime < datetime('$end_hour_iso', '+1 day')
285            AND s.etime >= datetime('$start_hour_iso')
286            AND s.stime >= datetime('$start_hour_iso', '-1 day')
287            AND s.etime < datetime('$end_hour_iso', '+1 day')
288            $saddress_part
289            $sipset_part
290            GROUP BY strftime('%Y/%m/%d', s.stime)
291            ORDER BY strftime('%Y/%m/%d', s.stime)
292        },
293        "scanip" => q{
294            SELECT DISTINCT s.sip
295            FROM scans s
296            WHERE s.stime < datetime('$end_hour_iso', '+1 hour')
297            AND s.etime >= datetime('$start_hour_iso')
298            AND s.stime >= datetime('$start_hour_iso', '-1 hour')
299            AND s.etime < datetime('$end_hour_iso', '+2 hours')
300            $saddress_part
301            $sipset_part
302        },
303        "export" => q{
304            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
305              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
306            FROM scans s
307            WHERE s.stime < datetime('$end_hour_iso', '+1 hour')
308            AND s.etime >= datetime('$start_hour_iso')
309            AND s.stime >= datetime('$start_hour_iso', '-1 hour')
310            AND s.etime < datetime('$end_hour_iso', '+2 hours')
311            $saddress_part
312            $sipset_part
313        }
314    }
315);
316
317parse_options();
318load_rcfile();
319
320if ( !defined $opt_database ) {
321    if ( defined $conf_db_instance ) {
322        $opt_database = $conf_db_instance;
323    }
324    else {
325        $opt_database = "SCAN";
326    }
327}
328
329require DBI;
330
331my $dbh;
332
333if ( !defined $conf_db_driver ) {
334    die "$appname: No database driver specified\n";
335}
336elsif ( $conf_db_driver =~ /oracle/i ) {
337    $dbh = db_connect_oracle();
338}
339elsif ( $conf_db_driver =~ /postgresql/i ) {
340    $dbh = db_connect_postgresql();
341}
342elsif ( $conf_db_driver =~ /mysql/i ) {
343    $dbh = db_connect_mysql();
344}
345elsif ( $conf_db_driver =~ /sqlite/i ) {
346    $dbh = db_connect_sqlite();
347}
348else {
349    die "$appname: Unknown db_driver: $conf_db_driver\n";
350}
351
352my $sth;
353
354# using 'foreach' as a switch()
355foreach ($opt_report) {
356    if (/standard/i) {
357        open( OUTF, ">$outfile" )
358            or die "$appname: Cannot open output file '$outfile': $!\n";
359        $sth = do_query("standard");
360        write_standard_results($sth);
361        close(OUTF);
362    }
363    elsif (/volume/i) {
364        open( OUTF, ">$outfile" )
365            or die "$appname: Cannot open output file '$outfile': $!\n";
366        $sth = do_query("volume");
367        write_volume_results($sth);
368        close(OUTF);
369    }
370    elsif (/scanset/i) {
371        $sth = do_query("scanip");
372        write_scan_set($sth);
373    }
374    elsif (/scanflows/i) {
375        $sth = do_query("scanip");
376        write_scan_flows($sth);
377    }
378    elsif (/respflows/i) {
379        $sth = do_query("scanip");
380        write_resp_flows($sth);
381    }
382    elsif (/export$/i) {
383        open( OUTF, ">$outfile" )
384            or die "$appname: Cannot open output file '$outfile': $!\n";
385        $sth = do_query("export");
386        write_export_results($sth);
387        close(OUTF);
388    }
389    else {
390        die "$appname: Invalid report: '$_'\n";
391    }
392}
393
394$sth->finish;
395$dbh->disconnect;
396
397exit 0;
398
399# Helper functions
400
401
402### Parse the options
403
404sub parse_options()
405{
406    my $opt_help;
407    my $opt_man;
408    my $opt_version;
409    my $opt_outfile;
410
411    # process options.  see "man Getopt::Long"
412    GetOptions(
413        'start-date=s',  \$opt_start_hour,
414        'end-date=s',    \$opt_end_hour,
415        'saddress=s',    \$opt_saddress,
416        'sipset=s',      \$opt_sipset,
417        'daddress=s',    \$opt_daddress,
418        'dipset=s',      \$opt_dipset,
419        'report=s',      \$opt_report,
420        'show-header!',  \$opt_show_header,
421        'columnar!',     \$opt_columnar,
422        'output-path=s', \$opt_outfile,
423        'database=s',    \$opt_database,
424
425        'verbose|v!',    \$opt_verbose,
426
427        'help',    \$opt_help,
428        'man',     \$opt_man,
429        'version', \$opt_version,
430
431        ) or pod2usage( -exitval => -1 );
432
433    pod2usage( -exitval => 0 ) if $opt_help;
434    pod2usage( -exitval => 0, -verbose => 2 ) if $opt_man;
435    tool_version_exit() if $opt_version;
436
437    # set output files
438    if (!$opt_outfile) {
439        $outfile    = "&STDOUT";
440        $outfile_rw = "stdout";
441    }
442    elsif ($opt_outfile
443           =~ qr/(^[\w\+_\040\#\(\)\{\}\[\]\/\-\^,\.:;&%@\\~]+\$?$)/)
444    {
445        $outfile = "$1";
446        $outfile_rw = "$opt_outfile";
447    }
448    else {
449        die "$appname: Invalid characters in output-path filename\n";
450    }
451    print STDERR "writing results to $outfile\n" if $opt_verbose;
452
453
454    # set and verify times
455    if ( !defined($opt_start_hour) ) {
456        # when no start-date, use all of today
457        if ( defined $opt_end_hour ) {
458            die "$appname: Cannot specify end-date without start-date\n";
459        }
460
461        my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
462        $opt_start_hour = sprintf("%04d/%02d/%02d",
463                                  $year + 1900, $month + 1, $day);
464        if ( $opt_report =~ /volume/i ) {
465            # volume report is day-based, no need for hour
466            $opt_end_hour = $opt_start_hour;
467        }
468        else {
469            $opt_end_hour = $opt_start_hour . ":23";
470        }
471    }
472    elsif ( !defined($opt_end_hour) ) {
473        # when only start-date, use either that hour or the entire day
474        if ( $opt_report =~ /volume/i ) {
475            # volume report is day-based, no need for hour
476            $opt_end_hour = $opt_start_hour;
477        }
478        elsif ( $opt_start_hour =~ /:\d+/ ) {
479            $opt_end_hour = $opt_start_hour;
480        }
481        else {
482            $opt_end_hour = $opt_start_hour . ":23";
483        }
484    }
485    elsif ( $opt_start_hour !~ /:\d+/ ) {
486        # ignore hour on end-date when no hour on start-date.  Have
487        # end-date go to end-of-day unless volume report, which is
488        # already day-based
489        $opt_end_hour =~ s/^([^:]+):.*/$1/;
490        if ( $opt_report !~ /volume/i ) {
491            $opt_end_hour .= ":23";
492        }
493    }
494    elsif ( $opt_end_hour !~ /:\d+/ && $opt_report !~ /volume/i ) {
495        # set ending hour to the starting hour
496        $opt_start_hour =~ m/(:\d.*)/;
497        $opt_end_hour .= $1;
498    }
499
500    my $start = val_date($opt_start_hour);
501    my $end = val_date($opt_end_hour);
502    if ($start > $end) {
503        die "$appname: Invalid dates: end-date is earlier that start-date\n";
504    }
505
506    # verify source IPs
507    val_ip($opt_saddress, 's.sip');
508    val_set($opt_sipset, 's.sip');
509}
510
511sub val_date(\$)
512{
513    my $in = shift;
514    return unless defined $$in;
515
516    my $time = 0;
517    if (
518        $$in =~ m{ ^
519    (\d{1,4})/(\d{1,2})/(\d{1,2}):?(\d{1,2})?
520    $ }x
521        )
522    {
523        # Has a full date, may or may not have further date parts
524        my ( $year, $mon, $day, $hour ) = ( $1, $2, $3, $4 );
525        $hour = 0 unless defined $hour;
526        if ( $hour > 23 ) {
527            die "$appname: Invalid date '$$in': Hour is too large\n";
528        }
529        if ( $year < 2000 || $year > 2050 ) {
530            die "$appname: Invalid date '$$in': Year is out of range\n";
531        }
532        if ( $day == 0 ) {
533            die "$appname: Invalid date '$$in': Day is too small\n";
534        }
535        if ( $mon == 4 || $mon == 6 || $mon == 9 || $mon == 11 ) {
536            if ( $day > 30 ) {
537                die "$appname: Invalid date '$$in': Day is too large\n";
538            }
539        }
540        elsif ( $mon == 2 ) {
541            if (( $year % 400 == 0 )
542                || ( $year % 4 == 0 && $year % 100 != 0 ))
543            {
544                if ( $day > 29 ) {
545                    die "$appname: Invalid date '$$in': Day is too large\n";
546                }
547            }
548            elsif ( $day > 28 ) {
549                die "$appname: Invalid date '$$in': Day is too large\n";
550            }
551        }
552        elsif ( $mon < 1 || $mon > 12 ) {
553            die "$appname: Invalid date '$$in': Month is out of range\n";
554        }
555        elsif ( $day > 31 ) {
556            die "$appname: Invalid date '$$in': Day is too large\n";
557        }
558
559        $time = (($year - 1900) * 1000000 + $mon * 10000 + $day * 100 + $hour);
560        print STDERR "$$in -> " if $opt_verbose;
561        $$in = sprintf( "%04d/%02d/%02d:%02d", $year, $mon, $day, $hour );
562        print STDERR "$$in\n" if $opt_verbose;
563    }
564    else {
565        die "$appname: Invalid date: $$in\n";
566    }
567
568    return $time;
569}
570
571### Validate an IP argument, and parse into ranges
572
573sub val_ip(\$$)
574{
575    my $in  = shift;
576    my $var = shift;
577    return unless defined $$in;
578    my @result = ();
579
580    # create temporary IPset from the input
581    my $set_file = File::Temp::tmpnam();
582
583    open( IPSET_OUT,
584          "|$rwsetbuild --ip-ranges --record-version=2 - '$set_file'" )
585        or die "$appname: rwsetbuild failed: $!";
586    for (split /,/, $$in) {
587        print IPSET_OUT "$_\n"
588            or die "$appname: rwsetbuild failed: $!";
589    }
590    close(IPSET_OUT)
591        or die "$appname: rwsetbuild failed: $!";
592
593    $$in = $set_file;
594
595    val_set($$in, $var);
596
597    unlink $set_file;
598}
599
600### Validate an ipset argument, and parse the set into ranges
601
602sub val_set(\$$)
603{
604    my $in  = shift;
605    my $var = shift;
606
607    # verify input
608    return unless defined $$in;
609    if ( !-e $$in ) {
610        die "$appname: Invalid (non-existent) IPset file: $$in\n";
611    }
612    my $count = `$rwsetcat --count-ips '$$in' 2>&1`;
613    unless ($count =~ /^\d+$/) {
614        die "$appname: rwsetcat failed: $count\n";
615    }
616    if ($count == 0) {
617        $$in = undef;
618        return;
619    }
620
621    # process contents of the IPset
622    my @result = ();
623    open( IPSET_IN,
624          "$rwsetcat --ip-format=decimal --ip-ranges --delimited=, '$$in'|")
625        or die "$appname: rwsetcat failed: $!\n";
626    while (<IPSET_IN>) {
627        chomp;
628        my (undef, $first, $last) = split /,/;
629        if ( $first == $last ) {
630            push @result, "($var = $first)";
631        }
632        else {
633            push @result, "($var >= $first AND $var <= $last)";
634        }
635    }
636    close( IPSET_IN );
637    $$in = join " or\n             ", @result;
638}
639
640sub load_rcfile()
641{
642    my $HOME   = ( getpwuid($<) )[7];
643    my $rcfile = "$HOME/.rwscanrc";
644
645    if (defined($ENV{'RWSCANRC'})) {
646        $rcfile = $ENV{'RWSCANRC'};
647    }
648
649    # First, look for .rwscanrc in the current user's home directory
650    if ( !-f $rcfile ) {
651
652        # If no .rwscanrc exists in the user's ~, we look in the
653        # "share/silk" directory parallel to the directory where the
654        # script runs, under the assumption that the script is in a
655        # "bin" subdirectory.
656
657        my $script_root = $FindBin::Bin;
658        $script_root =~ s@/bin$@@;
659        $rcfile = "$script_root/share/silk/.rwscanrc";
660        if ( !-f $rcfile ) {
661            $rcfile = "$script_root/.rwscanrc";
662            if ( !-f $rcfile ) {
663                warn ("$appname: Could not find .rwscanrc file,",
664                      " defaults will be used\n");
665                return -1;
666            }
667        }
668    }
669    my %rcopts;
670    open( RCFILE, $rcfile )
671        or die "$appname: Cannot open '$rcfile': $!\n";
672    while (<RCFILE>) {
673        next if (/^\s*#/);
674        if (/^(\S+)\s*=\s*(.*)\s*$/) {
675            $rcopts{$1} = $2;
676        }
677    }
678    close(RCFILE);
679
680    if ( defined $rcopts{'db_driver'} ) {
681        $conf_db_driver = $rcopts{'db_driver'};
682        if ( $conf_db_driver =~ /oracle/i ) {
683            $conf_db_driver = "oracle";
684            die if !eval { require DBD::Oracle; };
685        }
686        elsif ( $conf_db_driver =~ /postgresql/i ) {
687            $conf_db_driver = "postgresql";
688            die if !eval { require DBD::Pg; };
689        }
690        elsif ( $conf_db_driver =~ /mysql/i ) {
691            $conf_db_driver = "mysql";
692            die if !eval { require DBD::mysql; };
693        }
694        elsif ( $conf_db_driver =~ /sqlite/i ) {
695            $conf_db_driver = "sqlite";
696            die if !eval { require DBD::SQLite; };
697        }
698        else {
699            die "$appname: Unsupported db_driver: $conf_db_driver\n";
700        }
701    }
702    else {
703        warn ("$appname: Warning: db_driver not specified in rcfile,",
704              " defaulting to Oracle\n");
705        $conf_db_driver = "oracle";
706    }
707
708    if ( !defined $rcopts{'oracle_userid'} ) {
709        $conf_db_user = $rcopts{'db_userid'};
710        $conf_db_pass = $rcopts{'db_password'};
711    }
712    else {
713        warn ("$appname: Warning: using legacy option 'oracle_userid'. ",
714              "Please use 'db_userid' instead.\n");
715        $conf_db_user = $rcopts{'oracle_userid'};
716        $conf_db_pass = $rcopts{'oracle_password'};
717    }
718
719    if ( defined $rcopts{'db_instance'} ) {
720        $conf_db_instance = $rcopts{'db_instance'};
721    }
722
723    if ( defined $rcopts{'rw_in_class'} ) {
724        $conf_rw_in_class = $rcopts{'rw_in_class'};
725    }
726
727    if ( defined $rcopts{'rw_in_type'} ) {
728        $conf_rw_in_type = $rcopts{'rw_in_type'};
729    }
730
731    if ( defined $rcopts{'rw_out_class'} ) {
732        $conf_rw_out_class = $rcopts{'rw_out_class'};
733    }
734
735    if ( defined $rcopts{'rw_out_type'} ) {
736        $conf_rw_out_type = $rcopts{'rw_out_type'};
737    }
738}
739
740sub db_connect_oracle()
741{
742    my $dbh = DBI->connect( "dbi:Oracle:$opt_database",
743                            $conf_db_user, $conf_db_pass )
744        or die "$appname: Oracle connect failed: $!";
745    $dbh->do("alter session set NLS_DATE_FORMAT = 'YYYY/MM/DD:HH24:MI:SS'");
746    return $dbh;
747}
748
749sub db_connect_postgresql()
750{
751    my $dbh = DBI->connect( "dbi:Pg:dbname=$opt_database",
752                            $conf_db_user, $conf_db_pass )
753        or die "$appname: Postgress connect failed: $!";
754
755    #  $dbh->do("alter session set NLS_DATE_FORMAT = 'YYYY/MM/DD:HH24:MI:SS'");
756    return $dbh;
757}
758
759sub db_connect_mysql()
760{
761    my $dbh = DBI->connect( "dbi:mysql:$opt_database",
762                            $conf_db_user, $conf_db_pass )
763        or die "$appname: MySQL connect failed: $!";
764    return $dbh;
765}
766
767sub db_connect_sqlite()
768{
769    my $dbh = DBI->connect( "dbi:SQLite:$opt_database",
770                            $conf_db_user, $conf_db_pass )
771      or die "$appname: SQlite connect failed: $!";
772    return $dbh;
773}
774
775sub do_query($)
776{
777    my ($type) = @_;
778    my $saddress_part = "";
779    if ( defined $opt_saddress ) {
780        $saddress_part = qq{
781      AND ($opt_saddress)
782    };
783    }
784
785    my $sipset_part = "";
786    if ( defined $opt_sipset ) {
787        $sipset_part = qq{
788      AND ($opt_sipset)
789    };
790    }
791
792    my $query = $queries{$conf_db_driver}{$type};
793
794    $query =~ s/\$saddress_part/$saddress_part/g;
795    $query =~ s/\$sipset_part/$sipset_part/g;
796
797    $opt_start_hour =~ m!(\d+)/(\d+)/(\d+):(\d+)!;
798    my $start_hour_iso = "$1-$2-$3 $4:00:00";
799
800    $opt_end_hour =~ m!(\d+)/(\d+)/(\d+):(\d+)!;
801    my $end_hour_iso = "$1-$2-$3 $4:00:00";
802
803    $query =~ s/\$start_hour_iso/$start_hour_iso/g;
804    $query =~ s/\$end_hour_iso/$end_hour_iso/g;
805
806    $query =~ s/\$start_hour/$opt_start_hour/g;
807    $query =~ s/\$end_hour/$opt_end_hour/g;
808
809    print STDERR "$type query:\n$query\n" if $opt_verbose;
810
811    my $sth = $dbh->prepare( $query, { ora_check_sql => 0 } )
812        or die "$appname: Failed to prepare database query\n";
813
814    my $rv = $sth->execute;
815    return $sth;
816}
817
818
819# Take the results from a query and create an IPset
820sub query_to_ipset($$)
821{
822    my ($sth, $set_file) = @_;
823
824    my $s_srcaddr;
825    $sth->bind_columns( \$s_srcaddr );
826
827    open( IPSET_OUT, "|$rwsetbuild stdin '$set_file'" )
828        or die "$appname: rwsetbuild failed: $!";
829    while ( $sth->fetch ) {
830        my $o_srcaddr = join('.', unpack('C4', pack('N', $s_srcaddr)));
831        print IPSET_OUT $o_srcaddr, "\n"
832            or die "$appname: rwsetbuild failed: $!";
833    }
834    close(IPSET_OUT)
835        or die "$appname: rwsetbuild failed: $!";
836}
837
838
839# Generate output for --report=standard: Write one textual line for
840# each scanning IP
841sub write_standard_results($)
842{
843    my ($sth) = @_;
844    my (
845        $s_id,  $s_stime, $s_proto, $s_etime,
846        $s_sip, $s_flows, $s_pkts,  $s_bytes
847    );
848
849    $sth->bind_columns(
850        \(
851            $s_id,    $s_sip,   $s_stime, $s_etime,
852            $s_proto, $s_flows, $s_pkts,  $s_bytes
853        )
854    );
855
856    if ($opt_columnar) {
857        if ($opt_show_header) {
858            printf OUTF "%10s %-19s %-19s %-10s %-15s %10s %10s %14s\n",
859              "scan-id", "start-time", "end-time", "protocol", "source-address",
860              "flows", "packets", "bytes",;
861        }
862        open( SAVE_STDERR, ">&STDERR" );
863        open( STDERR,      ">/dev/null" );
864        eval {
865            while ( $sth->fetch ) {
866                my $o_sip = join( '.', unpack( 'C4', pack( 'N', $s_sip ) ) );
867                printf OUTF "%10d %-19s %-19s %-10d %-15s %10d %10d %14s\n",
868                  $s_id, $s_stime, $s_etime, $s_proto, $o_sip, $s_flows,
869                  $s_pkts, $s_bytes;
870            }
871        };
872        open( STDERR, ">&SAVE_STDERR" );
873    }
874    else {
875        if ($opt_show_header) {
876            print OUTF
877              "scan-id|stime|etime|proto|srcaddr|flows|packets|bytes\n";
878        }
879
880        # This song and dance is to make it so that ^C doesn't cause
881        # an ugly error message.
882        use vars qw( *SAVE_STDERR );    # to avoid warning
883        open( SAVE_STDERR, ">&STDERR" );
884        open( STDERR,      ">/dev/null" );
885
886        eval {
887            while ( $sth->fetch ) {
888                my $o_sip = join( '.', unpack( 'C4', pack( 'N', $s_sip ) ) );
889                print OUTF "$s_id|$s_stime|$s_etime|$s_proto|$o_sip|"
890                  . "$s_flows|$s_pkts|$s_bytes\n";
891            }
892        };
893        open( STDERR, ">&SAVE_STDERR" );
894    }
895
896}
897
898
899# Generate output for --report=export: Write textual columns retrieved
900# from the database in a form consisent with rwscan.
901sub write_export_results($)
902{
903    my ($sth) = @_;
904    my (
905        $s_id,         $s_stime, $s_proto, $s_etime,
906        $s_sip,        $s_flows, $s_pkts,  $s_bytes,
907        $s_scan_model, $s_scan_prob
908    );
909
910    $sth->bind_columns(
911        \(
912            $s_id,         $s_sip,   $s_proto, $s_stime,
913            $s_etime,      $s_flows, $s_pkts,  $s_bytes,
914            $s_scan_model, $s_scan_prob
915        )
916    );
917
918    if ($opt_show_header) {
919        print OUTF "id|sip|proto|stime|etime|"
920          . "flows|packets|bytes|scan_model|scan_prob\n";
921    }
922
923    use vars qw( *SAVE_STDERR );    # to avoid warning
924    open( SAVE_STDERR, ">&STDERR" );
925    open( STDERR,      ">/dev/null" );
926
927    eval {
928        while ( $sth->fetch ) {
929            print OUTF "$s_id|$s_sip|$s_proto|$s_stime|$s_etime|"
930              . "$s_flows|$s_pkts|$s_bytes|$s_scan_model|$s_scan_prob\n";
931        }
932    };
933    open( STDERR, ">&SAVE_STDERR" );
934}
935
936
937# Generate output for --report=volume: Columnar text giving the daily
938# scan volume
939sub write_volume_results($)
940{
941    my ($sth) = @_;
942
943    my @titles = qw(date flows packets bytes);
944    my ( $s_stime, $s_flows, $s_pkts, $s_bytes );
945    $sth->bind_columns( \( $s_stime, $s_flows, $s_pkts, $s_bytes ) );
946
947    my $format;
948
949    if ($opt_columnar) {
950        if ($opt_show_header) {
951            printf OUTF "%-19s %10s %10s %14s\n", @titles;
952        }
953        $format = "%-19s %10u %10u %14u\n";
954    }
955    else {
956        if ($opt_show_header) {
957            print OUTF join("|", @titles), "\n";
958        }
959        $format = "%s|%u|%u|%u\n";
960    }
961
962    # This song and dance is to make it so that ^C doesn't cause
963    # an ugly error message.
964    use vars qw( *SAVE_STDERR );    # to avoid warning
965    open( SAVE_STDERR, ">&STDERR" );
966    open( STDERR,      ">/dev/null" );
967    eval {
968        while ( $sth->fetch ) {
969            printf OUTF $format, $s_stime, $s_flows, $s_pkts, $s_bytes;
970        }
971    };
972    open( STDERR, ">&SAVE_STDERR" );
973}
974
975
976# Generate output for --report=scanset: create an IPset containing
977# scanning IPs.
978sub write_scan_set($)
979{
980    my ($sth) = @_;
981
982    if ( -t $outfile ) {
983        die("$appname: Will not write IPset to a terminal. ",
984            "Please specify an output file on the command line.\n");
985    }
986
987    if ( $opt_daddress || $opt_dipset ) {
988        my $set_filename = File::Temp::tmpnam();
989        query_to_ipset($sth, $set_filename);
990
991        my @args;
992        push( @args,
993              '--start-date', $opt_start_hour, '--end-date', $opt_end_hour,
994              '--sipset',     $set_filename,   '--pass',     'stdout' );
995
996        if ( defined($conf_rw_in_class) ) {
997            push( @args, '--class', $conf_rw_in_class );
998        }
999        if ( defined($conf_rw_in_type) ) {
1000            push( @args, '--type', $conf_rw_in_type );
1001        }
1002        if ( defined($opt_daddress) ) {
1003            push( @args, '--daddress', $opt_daddress );
1004        }
1005        if ( defined($opt_dipset) ) {
1006            push( @args, '--dipset', $opt_dipset );
1007        }
1008
1009        system( "$rwfilter @args | $rwset --sip-file '$outfile_rw'" );
1010        unlink $set_filename;
1011    }
1012    else {
1013        query_to_ipset($sth, $outfile_rw);
1014    }
1015}
1016
1017
1018# Generate output for --report=scanflows: flows coming from the
1019# scanning IPs during the time window
1020sub write_scan_flows($)
1021{
1022    my ($sth) = @_;
1023
1024    if ( -t $outfile ) {
1025        die("$appname: Will not write flow records to a terminal. ",
1026            "Please specify an output file on the command line.\n");
1027    }
1028
1029    my $set_filename = File::Temp::tmpnam();
1030    query_to_ipset($sth, $set_filename);
1031
1032    my @args;
1033    push( @args,
1034          '--start-date', $opt_start_hour, '--end-date', $opt_end_hour,
1035          '--sipset',     $set_filename,   '--pass',     $outfile_rw );
1036
1037    if ( defined($conf_rw_in_class) ) {
1038        push( @args, '--class', $conf_rw_in_class );
1039    }
1040    if ( defined($conf_rw_in_type) ) {
1041        push( @args, '--type', $conf_rw_in_type );
1042    }
1043    if ( defined($opt_daddress) ) {
1044        push( @args, '--daddress', $opt_daddress );
1045    }
1046    if ( defined($opt_dipset) ) {
1047        push( @args, '--dipset', $opt_dipset );
1048    }
1049
1050    print STDERR Dumper(@args) if $opt_verbose;
1051
1052    system( $rwfilter, @args );
1053
1054    unlink $set_filename;
1055}
1056
1057
1058# Generate output for --report=respflows: flows that are responses to
1059# the scanning IPs during the time window
1060sub write_resp_flows($)
1061{
1062    my ($sth) = @_;
1063
1064    if ( -t $outfile ) {
1065        die("$appname: Will not write flow records to a terminal. ",
1066            "Please specify an output file on the command line.\n");
1067    }
1068
1069    my $set_filename = File::Temp::tmpnam();
1070    query_to_ipset($sth, $set_filename);
1071
1072    my @args;
1073    push( @args,
1074          '--start-date', $opt_start_hour, '--end-date', $opt_end_hour,
1075          '--dipset',     $set_filename,   '--pass',     $outfile_rw );
1076
1077    # Restrict filter to outgoing flows only.
1078    if ( defined($conf_rw_out_class) ) {
1079        push( @args, '--class', $conf_rw_out_class );
1080    }
1081    if ( defined($conf_rw_out_type) ) {
1082        push( @args, '--type', $conf_rw_out_type );
1083    }
1084
1085    # These two look funny at first glance.  However, opt_daddress or
1086    # opt_dipset specify the destinations of scans, and we want the
1087    # response flows sent back to the scanners.
1088    if ( defined($opt_daddress) ) {
1089        push( @args, '--saddress', $opt_daddress );
1090    }
1091    if ( defined($opt_dipset) ) {
1092        push( @args, '--sipset', $opt_dipset );
1093    }
1094
1095    system( $rwfilter, @args );
1096
1097    unlink $set_filename;
1098}
1099
1100
1101# Generate output for --version: Print version and exit.
1102sub tool_version_exit()
1103{
1104    my $pkg = '@PACKAGE_STRING@' || 'SiLK';
1105    my $bugs = '@PACKAGE_BUGREPORT@' || 'UNKNOWN';
1106
1107    print <<EOF;
1108$appname: Part of $pkg
1109Copyright (C) 2001-2020 by Carnegie Mellon University
1110GNU General Public License (GPL) Rights pursuant to Version 2, June 1991.
1111Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013.
1112Send bug reports, feature requests, and comments to $bugs.
1113EOF
1114    exit;
1115}
1116
1117__END__
1118
1119=head1 NAME
1120
1121B<rwscanquery> - Query the network scan database
1122
1123=head1 SYNOPSIS
1124
1125 rwscanquery [options]
1126
1127Report Options:
1128
1129  --report=REPORT_TYPE       Select query and output options.  Values
1130                             for REPORT_TYPE are standard, volume,
1131                             scanset, scanflows, respflows, and export
1132
1133  --start-date=YYYY/MM/DD:HH Report on scans active after this date.
1134  --end-date=YYYY/MM/DD:HH   Defaults to start-date.
1135
1136  --saddress=ADDR_SPEC       Show scans originating from matching hosts.
1137  --sipset=IPSET_FILE        Show scans originating from hosts in set.
1138
1139  --daddress=IP_WILDCARD     Show only scans targeting matching hosts.
1140  --dipset=IPSET_FILE        Show only scans targeting hosts in set.
1141
1142  --show-header              Display column titles at start of output.
1143  --columnar                 Display more human-readable columnar view.
1144  --output-path=PATH         Write results to the specified file.
1145
1146Configuration Options:
1147
1148  --database=DBNAME          Query an alternate scan database
1149
1150Help Options:
1151
1152  --help                     Display this brief help message.
1153  --man                      Display the full documentation.
1154  --version                  Display the version information.
1155
1156=head1 DESCRIPTION
1157
1158B<rwscanquery> queries the network scan database---that is, the
1159database that contains scans found by B<rwscan(1)>.  The type of
1160output B<rwscanquery> creates is controlled by the B<--report> switch
1161as described in the L</Report Options> section below.  B<rwscanquery>
1162writes its output to the location specified by the B<--output-path>
1163switch or to the standard output when that switch is not provided.
1164
1165B<rwscanquery> runs a query of the scan database and then, depending
1166on the report type, either displays the result set as text or creates
1167a binary SiLK from the result set.  The database rows that are part of
1168the result set may be limited by using the B<--start-date>,
1169B<--end-date>, B<--saddress>, and B<--sipset> switches.  The result
1170set is always limited to a time window, and the current day is used
1171when no B<--start-date> is given.
1172
1173The following three report types produce textual output.  The default
1174output displays the values separated by a vertical bar (C<|>) with no
1175spacing.  The B<--columnar> switch causes the output to appear in
1176columns with a space-delimiter between the columns.  The output
1177includes no title line unless the B<--show-header> switch is
1178specified.
1179
1180=over 4
1181
1182=item *
1183
1184The C<standard> report contains most of the columns in the database
1185for the rows in the result set.  (The columns containing the scan
1186model and scan probability are not included.)
1187
1188=item *
1189
1190The C<volume> report groups the rows in the result set by day and
1191shows sums the flows, packets, and bytes columns for each day.
1192
1193=item *
1194
1195The C<export> report contains all the columns in the database for the
1196rows in the result set, and the rows are displayed in a format
1197compatible with B<rwscan>.
1198
1199=back
1200
1201The following three report types create a binary SiLK file as their
1202result.  These report types invoke other SiLK tools (namely
1203B<rwfilter(1)>, B<rwset(1)>, B<rwsetbuild(1)>, and B<rwsetcat(1)>) and
1204the report types assume B<rwfilter> has access to a SiLK data
1205repository.
1206
1207The first step in all three of these report types is for
1208B<rwscanquery> to get the distinct IP addresses for the rows in the
1209result set and pass them into B<rwsetbuild> to create a temporary
1210IPset file containing the scanning IPs.
1211
1212=over 4
1213
1214=item *
1215
1216A C<scanflows> report produces a file of SiLK Flow records whose
1217source IP is a scanning IPs.  B<rwscanquery> uses the temporary IPset
1218as an argument to B<rwfilter> to find flow records in your data
1219repository that originated from the scanning IPs within the time
1220window.  You may choose to limit the report to particular IPs targeted
1221by the scanning IPs by specifying the B<--daddress> or B<--dipset>
1222switches.  The output from B<rwfilter> is the output of the report.
1223The B<rwfilter> invocation uses the configuration values
1224C<rw_in_class> and C<rw_in_type> if they are specified in the
1225configuration file (c.f. L</CONFIGURATION>).
1226
1227=item *
1228
1229A C<respflows> report produces a file of SiLK Flow records whose
1230destination IP is a scanning IP.  These flow records may represent
1231responses to a scan.  To create this report, B<rwscanquery> performs
1232steps similar to those for the C<scanflows> report except the
1233direction of the B<rwfilter> command is reversed to find flow records
1234going to the scanning IPs.  You may choose to limit the report to
1235particular IPs that responded to the scan by specifying the
1236B<--daddress> or B<--dipset> switches.  The output from B<rwfilter> is
1237the output of the report.  The B<rwfilter> invocation uses the
1238configuration values C<rw_out_class> and C<rw_out_type> if they are
1239specified in the configuration file (c.f. L</CONFIGURATION>).
1240
1241=item *
1242
1243A C<scanset> report produces a binary IPset file.
1244
1245=over 4
1246
1247=item *
1248
1249If neither the B<--daddress> nor B<--dipset> switches are specified,
1250the output of the this report is the temporary IPset file containing
1251the scanning IPs; that is, all the scanning IPs in the time window.
1252
1253=item *
1254
1255Otherwise, B<rwscanquery> performs the same steps it does as when
1256creating C<scanflows> report.  Next, instead of returning the output
1257from B<rwfilter>, B<rwscanquery> passes the flow records into B<rwset>
1258to create an IPset file containing the scanning IPs that targeted
1259particular IP addresses.
1260
1261=back
1262
1263=back
1264
1265=head1 OPTIONS
1266
1267Option names may be abbreviated if the abbreviation is unique or is an
1268exact match for an option.  A parameter to an option may be specified
1269as B<--arg>=I<param> or B<--arg> I<param>, though the first form is
1270required for options that take optional parameters.
1271
1272=head2 Report Options
1273
1274=over 4
1275
1276=item B<--report>=I<TYPE>
1277
1278Specify the query and the type of output to create.  When this switch
1279is not specified, the default is a C<standard> report.  The supported
1280values for I<TYPE> are:
1281
1282=over 4
1283
1284=item standard
1285
1286Write one textual line of output for each scan record in the scan
1287database.  By default, the output has no titles and it is not in
1288columnar form.  Specify the B<--show-header> and/or B<--columnar>
1289switches to make the output more human readable.
1290
1291=item volume
1292
1293Write a daily scan activity volume summary report for each day within
1294the time period.  By default, the output has no titles and it is not
1295in columnar form.  Specify the B<--show-header> and/or B<--columnar>
1296switches to make the output more human readable.
1297
1298=item scanset
1299
1300Write an IPset file containing the IP addresses which were the sources
1301of scan activity during the selected time period.  The output of this
1302report type is binary, so you must redirect or pipe the output to a
1303location or specify the B<--output-path> switch.
1304
1305=item scanflows
1306
1307Write a SiLK Flow file containing all flows originating from scanning
1308IP addresses within the specified time period.  This flow data
1309includes flows originating from any host that would be listed as a scan
1310source by your query, from any time within the time period specified
1311by I<--start-date> and I<--end-date>.  Note that this may include
1312flows that were not identified by the scan analysis as being part of a
1313scan.  The output of this report type is binary, so you must redirect
1314or pipe the output to a location or specify the B<--output-path>
1315switch.
1316
1317=item respflows
1318
1319Write a SiLK Flow file containing all flows sent to scanning IP
1320addresses within the specified time period---that is, possible
1321responses to the scanners.  The output of this report type is binary,
1322so you must redirect or pipe the output to a location or specify the
1323B<--output-path> switch.
1324
1325=item export
1326
1327Write textual output consistent with the output format of the
1328B<rwscan(1)> tool.  Specify the B<--show-header> switch to include a
1329title line.
1330
1331=back
1332
1333=item B<--start-date>=I<YYYY/MM/DD:HH>
1334
1335Display scans which were active after this hour.  When this argument
1336contains a date with no hour and no B<--end-date> switch is
1337specified, scans for that entire day are returned.  If this switch
1338is not specified at all, scans for the current day (based on the local
1339time on the host machine) are returned.
1340
1341=item B<--end-date>=I<YYYY/MM/DD:HH>
1342
1343Display scans which were active before the end of this hour.  If no
1344end-date is given, defaults to the same as start-date.  It is an error
1345to provide an end-date without a start-date.
1346
1347=item B<--saddress>=I<ADDR_SPEC>
1348
1349Display scans originating from hosts described in I<ADDR_SPEC>, where
1350I<ADDR_SPEC> is a list of addresses, address ranges, and CIDR
1351blocks.  Only scans originating from hosts in the list are
1352displayed.
1353
1354=item B<--sipset>=I<IPSET_FILE>
1355
1356Display scans originating from hosts in I<IPSET_FILE>, where
1357I<IPSET_FILE> is a standard SiLK IPset file as created by B<rwset(1)>
1358or B<rwsetbuild(1)>.  Note that a very complex IPset may take a long
1359time to process, or even fail to return any results.
1360
1361=item B<--daddress>=I<IP_WILDCARD>
1362
1363Display scans targeting hosts described in I<IP_WILDCARD>, where
1364I<IP_WILDCARD> is a single IP address, a single CIDR block, or an IP
1365Wildcard expression accepted by B<rwfilter(1)>.  To match on multiple
1366IPs or networks, use the B<--dipset> switch.  This switch is ignored
1367for B<--report> types other than C<scanset>, C<scanflows>, and
1368C<respflows>.
1369
1370=item B<--dipset>=I<IPSET_FILE>
1371
1372Display scans targeting hosts in I<IPSET_FILE>, where I<IPSET_FILE> is
1373a standard SiLK IPset file.  This switch
1374is ignored for B<--report> types other than C<scanset>, C<scanflows>,
1375and C<respflows>.
1376
1377=item B<--show-header>
1378
1379Display a header line giving a short name (or title) for each field
1380when printing textual output with the C<standard>, C<volume>, or
1381C<export> report types.  By default, no header is displayed.
1382
1383=item B<--columnar>
1384
1385Display output in more human-readable columnar format when printing
1386textual output with the C<standard> or C<volume> report types.  When
1387this switch is not given, the output is presented as data fields
1388delimited by the B<|> character.
1389
1390=item B<--output-path>=I<PATH>
1391
1392Write results to I<PATH> instead of to the standard output.
1393
1394=back
1395
1396=head2 Configuration Options
1397
1398=over 4
1399
1400=item B<--database>=I<DBNAME>
1401
1402Select a database instance other than the default.  The default is
1403specified by the C<db_instance> value in the configuration file as
1404described in L</CONFIGURATION> below.
1405
1406=back
1407
1408=head2 Other Options
1409
1410=over 4
1411
1412=item B<--help>
1413
1414Display a brief usage message and exit.
1415
1416=item B<--man>
1417
1418Display full documentation for B<rwscanquery> and exit.
1419
1420=item B<--version>
1421
1422Print the version number and exit the application.
1423
1424=back
1425
1426=head1 CONFIGURATION
1427
1428B<rwscanquery> reads configuration information from a file named
1429F<.rwscanrc>.  If the RWSCANRC environment variable is set, it is used
1430as the location of the F<.rwscanrc> file.  When RWSCANRC is not set,
1431B<rwscanquery> attempts to find a file name F<.rwscanrc> in the
1432directories specified in the L</FILES> section below.
1433
1434The format of the F<.rwscanrc> file is I<name>=I<value> pairs, one per
1435line.  The configuration parameters currently read from F<.rwscanrc>
1436are:
1437
1438=over 2
1439
1440=item db_driver
1441
1442The type of database to connect to.  B<rwscanquery> supports C<oracle>,
1443C<postgresql>, C<mysql>, and C<sqlite>.
1444
1445=item db_userid
1446
1447The userid to use when connecting to the scan database.
1448
1449=item db_password
1450
1451The password to use when connecting to the scan database.
1452
1453=item db_instance
1454
1455The name of the database instance to connect to if none is provided with the
1456B<--database> command line switch.  If neither this configuration option nor
1457the B<--database> command line switch are specified, the hard-coded default
1458database instance "SCAN" is used.
1459
1460=item rw_in_class
1461
1462The class for incoming flow data.  The C<rw_in_class> and
1463C<rw_in_type> values are used to query scan flows when the
1464C<scanflows> report type is requested or when the B<--daddress> or
1465B<--dipset> switches are used for the C<scanset> report type.  If not
1466specified, B<rwfilter>'s default is used.
1467
1468=item rw_in_type
1469
1470The type(s) for incoming flow data.  See C<rw_in_class> for details.
1471
1472=item rw_out_class
1473
1474The class for outgoing flow data.  The C<rw_out_class> and
1475C<rw_out_type> values are used to query scan flows when the
1476C<respflows> report type is requested.  If not specified,
1477B<rwfilter>'s default is used.
1478
1479=item rw_out_type
1480
1481The type(s) for outgoing flow data.  See C<rw_out_class> for details.
1482(Note that B<rwfilter> often defaults to querying incoming flows, so
1483this parameter ought to be specified.)
1484
1485=back
1486
1487=head1 EXAMPLES
1488
1489In the following examples, the dollar sign (C<$>) represents the shell
1490prompt.  The text after the dollar sign represents the command line.
1491Lines have been wrapped for improved readability, and the back slash
1492(C<\>) is used to indicate a wrapped line.
1493
1494Display information on all scans occurring during the 12:00 hour
1495(12:00:00 to 12:59:59) of 2009/02/12.
1496
1497 $ rwscanquery --show-header --start-date=2009/02/12:12
1498 scan-id|stime|etime|proto|srcaddr|flows|packets|bytes
1499 499|2009-02-12 12:01:56|2009-02-12 12:08:39|6|10.199.151.231|256|256|10240
1500 365|2009-02-12 12:08:40|2009-02-12 12:14:54|6|10.146.88.117|256|256|10240
1501 57|2009-02-12 12:28:51|2009-02-12 12:34:55|6|10.29.23.160|256|256|10240
1502 627|2009-02-12 11:52:07|2009-02-12 12:41:16|17|10.253.24.230|1023|1023|30175
1503 366|2009-02-12 12:41:50|2009-02-12 12:48:14|6|10.146.89.46|256|256|10240
1504 182|2009-02-12 12:54:39|2009-02-12 13:01:20|6|10.79.26.176|256|256|10240
1505 4|2009-02-12 12:41:19|2009-02-12 13:33:57|17|10.2.47.87|1023|1023|30205
1506
1507Create the IPset file F<scan.set> containing the scanners discovered
1508during that hour.
1509
1510 $ rwscanquery --report=scanset --start-date=2009/02/12:12 \
1511        --output-path=scan.set
1512 $ rwsetcat scan.set
1513 10.2.47.87
1514 10.29.23.160
1515 10.79.26.176
1516 10.146.88.117
1517 10.146.89.46
1518 10.199.151.231
1519 10.253.24.230
1520
1521Repeat the first query but limit the output to scanners coming from
1522the CIDR block 10.199.0.0/16.
1523
1524 $ rwscanquery --show-header --start-date=2009/02/12:12 \
1525        --saddr=10.199.0.0/16
1526 scan-id|stime|etime|proto|srcaddr|flows|packets|bytes
1527 499|2009-02-12 12:01:56|2009-02-12 12:08:39|6|10.199.151.231|256|256|10240
1528
1529Expand the query for that CIDR block to include the preceding and
1530following hours (11:00:00 to 13:59:59).
1531
1532 $ rwscanquery --start-date=2009/02/12:11 --end-date=2009/02/12:13 \
1533        --saddr=10.199.0.0/16
1534 499|2009-02-12 12:01:56|2009-02-12 12:08:39|6|10.199.151.231|256|256|10240
1535 497|2009-02-12 13:33:57|2009-02-12 14:24:35|17|10.199.98.5|1023|1023|30079
1536
1537Create the IPset file F<scanning-cidr.set> that contains the CIDR
1538block 10.199.0.0/16, and then search for scans coming from that IP on
1539Feb 13, 2009.
1540
1541 $ cat scanning-cidr.txt
1542 10.199.0.0/16
1543 $ rwsetbuild scanning-cidr.txt scanning-cidr.set
1544 $
1545 $ rwscanquery --start-date=2009/02/13 --sipset=scanning-cidr.set
1546 500|2009-02-13 22:42:25|2009-02-13 22:48:45|6|10.199.207.32|256|256|10240
1547
1548Print the volume of data attributed to scans over a three day period.
1549
1550 $ rwscanquery --report=volume --show-header  \
1551        --start-date=2009/02/12 --end-date=2009/02/14
1552 date|flows|packets|bytes
1553 2009/02/12|137452|137499|17149008
1554 2009/02/13|74727|76167|2798040
1555 2009/02/14|76160|76160|2750531
1556
1557The following limits the volume report to the IPs in the file
1558F<scanning-cidr.set> and displays the results in columns.
1559
1560 $ rwscanquery --report=volume --show-header --columnar  \
1561        --start-date=2009/02/12 --end-date=2009/02/14    \
1562        --sipset=scanning-cidr.set
1563 date                     flows    packets          bytes
1564 2009/02/12                1279       1279          40319
1565 2009/02/13                 256        256          10240
1566 2009/02/14                 256        256          10240
1567
1568Get the SiLK Flow records coming from the scanners during the 12:00
1569hour on 2009/02/12 and store in the file F<scanning-flows.rw>.
1570
1571 $ rwscanquery --report=scanflows --start-date=2009/02/12:12  \
1572        --output=scanning-flows.rw
1573
1574Use B<rwuniq(1)> to summarize the file F<scanning-flows.rw>.
1575
1576 $ rwuniq --fields=sip --values=flows,packets,bytes  \
1577        --sort-output scanning-flows.rw
1578             sIP|   Records|        Packets|               Bytes|
1579      10.2.47.87|       373|            373|               11032|
1580    10.29.23.160|       256|            256|               10240|
1581    10.79.26.176|       203|            203|                8120|
1582   10.146.88.117|       256|            256|               10240|
1583    10.146.89.46|       256|            256|               10240|
1584  10.199.151.231|       256|            256|               10240|
1585   10.253.24.230|       846|            846|               24921|
1586
1587Run a respflows report to verify that there were no responses to the
1588scan.
1589
1590 $ rwscanquery --report=respflows --start-date=2009/02/12:12  \
1591        --output=scanning-response.rw
1592 $
1593 $ rwuniq --fields=sip --values=flows,packets,bytes  \
1594        --sort-output scanning-response.rw
1595             sIP|   Records|        Packets|               Bytes|
1596
1597Create the IPset F<subnet-scan.set> for scanners that targeted the
1598192.168.186.0/24 CIDR block during the 12:00 hour on 2009/02/12.
1599
1600 $ rwscanquery --report=scanset --start-date=2009/02/12:12        \
1601        --daddress=192.168.186.0/24 --output-path=subnet-scan.set
1602
1603Store the corresponding flow records for those scans in the file
1604F<subset-scan.rw>.
1605
1606 $ rwscanquery --report=scanflows --start-date=2009/02/12:12        \
1607        --daddress=192.168.186.0/24 --output-path=subnet-scan.rw
1608
1609Determine how many IPs in that subnet were targeted.
1610
1611 $ rwuniq --fields=sip --values=flows,distinct:dip subnet-scan.rw
1612             sIP|   Records|dIP-Distin|
1613    10.146.89.46|       256|       256|
1614
1615Display the title line for an export report.
1616
1617 $ rwscanquery --report=export --start-date=2009/02/12:12  \
1618        --show-header | head -1
1619 id|sip|proto|stime|etime|flows|packets|bytes|scan_model|scan_prob
1620
1621=head1 ENVIRONMENT
1622
1623=over 4
1624
1625=item RWSCANRC
1626
1627This environment variable allows the user to specify the location of
1628the F<.rwscanrc> configuration file.  The value may be a complete path
1629or a file relative to the user's current directory.  See
1630the L</FILES> section for standard locations of this file.
1631
1632=item SILK_CLOBBER
1633
1634The SiLK tools normally refuse to overwrite existing files.  Setting
1635SILK_CLOBBER to a non-empty value removes this restriction for the
1636report types of C<scanset>, C<scanflows>, and C<respflows>.
1637
1638=item SILK_CONFIG_FILE
1639
1640This environment variable is used as the location for the site
1641configuration file, F<silk.conf>, for report types that use
1642B<rwfilter>.  When this environment variable is not set, B<rwfilter>
1643searches for the site configuration file in the locations specified in
1644the L</FILES> section.
1645
1646=item SILK_DATA_ROOTDIR
1647
1648This environment variable specifies the root directory of data
1649repository for report types that use B<rwfilter>.  This value
1650overrides the compiled-in value.  In addition, B<rwfilter> may use
1651this value when searching for the SiLK site configuration files.  See
1652the L</FILES> section for details.
1653
1654=item SILK_RWFILTER_THREADS
1655
1656The number of threads B<rwfilter> uses when reading files from the
1657data store.
1658
1659=item SILK_PATH
1660
1661This environment variable gives the root of the install tree.  When
1662searching for the site configuration file, B<rwfilter> may use this
1663environment variable.  See the L</FILES> section for details.
1664
1665=item PATH
1666
1667This is the standard UNIX path (c.f., B<environ(7)>).  Depending on
1668the report type, B<rwscanquery> may invoke B<rwfilter(1)>,
1669B<rwset(1)>, B<rwsetbuild(1)>, or B<rwsetcat(1)> as part of its
1670processing.
1671
1672=item RWFILTER
1673
1674Complete path to B<rwfilter>.  If not set, B<rwscanquery> attempts to
1675find B<rwfilter> on your PATH.
1676
1677=item RWSET
1678
1679Complete path to B<rwset>.  If not set, B<rwscanquery> attempts to
1680find B<rwset> on your PATH.
1681
1682=item RWSETBUILD
1683
1684Complete path to B<rwsetbuild>.  If not set, B<rwscanquery> attempts
1685to find B<rwsetbuild> on your PATH.
1686
1687=item RWSETCAT
1688
1689Complete path to B<rwsetcat>.  If not set, B<rwscanquery> attempts to
1690find B<rwsetcat> on your PATH.
1691
1692=back
1693
1694=head1 FILES
1695
1696=over 4
1697
1698=item F<${RWSCANRC}>
1699
1700=item F<${HOME}/.rwscanrc>
1701
1702=item F<@prefix@/share/silk/.rwscanrc>
1703
1704Possible locations for the B<rwscanquery> configuration file,
1705F<.rwscanrc>.  In addition, B<rwscanquery> checks the parent directory
1706of the directory containing the B<rwscanquery> script.
1707
1708=item F<${SILK_CONFIG_FILE}>
1709
1710=item F<${SILK_DATA_ROOTDIR}/silk.conf>
1711
1712=item F<@SILK_DATA_ROOTDIR@/silk.conf>
1713
1714=item F<${SILK_PATH}/share/silk/silk.conf>
1715
1716=item F<${SILK_PATH}/share/silk.conf>
1717
1718=item F<@prefix@/share/silk/silk.conf>
1719
1720=item F<@prefix@/share/silk.conf>
1721
1722Possible locations for the SiLK site configuration file---for report
1723types that use B<rwfilter>.
1724
1725=back
1726
1727=head1 SEE ALSO
1728
1729B<rwscan(1)>, B<rwfilter(1)>, B<rwset(1)>, B<rwsetbuild(1)>,
1730B<rwsetcat(1)>, B<rwuniq(1)>, B<silk(7)>, B<environ(7)>
1731
1732=cut
1733
1734# Local Variables:
1735# mode:perl
1736# indent-tabs-mode:nil
1737# End:
1738