1#!/usr/local/bin/perl
2
3use strict;
4use locale;
5use Search::OpenFTS 0.40;
6use DBI();
7use Getopt::Std;
8use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
9
10my %opt;
11getopts( 'h:b:l:p:dvqe', \%opt );
12
13if ( $#ARGV < 0 || !$opt{p} ) {
14    print "Usage\n";
15    print
16"$0 -p DATABASE [-b count] [ -v ] [ -q | -d -h #headlines] [ -e ] WORD1 [ WORD2 [...] ]\n";
17    print "\t-b count\tbenchmark\n";
18    print "\t-v\t\tverbose\n";
19    print "\t-q\t\tquiet\n";
20    print "\t-d\t\tshow matched files\n";
21    print "\t-h #results\tshow #headlines, 5 by default\n";
22    print "\t-e\t\texplain\n";
23    print "Example: search.pl -p openfts -h5 small magellanic cloud\n";
24    exit;
25}
26
27my ( $dbi, $fts, $sql );
28my ( @stopwords, @ignored ) = ();
29
30my ( $dbname, $PREFIX ) = '';
31( $dbname, $PREFIX ) = split( ':', $opt{p} );
32
33$dbi = DBI->connect( 'DBI:Pg:dbname=' . $dbname ) || die;
34$fts = Search::OpenFTS->new(
35    $dbi,
36    prefix  => $PREFIX,
37    relfunc => q[ rank( '{0.1, 0.2, 0.4, 1.0}', $TSVECTOR, $QUERY, 1 )],
38  )
39  || die;
40
41my $query = join( " ", @ARGV );
42
43$sql = $fts->_sql( $query, rejected => \@stopwords, ignored => \@ignored );
44
45print "Stopwords:", join( ',', @stopwords ), "\n" if scalar @stopwords;
46
47@ignored = grep { $_->{word} ne ' ' } @ignored;
48if ( scalar @ignored ) {
49    print "Ignored:", map { "(\'$$_{word}\',$$_{type})" } @ignored;
50    print "\n";
51}
52
53print "", ($sql)
54  ? "$sql"
55  : 'Search returns zero results (search terms ignored by parser or dictionaries !)',
56  "\n"
57  if ( $opt{v} );
58
59if ( $opt{e} && length $sql ) {
60    $dbi->do("explain $sql");
61}
62
63if ( $opt{d} or $opt{h} ) {
64
65    # no check for stopwords if $query passes to get_sql as \@ARGV !!!
66    my ( $out, $condition, $order ) = $fts->get_sql($query);
67    my $sql = "
68select
69        $fts->{TXTID},
70        $fts->{TABLE}.path,
71        $fts->{TABLE}.page_content,
72        $out
73from
74        $fts->{TABLE}
75where
76        $condition
77order by $order";
78    if ( $opt{h} ) {
79        if ( $opt{h} =~ /^(\d+)(?:-(\d+))*$/ ) {    # offset-limit
80            my $offset = ($2) ? $1 : 0;
81            my $limit  = ($2) ? $2 : $1;
82            $sql .= " limit $limit offset $offset";
83        } else {
84            $sql .= " limit 5 offset 0";
85        }
86    }
87    $sql .= ";";
88
89    if ( $condition ne "" ) {
90        my $sth = $dbi->prepare($sql) || die;
91        $sth->execute() || die "Query list: $condition\n";
92        while ( defined( my $r = $sth->fetchrow_hashref ) ) {
93            print "------TID: $r->{tid}\tWEIGHT:", sprintf( "%.3f", $r->{pos} );
94            if ( $opt{h} ) {
95                print "\tPATH:$r->{path}\n";
96                my $headline = $fts->get_headline(
97                     query                   => $query,
98                     src                     => \$r->{page_content},
99                     maxread                 => 4096,
100                     maxlen                  => 100,
101                     otag                    => '',
102                     ctag                    => '',
103                     replace_ignore_headline => ' '
104                 );
105
106                 #                                otag=>'<b>',ctag=>'</b>' );
107		 $headline =~ s/(\s+)/ /g;
108                 print "$headline";
109            }
110            print "\n";
111        }
112        $sth->finish;
113    }
114} elsif ( !$opt{q} ) {
115    my $a = $fts->search($query);
116    print "Found documents:", scalar @$a, "\n", join( ";", @$a ), "\n"
117      if ref $a;
118}
119
120#Benchmarking
121if ( $opt{b} ) {
122    print "Speed gun in use :)...\n" if !$opt{q};
123    my $t0    = [gettimeofday];
124    my $count = 0;
125    foreach ( 1 .. $opt{b} ) {
126        my $a = $fts->search( \@ARGV );
127        $count = $#{$a};
128    }
129    my $elapsed = tv_interval( $t0, [gettimeofday] );
130    print sprintf(
131"Found documents:%d, total time (%d runs): %.02f sec; average time: %.03f sec\n",
132        $count + 1, $opt{b}, $elapsed, $elapsed / $opt{b} );
133}
134$dbi->disconnect;
135