1#!/usr/local/bin/perl 2 3use strict; 4use locale; 5use Search::OpenFTS 0.40; 6use DBI(); 7use Getopt::Std; 8use Time::HiRes qw( usleep ualarm gettimeofday tv_interval ); 9 10my %opt; 11getopts( 'h:b:l:p:dvqe', \%opt ); 12 13if ( $#ARGV < 0 || !$opt{p} ) { 14 print "Usage\n"; 15 print 16"$0 -p DATABASE [-b count] [ -v ] [ -q | -d -h #headlines] [ -e ] WORD1 [ WORD2 [...] ]\n"; 17 print "\t-b count\tbenchmark\n"; 18 print "\t-v\t\tverbose\n"; 19 print "\t-q\t\tquiet\n"; 20 print "\t-d\t\tshow matched files\n"; 21 print "\t-h #results\tshow #headlines, 5 by default\n"; 22 print "\t-e\t\texplain\n"; 23 print "Example: search.pl -p openfts -h5 small magellanic cloud\n"; 24 exit; 25} 26 27my ( $dbi, $fts, $sql ); 28my ( @stopwords, @ignored ) = (); 29 30my ( $dbname, $PREFIX ) = ''; 31( $dbname, $PREFIX ) = split( ':', $opt{p} ); 32 33$dbi = DBI->connect( 'DBI:Pg:dbname=' . $dbname ) || die; 34$fts = Search::OpenFTS->new( 35 $dbi, 36 prefix => $PREFIX, 37 relfunc => q[ rank( '{0.1, 0.2, 0.4, 1.0}', $TSVECTOR, $QUERY, 1 )], 38 ) 39 || die; 40 41my $query = join( " ", @ARGV ); 42 43$sql = $fts->_sql( $query, rejected => \@stopwords, ignored => \@ignored ); 44 45print "Stopwords:", join( ',', @stopwords ), "\n" if scalar @stopwords; 46 47@ignored = grep { $_->{word} ne ' ' } @ignored; 48if ( scalar @ignored ) { 49 print "Ignored:", map { "(\'$$_{word}\',$$_{type})" } @ignored; 50 print "\n"; 51} 52 53print "", ($sql) 54 ? "$sql" 55 : 'Search returns zero results (search terms ignored by parser or dictionaries !)', 56 "\n" 57 if ( $opt{v} ); 58 59if ( $opt{e} && length $sql ) { 60 $dbi->do("explain $sql"); 61} 62 63if ( $opt{d} or $opt{h} ) { 64 65 # no check for stopwords if $query passes to get_sql as \@ARGV !!! 66 my ( $out, $condition, $order ) = $fts->get_sql($query); 67 my $sql = " 68select 69 $fts->{TXTID}, 70 $fts->{TABLE}.path, 71 $fts->{TABLE}.page_content, 72 $out 73from 74 $fts->{TABLE} 75where 76 $condition 77order by $order"; 78 if ( $opt{h} ) { 79 if ( $opt{h} =~ /^(\d+)(?:-(\d+))*$/ ) { # offset-limit 80 my $offset = ($2) ? $1 : 0; 81 my $limit = ($2) ? $2 : $1; 82 $sql .= " limit $limit offset $offset"; 83 } else { 84 $sql .= " limit 5 offset 0"; 85 } 86 } 87 $sql .= ";"; 88 89 if ( $condition ne "" ) { 90 my $sth = $dbi->prepare($sql) || die; 91 $sth->execute() || die "Query list: $condition\n"; 92 while ( defined( my $r = $sth->fetchrow_hashref ) ) { 93 print "------TID: $r->{tid}\tWEIGHT:", sprintf( "%.3f", $r->{pos} ); 94 if ( $opt{h} ) { 95 print "\tPATH:$r->{path}\n"; 96 my $headline = $fts->get_headline( 97 query => $query, 98 src => \$r->{page_content}, 99 maxread => 4096, 100 maxlen => 100, 101 otag => '[1m', 102 ctag => '[0m', 103 replace_ignore_headline => ' ' 104 ); 105 106 # otag=>'<b>',ctag=>'</b>' ); 107 $headline =~ s/(\s+)/ /g; 108 print "$headline"; 109 } 110 print "\n"; 111 } 112 $sth->finish; 113 } 114} elsif ( !$opt{q} ) { 115 my $a = $fts->search($query); 116 print "Found documents:", scalar @$a, "\n", join( ";", @$a ), "\n" 117 if ref $a; 118} 119 120#Benchmarking 121if ( $opt{b} ) { 122 print "Speed gun in use :)...\n" if !$opt{q}; 123 my $t0 = [gettimeofday]; 124 my $count = 0; 125 foreach ( 1 .. $opt{b} ) { 126 my $a = $fts->search( \@ARGV ); 127 $count = $#{$a}; 128 } 129 my $elapsed = tv_interval( $t0, [gettimeofday] ); 130 print sprintf( 131"Found documents:%d, total time (%d runs): %.02f sec; average time: %.03f sec\n", 132 $count + 1, $opt{b}, $elapsed, $elapsed / $opt{b} ); 133} 134$dbi->disconnect; 135