#!/usr/local/bin/perl use strict; use locale; use Search::OpenFTS 0.40; use DBI(); use Getopt::Std; use Time::HiRes qw( usleep ualarm gettimeofday tv_interval ); my %opt; getopts( 'h:b:l:p:dvqe', \%opt ); if ( $#ARGV < 0 || !$opt{p} ) { print "Usage\n"; print "$0 -p DATABASE [-b count] [ -v ] [ -q | -d -h #headlines] [ -e ] WORD1 [ WORD2 [...] ]\n"; print "\t-b count\tbenchmark\n"; print "\t-v\t\tverbose\n"; print "\t-q\t\tquiet\n"; print "\t-d\t\tshow matched files\n"; print "\t-h #results\tshow #headlines, 5 by default\n"; print "\t-e\t\texplain\n"; print "Example: search.pl -p openfts -h5 small magellanic cloud\n"; exit; } my ( $dbi, $fts, $sql ); my ( @stopwords, @ignored ) = (); my ( $dbname, $PREFIX ) = ''; ( $dbname, $PREFIX ) = split( ':', $opt{p} ); $dbi = DBI->connect( 'DBI:Pg:dbname=' . $dbname ) || die; $fts = Search::OpenFTS->new( $dbi, prefix => $PREFIX, relfunc => q[ rank( '{0.1, 0.2, 0.4, 1.0}', $TSVECTOR, $QUERY, 1 )], ) || die; my $query = join( " ", @ARGV ); $sql = $fts->_sql( $query, rejected => \@stopwords, ignored => \@ignored ); print "Stopwords:", join( ',', @stopwords ), "\n" if scalar @stopwords; @ignored = grep { $_->{word} ne ' ' } @ignored; if ( scalar @ignored ) { print "Ignored:", map { "(\'$$_{word}\',$$_{type})" } @ignored; print "\n"; } print "", ($sql) ? "$sql" : 'Search returns zero results (search terms ignored by parser or dictionaries !)', "\n" if ( $opt{v} ); if ( $opt{e} && length $sql ) { $dbi->do("explain $sql"); } if ( $opt{d} or $opt{h} ) { # no check for stopwords if $query passes to get_sql as \@ARGV !!! my ( $out, $condition, $order ) = $fts->get_sql($query); my $sql = " select $fts->{TXTID}, $fts->{TABLE}.path, $fts->{TABLE}.page_content, $out from $fts->{TABLE} where $condition order by $order"; if ( $opt{h} ) { if ( $opt{h} =~ /^(\d+)(?:-(\d+))*$/ ) { # offset-limit my $offset = ($2) ? $1 : 0; my $limit = ($2) ? $2 : $1; $sql .= " limit $limit offset $offset"; } else { $sql .= " limit 5 offset 0"; } } $sql .= ";"; if ( $condition ne "" ) { my $sth = $dbi->prepare($sql) || die; $sth->execute() || die "Query list: $condition\n"; while ( defined( my $r = $sth->fetchrow_hashref ) ) { print "------TID: $r->{tid}\tWEIGHT:", sprintf( "%.3f", $r->{pos} ); if ( $opt{h} ) { print "\tPATH:$r->{path}\n"; my $headline = $fts->get_headline( query => $query, src => \$r->{page_content}, maxread => 4096, maxlen => 100, otag => '', ctag => '', replace_ignore_headline => ' ' ); # otag=>'',ctag=>'' ); $headline =~ s/(\s+)/ /g; print "$headline"; } print "\n"; } $sth->finish; } } elsif ( !$opt{q} ) { my $a = $fts->search($query); print "Found documents:", scalar @$a, "\n", join( ";", @$a ), "\n" if ref $a; } #Benchmarking if ( $opt{b} ) { print "Speed gun in use :)...\n" if !$opt{q}; my $t0 = [gettimeofday]; my $count = 0; foreach ( 1 .. $opt{b} ) { my $a = $fts->search( \@ARGV ); $count = $#{$a}; } my $elapsed = tv_interval( $t0, [gettimeofday] ); print sprintf( "Found documents:%d, total time (%d runs): %.02f sec; average time: %.03f sec\n", $count + 1, $opt{b}, $elapsed, $elapsed / $opt{b} ); } $dbi->disconnect;