1# See bottom of file for license and copyright information 2package Foswiki::Store::SearchAlgorithms::Forking; 3 4=begin TML 5 6---+ package Foswiki::Store::SearchAlgorithms::Forking 7Implements Foswiki::Store::Interfaces::SearchAlgorithm 8 9Forking implementation of flat file store search. Uses grep. 10 11=cut 12 13use strict; 14use warnings; 15use Assert; 16 17use Foswiki::Store::Interfaces::QueryAlgorithm (); 18our @ISA = ('Foswiki::Store::Interfaces::QueryAlgorithm'); 19 20use Foswiki::Store::Interfaces::SearchAlgorithm (); 21use Foswiki::Search::Node (); 22use Foswiki::Search::InfoCache (); 23use Foswiki::Search::ResultSet (); 24use Foswiki(); 25use Foswiki::Func(); 26use Foswiki::Meta (); 27use Foswiki::MetaCache (); 28use Foswiki::Query::Node (); 29use Foswiki::Query::HoistREs (); 30use Foswiki::ListIterator(); 31use Foswiki::Iterator::FilterIterator(); 32use Foswiki::Iterator::ProcessIterator(); 33 34use constant MONITOR => 0; 35 36BEGIN { 37 if ( $Foswiki::cfg{UseLocale} ) { 38 require locale; 39 import locale(); 40 } 41} 42 43=begin TML 44 45---++ ClassMethod new( $class, ) -> $cereal 46 47=cut 48 49sub new { 50 my $self = shift()->SUPER::new( 'SEARCH', @_ ); 51 return $self; 52} 53 54# Search .txt files in $dir for $searchString. This is the 'old' interface 55# to search algorithms (prior to Sven's massive search refactoring. It was 56# simply called 'search') 57sub _search { 58 my ( $searchString, $web, $inputTopicSet, $session, $options ) = @_; 59 60 # SMELL: I18N: 'grep' must use locales if needed, 61 # for case-insensitive searching. 62 my $program = ''; 63 64 if ( $options->{type} 65 && ( $options->{type} eq 'regex' || $options->{wordboundaries} ) ) 66 { 67 $program = $Foswiki::cfg{Store}{EgrepCmd}; 68 } 69 else { 70 $program = $Foswiki::cfg{Store}{FgrepCmd}; 71 } 72 73 if ( $options->{casesensitive} ) { 74 $program =~ s/%CS\{(.*?)\|.*?\}%/$1/g; 75 } 76 else { 77 $program =~ s/%CS\{.*?\|(.*?)\}%/$1/g; 78 } 79 if ( $options->{files_without_match} ) { 80 $program =~ s/%DET\{.*?\|(.*?)\}%/$1/g; 81 } 82 else { 83 $program =~ s/%DET\{(.*?)\|.*?\}%/$1/g; 84 } 85 if ( $options->{wordboundaries} ) { 86 87 # Item5529: Can't use quotemeta because $searchString may 88 # be UTF8 encoded 89 # TODO when testing UTF-8 code, try quotemeta. It should 90 # work with a decent perl 91 $searchString =~ s#([][|/\\\$\^*()+\{\};@?.\{\}])#\\$1#g; 92 $searchString = '\b' . $searchString . '\b'; 93 } 94 95 if ( $Foswiki::cfg{DetailedOS} eq 'MSWin32' ) { 96 97 #try to escape the ^ and "" for native windows grep and apache 98 $searchString =~ s/\[\^/[^^/g; 99 100 # Fix escaping and quoting for Windows 101 $searchString =~ s#\\#\\\\#g; 102 $searchString =~ s#"#\\"#g; 103 $searchString = q(") . $searchString . q("); 104 } 105 106 my $matches = ''; 107 108 #SMELL, TODO, replace with Store call. 109 my $sDir = $Foswiki::cfg{DataDir} . '/' . $web . '/'; 110 111 # process topics in sets, fix for Codev.ArgumentListIsTooLongForSearch 112 my $maxTopicsInSet = 512; # max number of topics for a grep call 113 # SMELL: the number is actually dependant on the length of the path 114 # to each file 115 # SMELL: the following while loop should probably be made by sysCommand, 116 # as this is a leaky abstraction. 117 # heck, on pre WinXP its only 2048, post XP its 8191 - 118 # http://support.microsoft.com/kb/830473 119 if ( $Foswiki::cfg{DetailedOS} eq 'MSWin32' ) { 120 121 #tune the number based on the length of "$sDir/WebSearchAdvanced.txt" 122 #30 is a guess - wotamess 123 $maxTopicsInSet = 124 ( ( 8191 - ( length($program) + length($searchString) + 30 ) ) / 125 ( length("$sDir/LongWebSearchAdvanced.txt") + 10 ) ); 126 127 #print STDERR "++++++++++++ $maxTopicsInSet \n"; 128 } 129 130 # while (my @set = splice( @take, 0, $maxTopicsInSet )) { 131 # @set = map { "$sDir/$_.txt" } @set; 132 my @set; 133 $inputTopicSet->reset(); 134 while ( $inputTopicSet->hasNext() ) { 135 my $webtopic = $inputTopicSet->next(); 136 my ( $Iweb, $tn ) = 137 Foswiki::Func::normalizeWebTopicName( $web, $webtopic ); 138 push( @set, "$sDir/$tn.txt" ); 139 if ( 140 ( $#set >= $maxTopicsInSet ) #replace with character count.. 141 || !( $inputTopicSet->hasNext() ) 142 ) 143 { 144 my ( $m, $exit ) = Foswiki::Sandbox->sysCommand( 145 $program, 146 TOKEN => $searchString, 147 FILES => \@set 148 ); 149 @set = (); 150 151 # man grep: "Normally, exit status is 0 if selected lines are found 152 # and 1 otherwise. But the exit status is 2 if an error occurred, 153 # unless the -q or --quiet or --silent option is used and a selected 154 # line is found." 155 if ( $exit > 1 ) { 156 157 # TODO: need to work out a way to alert the admin there is 158 # a problem, without filling up the log files with repeated 159 # SEARCH's 160 161 # NOTE: we ignore the error, because grep returns an error 162 # if it comes across a broken file link or a file it does 163 # not have permission to open, so throwing here gives wrong 164 # search results. 165 # throw Error::Simple( 166 # "$program Grep for '$searchString' returned error") 167 } 168 $matches .= Foswiki::Store::decode($m); 169 } 170 } 171 my %seen; 172 173 # Note use of / and \ as dir separators, to support Winblows 174 $matches =~ 175 s/([^\/\\]*?)\.txt(:(.*))?$/push( @{$seen{$1}}, ($3||'') ); ''/gem; 176 177 # Implicit untaint OK; data from grep 178 179 return \%seen; 180} 181 182#ok, for initial validation, naively call the code with a web. 183sub _webQuery { 184 my ( $this, $query, $web, $inputTopicSet, $session, $options ) = @_; 185 ASSERT( !$query->isEmpty() ) if DEBUG; 186 187 #print STDERR "ForkingSEARCH(".join(', ', @{ $query->tokens() }).")\n"; 188 # default scope is 'text' 189 $options->{'scope'} = 'text' 190 unless ( defined( $options->{'scope'} ) 191 && $options->{'scope'} =~ m/^(topic|all)$/ ); 192 193 my $topicSet = $inputTopicSet; 194 if ( !defined($topicSet) ) { 195 196 #then we start with the whole web 197 #TODO: i'm sure that is a flawed assumption 198 my $webObject = Foswiki::Meta->new( $session, $web ); 199 $topicSet = 200 Foswiki::Search::InfoCache::getTopicListIterator( $webObject, 201 $options ); 202 } 203 ASSERT( UNIVERSAL::isa( $topicSet, 'Foswiki::Iterator' ) ) if DEBUG; 204 205 #print STDERR "######## Forking search ($web) tokens " 206 # .scalar(@{$query->tokens()})." : ".join(',', @{$query->tokens()})."\n"; 207 # AND search - search once for each token, ANDing result together 208 foreach my $token ( @{ $query->tokens() } ) { 209 210 my $tokenCopy = $token; 211 212 # flag for AND NOT search 213 my $invertSearch = 0; 214 $invertSearch = ( $tokenCopy =~ s/^\!// ); 215 216 # scope can be 'topic' (default), 'text' or "all" 217 # scope='topic', e.g. Perl search on topic name: 218 my %topicMatches; 219 unless ( $options->{'scope'} eq 'text' ) { 220 my $qtoken = $tokenCopy; 221 222# FIXME I18N 223# http://foswiki.org/Tasks/Item1646 this causes us to use/leak huge amounts of memory if called too often 224 $qtoken = quotemeta($qtoken) if ( $options->{'type'} ne 'regex' ); 225 226 $topicSet->reset(); 227 while ( $topicSet->hasNext() ) { 228 my $webtopic = $topicSet->next(); 229 my ( $itrWeb, $topic ) = 230 Foswiki::Func::normalizeWebTopicName( $web, $webtopic ); 231 232 if ( $options->{'casesensitive'} ) { 233 234 # fix for Codev.SearchWithNoPipe 235 #push(@scopeTopicList, $topic) if ( $topic =~ m/$qtoken/ ); 236 $topicMatches{$topic} = 1 if ( $topic =~ m/$qtoken/ ); 237 } 238 else { 239 240 #push(@scopeTopicList, $topic) if ( $topic =~ m/$qtoken/i ); 241 $topicMatches{$topic} = 1 if ( $topic =~ m/$qtoken/i ); 242 } 243 } 244 } 245 246 # scope='text', e.g. grep search on topic text: 247 unless ( $options->{'scope'} eq 'topic' ) { 248 my $textMatches = 249 _search( $tokenCopy, $web, $topicSet, $session, $options ); 250 251 #bring the text matches into the topicMatch hash 252 if ($textMatches) { 253 @topicMatches{ keys %$textMatches } = values %$textMatches; 254 } 255 } 256 257 my @scopeTextList = (); 258 if ($invertSearch) { 259 $topicSet->reset(); 260 while ( $topicSet->hasNext() ) { 261 my $webtopic = $topicSet->next(); 262 my ( $Iweb, $topic ) = 263 Foswiki::Func::normalizeWebTopicName( $web, $webtopic ); 264 265 if ( $topicMatches{$topic} ) { 266 } 267 else { 268 push( @scopeTextList, $topic ); 269 } 270 } 271 } 272 else { 273 274 #TODO: the sad thing about this is we lose info 275 @scopeTextList = keys(%topicMatches); 276 } 277 278 # reduced topic list for next token 279 $topicSet = 280 Foswiki::Search::InfoCache->new( $Foswiki::Plugins::SESSION, $web, 281 \@scopeTextList ); 282 } 283 284 return $topicSet; 285} 286 2871; 288__END__ 289Foswiki - The Free and Open Source Wiki, http://foswiki.org/ 290 291Copyright (C) 2008-2011 Foswiki Contributors. Foswiki Contributors 292are listed in the AUTHORS file in the root of this distribution. 293NOTE: Please extend that file, not this notice. 294 295Additional copyrights apply to some or all of the code in this 296file as follows: 297 298Copyright (C) 2002 John Talintyre, john.talintyre@btinternet.com 299Copyright (C) 2002-2007 Peter Thoeny, peter@thoeny.org 300and TWiki Contributors. All Rights Reserved. TWiki Contributors 301are listed in the AUTHORS file in the root of this distribution. 302 303This program is free software; you can redistribute it and/or 304modify it under the terms of the GNU General Public License 305as published by the Free Software Foundation; either version 2 306of the License, or (at your option) any later version. For 307more details read LICENSE in the root of this distribution. 308 309This program is distributed in the hope that it will be useful, 310but WITHOUT ANY WARRANTY; without even the implied warranty of 311MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 312 313As per the GPL, removal of this notice is prohibited. 314