1# See bottom of file for license and copyright information
2package Foswiki::Store::SearchAlgorithms::Forking;
3
4=begin TML
5
6---+ package Foswiki::Store::SearchAlgorithms::Forking
7Implements Foswiki::Store::Interfaces::SearchAlgorithm
8
9Forking implementation of flat file store search. Uses grep.
10
11=cut
12
13use strict;
14use warnings;
15use Assert;
16
17use Foswiki::Store::Interfaces::QueryAlgorithm ();
18our @ISA = ('Foswiki::Store::Interfaces::QueryAlgorithm');
19
20use Foswiki::Store::Interfaces::SearchAlgorithm ();
21use Foswiki::Search::Node                       ();
22use Foswiki::Search::InfoCache                  ();
23use Foswiki::Search::ResultSet                  ();
24use Foswiki();
25use Foswiki::Func();
26use Foswiki::Meta            ();
27use Foswiki::MetaCache       ();
28use Foswiki::Query::Node     ();
29use Foswiki::Query::HoistREs ();
30use Foswiki::ListIterator();
31use Foswiki::Iterator::FilterIterator();
32use Foswiki::Iterator::ProcessIterator();
33
34use constant MONITOR => 0;
35
36BEGIN {
37    if ( $Foswiki::cfg{UseLocale} ) {
38        require locale;
39        import locale();
40    }
41}
42
43=begin TML
44
45---++ ClassMethod new( $class,  ) -> $cereal
46
47=cut
48
49sub new {
50    my $self = shift()->SUPER::new( 'SEARCH', @_ );
51    return $self;
52}
53
54# Search .txt files in $dir for $searchString. This is the 'old' interface
55# to search algorithms (prior to Sven's massive search refactoring. It was
56# simply called 'search')
57sub _search {
58    my ( $searchString, $web, $inputTopicSet, $session, $options ) = @_;
59
60    # SMELL: I18N: 'grep' must use locales if needed,
61    # for case-insensitive searching.
62    my $program = '';
63
64    if ( $options->{type}
65        && ( $options->{type} eq 'regex' || $options->{wordboundaries} ) )
66    {
67        $program = $Foswiki::cfg{Store}{EgrepCmd};
68    }
69    else {
70        $program = $Foswiki::cfg{Store}{FgrepCmd};
71    }
72
73    if ( $options->{casesensitive} ) {
74        $program =~ s/%CS\{(.*?)\|.*?\}%/$1/g;
75    }
76    else {
77        $program =~ s/%CS\{.*?\|(.*?)\}%/$1/g;
78    }
79    if ( $options->{files_without_match} ) {
80        $program =~ s/%DET\{.*?\|(.*?)\}%/$1/g;
81    }
82    else {
83        $program =~ s/%DET\{(.*?)\|.*?\}%/$1/g;
84    }
85    if ( $options->{wordboundaries} ) {
86
87        # Item5529: Can't use quotemeta because $searchString may
88        # be UTF8 encoded
89        # TODO when testing UTF-8 code, try quotemeta. It should
90        # work with a decent perl
91        $searchString =~ s#([][|/\\\$\^*()+\{\};@?.\{\}])#\\$1#g;
92        $searchString = '\b' . $searchString . '\b';
93    }
94
95    if ( $Foswiki::cfg{DetailedOS} eq 'MSWin32' ) {
96
97        #try to escape the ^ and "" for native windows grep and apache
98        $searchString =~ s/\[\^/[^^/g;
99
100        # Fix escaping and quoting for Windows
101        $searchString =~ s#\\#\\\\#g;
102        $searchString =~ s#"#\\"#g;
103        $searchString = q(") . $searchString . q(");
104    }
105
106    my $matches = '';
107
108    #SMELL, TODO, replace with Store call.
109    my $sDir = $Foswiki::cfg{DataDir} . '/' . $web . '/';
110
111    # process topics in sets, fix for Codev.ArgumentListIsTooLongForSearch
112    my $maxTopicsInSet = 512;    # max number of topics for a grep call
113        # SMELL: the number is actually dependant on the length of the path
114        # to each file
115        # SMELL: the following while loop should probably be made by sysCommand,
116        # as this is a leaky abstraction.
117        # heck, on pre WinXP its only 2048, post XP its 8191 -
118        # http://support.microsoft.com/kb/830473
119    if ( $Foswiki::cfg{DetailedOS} eq 'MSWin32' ) {
120
121        #tune the number based on the length of "$sDir/WebSearchAdvanced.txt"
122        #30 is a guess - wotamess
123        $maxTopicsInSet =
124          ( ( 8191 - ( length($program) + length($searchString) + 30 ) ) /
125              ( length("$sDir/LongWebSearchAdvanced.txt") + 10 ) );
126
127        #print STDERR "++++++++++++ $maxTopicsInSet \n";
128    }
129
130    #    while (my @set = splice( @take, 0, $maxTopicsInSet )) {
131    #        @set = map { "$sDir/$_.txt" } @set;
132    my @set;
133    $inputTopicSet->reset();
134    while ( $inputTopicSet->hasNext() ) {
135        my $webtopic = $inputTopicSet->next();
136        my ( $Iweb, $tn ) =
137          Foswiki::Func::normalizeWebTopicName( $web, $webtopic );
138        push( @set, "$sDir/$tn.txt" );
139        if (
140            ( $#set >= $maxTopicsInSet )    #replace with character count..
141            || !( $inputTopicSet->hasNext() )
142          )
143        {
144            my ( $m, $exit ) = Foswiki::Sandbox->sysCommand(
145                $program,
146                TOKEN => $searchString,
147                FILES => \@set
148            );
149            @set = ();
150
151            # man grep: "Normally, exit status is 0 if selected lines are found
152            # and 1 otherwise. But the exit status is 2 if an error occurred,
153            # unless the -q or --quiet or --silent option is used and a selected
154            # line is found."
155            if ( $exit > 1 ) {
156
157                # TODO: need to work out a way to alert the admin there is
158                # a problem, without filling up the log files with repeated
159                # SEARCH's
160
161                # NOTE: we ignore the error, because grep returns an error
162                # if it comes across a broken file link or a file it does
163                # not have permission to open, so throwing here gives wrong
164                # search results.
165                # throw Error::Simple(
166                #      "$program Grep for '$searchString' returned error")
167            }
168            $matches .= Foswiki::Store::decode($m);
169        }
170    }
171    my %seen;
172
173    # Note use of / and \ as dir separators, to support Winblows
174    $matches =~
175      s/([^\/\\]*?)\.txt(:(.*))?$/push( @{$seen{$1}}, ($3||'') ); ''/gem;
176
177    # Implicit untaint OK; data from grep
178
179    return \%seen;
180}
181
182#ok, for initial validation, naively call the code with a web.
183sub _webQuery {
184    my ( $this, $query, $web, $inputTopicSet, $session, $options ) = @_;
185    ASSERT( !$query->isEmpty() ) if DEBUG;
186
187    #print STDERR "ForkingSEARCH(".join(', ', @{ $query->tokens() }).")\n";
188    # default scope is 'text'
189    $options->{'scope'} = 'text'
190      unless ( defined( $options->{'scope'} )
191        && $options->{'scope'} =~ m/^(topic|all)$/ );
192
193    my $topicSet = $inputTopicSet;
194    if ( !defined($topicSet) ) {
195
196        #then we start with the whole web
197        #TODO: i'm sure that is a flawed assumption
198        my $webObject = Foswiki::Meta->new( $session, $web );
199        $topicSet =
200          Foswiki::Search::InfoCache::getTopicListIterator( $webObject,
201            $options );
202    }
203    ASSERT( UNIVERSAL::isa( $topicSet, 'Foswiki::Iterator' ) ) if DEBUG;
204
205    #print STDERR "######## Forking search ($web) tokens "
206    # .scalar(@{$query->tokens()})." : ".join(',', @{$query->tokens()})."\n";
207    # AND search - search once for each token, ANDing result together
208    foreach my $token ( @{ $query->tokens() } ) {
209
210        my $tokenCopy = $token;
211
212        # flag for AND NOT search
213        my $invertSearch = 0;
214        $invertSearch = ( $tokenCopy =~ s/^\!// );
215
216        # scope can be 'topic' (default), 'text' or "all"
217        # scope='topic', e.g. Perl search on topic name:
218        my %topicMatches;
219        unless ( $options->{'scope'} eq 'text' ) {
220            my $qtoken = $tokenCopy;
221
222# FIXME I18N
223# http://foswiki.org/Tasks/Item1646 this causes us to use/leak huge amounts of memory if called too often
224            $qtoken = quotemeta($qtoken) if ( $options->{'type'} ne 'regex' );
225
226            $topicSet->reset();
227            while ( $topicSet->hasNext() ) {
228                my $webtopic = $topicSet->next();
229                my ( $itrWeb, $topic ) =
230                  Foswiki::Func::normalizeWebTopicName( $web, $webtopic );
231
232                if ( $options->{'casesensitive'} ) {
233
234                    # fix for Codev.SearchWithNoPipe
235                    #push(@scopeTopicList, $topic) if ( $topic =~ m/$qtoken/ );
236                    $topicMatches{$topic} = 1 if ( $topic =~ m/$qtoken/ );
237                }
238                else {
239
240                    #push(@scopeTopicList, $topic) if ( $topic =~ m/$qtoken/i );
241                    $topicMatches{$topic} = 1 if ( $topic =~ m/$qtoken/i );
242                }
243            }
244        }
245
246        # scope='text', e.g. grep search on topic text:
247        unless ( $options->{'scope'} eq 'topic' ) {
248            my $textMatches =
249              _search( $tokenCopy, $web, $topicSet, $session, $options );
250
251            #bring the text matches into the topicMatch hash
252            if ($textMatches) {
253                @topicMatches{ keys %$textMatches } = values %$textMatches;
254            }
255        }
256
257        my @scopeTextList = ();
258        if ($invertSearch) {
259            $topicSet->reset();
260            while ( $topicSet->hasNext() ) {
261                my $webtopic = $topicSet->next();
262                my ( $Iweb, $topic ) =
263                  Foswiki::Func::normalizeWebTopicName( $web, $webtopic );
264
265                if ( $topicMatches{$topic} ) {
266                }
267                else {
268                    push( @scopeTextList, $topic );
269                }
270            }
271        }
272        else {
273
274            #TODO: the sad thing about this is we lose info
275            @scopeTextList = keys(%topicMatches);
276        }
277
278        # reduced topic list for next token
279        $topicSet =
280          Foswiki::Search::InfoCache->new( $Foswiki::Plugins::SESSION, $web,
281            \@scopeTextList );
282    }
283
284    return $topicSet;
285}
286
2871;
288__END__
289Foswiki - The Free and Open Source Wiki, http://foswiki.org/
290
291Copyright (C) 2008-2011 Foswiki Contributors. Foswiki Contributors
292are listed in the AUTHORS file in the root of this distribution.
293NOTE: Please extend that file, not this notice.
294
295Additional copyrights apply to some or all of the code in this
296file as follows:
297
298Copyright (C) 2002 John Talintyre, john.talintyre@btinternet.com
299Copyright (C) 2002-2007 Peter Thoeny, peter@thoeny.org
300and TWiki Contributors. All Rights Reserved. TWiki Contributors
301are listed in the AUTHORS file in the root of this distribution.
302
303This program is free software; you can redistribute it and/or
304modify it under the terms of the GNU General Public License
305as published by the Free Software Foundation; either version 2
306of the License, or (at your option) any later version. For
307more details read LICENSE in the root of this distribution.
308
309This program is distributed in the hope that it will be useful,
310but WITHOUT ANY WARRANTY; without even the implied warranty of
311MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
312
313As per the GPL, removal of this notice is prohibited.
314