1package Search::Xapian;
2
3use 5.006;
4use strict;
5use warnings;
6
7our $VERSION = '1.2.25.4';
8
9use Exporter 'import';
10
11use Search::Xapian::Database;
12use Search::Xapian::Document;
13use Search::Xapian::ESet;
14use Search::Xapian::ESetIterator;
15use Search::Xapian::Error;
16use Search::Xapian::MSet;
17use Search::Xapian::MSetIterator;
18use Search::Xapian::MultiValueSorter;
19use Search::Xapian::PositionIterator;
20use Search::Xapian::PostingIterator;
21use Search::Xapian::Query;
22use Search::Xapian::QueryParser;
23use Search::Xapian::RSet;
24use Search::Xapian::Stem;
25use Search::Xapian::TermGenerator;
26use Search::Xapian::TermIterator;
27use Search::Xapian::ValueIterator;
28use Search::Xapian::WritableDatabase;
29
30use Search::Xapian::BM25Weight;
31use Search::Xapian::BoolWeight;
32use Search::Xapian::TradWeight;
33
34use Search::Xapian::ValueCountMatchSpy;
35
36use Search::Xapian::SimpleStopper;
37use Search::Xapian::PerlStopper;
38
39require DynaLoader;
40
41our @ISA = qw(DynaLoader);
42
43# We need to use the RTLD_GLOBAL flag to dlopen() so that other C++
44# modules that link against libxapian.so get the *same* value for all the
45# weak symbols (eg, the exception classes)
46sub dl_load_flags { 0x01 }
47
48# This allows declaration	use Search::Xapian ':all';
49# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
50# will save memory.
51our %EXPORT_TAGS = (
52		    'ops' => [ qw(
53				  OP_AND
54				  OP_OR
55				  OP_AND_NOT
56				  OP_XOR
57				  OP_AND_MAYBE
58				  OP_FILTER
59				  OP_NEAR
60				  OP_PHRASE
61				  OP_VALUE_RANGE
62				  OP_SCALE_WEIGHT
63				  OP_ELITE_SET
64				  OP_VALUE_GE
65				  OP_VALUE_LE
66				 ) ],
67		    'db' => [ qw(
68				 DB_OPEN
69				 DB_CREATE
70				 DB_CREATE_OR_OPEN
71				 DB_CREATE_OR_OVERWRITE
72				 ) ],
73		    'enq_order' => [ qw(
74				 ENQ_DESCENDING
75				 ENQ_ASCENDING
76				 ENQ_DONT_CARE
77				   ) ],
78		    'qpflags' => [ qw(
79				 FLAG_BOOLEAN
80				 FLAG_PHRASE
81				 FLAG_LOVEHATE
82				 FLAG_BOOLEAN_ANY_CASE
83				 FLAG_WILDCARD
84				 FLAG_PURE_NOT
85				 FLAG_PARTIAL
86				 FLAG_SPELLING_CORRECTION
87				 FLAG_SYNONYM
88				 FLAG_AUTO_SYNONYMS
89				 FLAG_AUTO_MULTIWORD_SYNONYMS
90				 FLAG_DEFAULT
91				 ) ],
92		    'qpstem' => [ qw(
93				 STEM_NONE
94				 STEM_SOME
95				 STEM_ALL
96				 ) ]
97		   );
98$EXPORT_TAGS{standard} = [ @{ $EXPORT_TAGS{'ops'} },
99			   @{ $EXPORT_TAGS{'db'} },
100			   @{ $EXPORT_TAGS{'qpflags'} },
101			   @{ $EXPORT_TAGS{'qpstem'} } ];
102$EXPORT_TAGS{all} = [ @{ $EXPORT_TAGS{'standard'} }, @{ $EXPORT_TAGS{'enq_order'} }, 'BAD_VALUENO' ];
103
104
105# Names which can be exported.
106our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
107
108# Don't export any names by default.
109our @EXPORT = qw( );
110
111bootstrap Search::Xapian $VERSION;
112
113# Preloaded methods go here.
114
115our @OP_NAMES;
116foreach (@{ $EXPORT_TAGS{'ops'} }) {
117  $OP_NAMES[eval $_] = $_;
118}
119
120our @DB_NAMES;
121foreach (@{ $EXPORT_TAGS{'db'} }) {
122  $DB_NAMES[eval $_] = $_;
123}
124
125our @FLAG_NAMES;
126foreach (@{ $EXPORT_TAGS{'qpflags'} }) {
127  $FLAG_NAMES[eval $_] = $_;
128}
129
130our @STEM_NAMES;
131foreach (@{ $EXPORT_TAGS{'qpstem'} }) {
132  $STEM_NAMES[eval $_] = $_;
133}
134
1351;
136
137__END__
138
139
140=head1 NAME
141
142Search::Xapian - Perl XS frontend to the Xapian C++ search library.
143
144=head1 SYNOPSIS
145
146  use Search::Xapian;
147
148  my $db = Search::Xapian::Database->new( '[DATABASE DIR]' );
149  my $enq = $db->enquire( '[QUERY TERM]' );
150
151  printf "Running query '%s'\n", $enq->get_query()->get_description();
152
153  my @matches = $enq->matches(0, 10);
154
155  print scalar(@matches) . " results found\n";
156
157  foreach my $match ( @matches ) {
158    my $doc = $match->get_document();
159    printf "ID %d %d%% [ %s ]\n", $match->get_docid(), $match->get_percent(), $doc->get_data();
160  }
161
162=head1 DESCRIPTION
163
164This module wraps most methods of most Xapian classes. The missing classes
165and methods should be added in the future. It also provides a simplified,
166more 'perlish' interface to some common operations, as demonstrated above.
167
168There are some gaps in the POD documentation for wrapped classes, but you
169can read the Xapian C++ API documentation at
170L<https://xapian.org/docs/apidoc/html/annotated.html> for details of
171these.  Alternatively, take a look at the code in the examples and tests.
172
173If you want to use Search::Xapian and the threads module together, make
174sure you're using Search::Xapian >= 1.0.4.0 and Perl >= 5.8.7.  As of 1.0.4.0,
175Search::Xapian uses CLONE_SKIP to make sure that the perl wrapper objects
176aren't copied to new threads - without this the underlying C++ objects can get
177destroyed more than once.
178
179If you encounter problems, or have any comments, suggestions, patches, etc
180please email the Xapian-discuss mailing list (details of which can be found at
181L<https://xapian.org/lists>).
182
183=head2 EXPORT
184
185None by default.
186
187=head1 :db
188
189=over 4
190
191=item DB_OPEN
192
193Open a database, fail if database doesn't exist.
194
195=item DB_CREATE
196
197Create a new database, fail if database exists.
198
199=item DB_CREATE_OR_OPEN
200
201Open an existing database, without destroying data, or create a new
202database if one doesn't already exist.
203
204=item DB_CREATE_OR_OVERWRITE
205
206Overwrite database if it exists.
207
208=back
209
210=head1 :ops
211
212=over 4
213
214=item OP_AND
215
216Match if both subqueries are satisfied.
217
218=item OP_OR
219
220Match if either subquery is satisfied.
221
222=item OP_AND_NOT
223
224Match if left but not right subquery is satisfied.
225
226=item OP_XOR
227
228Match if left or right, but not both queries are satisfied.
229
230=item OP_AND_MAYBE
231
232Match if left is satisfied, but use weights from both.
233
234=item OP_FILTER
235
236Like OP_AND, but only weight using the left query.
237
238=item OP_NEAR
239
240Match if the words are near each other. The window should be specified, as
241a parameter to C<Search::Xapian::Query::Query>, but it defaults to the
242number of terms in the list.
243
244=item OP_PHRASE
245
246Match as a phrase (All words in order).
247
248=item OP_ELITE_SET
249
250Select an elite set from the subqueries, and perform a query with these combined as an OR query.
251
252=item OP_VALUE_RANGE
253
254Filter by a range test on a document value.
255
256=back
257
258=head1 :qpflags
259
260=over 4
261
262=item FLAG_DEFAULT
263
264This gives the QueryParser default flag settings, allowing you to easily add
265flags to the default ones.
266
267=item FLAG_BOOLEAN
268
269Support AND, OR, etc and bracketed subexpressions.
270
271=item FLAG_LOVEHATE
272
273Support + and -.
274
275=item FLAG_PHRASE
276
277Support quoted phrases.
278
279=item FLAG_BOOLEAN_ANY_CASE
280
281Support AND, OR, etc even if they aren't in ALLCAPS.
282
283=item FLAG_WILDCARD
284
285Support right truncation (e.g. Xap*).
286
287=item FLAG_PURE_NOT
288
289Allow queries such as 'NOT apples'.
290
291These require the use of a list of all documents in the database
292which is potentially expensive, so this feature isn't enabled by
293default.
294
295=item FLAG_PARTIAL
296
297Enable partial matching.
298
299Partial matching causes the parser to treat the query as a
300"partially entered" search.  This will automatically treat the
301final word as a wildcarded match, unless it is followed by
302whitespace, to produce more stable results from interactive
303searches.
304
305=item FLAG_SPELLING_CORRECTION
306
307=item FLAG_SYNONYM
308
309=item FLAG_AUTO_SYNONYMS
310
311=item FLAG_AUTO_MULTIWORD_SYNONYMS
312
313=back
314
315=head1 :qpstem
316
317=over 4
318
319=item STEM_ALL
320
321Stem all terms.
322
323=item STEM_NONE
324
325Don't stem any terms.
326
327=item STEM_SOME
328
329Stem some terms, in a manner compatible with Omega (capitalised words and those
330in phrases aren't stemmed).
331
332=back
333
334=head1 :enq_order
335
336=over 4
337
338=item ENQ_ASCENDING
339
340docids sort in ascending order (default)
341
342=item ENQ_DESCENDING
343
344docids sort in descending order
345
346=item ENQ_DONT_CARE
347
348docids sort in whatever order is most efficient for the backend
349
350=back
351
352=head1 :standard
353
354Standard is db + ops + qpflags + qpstem
355
356=head1 Version functions
357
358=over 4
359
360=item major_version
361
362Returns the major version of the Xapian C++ library being used.  E.g. for
363Xapian 1.0.9 this would return 1.
364
365=item minor_version
366
367Returns the minor version of the Xapian C++ library being used.  E.g. for
368Xapian 1.0.9 this would return 0.
369
370=item revision
371
372Returns the revision of the Xapian C++ library being used.  E.g. for
373Xapian 1.0.9 this would return 9.  In a stable release series, Xapian libraries
374with the same minor and major versions are usually ABI compatible, so this
375often won't match the third component of $Search::Xapian::VERSION (which is the
376version of the Search::Xapian XS wrappers).
377
378=back
379
380=head1 Numeric encoding functions
381
382=over 4
383
384=item sortable_serialise NUMBER
385
386Convert a floating point number to a string, preserving sort order.
387
388This method converts a floating point number to a string, suitable for
389using as a value for numeric range restriction, or for use as a sort
390key.
391
392The conversion is platform independent.
393
394The conversion attempts to ensure that, for any pair of values supplied
395to the conversion algorithm, the result of comparing the original
396values (with a numeric comparison operator) will be the same as the
397result of comparing the resulting values (with a string comparison
398operator).  On platforms which represent doubles with the precisions
399specified by IEEE_754, this will be the case: if the representation of
400doubles is more precise, it is possible that two very close doubles
401will be mapped to the same string, so will compare equal.
402
403Note also that both zero and -zero will be converted to the same
404representation: since these compare equal, this satisfies the
405comparison constraint, but it's worth knowing this if you wish to use
406the encoding in some situation where this distinction matters.
407
408Handling of NaN isn't (currently) guaranteed to be sensible.
409
410=item sortable_unserialise SERIALISED_NUMBER
411
412Convert a string encoded using sortable_serialise back to a floating
413point number.
414
415This expects the input to be a string produced by sortable_serialise().
416If the input is not such a string, the value returned is undefined (but
417no error will be thrown).
418
419The result of the conversion will be exactly the value which was
420supplied to sortable_serialise() when making the string on platforms
421which represent doubles with the precisions specified by IEEE_754, but
422may be a different (nearby) value on other platforms.
423
424=back
425
426=head1 TODO
427
428=over 4
429
430=item Error Handling
431
432Error handling for all methods liable to generate them.
433
434=item Documentation
435
436Add POD documentation for all classes, where possible just adapted from Xapian
437docs.
438
439=item Unwrapped classes
440
441The following Xapian classes are not yet wrapped:
442ErrorHandler, standard ExpandDecider subclasses
443(user-defined ones works),
444user-defined weight classes.
445
446=item Unwrapped methods
447
448The following methods are not yet wrapped:
449Enquire::get_eset(...) with more than two arguments,
450Query ctor optional "parameter" parameter,
451Remote::open(...),
452static Stem::get_available_languages().
453
454We wrap MSet::swap() and MSet::operator[](), but not ESet::swap(),
455ESet::operator[]().  Is swap actually useful?  Should we instead tie MSet
456and ESet to allow them to just be used as lists?
457
458=back
459
460=head1 CREDITS
461
462Thanks to Tye McQueen E<lt>tye@metronet.comE<gt> for explaining the
463finer points of how best to write XS frontends to C++ libraries, James
464Aylett E<lt>james@tartarus.orgE<gt> for clarifying the less obvious
465aspects of the Xapian API, Tim Brody for patches wrapping ::QueryParser and
466::Stopper and especially Olly Betts E<lt>olly@survex.comE<gt> for contributing
467advice, bugfixes, and wrapper code for the more obscure classes.
468
469=head1 AUTHOR
470
471Alex Bowley E<lt>kilinrax@cpan.orgE<gt>
472
473Please report any bugs/suggestions to E<lt>xapian-discuss@lists.xapian.orgE<gt>
474or use the Xapian bug tracker L<https://xapian.org/bugs>.  Please do
475NOT use the CPAN bug tracker or mail any of the authors individually.
476
477=head1 LICENSE
478
479This program is free software; you can redistribute it and/or modify
480it under the same terms as Perl itself.
481
482=head1 SEE ALSO
483
484L<Search::Xapian::BM25Weight>,
485L<Search::Xapian::BoolWeight>,
486L<Search::Xapian::Database>,
487L<Search::Xapian::Document>,
488L<Search::Xapian::Enquire>,
489L<Search::Xapian::MatchSpy>,
490L<Search::Xapian::MultiValueSorter>,
491L<Search::Xapian::PositionIterator>,
492L<Search::Xapian::PostingIterator>,
493L<Search::Xapian::QueryParser>,
494L<Search::Xapian::Stem>,
495L<Search::Xapian::TermGenerator>,
496L<Search::Xapian::TermIterator>,
497L<Search::Xapian::TradWeight>,
498L<Search::Xapian::ValueIterator>,
499L<Search::Xapian::Weight>,
500L<Search::Xapian::WritableDatabase>,
501and
502L<https://xapian.org/>.
503
504=cut
505