1package Search::Xapian; 2 3use 5.006; 4use strict; 5use warnings; 6 7our $VERSION = '1.2.25.4'; 8 9use Exporter 'import'; 10 11use Search::Xapian::Database; 12use Search::Xapian::Document; 13use Search::Xapian::ESet; 14use Search::Xapian::ESetIterator; 15use Search::Xapian::Error; 16use Search::Xapian::MSet; 17use Search::Xapian::MSetIterator; 18use Search::Xapian::MultiValueSorter; 19use Search::Xapian::PositionIterator; 20use Search::Xapian::PostingIterator; 21use Search::Xapian::Query; 22use Search::Xapian::QueryParser; 23use Search::Xapian::RSet; 24use Search::Xapian::Stem; 25use Search::Xapian::TermGenerator; 26use Search::Xapian::TermIterator; 27use Search::Xapian::ValueIterator; 28use Search::Xapian::WritableDatabase; 29 30use Search::Xapian::BM25Weight; 31use Search::Xapian::BoolWeight; 32use Search::Xapian::TradWeight; 33 34use Search::Xapian::ValueCountMatchSpy; 35 36use Search::Xapian::SimpleStopper; 37use Search::Xapian::PerlStopper; 38 39require DynaLoader; 40 41our @ISA = qw(DynaLoader); 42 43# We need to use the RTLD_GLOBAL flag to dlopen() so that other C++ 44# modules that link against libxapian.so get the *same* value for all the 45# weak symbols (eg, the exception classes) 46sub dl_load_flags { 0x01 } 47 48# This allows declaration use Search::Xapian ':all'; 49# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK 50# will save memory. 51our %EXPORT_TAGS = ( 52 'ops' => [ qw( 53 OP_AND 54 OP_OR 55 OP_AND_NOT 56 OP_XOR 57 OP_AND_MAYBE 58 OP_FILTER 59 OP_NEAR 60 OP_PHRASE 61 OP_VALUE_RANGE 62 OP_SCALE_WEIGHT 63 OP_ELITE_SET 64 OP_VALUE_GE 65 OP_VALUE_LE 66 ) ], 67 'db' => [ qw( 68 DB_OPEN 69 DB_CREATE 70 DB_CREATE_OR_OPEN 71 DB_CREATE_OR_OVERWRITE 72 ) ], 73 'enq_order' => [ qw( 74 ENQ_DESCENDING 75 ENQ_ASCENDING 76 ENQ_DONT_CARE 77 ) ], 78 'qpflags' => [ qw( 79 FLAG_BOOLEAN 80 FLAG_PHRASE 81 FLAG_LOVEHATE 82 FLAG_BOOLEAN_ANY_CASE 83 FLAG_WILDCARD 84 FLAG_PURE_NOT 85 FLAG_PARTIAL 86 FLAG_SPELLING_CORRECTION 87 FLAG_SYNONYM 88 FLAG_AUTO_SYNONYMS 89 FLAG_AUTO_MULTIWORD_SYNONYMS 90 FLAG_DEFAULT 91 ) ], 92 'qpstem' => [ qw( 93 STEM_NONE 94 STEM_SOME 95 STEM_ALL 96 ) ] 97 ); 98$EXPORT_TAGS{standard} = [ @{ $EXPORT_TAGS{'ops'} }, 99 @{ $EXPORT_TAGS{'db'} }, 100 @{ $EXPORT_TAGS{'qpflags'} }, 101 @{ $EXPORT_TAGS{'qpstem'} } ]; 102$EXPORT_TAGS{all} = [ @{ $EXPORT_TAGS{'standard'} }, @{ $EXPORT_TAGS{'enq_order'} }, 'BAD_VALUENO' ]; 103 104 105# Names which can be exported. 106our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); 107 108# Don't export any names by default. 109our @EXPORT = qw( ); 110 111bootstrap Search::Xapian $VERSION; 112 113# Preloaded methods go here. 114 115our @OP_NAMES; 116foreach (@{ $EXPORT_TAGS{'ops'} }) { 117 $OP_NAMES[eval $_] = $_; 118} 119 120our @DB_NAMES; 121foreach (@{ $EXPORT_TAGS{'db'} }) { 122 $DB_NAMES[eval $_] = $_; 123} 124 125our @FLAG_NAMES; 126foreach (@{ $EXPORT_TAGS{'qpflags'} }) { 127 $FLAG_NAMES[eval $_] = $_; 128} 129 130our @STEM_NAMES; 131foreach (@{ $EXPORT_TAGS{'qpstem'} }) { 132 $STEM_NAMES[eval $_] = $_; 133} 134 1351; 136 137__END__ 138 139 140=head1 NAME 141 142Search::Xapian - Perl XS frontend to the Xapian C++ search library. 143 144=head1 SYNOPSIS 145 146 use Search::Xapian; 147 148 my $db = Search::Xapian::Database->new( '[DATABASE DIR]' ); 149 my $enq = $db->enquire( '[QUERY TERM]' ); 150 151 printf "Running query '%s'\n", $enq->get_query()->get_description(); 152 153 my @matches = $enq->matches(0, 10); 154 155 print scalar(@matches) . " results found\n"; 156 157 foreach my $match ( @matches ) { 158 my $doc = $match->get_document(); 159 printf "ID %d %d%% [ %s ]\n", $match->get_docid(), $match->get_percent(), $doc->get_data(); 160 } 161 162=head1 DESCRIPTION 163 164This module wraps most methods of most Xapian classes. The missing classes 165and methods should be added in the future. It also provides a simplified, 166more 'perlish' interface to some common operations, as demonstrated above. 167 168There are some gaps in the POD documentation for wrapped classes, but you 169can read the Xapian C++ API documentation at 170L<https://xapian.org/docs/apidoc/html/annotated.html> for details of 171these. Alternatively, take a look at the code in the examples and tests. 172 173If you want to use Search::Xapian and the threads module together, make 174sure you're using Search::Xapian >= 1.0.4.0 and Perl >= 5.8.7. As of 1.0.4.0, 175Search::Xapian uses CLONE_SKIP to make sure that the perl wrapper objects 176aren't copied to new threads - without this the underlying C++ objects can get 177destroyed more than once. 178 179If you encounter problems, or have any comments, suggestions, patches, etc 180please email the Xapian-discuss mailing list (details of which can be found at 181L<https://xapian.org/lists>). 182 183=head2 EXPORT 184 185None by default. 186 187=head1 :db 188 189=over 4 190 191=item DB_OPEN 192 193Open a database, fail if database doesn't exist. 194 195=item DB_CREATE 196 197Create a new database, fail if database exists. 198 199=item DB_CREATE_OR_OPEN 200 201Open an existing database, without destroying data, or create a new 202database if one doesn't already exist. 203 204=item DB_CREATE_OR_OVERWRITE 205 206Overwrite database if it exists. 207 208=back 209 210=head1 :ops 211 212=over 4 213 214=item OP_AND 215 216Match if both subqueries are satisfied. 217 218=item OP_OR 219 220Match if either subquery is satisfied. 221 222=item OP_AND_NOT 223 224Match if left but not right subquery is satisfied. 225 226=item OP_XOR 227 228Match if left or right, but not both queries are satisfied. 229 230=item OP_AND_MAYBE 231 232Match if left is satisfied, but use weights from both. 233 234=item OP_FILTER 235 236Like OP_AND, but only weight using the left query. 237 238=item OP_NEAR 239 240Match if the words are near each other. The window should be specified, as 241a parameter to C<Search::Xapian::Query::Query>, but it defaults to the 242number of terms in the list. 243 244=item OP_PHRASE 245 246Match as a phrase (All words in order). 247 248=item OP_ELITE_SET 249 250Select an elite set from the subqueries, and perform a query with these combined as an OR query. 251 252=item OP_VALUE_RANGE 253 254Filter by a range test on a document value. 255 256=back 257 258=head1 :qpflags 259 260=over 4 261 262=item FLAG_DEFAULT 263 264This gives the QueryParser default flag settings, allowing you to easily add 265flags to the default ones. 266 267=item FLAG_BOOLEAN 268 269Support AND, OR, etc and bracketed subexpressions. 270 271=item FLAG_LOVEHATE 272 273Support + and -. 274 275=item FLAG_PHRASE 276 277Support quoted phrases. 278 279=item FLAG_BOOLEAN_ANY_CASE 280 281Support AND, OR, etc even if they aren't in ALLCAPS. 282 283=item FLAG_WILDCARD 284 285Support right truncation (e.g. Xap*). 286 287=item FLAG_PURE_NOT 288 289Allow queries such as 'NOT apples'. 290 291These require the use of a list of all documents in the database 292which is potentially expensive, so this feature isn't enabled by 293default. 294 295=item FLAG_PARTIAL 296 297Enable partial matching. 298 299Partial matching causes the parser to treat the query as a 300"partially entered" search. This will automatically treat the 301final word as a wildcarded match, unless it is followed by 302whitespace, to produce more stable results from interactive 303searches. 304 305=item FLAG_SPELLING_CORRECTION 306 307=item FLAG_SYNONYM 308 309=item FLAG_AUTO_SYNONYMS 310 311=item FLAG_AUTO_MULTIWORD_SYNONYMS 312 313=back 314 315=head1 :qpstem 316 317=over 4 318 319=item STEM_ALL 320 321Stem all terms. 322 323=item STEM_NONE 324 325Don't stem any terms. 326 327=item STEM_SOME 328 329Stem some terms, in a manner compatible with Omega (capitalised words and those 330in phrases aren't stemmed). 331 332=back 333 334=head1 :enq_order 335 336=over 4 337 338=item ENQ_ASCENDING 339 340docids sort in ascending order (default) 341 342=item ENQ_DESCENDING 343 344docids sort in descending order 345 346=item ENQ_DONT_CARE 347 348docids sort in whatever order is most efficient for the backend 349 350=back 351 352=head1 :standard 353 354Standard is db + ops + qpflags + qpstem 355 356=head1 Version functions 357 358=over 4 359 360=item major_version 361 362Returns the major version of the Xapian C++ library being used. E.g. for 363Xapian 1.0.9 this would return 1. 364 365=item minor_version 366 367Returns the minor version of the Xapian C++ library being used. E.g. for 368Xapian 1.0.9 this would return 0. 369 370=item revision 371 372Returns the revision of the Xapian C++ library being used. E.g. for 373Xapian 1.0.9 this would return 9. In a stable release series, Xapian libraries 374with the same minor and major versions are usually ABI compatible, so this 375often won't match the third component of $Search::Xapian::VERSION (which is the 376version of the Search::Xapian XS wrappers). 377 378=back 379 380=head1 Numeric encoding functions 381 382=over 4 383 384=item sortable_serialise NUMBER 385 386Convert a floating point number to a string, preserving sort order. 387 388This method converts a floating point number to a string, suitable for 389using as a value for numeric range restriction, or for use as a sort 390key. 391 392The conversion is platform independent. 393 394The conversion attempts to ensure that, for any pair of values supplied 395to the conversion algorithm, the result of comparing the original 396values (with a numeric comparison operator) will be the same as the 397result of comparing the resulting values (with a string comparison 398operator). On platforms which represent doubles with the precisions 399specified by IEEE_754, this will be the case: if the representation of 400doubles is more precise, it is possible that two very close doubles 401will be mapped to the same string, so will compare equal. 402 403Note also that both zero and -zero will be converted to the same 404representation: since these compare equal, this satisfies the 405comparison constraint, but it's worth knowing this if you wish to use 406the encoding in some situation where this distinction matters. 407 408Handling of NaN isn't (currently) guaranteed to be sensible. 409 410=item sortable_unserialise SERIALISED_NUMBER 411 412Convert a string encoded using sortable_serialise back to a floating 413point number. 414 415This expects the input to be a string produced by sortable_serialise(). 416If the input is not such a string, the value returned is undefined (but 417no error will be thrown). 418 419The result of the conversion will be exactly the value which was 420supplied to sortable_serialise() when making the string on platforms 421which represent doubles with the precisions specified by IEEE_754, but 422may be a different (nearby) value on other platforms. 423 424=back 425 426=head1 TODO 427 428=over 4 429 430=item Error Handling 431 432Error handling for all methods liable to generate them. 433 434=item Documentation 435 436Add POD documentation for all classes, where possible just adapted from Xapian 437docs. 438 439=item Unwrapped classes 440 441The following Xapian classes are not yet wrapped: 442ErrorHandler, standard ExpandDecider subclasses 443(user-defined ones works), 444user-defined weight classes. 445 446=item Unwrapped methods 447 448The following methods are not yet wrapped: 449Enquire::get_eset(...) with more than two arguments, 450Query ctor optional "parameter" parameter, 451Remote::open(...), 452static Stem::get_available_languages(). 453 454We wrap MSet::swap() and MSet::operator[](), but not ESet::swap(), 455ESet::operator[](). Is swap actually useful? Should we instead tie MSet 456and ESet to allow them to just be used as lists? 457 458=back 459 460=head1 CREDITS 461 462Thanks to Tye McQueen E<lt>tye@metronet.comE<gt> for explaining the 463finer points of how best to write XS frontends to C++ libraries, James 464Aylett E<lt>james@tartarus.orgE<gt> for clarifying the less obvious 465aspects of the Xapian API, Tim Brody for patches wrapping ::QueryParser and 466::Stopper and especially Olly Betts E<lt>olly@survex.comE<gt> for contributing 467advice, bugfixes, and wrapper code for the more obscure classes. 468 469=head1 AUTHOR 470 471Alex Bowley E<lt>kilinrax@cpan.orgE<gt> 472 473Please report any bugs/suggestions to E<lt>xapian-discuss@lists.xapian.orgE<gt> 474or use the Xapian bug tracker L<https://xapian.org/bugs>. Please do 475NOT use the CPAN bug tracker or mail any of the authors individually. 476 477=head1 LICENSE 478 479This program is free software; you can redistribute it and/or modify 480it under the same terms as Perl itself. 481 482=head1 SEE ALSO 483 484L<Search::Xapian::BM25Weight>, 485L<Search::Xapian::BoolWeight>, 486L<Search::Xapian::Database>, 487L<Search::Xapian::Document>, 488L<Search::Xapian::Enquire>, 489L<Search::Xapian::MatchSpy>, 490L<Search::Xapian::MultiValueSorter>, 491L<Search::Xapian::PositionIterator>, 492L<Search::Xapian::PostingIterator>, 493L<Search::Xapian::QueryParser>, 494L<Search::Xapian::Stem>, 495L<Search::Xapian::TermGenerator>, 496L<Search::Xapian::TermIterator>, 497L<Search::Xapian::TradWeight>, 498L<Search::Xapian::ValueIterator>, 499L<Search::Xapian::Weight>, 500L<Search::Xapian::WritableDatabase>, 501and 502L<https://xapian.org/>. 503 504=cut 505