1 /*
2 * Copyright 2005-2021 Fabrice Colin
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18
19 #include <fcntl.h>
20 #include <ctype.h>
21 #include <unistd.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <strings.h>
26 #include <time.h>
27 #include <ctype.h>
28 #include <vector>
29 #include <iostream>
30 #include <fstream>
31 #include <algorithm>
32 #include <utility>
33 #include <cstring>
34 #include <xapian.h>
35
36 #include "Languages.h"
37 #include "StringManip.h"
38 #include "TimeConverter.h"
39 #include "Url.h"
40 #include "FieldMapperInterface.h"
41 #include "LanguageDetector.h"
42 #include "XapianDatabaseFactory.h"
43 #include "XapianIndex.h"
44
45 #define MAGIC_TERM "X-MetaSE-Doc"
46
47 using std::clog;
48 using std::clog;
49 using std::endl;
50 using std::ios;
51 using std::ifstream;
52 using std::ofstream;
53 using std::string;
54 using std::vector;
55 using std::set;
56 using std::map;
57 using std::min;
58 using std::max;
59 using std::pair;
60
61 extern FieldMapperInterface *g_pMapper;
62
63 class TokensIndexer : public Dijon::CJKVTokenizer::TokensHandler
64 {
65 public:
TokensIndexer(Xapian::Stem * pStemmer,Xapian::Document & doc,const Xapian::WritableDatabase & db,const string & prefix,unsigned int nGramSize,bool & doSpelling,Xapian::termcount & termPos)66 TokensIndexer(Xapian::Stem *pStemmer, Xapian::Document &doc,
67 const Xapian::WritableDatabase &db,
68 const string &prefix, unsigned int nGramSize,
69 bool &doSpelling, Xapian::termcount &termPos) :
70 Dijon::CJKVTokenizer::TokensHandler(),
71 m_pStemmer(pStemmer),
72 m_doc(doc),
73 m_db(db),
74 m_prefix(prefix),
75 m_nGramSize(nGramSize),
76 m_nGramCount(0),
77 m_doSpelling(doSpelling),
78 m_termPos(termPos),
79 m_hasCJKV(false)
80 {
81 }
82
~TokensIndexer()83 virtual ~TokensIndexer()
84 {
85 if (m_hasCJKV == true)
86 {
87 // This will help identify CJKV documents
88 m_doc.add_term("XTOK:CJKV");
89 }
90 }
91
handle_token(const string & tok,bool is_cjkv)92 virtual bool handle_token(const string &tok, bool is_cjkv)
93 {
94 bool addSpelling = false;
95
96 if (tok.empty() == true)
97 {
98 return false;
99 }
100
101 // Lower case the term and trim spaces
102 string term(StringManip::toLowerCase(tok));
103 StringManip::trimSpaces(term);
104
105 if (term.empty() == true)
106 {
107 return true;
108 }
109
110 // Does it end with a dot ?
111 if (term[term.length() - 1] == '.')
112 {
113 bool foundNonDot = false;
114
115 string::size_type pos = term.length() - 1;
116 while (pos >= 0)
117 {
118 if (term[pos] != '.')
119 {
120 foundNonDot = true;
121
122 // Any dot before that ?
123 if ((pos == 0) ||
124 (term.find_last_of(".", pos - 1) == string::npos))
125 {
126 // No, all dots are at the end, trim them
127 term.erase(pos + 1);
128 }
129 // Else, it's probably an acronym
130 break;
131 }
132
133 if (pos == 0)
134 {
135 break;
136 }
137 --pos;
138 }
139
140 if (foundNonDot == false)
141 {
142 // It's all dots !
143 return true;
144 }
145 }
146 m_doc.add_posting(m_prefix + XapianDatabase::limitTermLength(term), m_termPos);
147
148 // Is this CJKV ?
149 if (is_cjkv == false)
150 {
151 #ifndef _DIACRITICS_SENSITIVE
152 bool hasDiacritics = false;
153
154 // Remove accents and other diacritics
155 string unaccentedTerm(Dijon::CJKVTokenizer::strip_marks(term));
156 if (unaccentedTerm != term)
157 {
158 m_doc.add_posting(m_prefix + XapianDatabase::limitTermLength(unaccentedTerm), m_termPos);
159 hasDiacritics = true;
160 }
161 #endif
162
163 // Don't stem if the term starts with a digit
164 if ((m_pStemmer != NULL) &&
165 (isdigit((int)term[0]) == 0))
166 {
167 string stemmedTerm((*m_pStemmer)(term));
168
169 m_doc.add_term("Z" + XapianDatabase::limitTermLength(stemmedTerm));
170 #ifndef _DIACRITICS_SENSITIVE
171 if (hasDiacritics == true)
172 {
173 stemmedTerm = (*m_pStemmer)(unaccentedTerm);
174
175 m_doc.add_term("Z" + XapianDatabase::limitTermLength(stemmedTerm));
176 }
177 #endif
178 }
179
180 // Does it include dots ?
181 string::size_type dotPos = term.find('.');
182 if (dotPos != string::npos)
183 {
184 string::size_type startPos = 0;
185 bool addRemainder = true;
186
187 while (dotPos != string::npos)
188 {
189 string component(term.substr(startPos, dotPos - startPos));
190
191 if (component.empty() == false)
192 {
193 m_doc.add_posting(m_prefix + XapianDatabase::limitTermLength(component), m_termPos);
194 ++m_termPos;
195 }
196
197 // Next
198 if (dotPos == term.length() - 1)
199 {
200 addRemainder = false;
201 break;
202 }
203 startPos = dotPos + 1;
204 dotPos = term.find('.', startPos);
205 }
206
207 if (addRemainder == true)
208 {
209 string lastComponent(term.substr(startPos));
210
211 m_doc.add_posting(m_prefix + XapianDatabase::limitTermLength(lastComponent), m_termPos);
212 }
213 }
214
215 addSpelling = m_doSpelling;
216 ++m_termPos;
217 m_nGramCount = 0;
218 }
219 else
220 {
221 if (m_nGramCount % m_nGramSize == 0)
222 {
223 ++m_termPos;
224 }
225 else if ((m_nGramCount + 1) % m_nGramSize == 0)
226 {
227 addSpelling = m_doSpelling;
228 }
229 ++m_nGramCount;
230 m_hasCJKV = true;
231 }
232
233 if (addSpelling == true)
234 {
235 try
236 {
237 m_db.add_spelling(XapianDatabase::limitTermLength(term));
238 }
239 catch (const Xapian::UnimplementedError &error)
240 {
241 clog << "Couldn't index with spelling correction: " << error.get_type() << ": " << error.get_msg() << endl;
242
243 m_doSpelling = false;
244 }
245 }
246
247 return true;
248 }
249
250 protected:
251 Xapian::Stem *m_pStemmer;
252 Xapian::Document &m_doc;
253 const Xapian::WritableDatabase &m_db;
254 string m_prefix;
255 unsigned int m_nGramSize;
256 unsigned int m_nGramCount;
257 bool &m_doSpelling;
258 Xapian::termcount &m_termPos;
259 bool m_hasCJKV;
260
261 };
262
XapianIndex(const string & indexName)263 XapianIndex::XapianIndex(const string &indexName) :
264 IndexInterface(),
265 m_databaseName(indexName),
266 m_goodIndex(false),
267 m_doSpelling(true)
268 {
269 // Open in read-only mode
270 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
271 if ((pDatabase != NULL) &&
272 (pDatabase->isOpen() == true))
273 {
274 m_goodIndex = true;
275 m_doSpelling = pDatabase->withSpelling();
276 }
277 }
278
XapianIndex(const XapianIndex & other)279 XapianIndex::XapianIndex(const XapianIndex &other) :
280 IndexInterface(other),
281 m_databaseName(other.m_databaseName),
282 m_goodIndex(other .m_goodIndex),
283 m_doSpelling(other.m_doSpelling),
284 m_stemLanguage(other.m_stemLanguage)
285 {
286 }
287
~XapianIndex()288 XapianIndex::~XapianIndex()
289 {
290 }
291
operator =(const XapianIndex & other)292 XapianIndex &XapianIndex::operator=(const XapianIndex &other)
293 {
294 if (this != &other)
295 {
296 IndexInterface::operator=(other);
297 m_databaseName = other.m_databaseName;
298 m_goodIndex = other .m_goodIndex;
299 m_doSpelling = other.m_doSpelling;
300 m_stemLanguage = other.m_stemLanguage;
301 }
302
303 return *this;
304 }
305
listDocumentsWithTerm(const string & term,set<unsigned int> & docIds,unsigned int maxDocsCount,unsigned int startDoc) const306 bool XapianIndex::listDocumentsWithTerm(const string &term, set<unsigned int> &docIds,
307 unsigned int maxDocsCount, unsigned int startDoc) const
308 {
309 unsigned int docCount = 0;
310
311 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
312 if (pDatabase == NULL)
313 {
314 clog << "Couldn't get index " << m_databaseName << endl;
315 return 0;
316 }
317
318 docIds.clear();
319 try
320 {
321 Xapian::Database *pIndex = pDatabase->readLock();
322 if (pIndex != NULL)
323 {
324 #ifdef DEBUG
325 clog << "XapianIndex::listDocumentsWithTerm: term " << term << endl;
326 #endif
327 // Get a list of documents that have the term
328 for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
329 (postingIter != pIndex->postlist_end(term)) &&
330 ((maxDocsCount == 0) || (docIds.size() < maxDocsCount));
331 ++postingIter)
332 {
333 Xapian::docid docId = *postingIter;
334
335 // We cannot use postingIter->skip_to() because startDoc isn't an ID
336 if (docCount >= startDoc)
337 {
338 docIds.insert(docId);
339 }
340 ++docCount;
341 }
342 }
343 }
344 catch (const Xapian::Error &error)
345 {
346 clog << "Couldn't get document list: " << error.get_type() << ": " << error.get_msg() << endl;
347 }
348 catch (...)
349 {
350 clog << "Couldn't get document list, unknown exception occurred" << endl;
351 }
352 pDatabase->unlock();
353
354 return docIds.size();
355 }
356
addPostingsToDocument(const Xapian::Utf8Iterator & itor,Xapian::Document & doc,const Xapian::WritableDatabase & db,const string & prefix,bool noStemming,bool & doSpelling,Xapian::termcount & termPos) const357 void XapianIndex::addPostingsToDocument(const Xapian::Utf8Iterator &itor, Xapian::Document &doc,
358 const Xapian::WritableDatabase &db, const string &prefix, bool noStemming, bool &doSpelling,
359 Xapian::termcount &termPos) const
360 {
361 Xapian::Stem *pStemmer = NULL;
362 bool isCJKV = false;
363
364 // Do we know what language to use for stemming ?
365 if ((noStemming == false) &&
366 (m_stemLanguage.empty() == false) &&
367 (m_stemLanguage != "unknown"))
368 {
369 try
370 {
371 pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
372 }
373 catch (const Xapian::Error &error)
374 {
375 clog << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
376 }
377 }
378
379 const char *pRawData = itor.raw();
380 if (pRawData != NULL)
381 {
382 Dijon::CJKVTokenizer tokenizer;
383 string text(pRawData);
384
385 #ifdef _DIACRITICS_SENSITIVE
386 if (tokenizer.has_cjkv(text) == true)
387 {
388 #endif
389 // Use overload
390 addPostingsToDocument(tokenizer, pStemmer, text, doc, db,
391 prefix, doSpelling, termPos);
392 isCJKV = true;
393 #ifdef _DIACRITICS_SENSITIVE
394 }
395 #endif
396 }
397
398 #ifdef _DIACRITICS_SENSITIVE
399 if (isCJKV == false)
400 {
401 Xapian::TermGenerator generator;
402
403 // Set the stemmer
404 if (pStemmer != NULL)
405 {
406 generator.set_stemmer(*pStemmer);
407 }
408
409 generator.set_termpos(termPos);
410 try
411 {
412 // Older Xapian backends don't support spelling correction
413 if (doSpelling == true)
414 {
415 // The database is required for the spelling dictionary
416 generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING);
417 generator.set_database(db);
418 }
419 generator.set_document(doc);
420 generator.index_text(itor, 1, prefix);
421 }
422 catch (const Xapian::UnimplementedError &error)
423 {
424 clog << "Couldn't index with spelling correction: " << error.get_type() << ": " << error.get_msg() << endl;
425
426 if (doSpelling == true)
427 {
428 doSpelling = false;
429
430 // Try again without spelling correction
431 // Let the caller catch the exception
432 generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, Xapian::TermGenerator::FLAG_SPELLING);
433 generator.set_document(doc);
434 generator.index_text(itor, 1, prefix);
435 }
436 }
437 termPos = generator.get_termpos();
438 }
439 #endif
440
441 if (pStemmer != NULL)
442 {
443 delete pStemmer;
444 }
445 }
446
addPostingsToDocument(Dijon::CJKVTokenizer & tokenizer,Xapian::Stem * pStemmer,const string & text,Xapian::Document & doc,const Xapian::WritableDatabase & db,const string & prefix,bool & doSpelling,Xapian::termcount & termPos) const447 void XapianIndex::addPostingsToDocument(Dijon::CJKVTokenizer &tokenizer, Xapian::Stem *pStemmer,
448 const string &text, Xapian::Document &doc, const Xapian::WritableDatabase &db,
449 const string &prefix, bool &doSpelling, Xapian::termcount &termPos) const
450 {
451 TokensIndexer handler(pStemmer, doc, db, prefix, tokenizer.get_ngram_size(),
452 doSpelling, termPos);
453
454 // Get the terms
455 tokenizer.tokenize(text, handler, true);
456 #ifdef DEBUG
457 clog << "XapianIndex::addPostingsToDocument: terms to position " << termPos << endl;
458 #endif
459 }
460
addLabelsToDocument(Xapian::Document & doc,const set<string> & labels,bool skipInternals)461 void XapianIndex::addLabelsToDocument(Xapian::Document &doc, const set<string> &labels,
462 bool skipInternals)
463 {
464 if (labels.empty() == true)
465 {
466 return;
467 }
468
469 for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
470 ++labelIter)
471 {
472 string labelName(*labelIter);
473
474 // Prevent from setting internal labels ?
475 if ((labelName.empty() == true) ||
476 ((skipInternals == true) && (labelName.substr(0, 2) == "X-")))
477 {
478 continue;
479 }
480
481 #ifdef DEBUG
482 clog << "XapianIndex::addLabelsToDocument: label \"" << labelName << "\"" << endl;
483 #endif
484 doc.add_term(string("XLABEL:") + XapianDatabase::limitTermLength(Url::escapeUrl(labelName)));
485 }
486 }
487
removePostingsFromDocument(const Xapian::Utf8Iterator & itor,Xapian::Document & doc,const Xapian::WritableDatabase & db,const string & prefix,bool noStemming,bool & doSpelling) const488 void XapianIndex::removePostingsFromDocument(const Xapian::Utf8Iterator &itor, Xapian::Document &doc,
489 const Xapian::WritableDatabase &db, const string &prefix,
490 bool noStemming, bool &doSpelling) const
491 {
492 Xapian::Document termsDoc;
493 Xapian::termcount termPos = 0;
494 bool addDoSpelling = false;
495
496 // Get the terms, without populating the spelling database
497 addPostingsToDocument(itor, termsDoc, db, prefix, noStemming, addDoSpelling, termPos);
498
499 // Get the terms and remove the first posting for each
500 for (Xapian::TermIterator termListIter = termsDoc.termlist_begin();
501 termListIter != termsDoc.termlist_end(); ++termListIter)
502 {
503 Xapian::termcount postingsCount = termListIter.positionlist_count();
504 Xapian::termcount postingNum = 0;
505 bool removeTerm = false;
506
507 #ifdef DEBUG
508 clog << "XapianIndex::removePostingsFromDocument: term " << *termListIter
509 << " has " << postingsCount << " postings" << endl;
510 #endif
511 // If a prefix is defined, or there are no postings, we can afford removing the term
512 if ((prefix.empty() == false) ||
513 (postingsCount == 0))
514 {
515 removeTerm = true;
516 }
517 else
518 {
519 // Check whether this term is in the original document and how many postings it has
520 Xapian::TermIterator termIter = doc.termlist_begin();
521 if (termIter != doc.termlist_end())
522 {
523 termIter.skip_to(*termListIter);
524 if (termIter != doc.termlist_end())
525 {
526 if (*termIter != *termListIter)
527 {
528 // This term doesn't exist in the document !
529 #ifdef DEBUG
530 clog << "XapianIndex::removePostingsFromDocument: no such term" << endl;
531 #endif
532 continue;
533 }
534
535 if (termIter.positionlist_count() <= postingsCount)
536 {
537 // All postings are to be removed, so we can remove the term
538 #ifdef DEBUG
539 clog << "XapianIndex::removePostingsFromDocument: no extra posting" << endl;
540 #endif
541 removeTerm = true;
542 }
543 }
544 }
545 }
546
547 if (removeTerm == true)
548 {
549 try
550 {
551 doc.remove_term(*termListIter);
552 }
553 catch (const Xapian::Error &error)
554 {
555 #ifdef DEBUG
556 clog << "XapianIndex::removePostingsFromDocument: " << error.get_msg() << endl;
557 #endif
558 }
559
560 try
561 {
562 // Decrease this term's frequency in the spelling dictionary
563 if (doSpelling == true)
564 {
565 db.remove_spelling(*termListIter);
566 }
567 }
568 catch (const Xapian::UnimplementedError &error)
569 {
570 clog << "Couldn't remove spelling correction: " << error.get_type() << ": " << error.get_msg() << endl;
571 doSpelling = false;
572 }
573 catch (const Xapian::Error &error)
574 {
575 #ifdef DEBUG
576 clog << "XapianIndex::removePostingsFromDocument: " << error.get_msg() << endl;
577 #endif
578 }
579 continue;
580 }
581
582 // Otherwise, remove the first N postings
583 // FIXME: if all the postings are in the range associated with the metadata
584 // as opposed to the actual data, the term can be removed altogether
585 for (Xapian::PositionIterator firstPosIter = termListIter.positionlist_begin();
586 firstPosIter != termListIter.positionlist_end(); ++firstPosIter)
587 {
588 if (postingNum >= postingsCount)
589 {
590 break;
591 }
592 ++postingNum;
593
594 try
595 {
596 doc.remove_posting(*termListIter, *firstPosIter);
597 }
598 catch (const Xapian::Error &error)
599 {
600 // This posting may have been removed already
601 #ifdef DEBUG
602 clog << "XapianIndex::removePostingsFromDocument: " << error.get_msg() << endl;
603 #endif
604 }
605 }
606 }
607 }
608
addCommonTerms(const DocumentInfo & docInfo,Xapian::Document & doc,const Xapian::WritableDatabase & db,Xapian::termcount & termPos)609 void XapianIndex::addCommonTerms(const DocumentInfo &docInfo, Xapian::Document &doc,
610 const Xapian::WritableDatabase &db, Xapian::termcount &termPos)
611 {
612 string title(docInfo.getTitle());
613 string location(docInfo.getLocation());
614 string type(docInfo.getType(false));
615 Url urlObj(location);
616
617 // Add a magic term :-)
618 doc.add_term(MAGIC_TERM);
619
620 // Index the title with prefix S
621 if (title.empty() == false)
622 {
623 addPostingsToDocument(Xapian::Utf8Iterator(title), doc, db, "S",
624 false, m_doSpelling, termPos);
625 }
626
627 string hostName, tree, fileName;
628
629 if (g_pMapper != NULL)
630 {
631 hostName = g_pMapper->getHost(docInfo);
632 tree = g_pMapper->getDirectory(docInfo);
633 fileName = g_pMapper->getFile(docInfo);
634 }
635 else
636 {
637 hostName = StringManip::toLowerCase(urlObj.getHost());
638 tree = urlObj.getLocation();
639 fileName = urlObj.getFile();
640 }
641 #ifdef DEBUG
642 clog << "XapianIndex::addCommonTerms: called for " << docInfo.getLocation()
643 << " (" << docInfo.getInternalPath() << ")" << endl;
644 #endif
645
646 // Index the full URL with prefix U
647 doc.add_term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(docInfo.getLocation(true)), true));
648 // And for containers, the base file with XFILE:
649 if ((urlObj.isLocal() == true) &&
650 (docInfo.getInternalPath().empty() == false))
651 {
652 string protocol(urlObj.getProtocol());
653
654 doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
655 if ((urlObj.isLocal() == true) &&
656 (protocol != "file"))
657 {
658 string fileUrl(location);
659
660 // Add another term with file as protocol
661 fileUrl.replace(0, protocol.length(), "file");
662 doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(fileUrl), true));
663 }
664 }
665 // ...the host name and included domains with prefix H
666 if (hostName.empty() == false)
667 {
668 doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName, true));
669 string::size_type dotPos = hostName.find('.');
670 while (dotPos != string::npos)
671 {
672 doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));
673
674 // Next
675 dotPos = hostName.find('.', dotPos + 1);
676 }
677 }
678 // ...the location (as is) and all directories with prefix XDIR:
679 if (tree.empty() == false)
680 {
681 if ((urlObj.isLocal() == true) &&
682 (docInfo.getIsDirectory() == true))
683 {
684 doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(docInfo.getLocation().substr(7)), true));
685 #ifdef DEBUG
686 clog << "XapianIndex::addCommonTerms: full XDIR" << docInfo.getLocation().substr(7) << endl;
687 #endif
688 }
689 doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
690 #ifdef DEBUG
691 clog << "XapianIndex::addCommonTerms: first XDIR" << tree << endl;
692 #endif
693 if (tree[0] == '/')
694 {
695 doc.add_term("XDIR:/");
696 #ifdef DEBUG
697 clog << "XapianIndex::addCommonTerms: top-level XDIR" << endl;
698 #endif
699 }
700 string::size_type slashPos = tree.find('/', 1);
701 while (slashPos != string::npos)
702 {
703 doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));
704 #ifdef DEBUG
705 clog << "XapianIndex::addCommonTerms: component XDIR" << tree.substr(0, slashPos) << endl;
706 #endif
707
708 // Next
709 slashPos = tree.find('/', slashPos + 1);
710 }
711
712 // ...and all components as XPATH:
713 bool doSpellingOnPaths = false;
714 addPostingsToDocument(Xapian::Utf8Iterator(tree), doc, db, "XPATH:",
715 true, doSpellingOnPaths, termPos);
716 }
717 else
718 {
719 doc.add_term("XDIR:/");
720 #ifdef DEBUG
721 clog << "XapianIndex::addCommonTerms: single top-level XDIR" << endl;
722 #endif
723 }
724 // ...and the file name with prefix P
725 if (fileName.empty() == false)
726 {
727 string extension;
728
729 doc.add_term(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));
730 if (fileName.find(' ') != string::npos)
731 {
732 bool doSpellingOnPaths = false;
733
734 // Add more XPATH: terms if there's a space in the file name
735 addPostingsToDocument(Xapian::Utf8Iterator(fileName), doc, db, "XPATH:",
736 true, doSpellingOnPaths, termPos);
737 }
738
739 // Does it have an extension ?
740 string::size_type extPos = fileName.rfind('.');
741 if ((extPos != string::npos) &&
742 (extPos + 1 < fileName.length()))
743 {
744 extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
745 }
746 doc.add_term(string("E") + XapianDatabase::limitTermLength(extension));
747 }
748 // Add the language code with prefix L
749 doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
750 // ...and the MIME type with prefix T
751 doc.add_term(string("T") + type);
752 string::size_type slashPos = type.find('/');
753 if (slashPos != string::npos)
754 {
755 doc.add_term(string("XCLASS:") + type.substr(0, slashPos));
756 }
757 // Others
758 if (g_pMapper != NULL)
759 {
760 vector<pair<string, string> > prefixedTerms;
761
762 g_pMapper->getTerms(docInfo, prefixedTerms);
763
764 for (vector<pair<string, string> >::const_iterator termIter = prefixedTerms.begin();
765 termIter != prefixedTerms.end(); ++termIter)
766 {
767 doc.add_term(termIter->second + XapianDatabase::limitTermLength(termIter->first));
768 }
769 }
770 }
771
removeCommonTerms(Xapian::Document & doc,const Xapian::WritableDatabase & db)772 void XapianIndex::removeCommonTerms(Xapian::Document &doc, const Xapian::WritableDatabase &db)
773 {
774 DocumentInfo docInfo;
775 set<string> commonTerms;
776 string record(doc.get_data());
777
778 // First, remove the magic term
779 commonTerms.insert(MAGIC_TERM);
780
781 if (record.empty() == true)
782 {
783 // Nothing else we can do
784 return;
785 }
786
787 XapianDatabase::recordToProps(record, &docInfo);
788 // XapianDatabase expects the language in English, which is okay here
789 string language(docInfo.getLanguage());
790 Url urlObj(docInfo.getLocation());
791
792 // FIXME: remove terms extracted from the title if they don't have more than one posting
793 string title(docInfo.getTitle());
794 if (title.empty() == false)
795 {
796 removePostingsFromDocument(Xapian::Utf8Iterator(title), doc, db, "S",
797 false, m_doSpelling);
798 }
799
800 // Location
801 string location(docInfo.getLocation());
802 commonTerms.insert(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(docInfo.getLocation(true)), true));
803 // Containers' base file
804 if ((urlObj.isLocal() == true) &&
805 (docInfo.getInternalPath().empty() == false))
806 {
807 string protocol(urlObj.getProtocol());
808
809 commonTerms.insert(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
810
811 if ((urlObj.isLocal() == true) &&
812 (protocol != "file"))
813 {
814 string fileUrl(location);
815
816 // Add another term with file as protocol
817 fileUrl.replace(0, protocol.length(), "file");
818 commonTerms.insert(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(fileUrl), true));
819 }
820 }
821 // Host name
822 string hostName(StringManip::toLowerCase(urlObj.getHost()));
823 if (hostName.empty() == false)
824 {
825 commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName, true));
826 string::size_type dotPos = hostName.find('.');
827 while (dotPos != string::npos)
828 {
829 commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));
830
831 // Next
832 dotPos = hostName.find('.', dotPos + 1);
833 }
834 }
835 // ...location
836 string tree(urlObj.getLocation());
837 if (tree.empty() == false)
838 {
839 if ((urlObj.isLocal() == true) &&
840 (docInfo.getIsDirectory() == true))
841 {
842 commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(docInfo.getLocation().substr(7)), true));
843 }
844 commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
845 if (tree[0] == '/')
846 {
847 commonTerms.insert("XDIR:/");
848 }
849 string::size_type slashPos = tree.find('/', 1);
850 while (slashPos != string::npos)
851 {
852 commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));
853
854 // Next
855 slashPos = tree.find('/', slashPos + 1);
856 }
857
858 // ...paths
859 bool doSpellingOnPaths = false;
860 removePostingsFromDocument(Xapian::Utf8Iterator(tree), doc, db, "XPATH:",
861 true, doSpellingOnPaths);
862 }
863 else
864 {
865 commonTerms.insert("XDIR:/");
866 }
867 // ...and file name
868 string fileName(urlObj.getFile());
869 if (fileName.empty() == false)
870 {
871 string extension;
872
873 commonTerms.insert(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));
874 if (fileName.find(' ') != string::npos)
875 {
876 bool doSpellingOnPaths = false;
877
878 removePostingsFromDocument(Xapian::Utf8Iterator(fileName), doc, db, "XPATH:",
879 true, doSpellingOnPaths);
880 }
881
882 // Does it have an extension ?
883 string::size_type extPos = fileName.rfind('.');
884 if ((extPos != string::npos) &&
885 (extPos + 1 < fileName.length()))
886 {
887 extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
888 }
889 commonTerms.insert(string("E") + XapianDatabase::limitTermLength(extension));
890 }
891 // Language code
892 commonTerms.insert(string("L") + Languages::toCode(language));
893 // MIME type
894 string type(docInfo.getType(false));
895 commonTerms.insert(string("T") + type);
896 string::size_type slashPos = type.find('/');
897 if (slashPos != string::npos)
898 {
899 commonTerms.insert(string("XCLASS:") + type.substr(0, slashPos));
900 }
901 // Others
902 if (g_pMapper != NULL)
903 {
904 vector<pair<string, string> > prefixedTerms;
905
906 g_pMapper->getTerms(docInfo, prefixedTerms);
907
908 for (vector<pair<string, string> >::const_iterator termIter = prefixedTerms.begin();
909 termIter != prefixedTerms.end(); ++termIter)
910 {
911 commonTerms.insert(termIter->second + XapianDatabase::limitTermLength(termIter->first));
912 }
913 }
914
915 for (set<string>::const_iterator termIter = commonTerms.begin(); termIter != commonTerms.end(); ++termIter)
916 {
917 try
918 {
919 doc.remove_term(*termIter);
920 }
921 catch (const Xapian::Error &error)
922 {
923 #ifdef DEBUG
924 clog << "XapianIndex::removeCommonTerms: " << error.get_msg() << endl;
925 #endif
926 }
927 }
928 }
929
scanDocument(const string & suggestedLanguage,const char * pData,off_t dataLength)930 string XapianIndex::scanDocument(const string &suggestedLanguage,
931 const char *pData, off_t dataLength)
932 {
933 vector<string> candidates;
934 string language;
935 bool scannedDocument = false;
936
937 if (suggestedLanguage.empty() == false)
938 {
939 // See first if this is suitable
940 candidates.push_back(suggestedLanguage);
941 }
942 else
943 {
944 // Try to determine the document's language right away
945 LanguageDetector::getInstance().guessLanguage(pData, max(dataLength, (off_t)2048), candidates);
946
947 scannedDocument = true;
948 }
949
950 // See which of these languages is suitable for stemming
951 vector<string>::iterator langIter = candidates.begin();
952 while (langIter != candidates.end())
953 {
954 if (*langIter == "unknown")
955 {
956 ++langIter;
957 continue;
958 }
959
960 try
961 {
962 Xapian::Stem stemmer(StringManip::toLowerCase(*langIter));
963 }
964 catch (const Xapian::Error &error)
965 {
966 clog << "Invalid language: " << error.get_type() << ": " << error.get_msg() << endl;
967
968 if (scannedDocument == false)
969 {
970 // The suggested language is not suitable
971 candidates.clear();
972 LanguageDetector::getInstance().guessLanguage(pData, max(dataLength, (off_t)2048), candidates);
973
974 langIter = candidates.begin();
975 scannedDocument = true;
976 }
977 else
978 {
979 ++langIter;
980 }
981 continue;
982 }
983
984 language = *langIter;
985 break;
986 }
987 #ifdef DEBUG
988 clog << "XapianIndex::scanDocument: language " << language << endl;
989 #endif
990
991 return language;
992 }
993
setDocumentData(const DocumentInfo & docInfo,Xapian::Document & doc,const string & language) const994 void XapianIndex::setDocumentData(const DocumentInfo &docInfo, Xapian::Document &doc,
995 const string &language) const
996 {
997 time_t timeT = TimeConverter::fromTimestamp(docInfo.getTimestamp());
998 struct tm *tm = localtime(&timeT);
999 string yyyymmdd(TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday));
1000 string hhmmss(TimeConverter::toHHMMSSString(tm->tm_hour, tm->tm_min, tm->tm_sec));
1001
1002 // Date
1003 doc.add_value(0, yyyymmdd);
1004 // FIXME: checksum in value 1
1005 // Size
1006 doc.add_value(2, Xapian::sortable_serialise((double )docInfo.getSize()));
1007 // Time
1008 doc.add_value(3, hhmmss);
1009 // Date and time, for results sorting
1010 doc.add_value(4, yyyymmdd + hhmmss);
1011 // Number of seconds to January 1st, 10000
1012 doc.add_value(5, Xapian::sortable_serialise((double )253402300800 - timeT));
1013 // Any custom value ?
1014 if (g_pMapper != NULL)
1015 {
1016 map<unsigned int, string> values;
1017
1018 g_pMapper->getValues(docInfo, values);
1019 for (map<unsigned int, string>::const_iterator valIter = values.begin();
1020 valIter != values.end(); ++valIter)
1021 {
1022 doc.add_value(valIter->first, valIter->second);
1023 }
1024 }
1025
1026 DocumentInfo docCopy(docInfo);
1027 // XapianDatabase expects the language in English, which is okay here
1028 docCopy.setLanguage(language);
1029 doc.set_data(XapianDatabase::propsToRecord(&docCopy));
1030 }
1031
deleteDocuments(const string & term)1032 bool XapianIndex::deleteDocuments(const string &term)
1033 {
1034 bool unindexed = false;
1035
1036 if (term.empty() == true)
1037 {
1038 return false;
1039 }
1040
1041 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1042 if (pDatabase == NULL)
1043 {
1044 clog << "Couldn't get index " << m_databaseName << endl;
1045 return false;
1046 }
1047
1048 try
1049 {
1050 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
1051 if (pIndex != NULL)
1052 {
1053 #ifdef DEBUG
1054 clog << "XapianIndex::deleteDocuments: term is " << term << endl;
1055 #endif
1056
1057 // Delete documents from the index
1058 pIndex->delete_document(term);
1059
1060 unindexed = true;
1061 }
1062 }
1063 catch (const Xapian::Error &error)
1064 {
1065 clog << "Couldn't unindex documents: " << error.get_type() << ": " << error.get_msg() << endl;
1066 }
1067 catch (...)
1068 {
1069 clog << "Couldn't unindex documents, unknown exception occurred" << endl;
1070 }
1071 pDatabase->unlock();
1072
1073 return unindexed;
1074 }
1075
1076 //
1077 // Implementation of IndexInterface
1078 //
1079
1080 /// Returns false if the index couldn't be opened.
isGood(void) const1081 bool XapianIndex::isGood(void) const
1082 {
1083 return m_goodIndex;
1084 }
1085
1086 /// Gets metadata.
getMetadata(const string & name) const1087 string XapianIndex::getMetadata(const string &name) const
1088 {
1089 string metadataValue;
1090
1091 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1092 if (pDatabase == NULL)
1093 {
1094 clog << "Couldn't get index " << m_databaseName << endl;
1095 return "";
1096 }
1097
1098 try
1099 {
1100 Xapian::Database *pIndex = pDatabase->readLock();
1101 if (pIndex != NULL)
1102 {
1103 // If this index type doesn't support metadata, no exception will be thrown
1104 // We will just get an empty string
1105 metadataValue = pIndex->get_metadata(name);
1106 }
1107 }
1108 catch (const Xapian::Error &error)
1109 {
1110 clog << "Couldn't get metadata: " << error.get_type() << ": " << error.get_msg() << endl;
1111 }
1112 catch (...)
1113 {
1114 clog << "Couldn't get metadata, unknown exception occurred" << endl;
1115 }
1116 pDatabase->unlock();
1117
1118 return metadataValue;
1119 }
1120
1121 /// Sets metadata.
setMetadata(const string & name,const string & value) const1122 bool XapianIndex::setMetadata(const string &name, const string &value) const
1123 {
1124 bool setMetadata = false;
1125
1126 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1127 if (pDatabase == NULL)
1128 {
1129 clog << "Couldn't get index " << m_databaseName << endl;
1130 return false;
1131 }
1132
1133 try
1134 {
1135 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
1136 if (pIndex != NULL)
1137 {
1138 pIndex->set_metadata(name, value);
1139 setMetadata = true;
1140 }
1141 }
1142 catch (const Xapian::UnimplementedError &error)
1143 {
1144 clog << "Couldn't set metadata: " << error.get_type() << ": " << error.get_msg() << endl;
1145 }
1146 catch (const Xapian::Error &error)
1147 {
1148 clog << "Couldn't set metadata: " << error.get_type() << ": " << error.get_msg() << endl;
1149 }
1150 catch (...)
1151 {
1152 clog << "Couldn't set metadata, unknown exception occurred" << endl;
1153 }
1154 pDatabase->unlock();
1155
1156 return setMetadata;
1157 }
1158
1159 /// Gets the index location.
getLocation(void) const1160 string XapianIndex::getLocation(void) const
1161 {
1162 return m_databaseName;
1163 }
1164
1165 /// Returns a document's properties.
getDocumentInfo(unsigned int docId,DocumentInfo & docInfo) const1166 bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
1167 {
1168 bool foundDocument = false;
1169
1170 if (docId == 0)
1171 {
1172 return false;
1173 }
1174
1175 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1176 if (pDatabase == NULL)
1177 {
1178 clog << "Couldn't get index " << m_databaseName << endl;
1179 return false;
1180 }
1181
1182 try
1183 {
1184 Xapian::Database *pIndex = pDatabase->readLock();
1185 if (pIndex != NULL)
1186 {
1187 Xapian::Document doc = pIndex->get_document(docId);
1188 string record(doc.get_data());
1189
1190 // Get the current document data
1191 if (record.empty() == false)
1192 {
1193 XapianDatabase::recordToProps(record, &docInfo);
1194 // XapianDatabase stored the language in English
1195 docInfo.setLanguage(Languages::toLocale(docInfo.getLanguage()));
1196 foundDocument = true;
1197 }
1198 }
1199 }
1200 catch (const Xapian::Error &error)
1201 {
1202 clog << "Couldn't get document properties: " << error.get_type() << ": " << error.get_msg() << endl;
1203 }
1204 catch (...)
1205 {
1206 clog << "Couldn't get document properties, unknown exception occurred" << endl;
1207 }
1208 pDatabase->unlock();
1209
1210 return foundDocument;
1211 }
1212
1213 /// Returns a document's terms count.
getDocumentTermsCount(unsigned int docId) const1214 unsigned int XapianIndex::getDocumentTermsCount(unsigned int docId) const
1215 {
1216 unsigned int termsCount = 0;
1217
1218 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1219 if (pDatabase == NULL)
1220 {
1221 clog << "Couldn't get index " << m_databaseName << endl;
1222 return 0;
1223 }
1224
1225 try
1226 {
1227 Xapian::Database *pIndex = pDatabase->readLock();
1228 if (pIndex != NULL)
1229 {
1230 Xapian::Document doc = pIndex->get_document(docId);
1231
1232 termsCount = doc.termlist_count();
1233 #ifdef DEBUG
1234 clog << "XapianIndex::getDocumentTermsCount: " << termsCount << " terms in document " << docId << endl;
1235 #endif
1236 }
1237 }
1238 catch (const Xapian::Error &error)
1239 {
1240 clog << "Couldn't get document terms count: " << error.get_type() << ": " << error.get_msg() << endl;
1241 }
1242 catch (...)
1243 {
1244 clog << "Couldn't get document terms count, unknown exception occurred" << endl;
1245 }
1246 pDatabase->unlock();
1247
1248 return termsCount;
1249 }
1250
1251 /// Returns a document's terms.
getDocumentTerms(unsigned int docId,map<unsigned int,string> & wordsBuffer) const1252 bool XapianIndex::getDocumentTerms(unsigned int docId, map<unsigned int, string> &wordsBuffer) const
1253 {
1254 vector<string> noPosTerms;
1255 bool gotTerms = false;
1256
1257 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1258 if (pDatabase == NULL)
1259 {
1260 clog << "Couldn't get index " << m_databaseName << endl;
1261 return false;
1262 }
1263
1264 try
1265 {
1266 Xapian::Database *pIndex = pDatabase->readLock();
1267 if (pIndex != NULL)
1268 {
1269 unsigned int lastPos = 0;
1270
1271 // Go through the position list of each term
1272 for (Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
1273 termIter != pIndex->termlist_end(docId); ++termIter)
1274 {
1275 string termName(*termIter);
1276 char firstChar = termName[0];
1277 bool hasPositions = false;
1278
1279 // Is it prefixed ?
1280 if (isupper((int)firstChar) != 0)
1281 {
1282 // Skip X-prefixed terms
1283 if (firstChar == 'X')
1284 {
1285 #ifdef DEBUG
1286 clog << "XapianIndex::getDocumentTerms: skipping " << termName << endl;
1287 #endif
1288 continue;
1289 }
1290
1291 // Keep other prefixed terms (S, U, H, P, L, T...)
1292 termName.erase(0, 1);
1293 }
1294
1295 for (Xapian::PositionIterator positionIter = pIndex->positionlist_begin(docId, *termIter);
1296 positionIter != pIndex->positionlist_end(docId, *termIter); ++positionIter)
1297 {
1298 wordsBuffer[*positionIter] = termName;
1299 if (*positionIter > lastPos)
1300 {
1301 lastPos = *positionIter;
1302 }
1303 hasPositions = true;
1304 }
1305
1306 if (hasPositions == false)
1307 {
1308 noPosTerms.push_back(termName);
1309 }
1310
1311 gotTerms = true;
1312 }
1313
1314 // Append terms without positional docInformation as if they were at the end of the document
1315 for (vector<string>::const_iterator noPosIter = noPosTerms.begin();
1316 noPosIter != noPosTerms.end(); ++noPosIter)
1317 {
1318 wordsBuffer[lastPos] = *noPosIter;
1319 ++lastPos;
1320 }
1321 }
1322 }
1323 catch (const Xapian::Error &error)
1324 {
1325 clog << "Couldn't get document terms: " << error.get_type() << ": " << error.get_msg() << endl;
1326 }
1327 catch (...)
1328 {
1329 clog << "Couldn't get document terms, unknown exception occurred" << endl;
1330 }
1331 pDatabase->unlock();
1332
1333 return gotTerms;
1334 }
1335
1336 /// Sets the list of known labels.
setLabels(const set<string> & labels,bool resetLabels)1337 bool XapianIndex::setLabels(const set<string> &labels, bool resetLabels)
1338 {
1339 string labelsString;
1340
1341 // Whether labels are reset or not doesn't make any difference
1342 for (set<string>::const_iterator labelIter = labels.begin();
1343 labelIter != labels.end(); ++labelIter)
1344 {
1345 // Prevent from setting internal labels
1346 if (labelIter->substr(0, 2) == "X-")
1347 {
1348 continue;
1349 }
1350
1351 labelsString += "[";
1352 labelsString += Url::escapeUrl(*labelIter);
1353 labelsString += "]";
1354 }
1355
1356 return setMetadata("labels", labelsString);
1357 }
1358
1359 /// Gets the list of known labels.
getLabels(set<string> & labels) const1360 bool XapianIndex::getLabels(set<string> &labels) const
1361 {
1362 string labelsString(getMetadata("labels"));
1363
1364 if (labelsString.empty() == true)
1365 {
1366 return false;
1367 }
1368
1369 string::size_type endPos = 0;
1370 string label(StringManip::extractField(labelsString, "[", "]", endPos));
1371
1372 while (label.empty() == false)
1373 {
1374 labels.insert(Url::unescapeUrl(label));
1375
1376 if (endPos == string::npos)
1377 {
1378 break;
1379 }
1380 label = StringManip::extractField(labelsString, "[", "]", endPos);
1381 }
1382
1383 return true;
1384 }
1385
1386 /// Adds a label.
addLabel(const string & name)1387 bool XapianIndex::addLabel(const string &name)
1388 {
1389 set<string> labels;
1390
1391 if (getLabels(labels) == true)
1392 {
1393 labels.insert(name);
1394
1395 if (setLabels(labels, true) == true)
1396 {
1397 return true;
1398 }
1399 }
1400
1401 return false;
1402 }
1403
1404 /// Deletes all references to a label.
deleteLabel(const string & name)1405 bool XapianIndex::deleteLabel(const string &name)
1406 {
1407 bool deletedLabel = false;
1408
1409 // Prevent from deleting internal labels
1410 if (name.substr(0, 2) == "X-")
1411 {
1412 return false;
1413 }
1414
1415 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1416 if (pDatabase == NULL)
1417 {
1418 clog << "Couldn't get index " << m_databaseName << endl;
1419 return false;
1420 }
1421
1422 try
1423 {
1424 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
1425 if (pIndex != NULL)
1426 {
1427 string term("XLABEL:");
1428
1429 // Get documents that have this label
1430 term += XapianDatabase::limitTermLength(Url::escapeUrl(name));
1431 for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
1432 postingIter != pIndex->postlist_end(term); ++postingIter)
1433 {
1434 Xapian::docid docId = *postingIter;
1435
1436 // Get the document
1437 Xapian::Document doc = pIndex->get_document(docId);
1438 // Remove the term
1439 doc.remove_term(term);
1440 // ...and update the document
1441 pIndex->replace_document(docId, doc);
1442 }
1443 deletedLabel = true;
1444 }
1445 }
1446 catch (const Xapian::Error &error)
1447 {
1448 clog << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
1449 }
1450 catch (...)
1451 {
1452 clog << "Couldn't delete label, unknown exception occurred" << endl;
1453 }
1454 pDatabase->unlock();
1455
1456 return deletedLabel;
1457 }
1458
1459 /// Determines whether a document has a label.
hasLabel(unsigned int docId,const string & name) const1460 bool XapianIndex::hasLabel(unsigned int docId, const string &name) const
1461 {
1462 bool foundLabel = false;
1463
1464 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1465 if (pDatabase == NULL)
1466 {
1467 clog << "Couldn't get index " << m_databaseName << endl;
1468 return false;
1469 }
1470
1471 try
1472 {
1473 Xapian::Database *pIndex = pDatabase->readLock();
1474 if (pIndex != NULL)
1475 {
1476 string term("XLABEL:");
1477
1478 // Get documents that have this label
1479 // FIXME: would it be faster to get the document's terms ?
1480 term += XapianDatabase::limitTermLength(Url::escapeUrl(name));
1481 Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
1482 if (postingIter != pIndex->postlist_end(term))
1483 {
1484 // Is this document in the list ?
1485 postingIter.skip_to(docId);
1486 if ((postingIter != pIndex->postlist_end(term)) &&
1487 (docId == (*postingIter)))
1488 {
1489 foundLabel = true;
1490 }
1491 }
1492 }
1493 }
1494 catch (const Xapian::Error &error)
1495 {
1496 clog << "Couldn't check document labels: " << error.get_type() << ": " << error.get_msg() << endl;
1497 }
1498 catch (...)
1499 {
1500 clog << "Couldn't check document labels, unknown exception occurred" << endl;
1501 }
1502 pDatabase->unlock();
1503
1504 return foundLabel;
1505 }
1506
1507 /// Returns a document's labels.
getDocumentLabels(unsigned int docId,set<string> & labels) const1508 bool XapianIndex::getDocumentLabels(unsigned int docId, set<string> &labels) const
1509 {
1510 bool gotLabels = false;
1511
1512 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1513 if (pDatabase == NULL)
1514 {
1515 clog << "Couldn't get index " << m_databaseName << endl;
1516 return false;
1517 }
1518
1519 labels.clear();
1520 try
1521 {
1522 Xapian::Database *pIndex = pDatabase->readLock();
1523 if (pIndex != NULL)
1524 {
1525 Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
1526 if (termIter != pIndex->termlist_end(docId))
1527 {
1528 for (termIter.skip_to("XLABEL:");
1529 termIter != pIndex->termlist_end(docId); ++termIter)
1530 {
1531 if ((*termIter).length() < 7)
1532 {
1533 break;
1534 }
1535
1536 // Is this a label ?
1537 if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
1538 {
1539 labels.insert(Url::unescapeUrl((*termIter).substr(7)));
1540 }
1541 }
1542 gotLabels = true;
1543 }
1544 }
1545 }
1546 catch (const Xapian::Error &error)
1547 {
1548 clog << "Couldn't get document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
1549 }
1550 catch (...)
1551 {
1552 clog << "Couldn't get document's labels, unknown exception occurred" << endl;
1553 }
1554 pDatabase->unlock();
1555
1556 return gotLabels;
1557 }
1558
1559 /// Sets a document's labels.
setDocumentLabels(unsigned int docId,const set<string> & labels,bool resetLabels)1560 bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
1561 bool resetLabels)
1562 {
1563 set<unsigned int> docIds;
1564
1565 docIds.insert(docId);
1566 return setDocumentsLabels(docIds, labels, resetLabels);
1567 }
1568
1569 /// Sets documents' labels.
setDocumentsLabels(const set<unsigned int> & docIds,const set<string> & labels,bool resetLabels)1570 bool XapianIndex::setDocumentsLabels(const set<unsigned int> &docIds,
1571 const set<string> &labels, bool resetLabels)
1572 {
1573 bool updatedLabels = false;
1574
1575 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1576 if (pDatabase == NULL)
1577 {
1578 clog << "Couldn't get index " << m_databaseName << endl;
1579 return false;
1580 }
1581
1582 for (set<unsigned int>::const_iterator docIter = docIds.begin();
1583 docIter != docIds.end(); ++docIter)
1584 {
1585 try
1586 {
1587 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
1588 if (pIndex == NULL)
1589 {
1590 break;
1591 }
1592
1593 unsigned int docId = (*docIter);
1594 Xapian::Document doc = pIndex->get_document(docId);
1595
1596 // Reset existing labels ?
1597 if (resetLabels == true)
1598 {
1599 Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
1600 if (termIter != pIndex->termlist_end(docId))
1601 {
1602 for (termIter.skip_to("XLABEL:");
1603 termIter != pIndex->termlist_end(docId); ++termIter)
1604 {
1605 string term(*termIter);
1606
1607 // Is this a non-internal label ?
1608 if ((strncasecmp(term.c_str(), "XLABEL:", min(7, (int)term.length())) == 0) &&
1609 (strncasecmp(term.c_str(), "XLABEL:X-", min(9, (int)term.length())) != 0))
1610 {
1611 doc.remove_term(term);
1612 }
1613 }
1614 }
1615 }
1616
1617 // Set new labels
1618 addLabelsToDocument(doc, labels, true);
1619
1620 pIndex->replace_document(docId, doc);
1621 updatedLabels = true;
1622 }
1623 catch (const Xapian::Error &error)
1624 {
1625 clog << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
1626 }
1627 catch (...)
1628 {
1629 clog << "Couldn't update document's labels, unknown exception occurred" << endl;
1630 }
1631
1632 pDatabase->unlock();
1633 }
1634
1635 return updatedLabels;
1636 }
1637
1638 /// Checks whether the given URL is in the index.
hasDocument(const string & url) const1639 unsigned int XapianIndex::hasDocument(const string &url) const
1640 {
1641 unsigned int docId = 0;
1642
1643 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1644 if (pDatabase == NULL)
1645 {
1646 clog << "Couldn't get index " << m_databaseName << endl;
1647 return 0;
1648 }
1649
1650 try
1651 {
1652 Xapian::Database *pIndex = pDatabase->readLock();
1653 if (pIndex != NULL)
1654 {
1655 string term = string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(url)), true);
1656
1657 // Get documents that have this term
1658 Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
1659 if (postingIter != pIndex->postlist_end(term))
1660 {
1661 // This URL was indexed
1662 docId = *postingIter;
1663 #ifdef DEBUG
1664 clog << "XapianIndex::hasDocument: " << term << " in document "
1665 << docId << " " << postingIter.get_wdf() << " time(s)" << endl;
1666 #endif
1667 }
1668 // FIXME: what if the term exists in more than one document ?
1669 }
1670 }
1671 catch (const Xapian::Error &error)
1672 {
1673 clog << "Couldn't look for document: " << error.get_type() << ": " << error.get_msg() << endl;
1674 }
1675 catch (...)
1676 {
1677 clog << "Couldn't look for document, unknown exception occurred" << endl;
1678 }
1679 pDatabase->unlock();
1680
1681 return docId;
1682 }
1683
1684 /// Gets terms with the same root.
getCloseTerms(const string & term,set<string> & suggestions)1685 unsigned int XapianIndex::getCloseTerms(const string &term, set<string> &suggestions)
1686 {
1687 Dijon::CJKVTokenizer tokenizer;
1688
1689 // Only offer suggestions for non CJKV terms
1690 if (tokenizer.has_cjkv(term) == true)
1691 {
1692 return 0;
1693 }
1694
1695 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1696 if (pDatabase == NULL)
1697 {
1698 clog << "Couldn't get index " << m_databaseName << endl;
1699 return 0;
1700 }
1701
1702 suggestions.clear();
1703 try
1704 {
1705 Xapian::Database *pIndex = pDatabase->readLock();
1706 if (pIndex != NULL)
1707 {
1708 Xapian::TermIterator termIter = pIndex->allterms_begin();
1709
1710 if (termIter != pIndex->allterms_end())
1711 {
1712 string baseTerm(StringManip::toLowerCase(term));
1713 unsigned int count = 0;
1714
1715 // Get the next 10 terms
1716 for (termIter.skip_to(baseTerm);
1717 (termIter != pIndex->allterms_end()) && (count < 10); ++termIter)
1718 {
1719 string suggestedTerm(*termIter);
1720
1721 // Does this term have the same root ?
1722 if (suggestedTerm.find(baseTerm) != 0)
1723 {
1724 break;
1725 }
1726
1727 suggestions.insert(suggestedTerm);
1728 ++count;
1729 }
1730 }
1731 }
1732 }
1733 catch (const Xapian::Error &error)
1734 {
1735 clog << "Couldn't get terms: " << error.get_type() << ": " << error.get_msg() << endl;
1736 }
1737 catch (...)
1738 {
1739 clog << "Couldn't get terms, unknown exception occurred" << endl;
1740 }
1741 pDatabase->unlock();
1742
1743 return suggestions.size();
1744 }
1745
1746 /// Returns the ID of the last document.
getLastDocumentID(void) const1747 unsigned int XapianIndex::getLastDocumentID(void) const
1748 {
1749 unsigned int docId = 0;
1750
1751 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1752 if (pDatabase == NULL)
1753 {
1754 clog << "Couldn't get index " << m_databaseName << endl;
1755 return 0;
1756 }
1757
1758 try
1759 {
1760 Xapian::Database *pIndex = pDatabase->readLock();
1761 if (pIndex != NULL)
1762 {
1763 docId = pIndex->get_lastdocid();
1764 }
1765 }
1766 catch (const Xapian::Error &error)
1767 {
1768 clog << "Couldn't get last document ID: " << error.get_type() << ": " << error.get_msg() << endl;
1769 }
1770 catch (...)
1771 {
1772 clog << "Couldn't get last document ID, unknown exception occurred" << endl;
1773 }
1774 pDatabase->unlock();
1775
1776 return docId;
1777 }
1778
1779 /// Returns the number of documents.
getDocumentsCount(const string & labelName) const1780 unsigned int XapianIndex::getDocumentsCount(const string &labelName) const
1781 {
1782 unsigned int docCount = 0;
1783
1784 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
1785 if (pDatabase == NULL)
1786 {
1787 clog << "Couldn't get index " << m_databaseName << endl;
1788 return 0;
1789 }
1790
1791 try
1792 {
1793 Xapian::Database *pIndex = pDatabase->readLock();
1794 if (pIndex != NULL)
1795 {
1796 if (labelName.empty() == true)
1797 {
1798 docCount = pIndex->get_doccount();
1799 }
1800 else
1801 {
1802 string term("XLABEL:");
1803
1804 // Each label appears only one per document so the collection frequency
1805 // is the number of documents that have this label
1806 term += XapianDatabase::limitTermLength(Url::escapeUrl(labelName));
1807 docCount = pIndex->get_collection_freq(term);
1808 }
1809 }
1810 }
1811 catch (const Xapian::Error &error)
1812 {
1813 clog << "Couldn't count documents: " << error.get_type() << ": " << error.get_msg() << endl;
1814 }
1815 catch (...)
1816 {
1817 clog << "Couldn't count documents, unknown exception occurred" << endl;
1818 }
1819 pDatabase->unlock();
1820
1821 return docCount;
1822 }
1823
1824 /// Lists document IDs.
listDocuments(set<unsigned int> & docIds,unsigned int maxDocsCount,unsigned int startDoc) const1825 unsigned int XapianIndex::listDocuments(set<unsigned int> &docIds,
1826 unsigned int maxDocsCount, unsigned int startDoc) const
1827 {
1828 // All documents have the magic term
1829 if (listDocumentsWithTerm("", docIds, maxDocsCount, startDoc) == true)
1830 {
1831 return docIds.size();
1832 }
1833
1834 return 0;
1835 }
1836
1837 /// Lists documents.
listDocuments(const string & name,set<unsigned int> & docIds,NameType type,unsigned int maxDocsCount,unsigned int startDoc) const1838 bool XapianIndex::listDocuments(const string &name, set<unsigned int> &docIds,
1839 NameType type, unsigned int maxDocsCount, unsigned int startDoc) const
1840 {
1841 string term;
1842
1843 if (type == BY_LABEL)
1844 {
1845 term = string("XLABEL:") + XapianDatabase::limitTermLength(Url::escapeUrl(name));
1846 }
1847 else if (type == BY_DIRECTORY)
1848 {
1849 term = string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
1850 }
1851 else if (type == BY_FILE)
1852 {
1853 term = string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
1854 }
1855 else if (type == BY_CONTAINER_FILE)
1856 {
1857 term = string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
1858 }
1859
1860 return listDocumentsWithTerm(term, docIds, maxDocsCount, startDoc);
1861 }
1862
1863 /// Indexes the given data.
indexDocument(const Document & document,const std::set<std::string> & labels,unsigned int & docId)1864 bool XapianIndex::indexDocument(const Document &document, const std::set<std::string> &labels,
1865 unsigned int &docId)
1866 {
1867 bool indexed = false;
1868
1869 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1870 if (pDatabase == NULL)
1871 {
1872 clog << "Couldn't get index " << m_databaseName << endl;
1873 return false;
1874 }
1875
1876 // Cache the document's properties
1877 DocumentInfo docInfo(document);
1878 docInfo.setLocation(Url::canonicalizeUrl(document.getLocation()));
1879
1880 off_t dataLength = 0;
1881 const char *pData = document.getData(dataLength);
1882
1883 // Don't scan the document if a language is specified
1884 m_stemLanguage = Languages::toEnglish(docInfo.getLanguage());
1885 if ((pData != NULL) &&
1886 (dataLength > 0))
1887 {
1888 m_stemLanguage = scanDocument(m_stemLanguage, pData, dataLength);
1889 docInfo.setLanguage(Languages::toLocale(m_stemLanguage));
1890 }
1891
1892 try
1893 {
1894 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
1895 if (pIndex != NULL)
1896 {
1897 Xapian::Document doc;
1898 Xapian::termcount termPos = 0;
1899
1900 // Populate the Xapian document
1901 addCommonTerms(docInfo, doc, *pIndex, termPos);
1902 if ((pData != NULL) &&
1903 (dataLength > 0))
1904 {
1905 Xapian::Utf8Iterator itor(pData, dataLength);
1906 addPostingsToDocument(itor, doc, *pIndex, "",
1907 false, m_doSpelling, termPos);
1908 }
1909 #ifdef DEBUG
1910 clog << "XapianIndex::indexDocument: " << labels.size() << " labels for URL " << docInfo.getLocation(true) << endl;
1911 #endif
1912
1913 // Add labels
1914 addLabelsToDocument(doc, labels, false);
1915
1916 // Set data
1917 setDocumentData(docInfo, doc, m_stemLanguage);
1918
1919 // Add this document to the Xapian index
1920 docId = pIndex->add_document(doc);
1921 indexed = true;
1922 }
1923 }
1924 catch (const Xapian::Error &error)
1925 {
1926 clog << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl;
1927 }
1928 catch (...)
1929 {
1930 clog << "Couldn't index document, unknown exception occurred" << endl;
1931 }
1932 pDatabase->unlock();
1933
1934 return indexed;
1935 }
1936
1937 /// Updates the given document; true if success.
updateDocument(unsigned int docId,const Document & document)1938 bool XapianIndex::updateDocument(unsigned int docId, const Document &document)
1939 {
1940 bool updated = false;
1941
1942 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
1943 if (pDatabase == NULL)
1944 {
1945 clog << "Couldn't get index " << m_databaseName << endl;
1946 return false;
1947 }
1948
1949 // Cache the document's properties
1950 DocumentInfo docInfo(document);
1951 set<string> labels(document.getLabels());
1952 docInfo.setLocation(Url::canonicalizeUrl(document.getLocation()));
1953
1954 off_t dataLength = 0;
1955 const char *pData = document.getData(dataLength);
1956
1957 // Don't scan the document if a language is specified
1958 m_stemLanguage = Languages::toEnglish(docInfo.getLanguage());
1959 if ((pData != NULL) &&
1960 (dataLength > 0))
1961 {
1962 m_stemLanguage = scanDocument(m_stemLanguage, pData, dataLength);
1963 docInfo.setLanguage(Languages::toLocale(m_stemLanguage));
1964 }
1965
1966 Xapian::WritableDatabase *pIndex = NULL;
1967
1968 try
1969 {
1970 pIndex = pDatabase->writeLock();
1971 if (pIndex != NULL)
1972 {
1973 Xapian::Document doc;
1974 Xapian::termcount termPos = 0;
1975
1976 // Populate the Xapian document
1977 addCommonTerms(docInfo, doc, *pIndex, termPos);
1978 if ((pData != NULL) &&
1979 (dataLength > 0))
1980 {
1981 Xapian::Utf8Iterator itor(pData, dataLength);
1982 addPostingsToDocument(itor, doc, *pIndex, "",
1983 false, m_doSpelling, termPos);
1984 }
1985
1986 // Add labels
1987 addLabelsToDocument(doc, labels, false);
1988
1989 // Set data
1990 setDocumentData(docInfo, doc, m_stemLanguage);
1991
1992 // Update the document in the database
1993 pIndex->replace_document(docId, doc);
1994 updated = true;
1995 }
1996 }
1997 catch (const Xapian::Error &error)
1998 {
1999 clog << "Couldn't update document: " << error.get_type() << ": " << error.get_msg() << endl;
2000 }
2001 catch (...)
2002 {
2003 clog << "Couldn't update document, unknown exception occurred" << endl;
2004 }
2005 if (pIndex != NULL)
2006 {
2007 pDatabase->unlock();
2008 }
2009
2010 return updated;
2011 }
2012
2013 /// Updates a document's properties.
updateDocumentInfo(unsigned int docId,const DocumentInfo & docInfo)2014 bool XapianIndex::updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo)
2015 {
2016 bool updated = false;
2017
2018 if (docId == 0)
2019 {
2020 return false;
2021 }
2022
2023 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
2024 if (pDatabase == NULL)
2025 {
2026 clog << "Couldn't get index " << m_databaseName << endl;
2027 return false;
2028 }
2029
2030 try
2031 {
2032 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
2033 if (pIndex != NULL)
2034 {
2035 Xapian::Document doc = pIndex->get_document(docId);
2036 Xapian::termcount termPos = 0;
2037
2038 // Update the document data with the current language
2039 m_stemLanguage = Languages::toEnglish(docInfo.getLanguage());
2040 removeCommonTerms(doc, *pIndex);
2041 addCommonTerms(docInfo, doc, *pIndex, termPos);
2042 setDocumentData(docInfo, doc, m_stemLanguage);
2043
2044 pIndex->replace_document(docId, doc);
2045 updated = true;
2046 }
2047 }
2048 catch (const Xapian::Error &error)
2049 {
2050 clog << "Couldn't update document properties: " << error.get_type() << ": " << error.get_msg() << endl;
2051 }
2052 catch (...)
2053 {
2054 clog << "Couldn't update document properties, unknown exception occurred" << endl;
2055 }
2056 pDatabase->unlock();
2057
2058 return updated;
2059 }
2060
2061 /// Unindexes the given document; true if success.
unindexDocument(unsigned int docId)2062 bool XapianIndex::unindexDocument(unsigned int docId)
2063 {
2064 bool unindexed = false;
2065
2066 if (docId == 0)
2067 {
2068 return false;
2069 }
2070
2071 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
2072 if (pDatabase == NULL)
2073 {
2074 clog << "Couldn't get index " << m_databaseName << endl;
2075 return false;
2076 }
2077
2078 try
2079 {
2080 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
2081 if (pIndex != NULL)
2082 {
2083 // Delete the document from the index
2084 pIndex->delete_document(docId);
2085 unindexed = true;
2086 }
2087 }
2088 catch (const Xapian::Error &error)
2089 {
2090 clog << "Couldn't unindex document: " << error.get_type() << ": " << error.get_msg() << endl;
2091 }
2092 catch (...)
2093 {
2094 clog << "Couldn't unindex document, unknown exception occurred" << endl;
2095 }
2096 pDatabase->unlock();
2097
2098 return unindexed;
2099 }
2100
2101 /// Unindexes the given document.
unindexDocument(const string & location)2102 bool XapianIndex::unindexDocument(const string &location)
2103 {
2104 string term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(location)), true));
2105
2106 return deleteDocuments(term);
2107 }
2108
2109 /// Unindexes documents.
unindexDocuments(const string & name,NameType type)2110 bool XapianIndex::unindexDocuments(const string &name, NameType type)
2111 {
2112 string term;
2113
2114 if (type == BY_LABEL)
2115 {
2116 term = string("XLABEL:") + XapianDatabase::limitTermLength(Url::escapeUrl(name));
2117 }
2118 else if (type == BY_DIRECTORY)
2119 {
2120 term = string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
2121 }
2122 else if (type == BY_FILE)
2123 {
2124 term = string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
2125 }
2126 else if (type == BY_CONTAINER_FILE)
2127 {
2128 term = string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(name), true);
2129 }
2130
2131 return deleteDocuments(term);
2132 }
2133
2134 /// Unindexes all documents.
unindexAllDocuments(void)2135 bool XapianIndex::unindexAllDocuments(void)
2136 {
2137 // All documents have the magic term
2138 return deleteDocuments(MAGIC_TERM);
2139 }
2140
2141 /// Flushes recent changes to the disk.
flush(void)2142 bool XapianIndex::flush(void)
2143 {
2144 bool flushed = false;
2145
2146 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
2147 if (pDatabase == NULL)
2148 {
2149 clog << "Couldn't get index " << m_databaseName << endl;
2150 return false;
2151 }
2152
2153 try
2154 {
2155 Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
2156 if (pIndex != NULL)
2157 {
2158 pIndex->commit();
2159 flushed = true;
2160 }
2161 }
2162 catch (const Xapian::Error &error)
2163 {
2164 clog << "Couldn't flush database: " << error.get_type() << ": " << error.get_msg() << endl;
2165 }
2166 catch (...)
2167 {
2168 clog << "Couldn't flush database, unknown exception occurred" << endl;
2169 }
2170 pDatabase->unlock();
2171
2172 return flushed;
2173 }
2174
2175 /// Reopens the index.
reopen(void) const2176 bool XapianIndex::reopen(void) const
2177 {
2178 // Reopen
2179 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
2180 if (pDatabase == NULL)
2181 {
2182 clog << "Couldn't get index " << m_databaseName << endl;
2183 return false;
2184 }
2185 pDatabase->reopen();
2186
2187 return true;
2188 }
2189
2190 /// Resets the index.
reset(void)2191 bool XapianIndex::reset(void)
2192 {
2193 // Overwrite and reopen
2194 XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false, true);
2195 if (pDatabase == NULL)
2196 {
2197 clog << "Couldn't get index " << m_databaseName << endl;
2198 return false;
2199 }
2200
2201 return true;
2202 }
2203
2204