1 /******************************************************************************
2 *
3 * swmodule.cpp - code for base class 'SWModule'. SWModule is the basis
4 * for all types of modules (e.g. texts, commentaries,
5 * maps, lexicons, etc.)
6 *
7 * $Id: swmodule.cpp 3515 2017-11-01 11:38:09Z scribe $
8 *
9 * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
10 * CrossWire Bible Society
11 * P. O. Box 2528
12 * Tempe, AZ 85280-2528
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation version 2.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 *
23 */
24
25
26 #include <vector>
27
28 #include <swlog.h>
29 #include <sysdata.h>
30 #include <swmodule.h>
31 #include <utilstr.h>
32 #include <swfilter.h>
33 #include <versekey.h> // KLUDGE for Search
34 #include <treekeyidx.h> // KLUDGE for Search
35 #include <swoptfilter.h>
36 #include <filemgr.h>
37 #include <stringmgr.h>
38 #ifndef _MSC_VER
39 #include <iostream>
40 #endif
41
42 #if defined(USECXX11REGEX)
43 #include <regex>
44 #ifndef REG_ICASE
45 #define REG_ICASE std::regex::icase
46 #endif
47 #elif defined(USEICUREGEX)
48 #include <unicode/regex.h>
49 #ifndef REG_ICASE
50 #define REG_ICASE UREGEX_CASE_INSENSITIVE
51 #endif
52 #else
53 #include <regex.h> // GNU
54 #endif
55
56 #if defined USEXAPIAN
57 #include <xapian.h>
58 #elif defined USELUCENE
59 #include <CLucene.h>
60
61 //Lucence includes
62 //#include "CLucene.h"
63 //#include "CLucene/util/Reader.h"
64 //#include "CLucene/util/Misc.h"
65 //#include "CLucene/util/dirent.h"
66
67 using namespace lucene::index;
68 using namespace lucene::analysis;
69 using namespace lucene::util;
70 using namespace lucene::store;
71 using namespace lucene::document;
72 using namespace lucene::queryParser;
73 using namespace lucene::search;
74 #endif
75
76 using std::vector;
77
78 SWORD_NAMESPACE_START
79
80 SWModule::StdOutDisplay SWModule::rawdisp;
81
82 typedef std::list<SWBuf> StringList;
83
84 /******************************************************************************
85 * SWModule Constructor - Initializes data for instance of SWModule
86 *
87 * ENT: imodname - Internal name for module
88 * imoddesc - Name to display to user for module
89 * idisp - Display object to use for displaying
90 * imodtype - Type of Module (All modules will be displayed with
91 * others of same type under their modtype heading
92 * unicode - if this module is unicode
93 */
94
SWModule(const char * imodname,const char * imoddesc,SWDisplay * idisp,const char * imodtype,SWTextEncoding encoding,SWTextDirection direction,SWTextMarkup markup,const char * imodlang)95 SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, const char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char *imodlang) {
96 key = createKey();
97 entryBuf = "";
98 config = &ownConfig;
99 modname = 0;
100 error = 0;
101 moddesc = 0;
102 modtype = 0;
103 modlang = 0;
104 this->encoding = encoding;
105 this->direction = direction;
106 this->markup = markup;
107 entrySize= -1;
108 disp = (idisp) ? idisp : &rawdisp;
109 stdstr(&modname, imodname);
110 stdstr(&moddesc, imoddesc);
111 stdstr(&modtype, imodtype);
112 stdstr(&modlang, imodlang);
113 stripFilters = new FilterList();
114 rawFilters = new FilterList();
115 renderFilters = new FilterList();
116 optionFilters = new OptionFilterList();
117 encodingFilters = new FilterList();
118 skipConsecutiveLinks = true;
119 procEntAttr = true;
120 }
121
122
123 /******************************************************************************
124 * SWModule Destructor - Cleans up instance of SWModule
125 */
126
~SWModule()127 SWModule::~SWModule()
128 {
129 if (modname)
130 delete [] modname;
131 if (moddesc)
132 delete [] moddesc;
133 if (modtype)
134 delete [] modtype;
135 if (modlang)
136 delete [] modlang;
137
138 if (key) {
139 if (!key->isPersist())
140 delete key;
141 }
142
143 stripFilters->clear();
144 rawFilters->clear();
145 renderFilters->clear();
146 optionFilters->clear();
147 encodingFilters->clear();
148 entryAttributes.clear();
149
150 delete stripFilters;
151 delete rawFilters;
152 delete renderFilters;
153 delete optionFilters;
154 delete encodingFilters;
155 }
156
157
158 /******************************************************************************
159 * SWModule::createKey - Allocates a key of specific type for module
160 *
161 * RET: pointer to allocated key
162 */
163
createKey() const164 SWKey *SWModule::createKey() const
165 {
166 return new SWKey();
167 }
168
169
170 /******************************************************************************
171 * SWModule::popError - Gets and clears error status
172 *
173 * RET: error status
174 */
175
popError()176 char SWModule::popError()
177 {
178 char retval = error;
179
180 error = 0;
181 if (!retval) retval = key->popError();
182 return retval;
183 }
184
185
186 /******************************************************************************
187 * SWModule::Name - Sets/gets module name
188 *
189 * ENT: imodname - value which to set modname
190 * [0] - only get
191 *
192 * RET: pointer to modname
193 */
194
getName() const195 const char *SWModule::getName() const {
196 return modname;
197 }
198
199
200 /******************************************************************************
201 * SWModule::Description - Sets/gets module description
202 *
203 * ENT: imoddesc - value which to set moddesc
204 * [0] - only get
205 *
206 * RET: pointer to moddesc
207 */
208
getDescription() const209 const char *SWModule::getDescription() const {
210 return moddesc;
211 }
212
213
214 /******************************************************************************
215 * SWModule::Type - Sets/gets module type
216 *
217 * ENT: imodtype - value which to set modtype
218 * [0] - only get
219 *
220 * RET: pointer to modtype
221 */
222
getType() const223 const char *SWModule::getType() const {
224 return modtype;
225 }
226
227 /******************************************************************************
228 * SWModule::getDirection - Sets/gets module direction
229 *
230 * ENT: newdir - value which to set direction
231 * [-1] - only get
232 *
233 * RET: char direction
234 */
getDirection() const235 char SWModule::getDirection() const {
236 return direction;
237 }
238
239
240 /******************************************************************************
241 * SWModule::Disp - Sets/gets display driver
242 *
243 * ENT: idisp - value which to set disp
244 * [0] - only get
245 *
246 * RET: pointer to disp
247 */
248
getDisplay() const249 SWDisplay *SWModule::getDisplay() const {
250 return disp;
251 }
252
setDisplay(SWDisplay * idisp)253 void SWModule::setDisplay(SWDisplay *idisp) {
254 disp = idisp;
255 }
256
257 /******************************************************************************
258 * * SWModule::Display - Calls this modules display object and passes itself
259 * *
260 * * RET: error status
261 * */
262
display()263 char SWModule::display() {
264 disp->display(*this);
265 return 0;
266 }
267
268 /******************************************************************************
269 * SWModule::getKey - Gets the key from this module that points to the position
270 * record
271 *
272 * RET: key object
273 */
274
getKey() const275 SWKey *SWModule::getKey() const {
276 return key;
277 }
278
279
280 /******************************************************************************
281 * SWModule::setKey - Sets a key to this module for position to a particular
282 * record
283 *
284 * ENT: ikey - key with which to set this module
285 *
286 * RET: error status
287 */
288
setKey(const SWKey * ikey)289 char SWModule::setKey(const SWKey *ikey) {
290 SWKey *oldKey = 0;
291
292 if (key) {
293 if (!key->isPersist()) // if we have our own copy
294 oldKey = key;
295 }
296
297 if (!ikey->isPersist()) { // if we are to keep our own copy
298 key = createKey();
299 *key = *ikey;
300 }
301 else key = (SWKey *)ikey; // if we are to just point to an external key
302
303 if (oldKey)
304 delete oldKey;
305
306 return error = key->getError();
307 }
308
309
310 /******************************************************************************
311 * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry
312 *
313 * ENT: p - position (e.g. TOP, BOTTOM)
314 *
315 * RET: *this
316 */
317
setPosition(SW_POSITION p)318 void SWModule::setPosition(SW_POSITION p) {
319 *key = p;
320 char saveError = key->popError();
321
322 switch (p) {
323 case POS_TOP:
324 this->increment();
325 this->decrement();
326 break;
327
328 case POS_BOTTOM:
329 this->decrement();
330 this->increment();
331 break;
332 }
333
334 error = saveError;
335 }
336
337
338 /******************************************************************************
339 * SWModule::increment - Increments module key a number of entries
340 *
341 * ENT: increment - Number of entries to jump forward
342 *
343 * RET: *this
344 */
345
increment(int steps)346 void SWModule::increment(int steps) {
347 (*key) += steps;
348 error = key->popError();
349 }
350
351
352 /******************************************************************************
353 * SWModule::decrement - Decrements module key a number of entries
354 *
355 * ENT: decrement - Number of entries to jump backward
356 *
357 * RET: *this
358 */
359
decrement(int steps)360 void SWModule::decrement(int steps) {
361 (*key) -= steps;
362 error = key->popError();
363 }
364
365
366 /******************************************************************************
367 * SWModule::Search - Searches a module for a string
368 *
369 * ENT: istr - string for which to search
370 * searchType - type of search to perform
371 * >=0 - regex; (for backward compat, if > 0 then used as additional REGEX FLAGS)
372 * -1 - phrase
373 * -2 - multiword
374 * -3 - entryAttrib (eg. Word//Lemma./G1234/) (Lemma with dot means check components (Lemma.[1-9]) also)
375 * -4 - clucene
376 * -5 - multilemma window; flags = window size
377 * flags - options flags for search
378 * justCheckIfSupported - if set, don't search, only tell if this
379 * function supports requested search.
380 *
381 * RET: ListKey set to verses that contain istr
382 */
383
search(const char * istr,int searchType,int flags,SWKey * scope,bool * justCheckIfSupported,void (* percent)(char,void *),void * percentUserData)384 ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
385
386 listKey.clear();
387 SWBuf term = istr;
388 bool includeComponents = false; // for entryAttrib e.g., /Lemma.1/
389
390 SWBuf target = getConfigEntry("AbsoluteDataPath");
391 if (!target.endsWith("/") && !target.endsWith("\\")) {
392 target.append('/');
393 }
394 #if defined USEXAPIAN
395 target.append("xapian");
396 #elif defined USELUCENE
397 target.append("lucene");
398 #endif
399 if (justCheckIfSupported) {
400 *justCheckIfSupported = (searchType >= -3);
401 #if defined USEXAPIAN
402 if ((searchType == -4) && (FileMgr::existsDir(target))) {
403 *justCheckIfSupported = true;
404 }
405 #elif defined USELUCENE
406 if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) {
407 *justCheckIfSupported = true;
408 }
409 #endif
410 return listKey;
411 }
412
413 SWKey *saveKey = 0;
414 SWKey *searchKey = 0;
415 SWKey *resultKey = createKey();
416 SWKey *lastKey = createKey();
417 SWBuf lastBuf = "";
418
419 #ifdef USECXX11REGEX
420 std::locale oldLocale;
421 std::locale::global(std::locale("en_US.UTF-8"));
422
423 std::regex preg;
424 #elif defined(USEICUREGEX)
425 RegexMatcher *matcher = 0;
426 #else
427 regex_t preg;
428 #endif
429
430 vector<SWBuf> words;
431 vector<SWBuf> window;
432 const char *sres;
433 terminateSearch = false;
434 char perc = 1;
435 bool savePEA = isProcessEntryAttributes();
436
437 // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts
438 bool specialStrips = (getConfigEntry("LocalStripFilter")
439 || (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents"))
440 || (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints"))
441 || (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints"))
442 || (strchr(istr, '<')));
443
444 setProcessEntryAttributes(searchType == -3);
445
446
447 if (!key->isPersist()) {
448 saveKey = createKey();
449 *saveKey = *key;
450 }
451 else saveKey = key;
452
453 searchKey = (scope)?scope->clone():(key->isPersist())?key->clone():0;
454 if (searchKey) {
455 searchKey->setPersist(true);
456 setKey(*searchKey);
457 }
458
459 (*percent)(perc, percentUserData);
460
461 *this = BOTTOM;
462 long highIndex = key->getIndex();
463 if (!highIndex)
464 highIndex = 1; // avoid division by zero errors.
465 *this = TOP;
466 if (searchType >= 0) {
467 #ifdef USECXX11REGEX
468 preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended | searchType | flags);
469 #elif defined(USEICUREGEX)
470 UErrorCode status = U_ZERO_ERROR;
471 matcher = new RegexMatcher(istr, searchType | flags, status);
472 if (U_FAILURE(status)) {
473 SWLog::getSystemLog()->logError("Error compiling Regex: %d", status);
474 return listKey;
475 }
476
477 #else
478 flags |=searchType|REG_NOSUB|REG_EXTENDED;
479 int err = regcomp(&preg, istr, flags);
480 if (err) {
481 SWLog::getSystemLog()->logError("Error compiling Regex: %d", err);
482 return listKey;
483 }
484 #endif
485 }
486
487 (*percent)(++perc, percentUserData);
488
489
490 #if defined USEXAPIAN || defined USELUCENE
491 (*percent)(10, percentUserData);
492 if (searchType == -4) { // indexed search
493 #if defined USEXAPIAN
494 SWTRY {
495 Xapian::Database database(target.c_str());
496 Xapian::QueryParser queryParser;
497 queryParser.set_default_op(Xapian::Query::OP_AND);
498 SWTRY {
499 queryParser.set_stemmer(Xapian::Stem(getLanguage()));
500 } SWCATCH(...) {}
501 queryParser.set_stemming_strategy(queryParser.STEM_SOME);
502 queryParser.add_prefix("content", "C");
503 queryParser.add_prefix("lemma", "L");
504 queryParser.add_prefix("morph", "M");
505 queryParser.add_prefix("prox", "P");
506 queryParser.add_prefix("proxlem", "PL");
507 queryParser.add_prefix("proxmorph", "PM");
508
509 #elif defined USELUCENE
510
511 lucene::index::IndexReader *ir = 0;
512 lucene::search::IndexSearcher *is = 0;
513 Query *q = 0;
514 Hits *h = 0;
515 SWTRY {
516 ir = IndexReader::open(target);
517 is = new IndexSearcher(ir);
518 const TCHAR *stopWords[] = { 0 };
519 standard::StandardAnalyzer analyzer(stopWords);
520 #endif
521
522 // parse the query
523 #if defined USEXAPIAN
524 Xapian::Query q = queryParser.parse_query(istr);
525 Xapian::Enquire enquire = Xapian::Enquire(database);
526 #elif defined USELUCENE
527 q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
528 #endif
529 (*percent)(20, percentUserData);
530
531 // perform the search
532 #if defined USEXAPIAN
533 enquire.set_query(q);
534 Xapian::MSet h = enquire.get_mset(0, 99999);
535 #elif defined USELUCENE
536 h = is->search(q);
537 #endif
538 (*percent)(80, percentUserData);
539
540 // iterate thru each good module position that meets the search
541 bool checkBounds = getKey()->isBoundSet();
542 #if defined USEXAPIAN
543 Xapian::MSetIterator i;
544 for (i = h.begin(); i != h.end(); ++i) {
545 // cout << "Document ID " << *i << "\t";
546 __u64 score = i.get_percent();
547 Xapian::Document doc = i.get_document();
548 *resultKey = doc.get_data().c_str();
549 #elif defined USELUCENE
550 for (unsigned long i = 0; i < (unsigned long)h->length(); i++) {
551 Document &doc = h->doc(i);
552 // set a temporary verse key to this module position
553 *resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8?
554 __u64 score = (__u64)((__u32)(h->score(i)*100));
555 #endif
556
557 // check to see if it sets ok (within our bounds) and if not, skip
558 if (checkBounds) {
559 *getKey() = *resultKey;
560 if (*getKey() != *resultKey) {
561 continue;
562 }
563 }
564 listKey << *resultKey;
565 listKey.getElement()->userData = score;
566 }
567 (*percent)(98, percentUserData);
568 }
569 SWCATCH (...) {
570 #if defined USEXAPIAN
571 #elif defined USELUCENE
572 q = 0;
573 #endif
574 // invalid clucene query
575 }
576 #if defined USEXAPIAN
577 #elif defined USELUCENE
578 delete h;
579 delete q;
580
581 delete is;
582 if (ir) {
583 ir->close();
584 }
585 #endif
586 }
587 #endif
588
589 // some pre-loop processing
590 switch (searchType) {
591
592 // phrase
593 case -1:
594 // let's see if we're told to ignore case. If so, then we'll touppstr our term
595 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term);
596 break;
597
598 // multi-word
599 case -2:
600 case -5:
601 // let's break the term down into our words vector
602 while (1) {
603 const char *word = term.stripPrefix(' ');
604 if (!word) {
605 words.push_back(term);
606 break;
607 }
608 words.push_back(word);
609 }
610 if ((flags & REG_ICASE) == REG_ICASE) {
611 for (unsigned int i = 0; i < words.size(); i++) {
612 toupperstr(words[i]);
613 }
614 }
615 break;
616
617 // entry attributes
618 case -3:
619 // let's break the attribute segs down. We'll reuse our words vector for each segment
620 while (1) {
621 const char *word = term.stripPrefix('/');
622 if (!word) {
623 words.push_back(term);
624 break;
625 }
626 words.push_back(word);
627 }
628 if ((words.size()>2) && words[2].endsWith(".")) {
629 includeComponents = true;
630 words[2]--;
631 }
632 break;
633 }
634
635
636 // our main loop to iterate the module and find the stuff
637 perc = 5;
638 (*percent)(perc, percentUserData);
639
640
641 while ((searchType != -4) && !popError() && !terminateSearch) {
642 long mindex = key->getIndex();
643 float per = (float)mindex / highIndex;
644 per *= 93;
645 per += 5;
646 char newperc = (char)per;
647 if (newperc > perc) {
648 perc = newperc;
649 (*percent)(perc, percentUserData);
650 }
651 else if (newperc < perc) {
652 #ifndef _MSC_VER
653 std::cerr << "Serious error: new percentage complete is less than previous value\n";
654 std::cerr << "index: " << (key->getIndex()) << "\n";
655 std::cerr << "highIndex: " << highIndex << "\n";
656 std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n";
657 std::cerr << "perc == " << (int )perc << "% \n";
658 #endif
659 }
660 if (searchType >= 0) {
661 SWBuf textBuf = stripText();
662 #ifdef USECXX11REGEX
663 if (std::regex_match(std::string(textBuf.c_str()), preg)) {
664 #elif defined(USEICUREGEX)
665 UnicodeString stringToTest = textBuf.c_str();
666 matcher->reset(stringToTest);
667
668 if (matcher->find()) {
669 #else
670 if (!regexec(&preg, textBuf, 0, 0, 0)) {
671 #endif
672 *resultKey = *getKey();
673 resultKey->clearBound();
674 listKey << *resultKey;
675 lastBuf = "";
676 }
677 #ifdef USECXX11REGEX
678 else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) {
679 #elif defined(USEICUREGEX)
680 else {
681 stringToTest = (lastBuf + ' ' + textBuf).c_str();
682 matcher->reset(stringToTest);
683
684 if (matcher->find()) {
685 #else
686 else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) {
687 #endif
688 lastKey->clearBound();
689 listKey << *lastKey;
690 lastBuf = textBuf;
691 }
692 else {
693 lastBuf = textBuf;
694 }
695 #if defined(USEICUREGEX)
696 }
697 #endif
698 }
699
700 // phrase
701 else {
702 SWBuf textBuf;
703 switch (searchType) {
704
705 // phrase
706 case -1:
707 textBuf = stripText();
708 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
709 sres = strstr(textBuf.c_str(), term.c_str());
710 if (sres) { //it's also in the stripText(), so we have a valid search result item now
711 *resultKey = *getKey();
712 resultKey->clearBound();
713 listKey << *resultKey;
714 }
715 break;
716
717 // multiword
718 case -2: { // enclose our allocations
719 int loopCount = 0;
720 unsigned int foundWords = 0;
721 do {
722 textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : stripText();
723 foundWords = 0;
724
725 for (unsigned int i = 0; i < words.size(); i++) {
726 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
727 sres = strstr(textBuf.c_str(), words[i].c_str());
728 if (!sres) {
729 break; //for loop
730 }
731 foundWords++;
732 }
733
734 loopCount++;
735 } while ( (loopCount < 2) && (foundWords == words.size()));
736
737 if ((loopCount == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
738 *resultKey = *getKey();
739 resultKey->clearBound();
740 listKey << *resultKey;
741 }
742 }
743 break;
744
745 // entry attributes
746 case -3: {
747 renderText(); // force parse
748 AttributeTypeList &entryAttribs = getEntryAttributes();
749 AttributeTypeList::iterator i1Start, i1End;
750 AttributeList::iterator i2Start, i2End;
751 AttributeValue::iterator i3Start, i3End;
752
753 if ((words.size()) && (words[0].length())) {
754 // cout << "Word: " << words[0] << endl;
755 for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
756 // cout << "stuff: " << i1Start->first.c_str() << endl;
757 }
758 i1Start = entryAttribs.find(words[0]);
759 i1End = i1Start;
760 if (i1End != entryAttribs.end()) {
761 i1End++;
762 }
763 }
764 else {
765 i1Start = entryAttribs.begin();
766 i1End = entryAttribs.end();
767 }
768 for (;i1Start != i1End; i1Start++) {
769 if ((words.size()>1) && (words[1].length())) {
770 i2Start = i1Start->second.find(words[1]);
771 i2End = i2Start;
772 if (i2End != i1Start->second.end())
773 i2End++;
774 }
775 else {
776 i2Start = i1Start->second.begin();
777 i2End = i1Start->second.end();
778 }
779 for (;i2Start != i2End; i2Start++) {
780 if ((words.size()>2) && (words[2].length()) && (!includeComponents)) {
781 i3Start = i2Start->second.find(words[2]);
782 i3End = i3Start;
783 if (i3End != i2Start->second.end())
784 i3End++;
785 }
786 else {
787 i3Start = i2Start->second.begin();
788 i3End = i2Start->second.end();
789 }
790 for (;i3Start != i3End; i3Start++) {
791 if ((words.size()>3) && (words[3].length())) {
792 if (includeComponents) {
793 SWBuf key = i3Start->first.c_str();
794 key = key.stripPrefix('.', true);
795 // we're iterating all 3 level keys, so be sure we match our
796 // prefix (e.g., Lemma, Lemma.1, Lemma.2, etc.)
797 if (key != words[2]) continue;
798 }
799 if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
800 bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
801 sres = (found) ? i3Start->second.c_str() : 0;
802 }
803 else {
804 sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
805 }
806 if (sres) {
807 *resultKey = *getKey();
808 resultKey->clearBound();
809 listKey << *resultKey;
810 break;
811 }
812 }
813 }
814 if (i3Start != i3End)
815 break;
816 }
817 if (i2Start != i2End)
818 break;
819 }
820 break;
821 }
822 // NOT DONE
823 case -5:
824 AttributeList &words = getEntryAttributes()["Word"];
825 SWBuf kjvWord = "";
826 SWBuf bibWord = "";
827 for (AttributeList::iterator it = words.begin(); it != words.end(); it++) {
828 int parts = atoi(it->second["PartCount"]);
829 SWBuf lemma = "";
830 SWBuf morph = "";
831 for (int i = 1; i <= parts; i++) {
832 SWBuf key = "";
833 key = (parts == 1) ? "Lemma" : SWBuf().setFormatted("Lemma.%d", i).c_str();
834 AttributeValue::iterator li = it->second.find(key);
835 if (li != it->second.end()) {
836 if (i > 1) lemma += " ";
837 key = (parts == 1) ? "LemmaClass" : SWBuf().setFormatted("LemmaClass.%d", i).c_str();
838 AttributeValue::iterator lci = it->second.find(key);
839 if (lci != it->second.end()) {
840 lemma += lci->second + ":";
841 }
842 lemma += li->second;
843 }
844 key = (parts == 1) ? "Morph" : SWBuf().setFormatted("Morph.%d", i).c_str();
845 li = it->second.find(key);
846 // silly. sometimes morph counts don't equal lemma counts
847 if (i == 1 && parts != 1 && li == it->second.end()) {
848 li = it->second.find("Morph");
849 }
850 if (li != it->second.end()) {
851 if (i > 1) morph += " ";
852 key = (parts == 1) ? "MorphClass" : SWBuf().setFormatted("MorphClass.%d", i).c_str();
853 AttributeValue::iterator lci = it->second.find(key);
854 // silly. sometimes morph counts don't equal lemma counts
855 if (i == 1 && parts != 1 && lci == it->second.end()) {
856 lci = it->second.find("MorphClass");
857 }
858 if (lci != it->second.end()) {
859 morph += lci->second + ":";
860 }
861 morph += li->second;
862 }
863 // TODO: add src tags and maybe other attributes
864 }
865 while (window.size() < (unsigned)flags) {
866
867 }
868 }
869 break;
870 } // end switch
871 }
872 *lastKey = *getKey();
873 (*this)++;
874 }
875
876
877 // cleaup work
878 if (searchType >= 0) {
879 #ifdef USECXX11REGEX
880 std::locale::global(oldLocale);
881 #elif defined(USEICUREGEX)
882 delete matcher;
883 #else
884 regfree(&preg);
885 #endif
886 }
887
888 setKey(*saveKey);
889
890 if (!saveKey->isPersist())
891 delete saveKey;
892
893 if (searchKey)
894 delete searchKey;
895 delete resultKey;
896 delete lastKey;
897
898 listKey = TOP;
899 setProcessEntryAttributes(savePEA);
900
901
902 (*percent)(100, percentUserData);
903
904
905 return listKey;
906 }
907
908
909 /******************************************************************************
910 * SWModule::stripText() - calls all stripfilters on current text
911 *
912 * ENT: buf - buf to massage instead of this modules current text
913 * len - max len of buf
914 *
915 * RET: this module's text at current key location massaged by Strip filters
916 */
917
918 const char *SWModule::stripText(const char *buf, int len) {
919 static SWBuf local;
920 local = renderText(buf, len, false);
921 return local.c_str();
922 }
923
924
925 /** SWModule::getRenderHeader() - Produces any header data which might be
926 * useful which associated with the processing done with this filter.
927 * A typical example is a suggested CSS style block for classed
928 * containers.
929 */
930 const char *SWModule::getRenderHeader() const {
931 FilterList::const_iterator first = getRenderFilters().begin();
932 if (first != getRenderFilters().end()) {
933 return (*first)->getHeader();
934 }
935 return "";
936 }
937
938
939 /******************************************************************************
940 * SWModule::renderText - calls all renderfilters on current module
941 * position
942 *
943 * RET: this module's text at current key location massaged by renderText filters
944 */
945 SWBuf SWModule::renderText() {
946 return renderText((const char *)0);
947 }
948
949 /******************************************************************************
950 * SWModule::renderText - calls all renderfilters on provided text
951 * or current module position provided text null
952 *
953 * ENT: buf - buffer to render
954 *
955 * RET: this module's text at current key location massaged by renderText filters
956 *
957 * NOTES: This method is only truly const if called with a provided text; using
958 * module's current position may produce a new entry attributes map which
959 * logically violates the const semantic, which is why the above method
960 * which takes no params is not const, i.e., don't call this method with
961 * null as text param, but instead use non-const method above. The public
962 * interface for this method expects a value for the text param. We use it
963 * internally sometimes calling with null to save duplication of code.
964 */
965
966 SWBuf SWModule::renderText(const char *buf, int len, bool render) const {
967 bool savePEA = isProcessEntryAttributes();
968 if (!buf) {
969 entryAttributes.clear();
970 }
971 else {
972 setProcessEntryAttributes(false);
973 }
974
975 SWBuf local;
976 if (buf)
977 local = buf;
978
979 SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
980 SWKey *key = 0;
981 static const char *null = "";
982
983 if (tmpbuf) {
984 unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
985 if (size > 0) {
986 key = this->getKey();
987
988 optionFilter(tmpbuf, key);
989
990 if (render) {
991 renderFilter(tmpbuf, key);
992 encodingFilter(tmpbuf, key);
993 }
994 else stripFilter(tmpbuf, key);
995 }
996 }
997 else {
998 tmpbuf = null;
999 }
1000
1001 setProcessEntryAttributes(savePEA);
1002
1003 return tmpbuf;
1004 }
1005
1006
1007 /******************************************************************************
1008 * SWModule::renderText - calls all renderfilters on current text
1009 *
1010 * ENT: tmpKey - key to use to grab text
1011 *
1012 * RET: this module's text at current key location massaged by RenderFilers
1013 */
1014
1015 SWBuf SWModule::renderText(const SWKey *tmpKey) {
1016 SWKey *saveKey;
1017 const char *retVal;
1018
1019 if (!key->isPersist()) {
1020 saveKey = createKey();
1021 *saveKey = *key;
1022 }
1023 else saveKey = key;
1024
1025 setKey(*tmpKey);
1026
1027 retVal = renderText();
1028
1029 setKey(*saveKey);
1030
1031 if (!saveKey->isPersist())
1032 delete saveKey;
1033
1034 return retVal;
1035 }
1036
1037
1038 /******************************************************************************
1039 * SWModule::stripText - calls all StripTextFilters on current text
1040 *
1041 * ENT: tmpKey - key to use to grab text
1042 *
1043 * RET: this module's text at specified key location massaged by Strip filters
1044 */
1045
1046 const char *SWModule::stripText(const SWKey *tmpKey) {
1047 SWKey *saveKey;
1048 const char *retVal;
1049
1050 if (!key->isPersist()) {
1051 saveKey = createKey();
1052 *saveKey = *key;
1053 }
1054 else saveKey = key;
1055
1056 setKey(*tmpKey);
1057
1058 retVal = stripText();
1059
1060 setKey(*saveKey);
1061
1062 if (!saveKey->isPersist())
1063 delete saveKey;
1064
1065 return retVal;
1066 }
1067
1068 /******************************************************************************
1069 * SWModule::getBibliography -Returns bibliographic data for a module in the
1070 * requested format
1071 *
1072 * ENT: bibFormat format of the bibliographic data
1073 *
1074 * RET: bibliographic data in the requested format as a string (BibTeX by default)
1075 */
1076
1077 SWBuf SWModule::getBibliography(unsigned char bibFormat) const {
1078 SWBuf s;
1079 switch (bibFormat) {
1080 case BIB_BIBTEX:
1081 s.append("@Book {").append(modname).append(", Title = \"").append(moddesc).append("\", Publisher = \"CrossWire Bible Society\"}");
1082 break;
1083 }
1084 return s;
1085 }
1086
1087 const char *SWModule::getConfigEntry(const char *key) const {
1088 ConfigEntMap::iterator it = config->find(key);
1089 return (it != config->end()) ? it->second.c_str() : 0;
1090 }
1091
1092
1093 void SWModule::setConfig(ConfigEntMap *config) {
1094 this->config = config;
1095 }
1096
1097
1098 bool SWModule::hasSearchFramework() {
1099 #ifdef USELUCENE
1100 return true;
1101 #else
1102 return SWSearchable::hasSearchFramework();
1103 #endif
1104 }
1105
1106 void SWModule::deleteSearchFramework() {
1107 #ifdef USELUCENE
1108 SWBuf target = getConfigEntry("AbsoluteDataPath");
1109 if (!target.endsWith("/") && !target.endsWith("\\")) {
1110 target.append('/');
1111 }
1112 target.append("lucene");
1113
1114 FileMgr::removeDir(target.c_str());
1115 #else
1116 SWSearchable::deleteSearchFramework();
1117 #endif
1118 }
1119
1120
1121 signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
1122
1123 #if defined USELUCENE || defined USEXAPIAN
1124 SWBuf target = getConfigEntry("AbsoluteDataPath");
1125 if (!target.endsWith("/") && !target.endsWith("\\")) {
1126 target.append('/');
1127 }
1128 #if defined USEXAPIAN
1129 target.append("xapian");
1130 #elif defined USELUCENE
1131 const int MAX_CONV_SIZE = 1024 * 1024;
1132 target.append("lucene");
1133 #endif
1134 int status = FileMgr::createParent(target+"/dummy");
1135 if (status) return -1;
1136
1137 SWKey *saveKey = 0;
1138 SWKey *searchKey = 0;
1139 SWKey textkey;
1140 SWBuf c;
1141
1142
1143 // turn all filters to default values
1144 StringList filterSettings;
1145 for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1146 filterSettings.push_back((*filter)->getOptionValue());
1147 (*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
1148
1149 if ( (!strcmp("Greek Accents", (*filter)->getOptionName())) ||
1150 (!strcmp("Hebrew Vowel Points", (*filter)->getOptionName())) ||
1151 (!strcmp("Arabic Vowel Points", (*filter)->getOptionName()))
1152 ) {
1153 (*filter)->setOptionValue("Off");
1154 }
1155 }
1156
1157
1158 // be sure we give CLucene enough file handles
1159 FileMgr::getSystemFileMgr()->flush();
1160
1161 // save key information so as not to disrupt original
1162 // module position
1163 if (!key->isPersist()) {
1164 saveKey = createKey();
1165 *saveKey = *key;
1166 }
1167 else saveKey = key;
1168
1169 searchKey = (key->isPersist())?key->clone():0;
1170 if (searchKey) {
1171 searchKey->setPersist(1);
1172 setKey(*searchKey);
1173 }
1174
1175 bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
1176
1177 // lets create or open our search index
1178 #if defined USEXAPIAN
1179 Xapian::WritableDatabase database(target.c_str(), Xapian::DB_CREATE_OR_OPEN);
1180 Xapian::TermGenerator termGenerator;
1181 SWTRY {
1182 termGenerator.set_stemmer(Xapian::Stem(getLanguage()));
1183 } SWCATCH(...) {}
1184
1185 #elif defined USELUCENE
1186 RAMDirectory *ramDir = 0;
1187 IndexWriter *coreWriter = 0;
1188 IndexWriter *fsWriter = 0;
1189 Directory *d = 0;
1190
1191 const TCHAR *stopWords[] = { 0 };
1192 standard::StandardAnalyzer *an = new standard::StandardAnalyzer(stopWords);
1193
1194 ramDir = new RAMDirectory();
1195 coreWriter = new IndexWriter(ramDir, an, true);
1196 coreWriter->setMaxFieldLength(MAX_CONV_SIZE);
1197 #endif
1198
1199
1200
1201
1202 char perc = 1;
1203 VerseKey *vkcheck = 0;
1204 vkcheck = SWDYNAMIC_CAST(VerseKey, key);
1205 VerseKey *chapMax = 0;
1206 if (vkcheck) chapMax = (VerseKey *)vkcheck->clone();
1207
1208 TreeKeyIdx *tkcheck = 0;
1209 tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
1210
1211
1212 *this = BOTTOM;
1213 long highIndex = key->getIndex();
1214 if (!highIndex)
1215 highIndex = 1; // avoid division by zero errors.
1216
1217 bool savePEA = isProcessEntryAttributes();
1218 setProcessEntryAttributes(true);
1219
1220 // prox chapter blocks
1221 // position module at the beginning
1222 *this = TOP;
1223
1224 SWBuf proxBuf;
1225 SWBuf proxLem;
1226 SWBuf proxMorph;
1227 SWBuf strong;
1228 SWBuf morph;
1229
1230 char err = popError();
1231 while (!err) {
1232 long mindex = key->getIndex();
1233
1234 proxBuf = "";
1235 proxLem = "";
1236 proxMorph = "";
1237
1238 // computer percent complete so we can report to our progress callback
1239 float per = (float)mindex / highIndex;
1240 // between 5%-98%
1241 per *= 93; per += 5;
1242 char newperc = (char)per;
1243 if (newperc > perc) {
1244 perc = newperc;
1245 (*percent)(perc, percentUserData);
1246 }
1247
1248 // get "content" field
1249 const char *content = stripText();
1250
1251 bool good = false;
1252
1253 // start out entry
1254 #if defined USEXAPIAN
1255 Xapian::Document doc;
1256 termGenerator.set_document(doc);
1257 #elif defined USELUCENE
1258 Document *doc = new Document();
1259 #endif
1260 // get "key" field
1261 SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
1262 if (content && *content) {
1263 good = true;
1264
1265
1266 // build "strong" field
1267 AttributeTypeList::iterator words;
1268 AttributeList::iterator word;
1269 AttributeValue::iterator strongVal;
1270 AttributeValue::iterator morphVal;
1271
1272 strong="";
1273 morph="";
1274 words = getEntryAttributes().find("Word");
1275 if (words != getEntryAttributes().end()) {
1276 for (word = words->second.begin();word != words->second.end(); word++) {
1277 int partCount = atoi(word->second["PartCount"]);
1278 if (!partCount) partCount = 1;
1279 for (int i = 0; i < partCount; i++) {
1280 SWBuf tmp = "Lemma";
1281 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1282 strongVal = word->second.find(tmp);
1283 if (strongVal != word->second.end()) {
1284 // cheeze. skip empty article tags that weren't assigned to any text
1285 if (strongVal->second == "G3588") {
1286 if (word->second.find("Text") == word->second.end())
1287 continue; // no text? let's skip
1288 }
1289 strong.append(strongVal->second);
1290 morph.append(strongVal->second);
1291 morph.append('@');
1292 SWBuf tmp = "Morph";
1293 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1294 morphVal = word->second.find(tmp);
1295 if (morphVal != word->second.end()) {
1296 morph.append(morphVal->second);
1297 }
1298 strong.append(' ');
1299 morph.append(' ');
1300 }
1301 }
1302 }
1303 }
1304
1305 #if defined USEXAPIAN
1306 doc.set_data(keyText.c_str());
1307 #elif defined USELUCENE
1308 doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
1309 #endif
1310
1311 if (includeKeyInSearch) {
1312 c = keyText;
1313 c += " ";
1314 c += content;
1315 content = c.c_str();
1316 }
1317
1318 #if defined USEXAPIAN
1319 termGenerator.index_text(content);
1320 termGenerator.index_text(content, 1, "C");
1321 #elif defined USELUCENE
1322 doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1323 #endif
1324
1325 if (strong.length() > 0) {
1326 #if defined USEXAPIAN
1327 termGenerator.index_text(strong.c_str(), 1, "L");
1328 termGenerator.index_text(morph.c_str(), 1, "M");
1329 #elif defined USELUCENE
1330 doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1331 doc->add(*_CLNEW Field(_T("morph"), (wchar_t *)utf8ToWChar(morph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1332 #endif
1333 //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
1334 }
1335
1336 //printf("setting fields (%s).\n", (const char *)*key);
1337 //fflush(stdout);
1338 }
1339 // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
1340
1341 // for VerseKeys use chapter
1342 if (vkcheck) {
1343 *chapMax = *vkcheck;
1344 // we're the first verse in a chapter
1345 if (vkcheck->getVerse() == 1) {
1346 *chapMax = MAXVERSE;
1347 VerseKey saveKey = *vkcheck;
1348 while ((!err) && (*vkcheck <= *chapMax)) {
1349 //printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
1350 //printf("building proxBuf from (%s).\n", (const char *)*key);
1351
1352 content = stripText();
1353 if (content && *content) {
1354 // build "strong" field
1355 strong = "";
1356 morph = "";
1357 AttributeTypeList::iterator words;
1358 AttributeList::iterator word;
1359 AttributeValue::iterator strongVal;
1360 AttributeValue::iterator morphVal;
1361
1362 words = getEntryAttributes().find("Word");
1363 if (words != getEntryAttributes().end()) {
1364 for (word = words->second.begin();word != words->second.end(); word++) {
1365 int partCount = atoi(word->second["PartCount"]);
1366 if (!partCount) partCount = 1;
1367 for (int i = 0; i < partCount; i++) {
1368 SWBuf tmp = "Lemma";
1369 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1370 strongVal = word->second.find(tmp);
1371 if (strongVal != word->second.end()) {
1372 // cheeze. skip empty article tags that weren't assigned to any text
1373 if (strongVal->second == "G3588") {
1374 if (word->second.find("Text") == word->second.end())
1375 continue; // no text? let's skip
1376 }
1377 strong.append(strongVal->second);
1378 morph.append(strongVal->second);
1379 morph.append('@');
1380 SWBuf tmp = "Morph";
1381 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1382 morphVal = word->second.find(tmp);
1383 if (morphVal != word->second.end()) {
1384 morph.append(morphVal->second);
1385 }
1386 strong.append(' ');
1387 morph.append(' ');
1388 }
1389 }
1390 }
1391 }
1392 proxBuf += content;
1393 proxBuf.append(' ');
1394 proxLem += strong;
1395 proxMorph += morph;
1396 if (proxLem.length()) {
1397 proxLem.append("\n");
1398 proxMorph.append("\n");
1399 }
1400 }
1401 (*this)++;
1402 err = popError();
1403 }
1404 err = 0;
1405 *vkcheck = saveKey;
1406 }
1407 }
1408
1409 // for TreeKeys use siblings if we have no children
1410 else if (tkcheck) {
1411 if (!tkcheck->hasChildren()) {
1412 if (!tkcheck->previousSibling()) {
1413 do {
1414 //printf("building proxBuf from (%s).\n", (const char *)*key);
1415 //fflush(stdout);
1416
1417 content = stripText();
1418 if (content && *content) {
1419 // build "strong" field
1420 strong = "";
1421 morph = "";
1422 AttributeTypeList::iterator words;
1423 AttributeList::iterator word;
1424 AttributeValue::iterator strongVal;
1425 AttributeValue::iterator morphVal;
1426
1427 words = getEntryAttributes().find("Word");
1428 if (words != getEntryAttributes().end()) {
1429 for (word = words->second.begin();word != words->second.end(); word++) {
1430 int partCount = atoi(word->second["PartCount"]);
1431 if (!partCount) partCount = 1;
1432 for (int i = 0; i < partCount; i++) {
1433 SWBuf tmp = "Lemma";
1434 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1435 strongVal = word->second.find(tmp);
1436 if (strongVal != word->second.end()) {
1437 // cheeze. skip empty article tags that weren't assigned to any text
1438 if (strongVal->second == "G3588") {
1439 if (word->second.find("Text") == word->second.end())
1440 continue; // no text? let's skip
1441 }
1442 strong.append(strongVal->second);
1443 morph.append(strongVal->second);
1444 morph.append('@');
1445 SWBuf tmp = "Morph";
1446 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1447 morphVal = word->second.find(tmp);
1448 if (morphVal != word->second.end()) {
1449 morph.append(morphVal->second);
1450 }
1451 strong.append(' ');
1452 morph.append(' ');
1453 }
1454 }
1455 }
1456 }
1457
1458 proxBuf += content;
1459 proxBuf.append(' ');
1460 proxLem += strong;
1461 proxMorph += morph;
1462 if (proxLem.length()) {
1463 proxLem.append("\n");
1464 proxMorph.append("\n");
1465 }
1466 }
1467 } while (tkcheck->nextSibling());
1468 tkcheck->parent();
1469 tkcheck->firstChild();
1470 }
1471 else tkcheck->nextSibling(); // reposition from our previousSibling test
1472 }
1473 }
1474
1475 if (proxBuf.length() > 0) {
1476
1477 #if defined USEXAPIAN
1478 termGenerator.index_text(proxBuf.c_str(), 1, "P");
1479 #elif defined USELUCENE
1480 doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1481 #endif
1482 good = true;
1483 }
1484 if (proxLem.length() > 0) {
1485 #if defined USEXAPIAN
1486 termGenerator.index_text(proxLem.c_str(), 1, "PL");
1487 termGenerator.index_text(proxMorph.c_str(), 1, "PM");
1488 #elif defined USELUCENE
1489 doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1490 doc->add(*_CLNEW Field(_T("proxmorph"), (wchar_t *)utf8ToWChar(proxMorph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1491 #endif
1492 good = true;
1493 }
1494 if (good) {
1495 //printf("writing (%s).\n", (const char *)*key);
1496 //fflush(stdout);
1497 #if defined USEXAPIAN
1498 SWBuf idTerm;
1499 idTerm.setFormatted("Q%ld", key->getIndex());
1500 doc.add_boolean_term(idTerm.c_str());
1501 database.replace_document(idTerm.c_str(), doc);
1502 #elif defined USELUCENE
1503 coreWriter->addDocument(doc);
1504 #endif
1505 }
1506 #if defined USEXAPIAN
1507 #elif defined USELUCENE
1508 delete doc;
1509 #endif
1510
1511 (*this)++;
1512 err = popError();
1513 }
1514
1515 // Optimizing automatically happens with the call to addIndexes
1516 //coreWriter->optimize();
1517 #if defined USEXAPIAN
1518 #elif defined USELUCENE
1519 coreWriter->close();
1520
1521 #ifdef CLUCENE2
1522 d = FSDirectory::getDirectory(target.c_str());
1523 #endif
1524 if (IndexReader::indexExists(target.c_str())) {
1525 #ifndef CLUCENE2
1526 d = FSDirectory::getDirectory(target.c_str(), false);
1527 #endif
1528 if (IndexReader::isLocked(d)) {
1529 IndexReader::unlock(d);
1530 }
1531 fsWriter = new IndexWriter( d, an, false);
1532 }
1533 else {
1534 #ifndef CLUCENE2
1535 d = FSDirectory::getDirectory(target.c_str(), true);
1536 #endif
1537 fsWriter = new IndexWriter(d, an, true);
1538 }
1539
1540 Directory *dirs[] = { ramDir, 0 };
1541 #ifdef CLUCENE2
1542 lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
1543 fsWriter->addIndexes(dirsa);
1544 #else
1545 fsWriter->addIndexes(dirs);
1546 #endif
1547 fsWriter->close();
1548
1549 delete ramDir;
1550 delete coreWriter;
1551 delete fsWriter;
1552 delete an;
1553 #endif
1554
1555 // reposition module back to where it was before we were called
1556 setKey(*saveKey);
1557
1558 if (!saveKey->isPersist())
1559 delete saveKey;
1560
1561 if (searchKey)
1562 delete searchKey;
1563
1564 delete chapMax;
1565
1566 setProcessEntryAttributes(savePEA);
1567
1568 // reset option filters back to original values
1569 StringList::iterator origVal = filterSettings.begin();
1570 for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1571 (*filter)->setOptionValue(*origVal++);
1572 }
1573
1574 return 0;
1575 #else
1576 return SWSearchable::createSearchFramework(percent, percentUserData);
1577 #endif
1578 }
1579
1580 /** OptionFilterBuffer a text buffer
1581 * @param filters the FilterList of filters to iterate
1582 * @param buf the buffer to filter
1583 * @param key key location from where this buffer was extracted
1584 */
1585 void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const {
1586 OptionFilterList::iterator it;
1587 for (it = filters->begin(); it != filters->end(); it++) {
1588 (*it)->processText(buf, key, this);
1589 }
1590 }
1591
1592 /** FilterBuffer a text buffer
1593 * @param filters the FilterList of filters to iterate
1594 * @param buf the buffer to filter
1595 * @param key key location from where this buffer was extracted
1596 */
1597 void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) const {
1598 FilterList::iterator it;
1599 for (it = filters->begin(); it != filters->end(); it++) {
1600 (*it)->processText(buf, key, this);
1601 }
1602 }
1603
1604 signed char SWModule::createModule(const char*) {
1605 return -1;
1606 }
1607
1608 void SWModule::setEntry(const char*, long) {
1609 }
1610
1611 void SWModule::linkEntry(const SWKey*) {
1612 }
1613
1614
1615 /******************************************************************************
1616 * SWModule::prepText - Prepares the text before returning it to external
1617 * objects
1618 *
1619 * ENT: buf - buffer where text is stored and where to store the prep'd
1620 * text.
1621 */
1622
1623 void SWModule::prepText(SWBuf &buf) {
1624 unsigned int to, from;
1625 char space = 0, cr = 0, realdata = 0, nlcnt = 0;
1626 char *rawBuf = buf.getRawData();
1627 for (to = from = 0; rawBuf[from]; from++) {
1628 switch (rawBuf[from]) {
1629 case 10:
1630 if (!realdata)
1631 continue;
1632 space = (cr) ? 0 : 1;
1633 cr = 0;
1634 nlcnt++;
1635 if (nlcnt > 1) {
1636 // *to++ = nl;
1637 rawBuf[to++] = 10;
1638 // *to++ = nl[1];
1639 // nlcnt = 0;
1640 }
1641 continue;
1642 case 13:
1643 if (!realdata)
1644 continue;
1645 // *to++ = nl[0];
1646 rawBuf[to++] = 10;
1647 space = 0;
1648 cr = 1;
1649 continue;
1650 }
1651 realdata = 1;
1652 nlcnt = 0;
1653 if (space) {
1654 space = 0;
1655 if (rawBuf[from] != ' ') {
1656 rawBuf[to++] = ' ';
1657 from--;
1658 continue;
1659 }
1660 }
1661 rawBuf[to++] = rawBuf[from];
1662 }
1663 buf.setSize(to);
1664
1665 while (to > 1) { // remove trailing excess
1666 to--;
1667 if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
1668 buf.setSize(to);
1669 else break;
1670 }
1671 }
1672
1673 SWORD_NAMESPACE_END
1674