1 /******************************************************************************
2  *
3  *  swmodule.cpp -	code for base class 'SWModule'. SWModule is the basis
4  *			for all types of modules (e.g. texts, commentaries,
5  *			maps, lexicons, etc.)
6  *
7  * $Id: swmodule.cpp 3515 2017-11-01 11:38:09Z scribe $
8  *
9  * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
10  *	CrossWire Bible Society
11  *	P. O. Box 2528
12  *	Tempe, AZ  85280-2528
13  *
14  * This program is free software; you can redistribute it and/or modify it
15  * under the terms of the GNU General Public License as published by the
16  * Free Software Foundation version 2.
17  *
18  * This program is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * General Public License for more details.
22  *
23  */
24 
25 
26 #include <vector>
27 
28 #include <swlog.h>
29 #include <sysdata.h>
30 #include <swmodule.h>
31 #include <utilstr.h>
32 #include <swfilter.h>
33 #include <versekey.h>	// KLUDGE for Search
34 #include <treekeyidx.h>	// KLUDGE for Search
35 #include <swoptfilter.h>
36 #include <filemgr.h>
37 #include <stringmgr.h>
38 #ifndef _MSC_VER
39 #include <iostream>
40 #endif
41 
42 #if defined(USECXX11REGEX)
43 #include <regex>
44 #ifndef REG_ICASE
45 #define REG_ICASE std::regex::icase
46 #endif
47 #elif defined(USEICUREGEX)
48 #include <unicode/regex.h>
49 #ifndef REG_ICASE
50 #define REG_ICASE UREGEX_CASE_INSENSITIVE
51 #endif
52 #else
53 #include <regex.h>	// GNU
54 #endif
55 
56 #if defined USEXAPIAN
57 #include <xapian.h>
58 #elif defined USELUCENE
59 #include <CLucene.h>
60 
61 //Lucence includes
62 //#include "CLucene.h"
63 //#include "CLucene/util/Reader.h"
64 //#include "CLucene/util/Misc.h"
65 //#include "CLucene/util/dirent.h"
66 
67 using namespace lucene::index;
68 using namespace lucene::analysis;
69 using namespace lucene::util;
70 using namespace lucene::store;
71 using namespace lucene::document;
72 using namespace lucene::queryParser;
73 using namespace lucene::search;
74 #endif
75 
76 using std::vector;
77 
78 SWORD_NAMESPACE_START
79 
80 SWModule::StdOutDisplay SWModule::rawdisp;
81 
82 typedef std::list<SWBuf> StringList;
83 
84 /******************************************************************************
85  * SWModule Constructor - Initializes data for instance of SWModule
86  *
87  * ENT:	imodname - Internal name for module
88  *	imoddesc - Name to display to user for module
89  *	idisp	 - Display object to use for displaying
90  *	imodtype - Type of Module (All modules will be displayed with
91  *			others of same type under their modtype heading
92  *	unicode  - if this module is unicode
93  */
94 
SWModule(const char * imodname,const char * imoddesc,SWDisplay * idisp,const char * imodtype,SWTextEncoding encoding,SWTextDirection direction,SWTextMarkup markup,const char * imodlang)95 SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, const char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char *imodlang) {
96 	key       = createKey();
97 	entryBuf  = "";
98 	config    = &ownConfig;
99 	modname   = 0;
100 	error     = 0;
101 	moddesc   = 0;
102 	modtype   = 0;
103 	modlang   = 0;
104 	this->encoding = encoding;
105 	this->direction = direction;
106 	this->markup  = markup;
107 	entrySize= -1;
108 	disp     = (idisp) ? idisp : &rawdisp;
109 	stdstr(&modname, imodname);
110 	stdstr(&moddesc, imoddesc);
111 	stdstr(&modtype, imodtype);
112 	stdstr(&modlang, imodlang);
113 	stripFilters = new FilterList();
114 	rawFilters = new FilterList();
115 	renderFilters = new FilterList();
116 	optionFilters = new OptionFilterList();
117 	encodingFilters = new FilterList();
118 	skipConsecutiveLinks = true;
119 	procEntAttr = true;
120 }
121 
122 
123 /******************************************************************************
124  * SWModule Destructor - Cleans up instance of SWModule
125  */
126 
~SWModule()127 SWModule::~SWModule()
128 {
129 	if (modname)
130 		delete [] modname;
131 	if (moddesc)
132 		delete [] moddesc;
133 	if (modtype)
134 		delete [] modtype;
135 	if (modlang)
136 		delete [] modlang;
137 
138 	if (key) {
139 		if (!key->isPersist())
140 			delete key;
141 	}
142 
143 	stripFilters->clear();
144 	rawFilters->clear();
145 	renderFilters->clear();
146 	optionFilters->clear();
147 	encodingFilters->clear();
148 	entryAttributes.clear();
149 
150 	delete stripFilters;
151 	delete rawFilters;
152 	delete renderFilters;
153 	delete optionFilters;
154 	delete encodingFilters;
155 }
156 
157 
158 /******************************************************************************
159  * SWModule::createKey - Allocates a key of specific type for module
160  *
161  * RET:	pointer to allocated key
162  */
163 
createKey() const164 SWKey *SWModule::createKey() const
165 {
166 	return new SWKey();
167 }
168 
169 
170 /******************************************************************************
171  * SWModule::popError - Gets and clears error status
172  *
173  * RET:	error status
174  */
175 
popError()176 char SWModule::popError()
177 {
178 	char retval = error;
179 
180 	error = 0;
181 	if (!retval) retval = key->popError();
182 	return retval;
183 }
184 
185 
186 /******************************************************************************
187  * SWModule::Name - Sets/gets module name
188  *
189  * ENT:	imodname - value which to set modname
190  *		[0] - only get
191  *
192  * RET:	pointer to modname
193  */
194 
getName() const195 const char *SWModule::getName() const {
196 	return modname;
197 }
198 
199 
200 /******************************************************************************
201  * SWModule::Description - Sets/gets module description
202  *
203  * ENT:	imoddesc - value which to set moddesc
204  *		[0] - only get
205  *
206  * RET:	pointer to moddesc
207  */
208 
getDescription() const209 const char *SWModule::getDescription() const {
210 	return moddesc;
211 }
212 
213 
214 /******************************************************************************
215  * SWModule::Type - Sets/gets module type
216  *
217  * ENT:	imodtype - value which to set modtype
218  *		[0] - only get
219  *
220  * RET:	pointer to modtype
221  */
222 
getType() const223 const char *SWModule::getType() const {
224 	return modtype;
225 }
226 
227 /******************************************************************************
228  * SWModule::getDirection - Sets/gets module direction
229  *
230  * ENT:	newdir - value which to set direction
231  *		[-1] - only get
232  *
233  * RET:	char direction
234  */
getDirection() const235 char SWModule::getDirection() const {
236 	return direction;
237 }
238 
239 
240 /******************************************************************************
241  * SWModule::Disp - Sets/gets display driver
242  *
243  * ENT:	idisp - value which to set disp
244  *		[0] - only get
245  *
246  * RET:	pointer to disp
247  */
248 
getDisplay() const249 SWDisplay *SWModule::getDisplay() const {
250 	return disp;
251 }
252 
setDisplay(SWDisplay * idisp)253 void SWModule::setDisplay(SWDisplay *idisp) {
254 	disp = idisp;
255 }
256 
257 /******************************************************************************
258  *  * SWModule::Display - Calls this modules display object and passes itself
259  *   *
260  *    * RET:   error status
261  *     */
262 
display()263 char SWModule::display() {
264 	disp->display(*this);
265 	return 0;
266 }
267 
268 /******************************************************************************
269  * SWModule::getKey - Gets the key from this module that points to the position
270  *			record
271  *
272  * RET:	key object
273  */
274 
getKey() const275 SWKey *SWModule::getKey() const {
276 	return key;
277 }
278 
279 
280 /******************************************************************************
281  * SWModule::setKey - Sets a key to this module for position to a particular
282  *			record
283  *
284  * ENT:	ikey - key with which to set this module
285  *
286  * RET:	error status
287  */
288 
setKey(const SWKey * ikey)289 char SWModule::setKey(const SWKey *ikey) {
290 	SWKey *oldKey = 0;
291 
292 	if (key) {
293 		if (!key->isPersist())	// if we have our own copy
294 			oldKey = key;
295 	}
296 
297 	if (!ikey->isPersist()) {		// if we are to keep our own copy
298 		 key = createKey();
299 		*key = *ikey;
300 	}
301 	else	 key = (SWKey *)ikey;		// if we are to just point to an external key
302 
303 	if (oldKey)
304 		delete oldKey;
305 
306 	return error = key->getError();
307 }
308 
309 
310 /******************************************************************************
311  * SWModule::setPosition(SW_POSITION)	- Positions this modules to an entry
312  *
313  * ENT:	p	- position (e.g. TOP, BOTTOM)
314  *
315  * RET: *this
316  */
317 
setPosition(SW_POSITION p)318 void SWModule::setPosition(SW_POSITION p) {
319 	*key = p;
320 	char saveError = key->popError();
321 
322 	switch (p) {
323 	case POS_TOP:
324 		this->increment();
325 		this->decrement();
326 		break;
327 
328 	case POS_BOTTOM:
329 		this->decrement();
330 		this->increment();
331 		break;
332 	}
333 
334 	error = saveError;
335 }
336 
337 
338 /******************************************************************************
339  * SWModule::increment	- Increments module key a number of entries
340  *
341  * ENT:	increment	- Number of entries to jump forward
342  *
343  * RET: *this
344  */
345 
increment(int steps)346 void SWModule::increment(int steps) {
347 	(*key) += steps;
348 	error = key->popError();
349 }
350 
351 
352 /******************************************************************************
353  * SWModule::decrement	- Decrements module key a number of entries
354  *
355  * ENT:	decrement	- Number of entries to jump backward
356  *
357  * RET: *this
358  */
359 
decrement(int steps)360 void SWModule::decrement(int steps) {
361 	(*key) -= steps;
362 	error = key->popError();
363 }
364 
365 
366 /******************************************************************************
367  * SWModule::Search 	- Searches a module for a string
368  *
369  * ENT:	istr		- string for which to search
370  * 	searchType	- type of search to perform
371  *				>=0 - regex; (for backward compat, if > 0 then used as additional REGEX FLAGS)
372  *				-1  - phrase
373  *				-2  - multiword
374  *				-3  - entryAttrib (eg. Word//Lemma./G1234/)	 (Lemma with dot means check components (Lemma.[1-9]) also)
375  *				-4  - clucene
376  *				-5  - multilemma window; flags = window size
377  * 	flags		- options flags for search
378  *	justCheckIfSupported	- if set, don't search, only tell if this
379  *							function supports requested search.
380  *
381  * RET: ListKey set to verses that contain istr
382  */
383 
search(const char * istr,int searchType,int flags,SWKey * scope,bool * justCheckIfSupported,void (* percent)(char,void *),void * percentUserData)384 ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
385 
386 	listKey.clear();
387 	SWBuf term = istr;
388 	bool includeComponents = false;	// for entryAttrib e.g., /Lemma.1/
389 
390 	SWBuf target = getConfigEntry("AbsoluteDataPath");
391 	if (!target.endsWith("/") && !target.endsWith("\\")) {
392 		target.append('/');
393 	}
394 #if defined USEXAPIAN
395 	target.append("xapian");
396 #elif defined USELUCENE
397 	target.append("lucene");
398 #endif
399 	if (justCheckIfSupported) {
400 		*justCheckIfSupported = (searchType >= -3);
401 #if defined USEXAPIAN
402 		if ((searchType == -4) && (FileMgr::existsDir(target))) {
403 			*justCheckIfSupported = true;
404 		}
405 #elif defined USELUCENE
406 		if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) {
407 			*justCheckIfSupported = true;
408 		}
409 #endif
410 		return listKey;
411 	}
412 
413 	SWKey *saveKey   = 0;
414 	SWKey *searchKey = 0;
415 	SWKey *resultKey = createKey();
416 	SWKey *lastKey   = createKey();
417 	SWBuf lastBuf = "";
418 
419 #ifdef USECXX11REGEX
420 	std::locale oldLocale;
421 	std::locale::global(std::locale("en_US.UTF-8"));
422 
423 	std::regex preg;
424 #elif defined(USEICUREGEX)
425 	RegexMatcher *matcher = 0;
426 #else
427 	regex_t preg;
428 #endif
429 
430 	vector<SWBuf> words;
431 	vector<SWBuf> window;
432 	const char *sres;
433 	terminateSearch = false;
434 	char perc = 1;
435 	bool savePEA = isProcessEntryAttributes();
436 
437 	// determine if we might be doing special strip searches.  useful for knowing if we can use shortcuts
438 	bool specialStrips = (getConfigEntry("LocalStripFilter")
439 			|| (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents"))
440 			|| (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints"))
441 			|| (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints"))
442 			|| (strchr(istr, '<')));
443 
444 	setProcessEntryAttributes(searchType == -3);
445 
446 
447 	if (!key->isPersist()) {
448 		saveKey = createKey();
449 		*saveKey = *key;
450 	}
451 	else	saveKey = key;
452 
453 	searchKey = (scope)?scope->clone():(key->isPersist())?key->clone():0;
454 	if (searchKey) {
455 		searchKey->setPersist(true);
456 		setKey(*searchKey);
457 	}
458 
459 	(*percent)(perc, percentUserData);
460 
461 	*this = BOTTOM;
462 	long highIndex = key->getIndex();
463 	if (!highIndex)
464 		highIndex = 1;		// avoid division by zero errors.
465 	*this = TOP;
466 	if (searchType >= 0) {
467 #ifdef USECXX11REGEX
468 		preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended | searchType | flags);
469 #elif defined(USEICUREGEX)
470 		UErrorCode        status    = U_ZERO_ERROR;
471 		matcher = new RegexMatcher(istr, searchType | flags, status);
472 		if (U_FAILURE(status)) {
473 			SWLog::getSystemLog()->logError("Error compiling Regex: %d", status);
474 			return listKey;
475 		}
476 
477 #else
478 		flags |=searchType|REG_NOSUB|REG_EXTENDED;
479 		int err = regcomp(&preg, istr, flags);
480 		if (err) {
481 			SWLog::getSystemLog()->logError("Error compiling Regex: %d", err);
482 			return listKey;
483 		}
484 #endif
485 	}
486 
487 	(*percent)(++perc, percentUserData);
488 
489 
490 #if defined USEXAPIAN || defined USELUCENE
491 	(*percent)(10, percentUserData);
492 	if (searchType == -4) {	// indexed search
493 #if defined USEXAPIAN
494 		SWTRY {
495 			Xapian::Database database(target.c_str());
496 			Xapian::QueryParser queryParser;
497 			queryParser.set_default_op(Xapian::Query::OP_AND);
498 			SWTRY {
499 				queryParser.set_stemmer(Xapian::Stem(getLanguage()));
500 			} SWCATCH(...) {}
501 			queryParser.set_stemming_strategy(queryParser.STEM_SOME);
502 			queryParser.add_prefix("content", "C");
503 			queryParser.add_prefix("lemma", "L");
504 			queryParser.add_prefix("morph", "M");
505 			queryParser.add_prefix("prox", "P");
506 			queryParser.add_prefix("proxlem", "PL");
507 			queryParser.add_prefix("proxmorph", "PM");
508 
509 #elif defined USELUCENE
510 
511 		lucene::index::IndexReader    *ir = 0;
512 		lucene::search::IndexSearcher *is = 0;
513 		Query                         *q  = 0;
514 		Hits                          *h  = 0;
515 		SWTRY {
516 			ir = IndexReader::open(target);
517 			is = new IndexSearcher(ir);
518 			const TCHAR *stopWords[] = { 0 };
519 			standard::StandardAnalyzer analyzer(stopWords);
520 #endif
521 
522 			// parse the query
523 #if defined USEXAPIAN
524 			Xapian::Query q = queryParser.parse_query(istr);
525 			Xapian::Enquire enquire = Xapian::Enquire(database);
526 #elif defined USELUCENE
527 			q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
528 #endif
529 			(*percent)(20, percentUserData);
530 
531 			// perform the search
532 #if defined USEXAPIAN
533 			enquire.set_query(q);
534 			Xapian::MSet h = enquire.get_mset(0, 99999);
535 #elif defined USELUCENE
536 			h = is->search(q);
537 #endif
538 			(*percent)(80, percentUserData);
539 
540 			// iterate thru each good module position that meets the search
541 			bool checkBounds = getKey()->isBoundSet();
542 #if defined USEXAPIAN
543 			Xapian::MSetIterator i;
544 			for (i = h.begin(); i != h.end(); ++i) {
545 //				cout << "Document ID " << *i << "\t";
546 				__u64 score = i.get_percent();
547 				Xapian::Document doc = i.get_document();
548 				*resultKey = doc.get_data().c_str();
549 #elif defined USELUCENE
550 			for (unsigned long i = 0; i < (unsigned long)h->length(); i++) {
551 				Document &doc = h->doc(i);
552 				// set a temporary verse key to this module position
553 				*resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8?
554 				__u64 score = (__u64)((__u32)(h->score(i)*100));
555 #endif
556 
557 				// check to see if it sets ok (within our bounds) and if not, skip
558 				if (checkBounds) {
559 					*getKey() = *resultKey;
560 					if (*getKey() != *resultKey) {
561 						continue;
562 					}
563 				}
564 				listKey << *resultKey;
565 				listKey.getElement()->userData = score;
566 			}
567 			(*percent)(98, percentUserData);
568 		}
569 		SWCATCH (...) {
570 #if defined USEXAPIAN
571 #elif defined USELUCENE
572 			q = 0;
573 #endif
574 			// invalid clucene query
575 		}
576 #if defined USEXAPIAN
577 #elif defined USELUCENE
578 		delete h;
579 		delete q;
580 
581 		delete is;
582 		if (ir) {
583 			ir->close();
584 		}
585 #endif
586 	}
587 #endif
588 
589 	// some pre-loop processing
590 	switch (searchType) {
591 
592 	// phrase
593 	case -1:
594 		// let's see if we're told to ignore case.  If so, then we'll touppstr our term
595 		if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term);
596 		break;
597 
598 	// multi-word
599 	case -2:
600 	case -5:
601 		// let's break the term down into our words vector
602 		while (1) {
603 			const char *word = term.stripPrefix(' ');
604 			if (!word) {
605 				words.push_back(term);
606 				break;
607 			}
608 			words.push_back(word);
609 		}
610 		if ((flags & REG_ICASE) == REG_ICASE) {
611 			for (unsigned int i = 0; i < words.size(); i++) {
612 				toupperstr(words[i]);
613 			}
614 		}
615 		break;
616 
617 	// entry attributes
618 	case -3:
619 		// let's break the attribute segs down.  We'll reuse our words vector for each segment
620 		while (1) {
621 			const char *word = term.stripPrefix('/');
622 			if (!word) {
623 				words.push_back(term);
624 				break;
625 			}
626 			words.push_back(word);
627 		}
628 		if ((words.size()>2) && words[2].endsWith(".")) {
629 			includeComponents = true;
630 			words[2]--;
631 		}
632 		break;
633 	}
634 
635 
636 	// our main loop to iterate the module and find the stuff
637 	perc = 5;
638 	(*percent)(perc, percentUserData);
639 
640 
641 	while ((searchType != -4) && !popError() && !terminateSearch) {
642 		long mindex = key->getIndex();
643 		float per = (float)mindex / highIndex;
644 		per *= 93;
645 		per += 5;
646 		char newperc = (char)per;
647 		if (newperc > perc) {
648 			perc = newperc;
649 			(*percent)(perc, percentUserData);
650 		}
651 		else if (newperc < perc) {
652 #ifndef _MSC_VER
653 			std::cerr << "Serious error: new percentage complete is less than previous value\n";
654 			std::cerr << "index: " << (key->getIndex()) << "\n";
655 			std::cerr << "highIndex: " << highIndex << "\n";
656 			std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n";
657 			std::cerr << "perc == "  << (int )perc << "% \n";
658 #endif
659 		}
660 		if (searchType >= 0) {
661 			SWBuf textBuf = stripText();
662 #ifdef USECXX11REGEX
663 			if (std::regex_match(std::string(textBuf.c_str()), preg)) {
664 #elif defined(USEICUREGEX)
665 			UnicodeString stringToTest = textBuf.c_str();
666 			matcher->reset(stringToTest);
667 
668 			if (matcher->find()) {
669 #else
670 			if (!regexec(&preg, textBuf, 0, 0, 0)) {
671 #endif
672 				*resultKey = *getKey();
673 				resultKey->clearBound();
674 				listKey << *resultKey;
675 				lastBuf = "";
676 			}
677 #ifdef USECXX11REGEX
678 			else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) {
679 #elif defined(USEICUREGEX)
680 			else {
681 				stringToTest = (lastBuf + ' ' + textBuf).c_str();
682 				matcher->reset(stringToTest);
683 
684 				if (matcher->find()) {
685 #else
686 			else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) {
687 #endif
688 				lastKey->clearBound();
689 				listKey << *lastKey;
690 				lastBuf = textBuf;
691 			}
692 			else {
693 				lastBuf = textBuf;
694 			}
695 #if defined(USEICUREGEX)
696 			}
697 #endif
698 		}
699 
700 		// phrase
701 		else {
702 			SWBuf textBuf;
703 			switch (searchType) {
704 
705 			// phrase
706 			case -1:
707 				textBuf = stripText();
708 				if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
709 				sres = strstr(textBuf.c_str(), term.c_str());
710 				if (sres) { //it's also in the stripText(), so we have a valid search result item now
711 					*resultKey = *getKey();
712 					resultKey->clearBound();
713 					listKey << *resultKey;
714 				}
715 				break;
716 
717 			// multiword
718 			case -2: { // enclose our allocations
719 				int loopCount = 0;
720 				unsigned int foundWords = 0;
721 				do {
722 					textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : stripText();
723 					foundWords = 0;
724 
725 					for (unsigned int i = 0; i < words.size(); i++) {
726 						if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
727 						sres = strstr(textBuf.c_str(), words[i].c_str());
728 						if (!sres) {
729 							break; //for loop
730 						}
731 						foundWords++;
732 					}
733 
734 					loopCount++;
735 				} while ( (loopCount < 2) && (foundWords == words.size()));
736 
737 				if ((loopCount == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
738 					*resultKey = *getKey();
739 					resultKey->clearBound();
740 					listKey << *resultKey;
741 				}
742 				}
743 				break;
744 
745 			// entry attributes
746 			case -3: {
747 				renderText();	// force parse
748 				AttributeTypeList &entryAttribs = getEntryAttributes();
749 				AttributeTypeList::iterator i1Start, i1End;
750 				AttributeList::iterator i2Start, i2End;
751 				AttributeValue::iterator i3Start, i3End;
752 
753 				if ((words.size()) && (words[0].length())) {
754 // cout << "Word: " << words[0] << endl;
755 				for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
756 // cout << "stuff: " << i1Start->first.c_str() << endl;
757 				}
758 					i1Start = entryAttribs.find(words[0]);
759 					i1End = i1Start;
760 					if (i1End != entryAttribs.end()) {
761 						i1End++;
762 					}
763 				}
764 				else {
765 					i1Start = entryAttribs.begin();
766 					i1End   = entryAttribs.end();
767 				}
768 				for (;i1Start != i1End; i1Start++) {
769 					if ((words.size()>1) && (words[1].length())) {
770 						i2Start = i1Start->second.find(words[1]);
771 						i2End = i2Start;
772 						if (i2End != i1Start->second.end())
773 							i2End++;
774 					}
775 					else {
776 						i2Start = i1Start->second.begin();
777 						i2End   = i1Start->second.end();
778 					}
779 					for (;i2Start != i2End; i2Start++) {
780 						if ((words.size()>2) && (words[2].length()) && (!includeComponents)) {
781 							i3Start = i2Start->second.find(words[2]);
782 							i3End = i3Start;
783 							if (i3End != i2Start->second.end())
784 								i3End++;
785 						}
786 						else {
787 							i3Start = i2Start->second.begin();
788 							i3End   = i2Start->second.end();
789 						}
790 						for (;i3Start != i3End; i3Start++) {
791 							if ((words.size()>3) && (words[3].length())) {
792 								if (includeComponents) {
793 									SWBuf key = i3Start->first.c_str();
794 									key = key.stripPrefix('.', true);
795 									// we're iterating all 3 level keys, so be sure we match our
796 									// prefix (e.g., Lemma, Lemma.1, Lemma.2, etc.)
797 									if (key != words[2]) continue;
798 								}
799 								if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
800 									bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
801 									sres = (found) ? i3Start->second.c_str() : 0;
802 								}
803 								else {
804 									sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
805 								}
806 								if (sres) {
807 									*resultKey = *getKey();
808 									resultKey->clearBound();
809 									listKey << *resultKey;
810 									break;
811 								}
812 							}
813 						}
814 						if (i3Start != i3End)
815 							break;
816 					}
817 					if (i2Start != i2End)
818 						break;
819 				}
820 				break;
821 			}
822 			// NOT DONE
823 			case -5:
824 				AttributeList &words = getEntryAttributes()["Word"];
825 				SWBuf kjvWord = "";
826 				SWBuf bibWord = "";
827 				for (AttributeList::iterator it = words.begin(); it != words.end(); it++) {
828 					int parts = atoi(it->second["PartCount"]);
829 					SWBuf lemma = "";
830 					SWBuf morph = "";
831 					for (int i = 1; i <= parts; i++) {
832 						SWBuf key = "";
833 						key = (parts == 1) ? "Lemma" : SWBuf().setFormatted("Lemma.%d", i).c_str();
834 						AttributeValue::iterator li = it->second.find(key);
835 						if (li != it->second.end()) {
836 							if (i > 1) lemma += " ";
837 							key = (parts == 1) ? "LemmaClass" : SWBuf().setFormatted("LemmaClass.%d", i).c_str();
838 							AttributeValue::iterator lci = it->second.find(key);
839 							if (lci != it->second.end()) {
840 								lemma += lci->second + ":";
841 							}
842 							lemma += li->second;
843 						}
844 						key = (parts == 1) ? "Morph" : SWBuf().setFormatted("Morph.%d", i).c_str();
845 						li = it->second.find(key);
846 						// silly.  sometimes morph counts don't equal lemma counts
847 						if (i == 1 && parts != 1 && li == it->second.end()) {
848 							li = it->second.find("Morph");
849 						}
850 						if (li != it->second.end()) {
851 							if (i > 1) morph += " ";
852 							key = (parts == 1) ? "MorphClass" : SWBuf().setFormatted("MorphClass.%d", i).c_str();
853 							AttributeValue::iterator lci = it->second.find(key);
854 							// silly.  sometimes morph counts don't equal lemma counts
855 							if (i == 1 && parts != 1 && lci == it->second.end()) {
856 								lci = it->second.find("MorphClass");
857 							}
858 							if (lci != it->second.end()) {
859 								morph += lci->second + ":";
860 							}
861 							morph += li->second;
862 						}
863 						// TODO: add src tags and maybe other attributes
864 					}
865 					while (window.size() < (unsigned)flags) {
866 
867 					}
868 				}
869 				break;
870 			} // end switch
871 		}
872 		*lastKey = *getKey();
873 		(*this)++;
874 	}
875 
876 
877 	// cleaup work
878 	if (searchType >= 0) {
879 #ifdef USECXX11REGEX
880 		std::locale::global(oldLocale);
881 #elif defined(USEICUREGEX)
882 		delete matcher;
883 #else
884 		regfree(&preg);
885 #endif
886 	}
887 
888 	setKey(*saveKey);
889 
890 	if (!saveKey->isPersist())
891 		delete saveKey;
892 
893 	if (searchKey)
894 		delete searchKey;
895 	delete resultKey;
896 	delete lastKey;
897 
898 	listKey = TOP;
899 	setProcessEntryAttributes(savePEA);
900 
901 
902 	(*percent)(100, percentUserData);
903 
904 
905 	return listKey;
906 }
907 
908 
909 /******************************************************************************
910  * SWModule::stripText() 	- calls all stripfilters on current text
911  *
912  * ENT:	buf	- buf to massage instead of this modules current text
913  * 	len	- max len of buf
914  *
915  * RET: this module's text at current key location massaged by Strip filters
916  */
917 
918 const char *SWModule::stripText(const char *buf, int len) {
919 	static SWBuf local;
920 	local = renderText(buf, len, false);
921 	return local.c_str();
922 }
923 
924 
925 /** SWModule::getRenderHeader()	- Produces any header data which might be
926  *	useful which associated with the processing done with this filter.
927  *	A typical example is a suggested CSS style block for classed
928  *	containers.
929  */
930 const char *SWModule::getRenderHeader() const {
931 	FilterList::const_iterator first = getRenderFilters().begin();
932 	if (first != getRenderFilters().end()) {
933 		return (*first)->getHeader();
934 	}
935 	return "";
936 }
937 
938 
939 /******************************************************************************
940  * SWModule::renderText 	- calls all renderfilters on current module
941  *				position
942  *
943  * RET: this module's text at current key location massaged by renderText filters
944  */
945 SWBuf SWModule::renderText() {
946 	return renderText((const char *)0);
947 }
948 
949 /******************************************************************************
950  * SWModule::renderText 	- calls all renderfilters on provided text
951  *				or current module position provided text null
952  *
953  * ENT:	buf	- buffer to render
954  *
955  * RET: this module's text at current key location massaged by renderText filters
956  *
957  * NOTES: This method is only truly const if called with a provided text; using
958  * module's current position may produce a new entry attributes map which
959  * logically violates the const semantic, which is why the above method
960  * which takes no params is not const, i.e., don't call this method with
961  * null as text param, but instead use non-const method above.  The public
962  * interface for this method expects a value for the text param.  We use it
963  * internally sometimes calling with null to save duplication of code.
964  */
965 
966 SWBuf SWModule::renderText(const char *buf, int len, bool render) const {
967 	bool savePEA = isProcessEntryAttributes();
968 	if (!buf) {
969 		entryAttributes.clear();
970 	}
971 	else {
972 		setProcessEntryAttributes(false);
973 	}
974 
975 	SWBuf local;
976 	if (buf)
977 		local = buf;
978 
979 	SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
980 	SWKey *key = 0;
981 	static const char *null = "";
982 
983 	if (tmpbuf) {
984 		unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
985 		if (size > 0) {
986 			key = this->getKey();
987 
988 			optionFilter(tmpbuf, key);
989 
990 			if (render) {
991 				renderFilter(tmpbuf, key);
992 				encodingFilter(tmpbuf, key);
993 			}
994 			else	stripFilter(tmpbuf, key);
995 		}
996 	}
997 	else {
998 		tmpbuf = null;
999 	}
1000 
1001 	setProcessEntryAttributes(savePEA);
1002 
1003 	return tmpbuf;
1004 }
1005 
1006 
1007 /******************************************************************************
1008  * SWModule::renderText 	- calls all renderfilters on current text
1009  *
1010  * ENT:	tmpKey	- key to use to grab text
1011  *
1012  * RET: this module's text at current key location massaged by RenderFilers
1013  */
1014 
1015 SWBuf SWModule::renderText(const SWKey *tmpKey) {
1016 	SWKey *saveKey;
1017 	const char *retVal;
1018 
1019 	if (!key->isPersist()) {
1020 		saveKey = createKey();
1021 		*saveKey = *key;
1022 	}
1023 	else	saveKey = key;
1024 
1025 	setKey(*tmpKey);
1026 
1027 	retVal = renderText();
1028 
1029 	setKey(*saveKey);
1030 
1031 	if (!saveKey->isPersist())
1032 		delete saveKey;
1033 
1034 	return retVal;
1035 }
1036 
1037 
1038 /******************************************************************************
1039  * SWModule::stripText 	- calls all StripTextFilters on current text
1040  *
1041  * ENT:	tmpKey	- key to use to grab text
1042  *
1043  * RET: this module's text at specified key location massaged by Strip filters
1044  */
1045 
1046 const char *SWModule::stripText(const SWKey *tmpKey) {
1047 	SWKey *saveKey;
1048 	const char *retVal;
1049 
1050 	if (!key->isPersist()) {
1051 		saveKey = createKey();
1052 		*saveKey = *key;
1053 	}
1054 	else	saveKey = key;
1055 
1056 	setKey(*tmpKey);
1057 
1058 	retVal = stripText();
1059 
1060 	setKey(*saveKey);
1061 
1062 	if (!saveKey->isPersist())
1063 		delete saveKey;
1064 
1065 	return retVal;
1066 }
1067 
1068 /******************************************************************************
1069  * SWModule::getBibliography	-Returns bibliographic data for a module in the
1070  *								requested format
1071  *
1072  * ENT: bibFormat format of the bibliographic data
1073  *
1074  * RET: bibliographic data in the requested format as a string (BibTeX by default)
1075  */
1076 
1077 SWBuf SWModule::getBibliography(unsigned char bibFormat) const {
1078 	SWBuf s;
1079 	switch (bibFormat) {
1080 	case BIB_BIBTEX:
1081 		s.append("@Book {").append(modname).append(", Title = \"").append(moddesc).append("\", Publisher = \"CrossWire Bible Society\"}");
1082 		break;
1083 	}
1084 	return s;
1085 }
1086 
1087 const char *SWModule::getConfigEntry(const char *key) const {
1088 	ConfigEntMap::iterator it = config->find(key);
1089 	return (it != config->end()) ? it->second.c_str() : 0;
1090 }
1091 
1092 
1093 void SWModule::setConfig(ConfigEntMap *config) {
1094 	this->config = config;
1095 }
1096 
1097 
1098 bool SWModule::hasSearchFramework() {
1099 #ifdef USELUCENE
1100 	return true;
1101 #else
1102 	return SWSearchable::hasSearchFramework();
1103 #endif
1104 }
1105 
1106 void SWModule::deleteSearchFramework() {
1107 #ifdef USELUCENE
1108 	SWBuf target = getConfigEntry("AbsoluteDataPath");
1109 	if (!target.endsWith("/") && !target.endsWith("\\")) {
1110 		target.append('/');
1111 	}
1112 	target.append("lucene");
1113 
1114 	FileMgr::removeDir(target.c_str());
1115 #else
1116 	SWSearchable::deleteSearchFramework();
1117 #endif
1118 }
1119 
1120 
1121 signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
1122 
1123 #if defined USELUCENE || defined USEXAPIAN
1124 	SWBuf target = getConfigEntry("AbsoluteDataPath");
1125 	if (!target.endsWith("/") && !target.endsWith("\\")) {
1126 		target.append('/');
1127 	}
1128 #if defined USEXAPIAN
1129 	target.append("xapian");
1130 #elif defined USELUCENE
1131 	const int MAX_CONV_SIZE = 1024 * 1024;
1132 	target.append("lucene");
1133 #endif
1134 	int status = FileMgr::createParent(target+"/dummy");
1135 	if (status) return -1;
1136 
1137 	SWKey *saveKey = 0;
1138 	SWKey *searchKey = 0;
1139 	SWKey textkey;
1140 	SWBuf c;
1141 
1142 
1143 	// turn all filters to default values
1144 	StringList filterSettings;
1145 	for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1146 		filterSettings.push_back((*filter)->getOptionValue());
1147 		(*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
1148 
1149 		if ( (!strcmp("Greek Accents", (*filter)->getOptionName())) ||
1150 			(!strcmp("Hebrew Vowel Points", (*filter)->getOptionName())) ||
1151 			(!strcmp("Arabic Vowel Points", (*filter)->getOptionName()))
1152 		   ) {
1153 			(*filter)->setOptionValue("Off");
1154 		}
1155 	}
1156 
1157 
1158 	// be sure we give CLucene enough file handles
1159 	FileMgr::getSystemFileMgr()->flush();
1160 
1161 	// save key information so as not to disrupt original
1162 	// module position
1163 	if (!key->isPersist()) {
1164 		saveKey = createKey();
1165 		*saveKey = *key;
1166 	}
1167 	else	saveKey = key;
1168 
1169 	searchKey = (key->isPersist())?key->clone():0;
1170 	if (searchKey) {
1171 		searchKey->setPersist(1);
1172 		setKey(*searchKey);
1173 	}
1174 
1175 	bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
1176 
1177 	// lets create or open our search index
1178 #if defined USEXAPIAN
1179 	Xapian::WritableDatabase database(target.c_str(), Xapian::DB_CREATE_OR_OPEN);
1180 	Xapian::TermGenerator termGenerator;
1181 	SWTRY {
1182 		termGenerator.set_stemmer(Xapian::Stem(getLanguage()));
1183 	} SWCATCH(...) {}
1184 
1185 #elif defined USELUCENE
1186 	RAMDirectory *ramDir = 0;
1187 	IndexWriter *coreWriter = 0;
1188 	IndexWriter *fsWriter = 0;
1189 	Directory *d = 0;
1190 
1191 	const TCHAR *stopWords[] = { 0 };
1192 	standard::StandardAnalyzer *an = new standard::StandardAnalyzer(stopWords);
1193 
1194 	ramDir = new RAMDirectory();
1195 	coreWriter = new IndexWriter(ramDir, an, true);
1196 	coreWriter->setMaxFieldLength(MAX_CONV_SIZE);
1197 #endif
1198 
1199 
1200 
1201 
1202 	char perc = 1;
1203 	VerseKey *vkcheck = 0;
1204 	vkcheck = SWDYNAMIC_CAST(VerseKey, key);
1205 	VerseKey *chapMax = 0;
1206 	if (vkcheck) chapMax = (VerseKey *)vkcheck->clone();
1207 
1208 	TreeKeyIdx *tkcheck = 0;
1209 	tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
1210 
1211 
1212 	*this = BOTTOM;
1213 	long highIndex = key->getIndex();
1214 	if (!highIndex)
1215 		highIndex = 1;		// avoid division by zero errors.
1216 
1217 	bool savePEA = isProcessEntryAttributes();
1218 	setProcessEntryAttributes(true);
1219 
1220 	// prox chapter blocks
1221 	// position module at the beginning
1222 	*this = TOP;
1223 
1224 	SWBuf proxBuf;
1225 	SWBuf proxLem;
1226 	SWBuf proxMorph;
1227 	SWBuf strong;
1228 	SWBuf morph;
1229 
1230 	char err = popError();
1231 	while (!err) {
1232 		long mindex = key->getIndex();
1233 
1234 		proxBuf = "";
1235 		proxLem = "";
1236 		proxMorph = "";
1237 
1238 		// computer percent complete so we can report to our progress callback
1239 		float per = (float)mindex / highIndex;
1240 		// between 5%-98%
1241 		per *= 93; per += 5;
1242 		char newperc = (char)per;
1243 		if (newperc > perc) {
1244 			perc = newperc;
1245 			(*percent)(perc, percentUserData);
1246 		}
1247 
1248 		// get "content" field
1249 		const char *content = stripText();
1250 
1251 		bool good = false;
1252 
1253 		// start out entry
1254 #if defined USEXAPIAN
1255 		Xapian::Document doc;
1256 		termGenerator.set_document(doc);
1257 #elif defined USELUCENE
1258 		Document *doc = new Document();
1259 #endif
1260 		// get "key" field
1261 		SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
1262 		if (content && *content) {
1263 			good = true;
1264 
1265 
1266 			// build "strong" field
1267 			AttributeTypeList::iterator words;
1268 			AttributeList::iterator word;
1269 			AttributeValue::iterator strongVal;
1270 			AttributeValue::iterator morphVal;
1271 
1272 			strong="";
1273 			morph="";
1274 			words = getEntryAttributes().find("Word");
1275 			if (words != getEntryAttributes().end()) {
1276 				for (word = words->second.begin();word != words->second.end(); word++) {
1277 					int partCount = atoi(word->second["PartCount"]);
1278 					if (!partCount) partCount = 1;
1279 					for (int i = 0; i < partCount; i++) {
1280 						SWBuf tmp = "Lemma";
1281 						if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1282 						strongVal = word->second.find(tmp);
1283 						if (strongVal != word->second.end()) {
1284 							// cheeze.  skip empty article tags that weren't assigned to any text
1285 							if (strongVal->second == "G3588") {
1286 								if (word->second.find("Text") == word->second.end())
1287 									continue;	// no text? let's skip
1288 							}
1289 							strong.append(strongVal->second);
1290 							morph.append(strongVal->second);
1291 							morph.append('@');
1292 							SWBuf tmp = "Morph";
1293 							if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1294 							morphVal = word->second.find(tmp);
1295 							if (morphVal != word->second.end()) {
1296 								morph.append(morphVal->second);
1297 							}
1298 							strong.append(' ');
1299 							morph.append(' ');
1300 						}
1301 					}
1302 				}
1303 			}
1304 
1305 #if defined USEXAPIAN
1306 			doc.set_data(keyText.c_str());
1307 #elif defined USELUCENE
1308 			doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
1309 #endif
1310 
1311 			if (includeKeyInSearch) {
1312 				c = keyText;
1313 				c += " ";
1314 				c += content;
1315 				content = c.c_str();
1316 			}
1317 
1318 #if defined USEXAPIAN
1319 			termGenerator.index_text(content);
1320 			termGenerator.index_text(content, 1, "C");
1321 #elif defined USELUCENE
1322 			doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1323 #endif
1324 
1325 			if (strong.length() > 0) {
1326 #if defined USEXAPIAN
1327 				termGenerator.index_text(strong.c_str(), 1, "L");
1328 				termGenerator.index_text(morph.c_str(), 1, "M");
1329 #elif defined USELUCENE
1330 				doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1331 				doc->add(*_CLNEW Field(_T("morph"), (wchar_t *)utf8ToWChar(morph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1332 #endif
1333 //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
1334 			}
1335 
1336 //printf("setting fields (%s).\n", (const char *)*key);
1337 //fflush(stdout);
1338 		}
1339 		// don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
1340 
1341 		// for VerseKeys use chapter
1342 		if (vkcheck) {
1343 			*chapMax = *vkcheck;
1344 			// we're the first verse in a chapter
1345 			if (vkcheck->getVerse() == 1) {
1346 				*chapMax = MAXVERSE;
1347 				VerseKey saveKey = *vkcheck;
1348 				while ((!err) && (*vkcheck <= *chapMax)) {
1349 //printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
1350 //printf("building proxBuf from (%s).\n", (const char *)*key);
1351 
1352 					content = stripText();
1353 					if (content && *content) {
1354 						// build "strong" field
1355 						strong = "";
1356 						morph = "";
1357 						AttributeTypeList::iterator words;
1358 						AttributeList::iterator word;
1359 						AttributeValue::iterator strongVal;
1360 						AttributeValue::iterator morphVal;
1361 
1362 						words = getEntryAttributes().find("Word");
1363 						if (words != getEntryAttributes().end()) {
1364 							for (word = words->second.begin();word != words->second.end(); word++) {
1365 								int partCount = atoi(word->second["PartCount"]);
1366 								if (!partCount) partCount = 1;
1367 								for (int i = 0; i < partCount; i++) {
1368 									SWBuf tmp = "Lemma";
1369 									if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1370 									strongVal = word->second.find(tmp);
1371 									if (strongVal != word->second.end()) {
1372 										// cheeze.  skip empty article tags that weren't assigned to any text
1373 										if (strongVal->second == "G3588") {
1374 											if (word->second.find("Text") == word->second.end())
1375 												continue;	// no text? let's skip
1376 										}
1377 										strong.append(strongVal->second);
1378 										morph.append(strongVal->second);
1379 										morph.append('@');
1380 										SWBuf tmp = "Morph";
1381 										if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1382 										morphVal = word->second.find(tmp);
1383 										if (morphVal != word->second.end()) {
1384 											morph.append(morphVal->second);
1385 										}
1386 										strong.append(' ');
1387 										morph.append(' ');
1388 									}
1389 								}
1390 							}
1391 						}
1392 						proxBuf += content;
1393 						proxBuf.append(' ');
1394 						proxLem += strong;
1395 						proxMorph += morph;
1396 						if (proxLem.length()) {
1397 							proxLem.append("\n");
1398 							proxMorph.append("\n");
1399 						}
1400 					}
1401 					(*this)++;
1402 					err = popError();
1403 				}
1404 				err = 0;
1405 				*vkcheck = saveKey;
1406 			}
1407 		}
1408 
1409 		// for TreeKeys use siblings if we have no children
1410 		else if (tkcheck) {
1411 			if (!tkcheck->hasChildren()) {
1412 				if (!tkcheck->previousSibling()) {
1413 					do {
1414 //printf("building proxBuf from (%s).\n", (const char *)*key);
1415 //fflush(stdout);
1416 
1417 						content = stripText();
1418 						if (content && *content) {
1419 							// build "strong" field
1420 							strong = "";
1421 							morph = "";
1422 							AttributeTypeList::iterator words;
1423 							AttributeList::iterator word;
1424 							AttributeValue::iterator strongVal;
1425 							AttributeValue::iterator morphVal;
1426 
1427 							words = getEntryAttributes().find("Word");
1428 							if (words != getEntryAttributes().end()) {
1429 								for (word = words->second.begin();word != words->second.end(); word++) {
1430 									int partCount = atoi(word->second["PartCount"]);
1431 									if (!partCount) partCount = 1;
1432 									for (int i = 0; i < partCount; i++) {
1433 										SWBuf tmp = "Lemma";
1434 										if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1435 										strongVal = word->second.find(tmp);
1436 										if (strongVal != word->second.end()) {
1437 											// cheeze.  skip empty article tags that weren't assigned to any text
1438 											if (strongVal->second == "G3588") {
1439 												if (word->second.find("Text") == word->second.end())
1440 													continue;	// no text? let's skip
1441 											}
1442 											strong.append(strongVal->second);
1443 											morph.append(strongVal->second);
1444 											morph.append('@');
1445 											SWBuf tmp = "Morph";
1446 											if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1447 											morphVal = word->second.find(tmp);
1448 											if (morphVal != word->second.end()) {
1449 												morph.append(morphVal->second);
1450 											}
1451 											strong.append(' ');
1452 											morph.append(' ');
1453 										}
1454 									}
1455 								}
1456 							}
1457 
1458 							proxBuf += content;
1459 							proxBuf.append(' ');
1460 							proxLem += strong;
1461 							proxMorph += morph;
1462 							if (proxLem.length()) {
1463 								proxLem.append("\n");
1464 								proxMorph.append("\n");
1465 							}
1466 						}
1467 					} while (tkcheck->nextSibling());
1468 					tkcheck->parent();
1469 					tkcheck->firstChild();
1470 				}
1471 				else tkcheck->nextSibling();	// reposition from our previousSibling test
1472 			}
1473 		}
1474 
1475 		if (proxBuf.length() > 0) {
1476 
1477 #if defined USEXAPIAN
1478 			termGenerator.index_text(proxBuf.c_str(), 1, "P");
1479 #elif defined USELUCENE
1480 			doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1481 #endif
1482 			good = true;
1483 		}
1484 		if (proxLem.length() > 0) {
1485 #if defined USEXAPIAN
1486 			termGenerator.index_text(proxLem.c_str(), 1, "PL");
1487 			termGenerator.index_text(proxMorph.c_str(), 1, "PM");
1488 #elif defined USELUCENE
1489 			doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1490 			doc->add(*_CLNEW Field(_T("proxmorph"), (wchar_t *)utf8ToWChar(proxMorph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1491 #endif
1492 			good = true;
1493 		}
1494 		if (good) {
1495 //printf("writing (%s).\n", (const char *)*key);
1496 //fflush(stdout);
1497 #if defined USEXAPIAN
1498 			SWBuf idTerm;
1499 			idTerm.setFormatted("Q%ld", key->getIndex());
1500 			doc.add_boolean_term(idTerm.c_str());
1501 			database.replace_document(idTerm.c_str(), doc);
1502 #elif defined USELUCENE
1503 			coreWriter->addDocument(doc);
1504 #endif
1505 		}
1506 #if defined USEXAPIAN
1507 #elif defined USELUCENE
1508 		delete doc;
1509 #endif
1510 
1511 		(*this)++;
1512 		err = popError();
1513 	}
1514 
1515 	// Optimizing automatically happens with the call to addIndexes
1516 	//coreWriter->optimize();
1517 #if defined USEXAPIAN
1518 #elif defined USELUCENE
1519 	coreWriter->close();
1520 
1521 #ifdef CLUCENE2
1522 	d = FSDirectory::getDirectory(target.c_str());
1523 #endif
1524 	if (IndexReader::indexExists(target.c_str())) {
1525 #ifndef CLUCENE2
1526 		d = FSDirectory::getDirectory(target.c_str(), false);
1527 #endif
1528 		if (IndexReader::isLocked(d)) {
1529 			IndexReader::unlock(d);
1530 		}
1531 		fsWriter = new IndexWriter( d, an, false);
1532 	}
1533 	else {
1534 #ifndef CLUCENE2
1535 		d = FSDirectory::getDirectory(target.c_str(), true);
1536 #endif
1537 		fsWriter = new IndexWriter(d, an, true);
1538 	}
1539 
1540 	Directory *dirs[] = { ramDir, 0 };
1541 #ifdef CLUCENE2
1542 	lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
1543 	fsWriter->addIndexes(dirsa);
1544 #else
1545 	fsWriter->addIndexes(dirs);
1546 #endif
1547 	fsWriter->close();
1548 
1549 	delete ramDir;
1550 	delete coreWriter;
1551 	delete fsWriter;
1552 	delete an;
1553 #endif
1554 
1555 	// reposition module back to where it was before we were called
1556 	setKey(*saveKey);
1557 
1558 	if (!saveKey->isPersist())
1559 		delete saveKey;
1560 
1561 	if (searchKey)
1562 		delete searchKey;
1563 
1564 	delete chapMax;
1565 
1566 	setProcessEntryAttributes(savePEA);
1567 
1568 	// reset option filters back to original values
1569 	StringList::iterator origVal = filterSettings.begin();
1570 	for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1571 		(*filter)->setOptionValue(*origVal++);
1572 	}
1573 
1574 	return 0;
1575 #else
1576 	return SWSearchable::createSearchFramework(percent, percentUserData);
1577 #endif
1578 }
1579 
1580 /** OptionFilterBuffer a text buffer
1581  * @param filters the FilterList of filters to iterate
1582  * @param buf the buffer to filter
1583  * @param key key location from where this buffer was extracted
1584  */
1585 void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const {
1586 	OptionFilterList::iterator it;
1587 	for (it = filters->begin(); it != filters->end(); it++) {
1588 		(*it)->processText(buf, key, this);
1589 	}
1590 }
1591 
1592 /** FilterBuffer a text buffer
1593  * @param filters the FilterList of filters to iterate
1594  * @param buf the buffer to filter
1595  * @param key key location from where this buffer was extracted
1596  */
1597 void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) const {
1598 	FilterList::iterator it;
1599 	for (it = filters->begin(); it != filters->end(); it++) {
1600 		(*it)->processText(buf, key, this);
1601 	}
1602 }
1603 
1604 signed char SWModule::createModule(const char*) {
1605 	return -1;
1606 }
1607 
1608 void SWModule::setEntry(const char*, long) {
1609 }
1610 
1611 void SWModule::linkEntry(const SWKey*) {
1612 }
1613 
1614 
1615 /******************************************************************************
1616  * SWModule::prepText	- Prepares the text before returning it to external
1617  *					objects
1618  *
1619  * ENT:	buf	- buffer where text is stored and where to store the prep'd
1620  *				text.
1621  */
1622 
1623 void SWModule::prepText(SWBuf &buf) {
1624 	unsigned int to, from;
1625 	char space = 0, cr = 0, realdata = 0, nlcnt = 0;
1626 	char *rawBuf = buf.getRawData();
1627 	for (to = from = 0; rawBuf[from]; from++) {
1628 		switch (rawBuf[from]) {
1629 		case 10:
1630 			if (!realdata)
1631 				continue;
1632 			space = (cr) ? 0 : 1;
1633 			cr = 0;
1634 			nlcnt++;
1635 			if (nlcnt > 1) {
1636 //				*to++ = nl;
1637 				rawBuf[to++] = 10;
1638 //				*to++ = nl[1];
1639 //				nlcnt = 0;
1640 			}
1641 			continue;
1642 		case 13:
1643 			if (!realdata)
1644 				continue;
1645 //			*to++ = nl[0];
1646 			rawBuf[to++] = 10;
1647 			space = 0;
1648 			cr = 1;
1649 			continue;
1650 		}
1651 		realdata = 1;
1652 		nlcnt = 0;
1653 		if (space) {
1654 			space = 0;
1655 			if (rawBuf[from] != ' ') {
1656 				rawBuf[to++] = ' ';
1657 				from--;
1658 				continue;
1659 			}
1660 		}
1661 		rawBuf[to++] = rawBuf[from];
1662 	}
1663 	buf.setSize(to);
1664 
1665 	while (to > 1) {			// remove trailing excess
1666 		to--;
1667 		if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
1668 			buf.setSize(to);
1669 		else break;
1670 	}
1671 }
1672 
1673 SWORD_NAMESPACE_END
1674