1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #include "test.h"
8 #include <CLucene/search/MatchAllDocsQuery.h>
9 #include <stdio.h>
10 
11 //checks if a merged index finds phrases correctly
testIWmergePhraseSegments(CuTest * tc)12 void testIWmergePhraseSegments(CuTest *tc){
13 	char fsdir[CL_MAX_PATH];
14 	_snprintf(fsdir, CL_MAX_PATH, "%s/%s",cl_tempDir, "test.indexwriter");
15 	SimpleAnalyzer a;
16   Directory* dir = FSDirectory::getDirectory(fsdir);
17 
18 	IndexWriter ndx2(dir,&a,true);
19 	ndx2.setUseCompoundFile(false);
20 	Document doc0;
21 	doc0.add(
22 		*_CLNEW Field(
23 			_T("field0"),
24 			_T("value0 value1"),
25 			Field::STORE_YES | Field::INDEX_TOKENIZED
26 		)
27 	);
28 	ndx2.addDocument(&doc0);
29 	ndx2.optimize();
30 	ndx2.close();
31 
32 	IndexWriter ndx(fsdir,&a,false);
33 	ndx.setUseCompoundFile(false);
34 	Document doc1;
35 	doc1.add(
36 		*_CLNEW Field(
37 			_T("field0"),
38 			_T("value1 value0"),
39 			Field::STORE_YES | Field::INDEX_TOKENIZED
40 		)
41 	);
42 	ndx.addDocument(&doc1);
43 	ndx.optimize();
44 	ndx.close();
45 
46 	//test the index querying
47 	IndexSearcher searcher(fsdir);
48 	Query* query0 = QueryParser::parse(
49 		_T("\"value0 value1\""),
50 		_T("field0"),
51 		&a
52 	);
53 	Hits* hits0 = searcher.search(query0);
54 	CLUCENE_ASSERT(hits0->length() > 0);
55 	Query* query1 = QueryParser::parse(
56 		_T("\"value1 value0\""),
57 		_T("field0"),
58 		&a
59 	);
60 	Hits* hits1 = searcher.search(query1);
61 	CLUCENE_ASSERT(hits1->length() > 0);
62 	_CLDELETE(query0);
63 	_CLDELETE(query1);
64 	_CLDELETE(hits0);
65 	_CLDELETE(hits1);
66 	_CLDECDELETE(dir);
67 }
68 
69 //checks that adding more than the min_merge value goes ok...
70 //checks for a mem leak that used to occur
testIWmergeSegments1(CuTest * tc)71 void testIWmergeSegments1(CuTest *tc){
72 	RAMDirectory ram;
73 	SimpleAnalyzer a;
74 
75     IndexWriter ndx2(&ram,&a,true);
76 	ndx2.close();                   //test immediate closing bug reported
77 
78 	IndexWriter ndx(&ram,&a,true);  //set create to false
79 
80 	ndx.setUseCompoundFile(false);
81 	ndx.setMergeFactor(2);
82 	TCHAR fld[1000];
83 	for ( int i=0;i<1000;i++ ){
84     English::IntToEnglish(i,fld,1000);
85 
86 		Document doc;
87 
88 		doc.add ( *_CLNEW Field(_T("field0"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
89 		doc.add ( *_CLNEW Field(_T("field1"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
90 		doc.add ( *_CLNEW Field(_T("field2"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
91 		doc.add ( *_CLNEW Field(_T("field3"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
92 		ndx.addDocument(&doc);
93 	}
94 	//ndx.optimize(); //optimize so we can read terminfosreader with segmentreader
95 	ndx.close();
96 
97 	//test the ram loading
98 	RAMDirectory ram2(&ram);
99 	IndexReader* reader2 = IndexReader::open(&ram2);
100 	Term* term = _CLNEW Term(_T("field0"),fld);
101 	TermEnum* en = reader2->terms(term);
102 	CLUCENE_ASSERT(en->next());
103 	_CLDELETE(en);
104 	_CLDECDELETE(term);
105 	_CLDELETE(reader2);
106 }
107 
108 //checks if appending to an index works correctly
testIWmergeSegments2(CuTest * tc)109 void testIWmergeSegments2(CuTest *tc){
110 	char fsdir[CL_MAX_PATH];
111 	_snprintf(fsdir, CL_MAX_PATH, "%s/%s",cl_tempDir, "test.indexwriter");
112 	SimpleAnalyzer a;
113   Directory* dir = FSDirectory::getDirectory(fsdir);
114 
115 	IndexWriter ndx2(dir,&a,true);
116 	ndx2.setUseCompoundFile(false);
117 	Document doc0;
118 	doc0.add(
119 		*_CLNEW Field(
120 			_T("field0"),
121 			_T("value0"),
122 			Field::STORE_YES | Field::INDEX_TOKENIZED
123 		)
124 	);
125 	ndx2.addDocument(&doc0);
126 	ndx2.optimize();
127 	ndx2.close();
128 
129 	IndexWriter ndx(fsdir,&a,false);
130 	ndx.setUseCompoundFile(false);
131 	Document doc1;
132 	doc1.add(
133 		*_CLNEW Field(
134 			_T("field0"),
135 			_T("value1"),
136 			Field::STORE_YES | Field::INDEX_TOKENIZED
137 		)
138 	);
139 	ndx.addDocument(&doc1);
140 	ndx.optimize();
141 	ndx.close();
142 
143 	//test the ram querying
144 	IndexSearcher searcher(fsdir);
145 	Term* term0 = _CLNEW Term(_T("field0"),_T("value1"));
146 	Query* query0 = QueryParser::parse(_T("value0"),_T("field0"),&a);
147 	Hits* hits0 = searcher.search(query0);
148 	CLUCENE_ASSERT(hits0->length() > 0);
149 	Term* term1 = _CLNEW Term(_T("field0"),_T("value0"));
150 	Query* query1 = QueryParser::parse(_T("value1"),_T("field0"),&a);
151 	Hits* hits1 = searcher.search(query1);
152 	CLUCENE_ASSERT(hits1->length() > 0);
153 	_CLDELETE(query0);
154 	_CLDELETE(query1);
155 	_CLDELETE(hits0);
156   _CLDELETE(hits1);
157 	_CLDECDELETE(term0);
158 	_CLDECDELETE(term1);
159     dir->close();
160     _CLDECDELETE(dir);
161 }
162 
testAddIndexes(CuTest * tc)163 void testAddIndexes(CuTest *tc){
164 	char reuters_origdirectory[1024];
165   strcpy(reuters_origdirectory, clucene_data_location);
166   strcat(reuters_origdirectory, "/reuters-21578-index");
167 
168   {
169     RAMDirectory dir;
170     WhitespaceAnalyzer a;
171     IndexWriter w(&dir, &a, true);
172     ValueArray<Directory*> dirs(2);
173     dirs[0] = FSDirectory::getDirectory(reuters_origdirectory);
174     dirs[1] = FSDirectory::getDirectory(reuters_origdirectory);
175     w.addIndexesNoOptimize(dirs);
176     w.flush();
177     CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters...
178 
179     // TODO: Currently there is a double ref-counting mechanism in place for Directory objects,
180     //      so we need to dec them both
181     dirs[1]->close();_CLDECDELETE(dirs[1]);
182     dirs[0]->close();_CLDECDELETE(dirs[0]);
183   }
184   {
185     RAMDirectory dir;
186     WhitespaceAnalyzer a;
187     IndexWriter w(&dir, &a, true);
188     ValueArray<Directory*> dirs(2);
189     dirs[0] = FSDirectory::getDirectory(reuters_origdirectory);
190     dirs[1] = FSDirectory::getDirectory(reuters_origdirectory);
191     w.addIndexes(dirs);
192     w.flush();
193     CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters...
194 
195     // TODO: Currently there is a double ref-counting mechanism in place for Directory objects,
196     //      so we need to dec them both
197     dirs[1]->close();_CLDECDELETE(dirs[1]);
198     dirs[0]->close();_CLDECDELETE(dirs[0]);
199   }
200 }
201 
testHashingBug(CuTest *)202 void testHashingBug(CuTest* /*tc*/){
203   //Manuel Freiholz's indexing bug
204 
205   CL_NS(document)::Document doc;
206   CL_NS(document)::Field* field;
207   CL_NS(analysis::standard)::StandardAnalyzer analyzer;
208   CL_NS(store)::RAMDirectory dir;
209   CL_NS(index)::IndexWriter writer(&dir, &analyzer, true, true );
210 
211   field = _CLNEW CL_NS(document)::Field( _T("CNS_VERSION"), _T("1"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
212   doc.add( (*field) );
213 
214   field = _CLNEW CL_NS(document)::Field( _T("CNS_PID"), _T("5"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
215   doc.add( (*field) );
216 
217   field = _CLNEW CL_NS(document)::Field( _T("CNS_DATE"), _T("20090722"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
218   doc.add( (*field) );
219 
220   field = _CLNEW CL_NS(document)::Field( _T("CNS_SEARCHDATA"), _T("all kind of data"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_TOKENIZED );
221   doc.add( (*field) );
222 
223   writer.addDocument( &doc ); // ADDING FIRST DOCUMENT. -> this works!
224 
225   doc.clear();
226 
227   field = _CLNEW CL_NS(document)::Field( _T("CNS_VERSION"), _T("1"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
228   doc.add( (*field) );
229 
230   field = _CLNEW CL_NS(document)::Field( _T("CNS_PID"), _T("5"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
231   doc.add( (*field) );
232 
233   field = _CLNEW CL_NS(document)::Field( _T("CNS_LINEID"), _T("20"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
234   doc.add( (*field) );
235 
236   field = _CLNEW CL_NS(document)::Field( _T("CNS_VT_ORDER"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
237   doc.add( (*field) );
238 
239   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_H"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
240   doc.add( (*field) );
241 
242   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_HF"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
243   doc.add( (*field) );
244 
245   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_D"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
246   doc.add( (*field) );
247 
248   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_OD"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
249   doc.add( (*field) );
250 
251   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_P1"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
252   doc.add( (*field) );
253 
254   field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_H1"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
255   doc.add( (*field) ); // the problematic field!
256 
257   writer.addDocument( &doc ); // ADDING SECOND DOCUMENT - will never return from this function
258   writer.optimize();          // stucks in line 222-223
259   writer.close();
260   _CL_LDECREF(&dir);
261 }
262 
263 class IWlargeScaleCorrectness_tester {
264 public:
265 	void invoke(Directory& storage, CuTest *tc);
266 };
267 
invoke(Directory & storage,CuTest * tc)268 void IWlargeScaleCorrectness_tester::invoke(
269 	Directory& storage,
270 	CuTest *tc
271 ){
272 	SimpleAnalyzer a;
273 
274 	IndexWriter* ndx = _CLNEW IndexWriter(&storage,&a,true);
275 
276 	ndx->setUseCompoundFile(false);
277 
278 	const long documents = 200;
279 	const long step = 23;
280 	const long inverted_step = 113;
281 	const long repetitions = 5;
282 
283 	CLUCENE_ASSERT(0 == (step * inverted_step + 1) % documents);
284 
285 	long value0;
286 	long value1 = 0;
287 
288 	long block_size = 1;
289 	long reopen = 1;
290 
291 	for (value0 = 0; value0 < documents * repetitions; value0++) {
292 		if (reopen == value0) {
293 			ndx->optimize();
294 			ndx->close();
295 			_CLDELETE(ndx);
296 			ndx = _CLNEW IndexWriter(&storage,&a,false);
297 			ndx->setUseCompoundFile(false);
298 			reopen += block_size;
299 			block_size++;
300 		}
301 
302 		TCHAR* value0_string = NumberTools::longToString(value0 % documents);
303 		TCHAR* value1_string = NumberTools::longToString(value1);
304 
305 		Document doc;
306 
307 		doc.add (
308 			*_CLNEW Field(
309 				_T("field0"),
310 				value0_string,
311 				Field::STORE_YES | Field::INDEX_UNTOKENIZED
312 			)
313 		);
314 		doc.add (
315 			*_CLNEW Field(
316 				_T("field1"),
317 				value1_string,
318 				Field::STORE_YES | Field::INDEX_UNTOKENIZED
319 			)
320 		);
321 		ndx->addDocument(&doc);
322 
323 		_CLDELETE_ARRAY(value0_string);
324 		_CLDELETE_ARRAY(value1_string);
325 		value1 = (value1 + step) % documents;
326 	}
327 
328 	ndx->optimize();
329 	ndx->close();
330 
331 	IndexSearcher searcher(&storage);
332 	Query* query0 = _CLNEW MatchAllDocsQuery;
333 	Sort by_value1(
334 		_CLNEW SortField(
335 			_T("field1"),
336 			SortField::STRING,
337 			true
338 		)
339 	);
340 	Hits* hits0 = searcher.search(query0, &by_value1);
341 	long last = 0;
342 	for (long i = 0; i < hits0->length(); i++) {
343 		Document& retrieved = hits0->doc(i);
344 		TCHAR const* value = retrieved.get(_T("field0"));
345 		long current = NumberTools::stringToLong(value);
346 		long delta = (current + documents - last) % documents;
347 		if (0 == (i % repetitions)) {
348 			CLUCENE_ASSERT(inverted_step == delta);
349 		} else {
350 			CLUCENE_ASSERT(0 == delta);
351 		}
352 		last = current;
353 	}
354 	_CLDELETE(query0);
355 	_CLDELETE(hits0);
356 	_CLDELETE(ndx);
357 }
358 
testIWlargeScaleCorrectness(CuTest * tc)359 void testIWlargeScaleCorrectness(CuTest *tc){
360 	char fsdir[CL_MAX_PATH];
361 	_snprintf(fsdir,CL_MAX_PATH,"%s/%s",cl_tempDir, "test.search");
362 	RAMDirectory ram;
363 	FSDirectory* disk = FSDirectory::getDirectory(fsdir);
364 	IWlargeScaleCorrectness_tester().invoke(ram, tc);
365 	IWlargeScaleCorrectness_tester().invoke(*disk, tc);
366 	disk->close();
367 	_CLDECDELETE(disk);
368 }
369 
testExceptionFromTokenStream(CuTest * tc)370 void testExceptionFromTokenStream(CuTest *tc) {
371 
372     class TokenFilterWithException : public TokenFilter
373     {
374     private:
375         int count;
376 
377     public:
378         TokenFilterWithException(TokenStream * in) :
379           TokenFilter(in, true), count(0) {};
380 
381           Token* next(Token * pToken) {
382               if (count++ == 5) {
383                   _CLTHROWA(CL_ERR_IO, "TokenFilterWithException testing IO exception");
384               }
385               return input->next(pToken);
386           };
387     };
388 
389     class AnalyzerWithException : public Analyzer
390     {
391         TokenStream* lastStream;
392     public:
393         AnalyzerWithException() { lastStream = NULL; }
394         virtual ~AnalyzerWithException() { _CLDELETE( lastStream ); }
395         TokenStream* tokenStream(const TCHAR * fieldName, Reader * reader) {
396             return _CLNEW TokenFilterWithException(_CLNEW WhitespaceTokenizer(reader));
397         };
398 
399         TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader)
400         {
401             _CLDELETE( lastStream );
402             lastStream = _CLNEW TokenFilterWithException(_CLNEW WhitespaceTokenizer(reader));
403             return lastStream;
404         }
405     };
406 
407     RAMDirectory * dir = _CLNEW RAMDirectory();
408     AnalyzerWithException a;
409     IndexWriter * writer = _CLNEW IndexWriter(dir, &a, true);
410 
411     Document* doc = _CLNEW Document();
412     doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd ee ff gg hh ii"),
413         Field::STORE_NO | Field::INDEX_TOKENIZED));
414     try {
415         writer->addDocument(doc);
416         CuFail(tc, _T("did not hit expected exception"));
417     } catch (CLuceneError&) {
418     }
419     _CLLDELETE(doc);
420 
421     // Make sure we can add another normal document
422     doc = _CLNEW Document();
423     doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd"), Field::STORE_NO | Field::INDEX_TOKENIZED));
424     writer->addDocument(doc);
425     _CLLDELETE(doc);
426 
427     // Make sure we can add another normal document
428     doc = _CLNEW Document();
429     doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd"), Field::STORE_NO | Field::INDEX_TOKENIZED));
430     writer->addDocument(doc);
431     _CLLDELETE(doc);
432 
433     writer->close();
434     _CLLDELETE(writer);
435 
436     IndexReader* reader = IndexReader::open(dir);
437     Term* t = _CLNEW Term(_T("content"), _T("aa"));
438     assertEquals(reader->docFreq(t), 3);
439 
440     // Make sure the doc that hit the exception was marked
441     // as deleted:
442     TermDocs* tdocs = reader->termDocs(t);
443     int count = 0;
444     while(tdocs->next()) {
445       count++;
446     }
447     _CLLDELETE(tdocs);
448     assertEquals(2, count);
449 
450     t->set(_T("content"), _T("gg"));
451     assertEquals(reader->docFreq(t), 0);
452     _CLDECDELETE(t);
453 
454     reader->close();
455     _CLLDELETE(reader);
456 
457     dir->close();
458     _CLDECDELETE(dir);
459 }
460 
461 /**
462 * Make sure we skip wicked long terms.
463 */
testWickedLongTerm(CuTest * tc)464 void testWickedLongTerm(CuTest *tc) {
465     RAMDirectory* dir = _CLNEW RAMDirectory();
466     StandardAnalyzer a;
467     IndexWriter* writer = _CLNEW IndexWriter(dir, &a, true);
468 
469     TCHAR bigTerm[16383];
470     for (int i=0; i<16383; i++)
471         bigTerm[i]=_T('x');
472     bigTerm[16382] = 0;
473 
474     Document* doc = _CLNEW Document();
475 
476     // Max length term is 16383, so this contents produces
477     // a too-long term:
478     TCHAR* contents = _CL_NEWARRAY(TCHAR, 17000);
479     _tcscpy(contents, _T("abc xyz x"));
480     _tcscat(contents, bigTerm);
481     _tcscat(contents, _T(" another term"));
482     doc->add(* _CLNEW Field(_T("content"), contents, Field::STORE_NO | Field::INDEX_TOKENIZED));
483     _CLDELETE_CARRAY(contents);
484     writer->addDocument(doc);
485     _CLLDELETE(doc);
486 
487     // Make sure we can add another normal document
488     doc = _CLNEW Document();
489     doc->add(* _CLNEW Field(_T("content"), _T("abc bbb ccc"), Field::STORE_NO | Field::INDEX_TOKENIZED));
490     writer->addDocument(doc);
491     _CLLDELETE(doc);
492     writer->close();
493     _CLDELETE(writer);
494 
495     IndexReader* reader = IndexReader::open(dir);
496 
497     // Make sure all terms < max size were indexed
498     Term* t = _CLNEW Term(_T("content"), _T("abc"), true);
499     assertEquals(2, reader->docFreq(t));
500     t->set(_T("content"), _T("bbb"), true);
501     assertEquals(1, reader->docFreq(t));
502     t->set(_T("content"), _T("term"), true);
503     assertEquals(1, reader->docFreq(t));
504     t->set(_T("content"), _T("another"), true);
505     assertEquals(1, reader->docFreq(t));
506 
507     // Make sure position is still incremented when
508     // massive term is skipped:
509     t->set(_T("content"), _T("another"), true);
510     TermPositions* tps = reader->termPositions(t);
511     assertTrue(tps->next());
512     assertEquals(1, tps->freq());
513     assertEquals(3, tps->nextPosition());
514     _CLLDELETE(tps);
515 
516     // Make sure the doc that has the massive term is in
517     // the index:
518     assertEqualsMsg(_T("document with wicked long term should is not in the index!"), 1, reader->numDocs());
519 
520     reader->close();
521     _CLLDELETE(reader);
522 
523     // Make sure we can add a document with exactly the
524     // maximum length term, and search on that term:
525     doc = _CLNEW Document();
526     doc->add(*_CLNEW Field(_T("content"), bigTerm, Field::STORE_NO | Field::INDEX_TOKENIZED));
527     StandardAnalyzer sa;
528     sa.setMaxTokenLength(100000);
529     writer = _CLNEW IndexWriter(dir, &sa, true);
530     writer->addDocument(doc);
531     _CLLDELETE(doc);
532     writer->close();
533     reader = IndexReader::open(dir);
534     t->set(_T("content"), bigTerm);
535     assertEquals(1, reader->docFreq(t));
536     reader->close();
537 
538     _CLDECDELETE(t);
539 
540     _CLLDELETE(writer);
541     _CLLDELETE(reader);
542 
543     dir->close();
544     _CLDECDELETE(dir);
545 }
546 
547 
testDeleteDocument(CuTest * tc)548 void testDeleteDocument(CuTest* tc) {
549     const int size = 205;
550     RAMDirectory* dir = _CLNEW RAMDirectory();
551     StandardAnalyzer a;
552     IndexWriter* writer = _CLNEW IndexWriter(dir, &a, true);
553 
554     // build an index that is big enough that a deletion files is written
555     // in the DGaps format
556     for (int i = 0; i < size; i++) {
557         Document* doc = _CLNEW Document();
558         TCHAR* contents = _CL_NEWARRAY(TCHAR, (size / 10) + 1);
559         _i64tot(i, contents, 10);
560         doc->add(* _CLNEW Field(_T("content"), contents, Field::STORE_NO | Field::INDEX_TOKENIZED));
561     	_CLDELETE_CARRAY(contents);
562         writer->addDocument(doc);
563         _CLDELETE_ARRAY( contents );
564         _CLLDELETE(doc);
565     }
566 
567     // assure that the index has only one segment
568     writer->optimize();
569     // close and flush index
570     writer->close();
571     _CLLDELETE( writer );
572 
573     // reopen the index and delete the document next to last
574     writer = _CLNEW IndexWriter(dir, &a, false);
575     TCHAR* contents = _CL_NEWARRAY(TCHAR, (size / 10) + 1);
576     _i64tot(size - 2, contents, 10);
577     Term* t = _CLNEW Term(_T("content"), contents);
578     _CLDELETE_LARRAY( contents );
579     writer->deleteDocuments(t);
580     writer->close();
581 
582     // now the index has a deletion file in the DGaps format
583 
584     _CLLDELETE(writer);
585     _CLDECDELETE(t);
586 
587     // open this index with a searcher to read the deletions file again
588     IndexReader* reader = IndexReader::open(dir);
589     IndexSearcher* searcher = _CLNEW IndexSearcher(reader);
590     searcher->close();
591     reader->close();
592     _CLLDELETE(searcher);
593     _CLLDELETE(reader);
594 
595     dir->close();
596     _CLLDELETE( dir );
597 }
598 
testMergeIndex(CuTest * tc)599 void testMergeIndex(CuTest* tc) {
600 
601     // A crash depends on the following:
602     // - The first document needs two differently named fields that set TERMVECTOR_YES.
603     // - The IndexWriter needs to reopen an existing index.
604     // - The reopened IndexWriter needs to call optimize() to force a merge
605     //   on term vectors. This merging causes the crash.
606     // Submitted by McCann
607 
608     RAMDirectory* dir = _CLNEW RAMDirectory();
609 
610     // open a new lucene index
611     SimpleAnalyzer a;
612     IndexWriter* writer = _CLNEW IndexWriter( dir, false, &a, true );
613     writer->setUseCompoundFile( false );
614 
615     // add two fields to document
616     Document* doc = _CLNEW Document();
617     doc->add ( *_CLNEW Field(_T("field0"), _T("value0"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
618     doc->add ( *_CLNEW Field(_T("field1"), _T("value1"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
619     writer->addDocument(doc);
620     _CLLDELETE(doc);
621 
622     // close and flush index
623     writer->close();
624     _CLLDELETE( writer );
625 
626     // open the previous lucene index
627     writer = _CLNEW IndexWriter( dir, false, &a, false );
628     writer->setUseCompoundFile( false );
629 
630     // add a field to document
631     // note: the settings on this field don't seem to affect the crash
632     doc = _CLNEW Document();
633     doc->add ( *_CLNEW Field(_T("field"), _T("value"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
634     writer->addDocument(doc);
635     _CLLDELETE(doc);
636 
637     // optimize index to force a merge
638     writer->optimize();
639     // close and flush index
640     writer->close();
641     _CLLDELETE( writer );
642 
643     // Close directory
644     dir->close();
645   _CLLDELETE( dir );
646 }
647 
testindexwriter(void)648 CuSuite *testindexwriter(void)
649 {
650     CuSuite *suite = CuSuiteNew(_T("CLucene IndexWriter Test"));
651     SUITE_ADD_TEST(suite, testHashingBug);
652     SUITE_ADD_TEST(suite, testAddIndexes);
653     SUITE_ADD_TEST(suite, testIWmergeSegments1);
654     SUITE_ADD_TEST(suite, testIWmergeSegments2);
655     SUITE_ADD_TEST(suite, testIWmergePhraseSegments);
656     SUITE_ADD_TEST(suite, testIWlargeScaleCorrectness);
657 
658     // TODO: This test fails due to differences between CLucene's StandardTokenizer and JLucene's; this test
659     // should work when the tokenizer will be brought up-to-date,
660     //SUITE_ADD_TEST(suite, testWickedLongTerm);
661 
662     SUITE_ADD_TEST(suite, testExceptionFromTokenStream);
663     SUITE_ADD_TEST(suite, testDeleteDocument);
664     SUITE_ADD_TEST(suite, testMergeIndex);
665 
666     return suite;
667 }
668 // EOF
669