1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #include "test.h"
8 #include <CLucene/search/MatchAllDocsQuery.h>
9 #include <stdio.h>
10
11 //checks if a merged index finds phrases correctly
testIWmergePhraseSegments(CuTest * tc)12 void testIWmergePhraseSegments(CuTest *tc){
13 char fsdir[CL_MAX_PATH];
14 _snprintf(fsdir, CL_MAX_PATH, "%s/%s",cl_tempDir, "test.indexwriter");
15 SimpleAnalyzer a;
16 Directory* dir = FSDirectory::getDirectory(fsdir);
17
18 IndexWriter ndx2(dir,&a,true);
19 ndx2.setUseCompoundFile(false);
20 Document doc0;
21 doc0.add(
22 *_CLNEW Field(
23 _T("field0"),
24 _T("value0 value1"),
25 Field::STORE_YES | Field::INDEX_TOKENIZED
26 )
27 );
28 ndx2.addDocument(&doc0);
29 ndx2.optimize();
30 ndx2.close();
31
32 IndexWriter ndx(fsdir,&a,false);
33 ndx.setUseCompoundFile(false);
34 Document doc1;
35 doc1.add(
36 *_CLNEW Field(
37 _T("field0"),
38 _T("value1 value0"),
39 Field::STORE_YES | Field::INDEX_TOKENIZED
40 )
41 );
42 ndx.addDocument(&doc1);
43 ndx.optimize();
44 ndx.close();
45
46 //test the index querying
47 IndexSearcher searcher(fsdir);
48 Query* query0 = QueryParser::parse(
49 _T("\"value0 value1\""),
50 _T("field0"),
51 &a
52 );
53 Hits* hits0 = searcher.search(query0);
54 CLUCENE_ASSERT(hits0->length() > 0);
55 Query* query1 = QueryParser::parse(
56 _T("\"value1 value0\""),
57 _T("field0"),
58 &a
59 );
60 Hits* hits1 = searcher.search(query1);
61 CLUCENE_ASSERT(hits1->length() > 0);
62 _CLDELETE(query0);
63 _CLDELETE(query1);
64 _CLDELETE(hits0);
65 _CLDELETE(hits1);
66 _CLDECDELETE(dir);
67 }
68
69 //checks that adding more than the min_merge value goes ok...
70 //checks for a mem leak that used to occur
testIWmergeSegments1(CuTest * tc)71 void testIWmergeSegments1(CuTest *tc){
72 RAMDirectory ram;
73 SimpleAnalyzer a;
74
75 IndexWriter ndx2(&ram,&a,true);
76 ndx2.close(); //test immediate closing bug reported
77
78 IndexWriter ndx(&ram,&a,true); //set create to false
79
80 ndx.setUseCompoundFile(false);
81 ndx.setMergeFactor(2);
82 TCHAR fld[1000];
83 for ( int i=0;i<1000;i++ ){
84 English::IntToEnglish(i,fld,1000);
85
86 Document doc;
87
88 doc.add ( *_CLNEW Field(_T("field0"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
89 doc.add ( *_CLNEW Field(_T("field1"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
90 doc.add ( *_CLNEW Field(_T("field2"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
91 doc.add ( *_CLNEW Field(_T("field3"),fld,Field::STORE_YES | Field::INDEX_TOKENIZED) );
92 ndx.addDocument(&doc);
93 }
94 //ndx.optimize(); //optimize so we can read terminfosreader with segmentreader
95 ndx.close();
96
97 //test the ram loading
98 RAMDirectory ram2(&ram);
99 IndexReader* reader2 = IndexReader::open(&ram2);
100 Term* term = _CLNEW Term(_T("field0"),fld);
101 TermEnum* en = reader2->terms(term);
102 CLUCENE_ASSERT(en->next());
103 _CLDELETE(en);
104 _CLDECDELETE(term);
105 _CLDELETE(reader2);
106 }
107
108 //checks if appending to an index works correctly
testIWmergeSegments2(CuTest * tc)109 void testIWmergeSegments2(CuTest *tc){
110 char fsdir[CL_MAX_PATH];
111 _snprintf(fsdir, CL_MAX_PATH, "%s/%s",cl_tempDir, "test.indexwriter");
112 SimpleAnalyzer a;
113 Directory* dir = FSDirectory::getDirectory(fsdir);
114
115 IndexWriter ndx2(dir,&a,true);
116 ndx2.setUseCompoundFile(false);
117 Document doc0;
118 doc0.add(
119 *_CLNEW Field(
120 _T("field0"),
121 _T("value0"),
122 Field::STORE_YES | Field::INDEX_TOKENIZED
123 )
124 );
125 ndx2.addDocument(&doc0);
126 ndx2.optimize();
127 ndx2.close();
128
129 IndexWriter ndx(fsdir,&a,false);
130 ndx.setUseCompoundFile(false);
131 Document doc1;
132 doc1.add(
133 *_CLNEW Field(
134 _T("field0"),
135 _T("value1"),
136 Field::STORE_YES | Field::INDEX_TOKENIZED
137 )
138 );
139 ndx.addDocument(&doc1);
140 ndx.optimize();
141 ndx.close();
142
143 //test the ram querying
144 IndexSearcher searcher(fsdir);
145 Term* term0 = _CLNEW Term(_T("field0"),_T("value1"));
146 Query* query0 = QueryParser::parse(_T("value0"),_T("field0"),&a);
147 Hits* hits0 = searcher.search(query0);
148 CLUCENE_ASSERT(hits0->length() > 0);
149 Term* term1 = _CLNEW Term(_T("field0"),_T("value0"));
150 Query* query1 = QueryParser::parse(_T("value1"),_T("field0"),&a);
151 Hits* hits1 = searcher.search(query1);
152 CLUCENE_ASSERT(hits1->length() > 0);
153 _CLDELETE(query0);
154 _CLDELETE(query1);
155 _CLDELETE(hits0);
156 _CLDELETE(hits1);
157 _CLDECDELETE(term0);
158 _CLDECDELETE(term1);
159 dir->close();
160 _CLDECDELETE(dir);
161 }
162
testAddIndexes(CuTest * tc)163 void testAddIndexes(CuTest *tc){
164 char reuters_origdirectory[1024];
165 strcpy(reuters_origdirectory, clucene_data_location);
166 strcat(reuters_origdirectory, "/reuters-21578-index");
167
168 {
169 RAMDirectory dir;
170 WhitespaceAnalyzer a;
171 IndexWriter w(&dir, &a, true);
172 ValueArray<Directory*> dirs(2);
173 dirs[0] = FSDirectory::getDirectory(reuters_origdirectory);
174 dirs[1] = FSDirectory::getDirectory(reuters_origdirectory);
175 w.addIndexesNoOptimize(dirs);
176 w.flush();
177 CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters...
178
179 // TODO: Currently there is a double ref-counting mechanism in place for Directory objects,
180 // so we need to dec them both
181 dirs[1]->close();_CLDECDELETE(dirs[1]);
182 dirs[0]->close();_CLDECDELETE(dirs[0]);
183 }
184 {
185 RAMDirectory dir;
186 WhitespaceAnalyzer a;
187 IndexWriter w(&dir, &a, true);
188 ValueArray<Directory*> dirs(2);
189 dirs[0] = FSDirectory::getDirectory(reuters_origdirectory);
190 dirs[1] = FSDirectory::getDirectory(reuters_origdirectory);
191 w.addIndexes(dirs);
192 w.flush();
193 CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters...
194
195 // TODO: Currently there is a double ref-counting mechanism in place for Directory objects,
196 // so we need to dec them both
197 dirs[1]->close();_CLDECDELETE(dirs[1]);
198 dirs[0]->close();_CLDECDELETE(dirs[0]);
199 }
200 }
201
testHashingBug(CuTest *)202 void testHashingBug(CuTest* /*tc*/){
203 //Manuel Freiholz's indexing bug
204
205 CL_NS(document)::Document doc;
206 CL_NS(document)::Field* field;
207 CL_NS(analysis::standard)::StandardAnalyzer analyzer;
208 CL_NS(store)::RAMDirectory dir;
209 CL_NS(index)::IndexWriter writer(&dir, &analyzer, true, true );
210
211 field = _CLNEW CL_NS(document)::Field( _T("CNS_VERSION"), _T("1"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
212 doc.add( (*field) );
213
214 field = _CLNEW CL_NS(document)::Field( _T("CNS_PID"), _T("5"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
215 doc.add( (*field) );
216
217 field = _CLNEW CL_NS(document)::Field( _T("CNS_DATE"), _T("20090722"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
218 doc.add( (*field) );
219
220 field = _CLNEW CL_NS(document)::Field( _T("CNS_SEARCHDATA"), _T("all kind of data"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_TOKENIZED );
221 doc.add( (*field) );
222
223 writer.addDocument( &doc ); // ADDING FIRST DOCUMENT. -> this works!
224
225 doc.clear();
226
227 field = _CLNEW CL_NS(document)::Field( _T("CNS_VERSION"), _T("1"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
228 doc.add( (*field) );
229
230 field = _CLNEW CL_NS(document)::Field( _T("CNS_PID"), _T("5"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
231 doc.add( (*field) );
232
233 field = _CLNEW CL_NS(document)::Field( _T("CNS_LINEID"), _T("20"), CL_NS(document)::Field::STORE_YES | CL_NS(document)::Field::INDEX_UNTOKENIZED );
234 doc.add( (*field) );
235
236 field = _CLNEW CL_NS(document)::Field( _T("CNS_VT_ORDER"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
237 doc.add( (*field) );
238
239 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_H"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
240 doc.add( (*field) );
241
242 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_HF"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
243 doc.add( (*field) );
244
245 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_D"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
246 doc.add( (*field) );
247
248 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_OD"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
249 doc.add( (*field) );
250
251 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_P1"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
252 doc.add( (*field) );
253
254 field = _CLNEW CL_NS(document)::Field( _T("CNS_VN_H1"), _T("456033000"), CL_NS(document)::Field::STORE_NO | CL_NS(document)::Field::INDEX_UNTOKENIZED );
255 doc.add( (*field) ); // the problematic field!
256
257 writer.addDocument( &doc ); // ADDING SECOND DOCUMENT - will never return from this function
258 writer.optimize(); // stucks in line 222-223
259 writer.close();
260 _CL_LDECREF(&dir);
261 }
262
263 class IWlargeScaleCorrectness_tester {
264 public:
265 void invoke(Directory& storage, CuTest *tc);
266 };
267
invoke(Directory & storage,CuTest * tc)268 void IWlargeScaleCorrectness_tester::invoke(
269 Directory& storage,
270 CuTest *tc
271 ){
272 SimpleAnalyzer a;
273
274 IndexWriter* ndx = _CLNEW IndexWriter(&storage,&a,true);
275
276 ndx->setUseCompoundFile(false);
277
278 const long documents = 200;
279 const long step = 23;
280 const long inverted_step = 113;
281 const long repetitions = 5;
282
283 CLUCENE_ASSERT(0 == (step * inverted_step + 1) % documents);
284
285 long value0;
286 long value1 = 0;
287
288 long block_size = 1;
289 long reopen = 1;
290
291 for (value0 = 0; value0 < documents * repetitions; value0++) {
292 if (reopen == value0) {
293 ndx->optimize();
294 ndx->close();
295 _CLDELETE(ndx);
296 ndx = _CLNEW IndexWriter(&storage,&a,false);
297 ndx->setUseCompoundFile(false);
298 reopen += block_size;
299 block_size++;
300 }
301
302 TCHAR* value0_string = NumberTools::longToString(value0 % documents);
303 TCHAR* value1_string = NumberTools::longToString(value1);
304
305 Document doc;
306
307 doc.add (
308 *_CLNEW Field(
309 _T("field0"),
310 value0_string,
311 Field::STORE_YES | Field::INDEX_UNTOKENIZED
312 )
313 );
314 doc.add (
315 *_CLNEW Field(
316 _T("field1"),
317 value1_string,
318 Field::STORE_YES | Field::INDEX_UNTOKENIZED
319 )
320 );
321 ndx->addDocument(&doc);
322
323 _CLDELETE_ARRAY(value0_string);
324 _CLDELETE_ARRAY(value1_string);
325 value1 = (value1 + step) % documents;
326 }
327
328 ndx->optimize();
329 ndx->close();
330
331 IndexSearcher searcher(&storage);
332 Query* query0 = _CLNEW MatchAllDocsQuery;
333 Sort by_value1(
334 _CLNEW SortField(
335 _T("field1"),
336 SortField::STRING,
337 true
338 )
339 );
340 Hits* hits0 = searcher.search(query0, &by_value1);
341 long last = 0;
342 for (long i = 0; i < hits0->length(); i++) {
343 Document& retrieved = hits0->doc(i);
344 TCHAR const* value = retrieved.get(_T("field0"));
345 long current = NumberTools::stringToLong(value);
346 long delta = (current + documents - last) % documents;
347 if (0 == (i % repetitions)) {
348 CLUCENE_ASSERT(inverted_step == delta);
349 } else {
350 CLUCENE_ASSERT(0 == delta);
351 }
352 last = current;
353 }
354 _CLDELETE(query0);
355 _CLDELETE(hits0);
356 _CLDELETE(ndx);
357 }
358
testIWlargeScaleCorrectness(CuTest * tc)359 void testIWlargeScaleCorrectness(CuTest *tc){
360 char fsdir[CL_MAX_PATH];
361 _snprintf(fsdir,CL_MAX_PATH,"%s/%s",cl_tempDir, "test.search");
362 RAMDirectory ram;
363 FSDirectory* disk = FSDirectory::getDirectory(fsdir);
364 IWlargeScaleCorrectness_tester().invoke(ram, tc);
365 IWlargeScaleCorrectness_tester().invoke(*disk, tc);
366 disk->close();
367 _CLDECDELETE(disk);
368 }
369
testExceptionFromTokenStream(CuTest * tc)370 void testExceptionFromTokenStream(CuTest *tc) {
371
372 class TokenFilterWithException : public TokenFilter
373 {
374 private:
375 int count;
376
377 public:
378 TokenFilterWithException(TokenStream * in) :
379 TokenFilter(in, true), count(0) {};
380
381 Token* next(Token * pToken) {
382 if (count++ == 5) {
383 _CLTHROWA(CL_ERR_IO, "TokenFilterWithException testing IO exception");
384 }
385 return input->next(pToken);
386 };
387 };
388
389 class AnalyzerWithException : public Analyzer
390 {
391 TokenStream* lastStream;
392 public:
393 AnalyzerWithException() { lastStream = NULL; }
394 virtual ~AnalyzerWithException() { _CLDELETE( lastStream ); }
395 TokenStream* tokenStream(const TCHAR * fieldName, Reader * reader) {
396 return _CLNEW TokenFilterWithException(_CLNEW WhitespaceTokenizer(reader));
397 };
398
399 TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader)
400 {
401 _CLDELETE( lastStream );
402 lastStream = _CLNEW TokenFilterWithException(_CLNEW WhitespaceTokenizer(reader));
403 return lastStream;
404 }
405 };
406
407 RAMDirectory * dir = _CLNEW RAMDirectory();
408 AnalyzerWithException a;
409 IndexWriter * writer = _CLNEW IndexWriter(dir, &a, true);
410
411 Document* doc = _CLNEW Document();
412 doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd ee ff gg hh ii"),
413 Field::STORE_NO | Field::INDEX_TOKENIZED));
414 try {
415 writer->addDocument(doc);
416 CuFail(tc, _T("did not hit expected exception"));
417 } catch (CLuceneError&) {
418 }
419 _CLLDELETE(doc);
420
421 // Make sure we can add another normal document
422 doc = _CLNEW Document();
423 doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd"), Field::STORE_NO | Field::INDEX_TOKENIZED));
424 writer->addDocument(doc);
425 _CLLDELETE(doc);
426
427 // Make sure we can add another normal document
428 doc = _CLNEW Document();
429 doc->add(* _CLNEW Field(_T("content"), _T("aa bb cc dd"), Field::STORE_NO | Field::INDEX_TOKENIZED));
430 writer->addDocument(doc);
431 _CLLDELETE(doc);
432
433 writer->close();
434 _CLLDELETE(writer);
435
436 IndexReader* reader = IndexReader::open(dir);
437 Term* t = _CLNEW Term(_T("content"), _T("aa"));
438 assertEquals(reader->docFreq(t), 3);
439
440 // Make sure the doc that hit the exception was marked
441 // as deleted:
442 TermDocs* tdocs = reader->termDocs(t);
443 int count = 0;
444 while(tdocs->next()) {
445 count++;
446 }
447 _CLLDELETE(tdocs);
448 assertEquals(2, count);
449
450 t->set(_T("content"), _T("gg"));
451 assertEquals(reader->docFreq(t), 0);
452 _CLDECDELETE(t);
453
454 reader->close();
455 _CLLDELETE(reader);
456
457 dir->close();
458 _CLDECDELETE(dir);
459 }
460
461 /**
462 * Make sure we skip wicked long terms.
463 */
testWickedLongTerm(CuTest * tc)464 void testWickedLongTerm(CuTest *tc) {
465 RAMDirectory* dir = _CLNEW RAMDirectory();
466 StandardAnalyzer a;
467 IndexWriter* writer = _CLNEW IndexWriter(dir, &a, true);
468
469 TCHAR bigTerm[16383];
470 for (int i=0; i<16383; i++)
471 bigTerm[i]=_T('x');
472 bigTerm[16382] = 0;
473
474 Document* doc = _CLNEW Document();
475
476 // Max length term is 16383, so this contents produces
477 // a too-long term:
478 TCHAR* contents = _CL_NEWARRAY(TCHAR, 17000);
479 _tcscpy(contents, _T("abc xyz x"));
480 _tcscat(contents, bigTerm);
481 _tcscat(contents, _T(" another term"));
482 doc->add(* _CLNEW Field(_T("content"), contents, Field::STORE_NO | Field::INDEX_TOKENIZED));
483 _CLDELETE_CARRAY(contents);
484 writer->addDocument(doc);
485 _CLLDELETE(doc);
486
487 // Make sure we can add another normal document
488 doc = _CLNEW Document();
489 doc->add(* _CLNEW Field(_T("content"), _T("abc bbb ccc"), Field::STORE_NO | Field::INDEX_TOKENIZED));
490 writer->addDocument(doc);
491 _CLLDELETE(doc);
492 writer->close();
493 _CLDELETE(writer);
494
495 IndexReader* reader = IndexReader::open(dir);
496
497 // Make sure all terms < max size were indexed
498 Term* t = _CLNEW Term(_T("content"), _T("abc"), true);
499 assertEquals(2, reader->docFreq(t));
500 t->set(_T("content"), _T("bbb"), true);
501 assertEquals(1, reader->docFreq(t));
502 t->set(_T("content"), _T("term"), true);
503 assertEquals(1, reader->docFreq(t));
504 t->set(_T("content"), _T("another"), true);
505 assertEquals(1, reader->docFreq(t));
506
507 // Make sure position is still incremented when
508 // massive term is skipped:
509 t->set(_T("content"), _T("another"), true);
510 TermPositions* tps = reader->termPositions(t);
511 assertTrue(tps->next());
512 assertEquals(1, tps->freq());
513 assertEquals(3, tps->nextPosition());
514 _CLLDELETE(tps);
515
516 // Make sure the doc that has the massive term is in
517 // the index:
518 assertEqualsMsg(_T("document with wicked long term should is not in the index!"), 1, reader->numDocs());
519
520 reader->close();
521 _CLLDELETE(reader);
522
523 // Make sure we can add a document with exactly the
524 // maximum length term, and search on that term:
525 doc = _CLNEW Document();
526 doc->add(*_CLNEW Field(_T("content"), bigTerm, Field::STORE_NO | Field::INDEX_TOKENIZED));
527 StandardAnalyzer sa;
528 sa.setMaxTokenLength(100000);
529 writer = _CLNEW IndexWriter(dir, &sa, true);
530 writer->addDocument(doc);
531 _CLLDELETE(doc);
532 writer->close();
533 reader = IndexReader::open(dir);
534 t->set(_T("content"), bigTerm);
535 assertEquals(1, reader->docFreq(t));
536 reader->close();
537
538 _CLDECDELETE(t);
539
540 _CLLDELETE(writer);
541 _CLLDELETE(reader);
542
543 dir->close();
544 _CLDECDELETE(dir);
545 }
546
547
testDeleteDocument(CuTest * tc)548 void testDeleteDocument(CuTest* tc) {
549 const int size = 205;
550 RAMDirectory* dir = _CLNEW RAMDirectory();
551 StandardAnalyzer a;
552 IndexWriter* writer = _CLNEW IndexWriter(dir, &a, true);
553
554 // build an index that is big enough that a deletion files is written
555 // in the DGaps format
556 for (int i = 0; i < size; i++) {
557 Document* doc = _CLNEW Document();
558 TCHAR* contents = _CL_NEWARRAY(TCHAR, (size / 10) + 1);
559 _i64tot(i, contents, 10);
560 doc->add(* _CLNEW Field(_T("content"), contents, Field::STORE_NO | Field::INDEX_TOKENIZED));
561 _CLDELETE_CARRAY(contents);
562 writer->addDocument(doc);
563 _CLDELETE_ARRAY( contents );
564 _CLLDELETE(doc);
565 }
566
567 // assure that the index has only one segment
568 writer->optimize();
569 // close and flush index
570 writer->close();
571 _CLLDELETE( writer );
572
573 // reopen the index and delete the document next to last
574 writer = _CLNEW IndexWriter(dir, &a, false);
575 TCHAR* contents = _CL_NEWARRAY(TCHAR, (size / 10) + 1);
576 _i64tot(size - 2, contents, 10);
577 Term* t = _CLNEW Term(_T("content"), contents);
578 _CLDELETE_LARRAY( contents );
579 writer->deleteDocuments(t);
580 writer->close();
581
582 // now the index has a deletion file in the DGaps format
583
584 _CLLDELETE(writer);
585 _CLDECDELETE(t);
586
587 // open this index with a searcher to read the deletions file again
588 IndexReader* reader = IndexReader::open(dir);
589 IndexSearcher* searcher = _CLNEW IndexSearcher(reader);
590 searcher->close();
591 reader->close();
592 _CLLDELETE(searcher);
593 _CLLDELETE(reader);
594
595 dir->close();
596 _CLLDELETE( dir );
597 }
598
testMergeIndex(CuTest * tc)599 void testMergeIndex(CuTest* tc) {
600
601 // A crash depends on the following:
602 // - The first document needs two differently named fields that set TERMVECTOR_YES.
603 // - The IndexWriter needs to reopen an existing index.
604 // - The reopened IndexWriter needs to call optimize() to force a merge
605 // on term vectors. This merging causes the crash.
606 // Submitted by McCann
607
608 RAMDirectory* dir = _CLNEW RAMDirectory();
609
610 // open a new lucene index
611 SimpleAnalyzer a;
612 IndexWriter* writer = _CLNEW IndexWriter( dir, false, &a, true );
613 writer->setUseCompoundFile( false );
614
615 // add two fields to document
616 Document* doc = _CLNEW Document();
617 doc->add ( *_CLNEW Field(_T("field0"), _T("value0"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
618 doc->add ( *_CLNEW Field(_T("field1"), _T("value1"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
619 writer->addDocument(doc);
620 _CLLDELETE(doc);
621
622 // close and flush index
623 writer->close();
624 _CLLDELETE( writer );
625
626 // open the previous lucene index
627 writer = _CLNEW IndexWriter( dir, false, &a, false );
628 writer->setUseCompoundFile( false );
629
630 // add a field to document
631 // note: the settings on this field don't seem to affect the crash
632 doc = _CLNEW Document();
633 doc->add ( *_CLNEW Field(_T("field"), _T("value"), Field::STORE_NO | Field::TERMVECTOR_YES | Field::INDEX_TOKENIZED) );
634 writer->addDocument(doc);
635 _CLLDELETE(doc);
636
637 // optimize index to force a merge
638 writer->optimize();
639 // close and flush index
640 writer->close();
641 _CLLDELETE( writer );
642
643 // Close directory
644 dir->close();
645 _CLLDELETE( dir );
646 }
647
testindexwriter(void)648 CuSuite *testindexwriter(void)
649 {
650 CuSuite *suite = CuSuiteNew(_T("CLucene IndexWriter Test"));
651 SUITE_ADD_TEST(suite, testHashingBug);
652 SUITE_ADD_TEST(suite, testAddIndexes);
653 SUITE_ADD_TEST(suite, testIWmergeSegments1);
654 SUITE_ADD_TEST(suite, testIWmergeSegments2);
655 SUITE_ADD_TEST(suite, testIWmergePhraseSegments);
656 SUITE_ADD_TEST(suite, testIWlargeScaleCorrectness);
657
658 // TODO: This test fails due to differences between CLucene's StandardTokenizer and JLucene's; this test
659 // should work when the tokenizer will be brought up-to-date,
660 //SUITE_ADD_TEST(suite, testWickedLongTerm);
661
662 SUITE_ADD_TEST(suite, testExceptionFromTokenStream);
663 SUITE_ADD_TEST(suite, testDeleteDocument);
664 SUITE_ADD_TEST(suite, testMergeIndex);
665
666 return suite;
667 }
668 // EOF
669