1 /*
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 *
7 * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
8 */
9 #include "CLucene/StdHeader.h"
10 #include "SegmentMerger.h"
11
12 CL_NS_USE(util)
13 CL_NS_USE(document)
14 CL_NS_USE(store)
15 CL_NS_DEF(index)
16
17 // File extensions of old-style index files
18 int COMPOUND_EXTENSIONS_LENGTH = 7;
19 const char* COMPOUND_EXTENSIONS = "fnm\0" "frq\0" "prx\0" "fdx\0" "fdt\0" "tii\0" "tis\0";
20
21 int VECTOR_EXTENSIONS_LENGTH = 3;
22 const char* VECTOR_EXTENSIONS = "tvx\0" "tvd\0" "tvf\0";
23
SegmentMerger(IndexWriter * writer,const QString & name)24 SegmentMerger::SegmentMerger(IndexWriter* writer, const QString& name)
25 {
26 //Func - Constructor
27 //Pre - dir holds a valid reference to a Directory
28 // name != NULL
29 //Post - Instance has been created
30
31 CND_PRECONDITION(!name.isEmpty(), "name is NULL");
32
33 freqOutput = NULL;
34 proxOutput = NULL;
35 termInfosWriter = NULL;
36 queue = NULL;
37 fieldInfos = NULL;
38 useCompoundFile = writer->getUseCompoundFile();
39 skipBuffer = _CLNEW CL_NS(store)::RAMIndexOutput();
40
41 segment = name;
42 directory = writer->getDirectory();
43 termIndexInterval = writer->getTermIndexInterval();
44
45 lastSkipDoc=0;
46 lastSkipFreqPointer=0;
47 lastSkipProxPointer=0;
48 skipInterval=0;
49 }
50
~SegmentMerger()51 SegmentMerger::~SegmentMerger()
52 {
53 //Func - Destructor
54 //Pre - true
55 //Post - The instance has been destroyed
56
57 //Clear the readers set
58 readers.clear();
59
60 //Delete field Infos
61 _CLDELETE(fieldInfos);
62 //Close and destroy the IndexOutput to the Frequency File
63 if (freqOutput != NULL) {
64 freqOutput->close();
65 _CLDELETE(freqOutput);
66 }
67 //Close and destroy the IndexOutput to the Prox File
68 if (proxOutput != NULL) {
69 proxOutput->close();
70 _CLDELETE(proxOutput);
71 }
72 //Close and destroy the termInfosWriter
73 if (termInfosWriter != NULL) {
74 termInfosWriter->close();
75 _CLDELETE(termInfosWriter);
76 }
77 //Close and destroy the queue
78 if (queue != NULL) {
79 queue->close();
80 _CLDELETE(queue);
81 }
82 //close and destory the skipBuffer
83 if (skipBuffer != NULL) {
84 skipBuffer->close();
85 _CLDELETE(skipBuffer);
86 }
87 }
88
add(IndexReader * reader)89 void SegmentMerger::add(IndexReader* reader)
90 {
91 //Func - Adds a IndexReader to the set of readers
92 //Pre - reader contains a valid reference to a IndexReader
93 //Post - The SegementReader reader has been added to the set of readers
94
95 readers.push_back(reader);
96 }
97
segmentReader(const int32_t i)98 IndexReader* SegmentMerger::segmentReader(const int32_t i)
99 {
100 //Func - Returns a reference to the i-th IndexReader
101 //Pre - 0 <= i < readers.size()
102 //Post - A reference to the i-th IndexReader has been returned
103
104 CND_PRECONDITION(i >= 0, "i is a negative number");
105 CND_PRECONDITION((size_t)i < readers.size(),
106 "i is bigger than the number of IndexReader instances");
107
108 //Retrieve the i-th IndexReader
109 IndexReader* ret = readers[i];
110 CND_CONDITION(ret != NULL, "No IndexReader found");
111
112 return ret;
113 }
114
merge()115 int32_t SegmentMerger::merge()
116 {
117 int32_t value = mergeFields();
118 mergeTerms();
119 mergeNorms();
120
121 if (fieldInfos->hasVectors())
122 mergeVectors();
123
124 return value;
125 }
126
closeReaders()127 void SegmentMerger::closeReaders()
128 {
129 for (uint32_t i = 0; i < readers.size(); i++) {
130 // close readers
131 IndexReader* reader = readers[i];
132 reader->close();
133 }
134 }
135
createCompoundFile(const QString & filename,QStringList & files)136 void SegmentMerger::createCompoundFile(const QString& filename, QStringList& files)
137 {
138 CompoundFileWriter* cfsWriter = _CLNEW CompoundFileWriter(directory, filename);
139
140 { //msvc6 scope fix
141 // Basic files
142 for (int32_t i = 0; i < COMPOUND_EXTENSIONS_LENGTH; i++) {
143 files.push_back(Misc::qjoin(segment, QLatin1String("."),
144 QLatin1String(COMPOUND_EXTENSIONS+(i*4))));
145 }
146 }
147
148 { //msvc6 scope fix
149 // Field norm files
150 for (int32_t i = 0; i < fieldInfos->size(); i++) {
151 FieldInfo* fi = fieldInfos->fieldInfo(i);
152 if (fi->isIndexed && !fi->omitNorms) {
153 TCHAR tbuf[10];
154 char abuf[10];
155 _i64tot(i, tbuf, 10);
156 STRCPY_TtoA(abuf, tbuf, 10);
157
158 files.push_back(Misc::qjoin(segment, QLatin1String(".f"),
159 QLatin1String(abuf)));
160 }
161 }
162 }
163
164 // Vector files
165 if (fieldInfos->hasVectors()) {
166 for (int32_t i = 0; i < VECTOR_EXTENSIONS_LENGTH; i++) {
167 files.push_back(Misc::qjoin(segment, QLatin1String("."),
168 QLatin1String(VECTOR_EXTENSIONS+(i*4))));
169 }
170 }
171
172 { //msvc6 scope fix
173 // Now merge all added files
174 for (size_t i=0;i<files.size();i++) {
175 cfsWriter->addFile(files[i]);
176 }
177 }
178
179 // Perform the merge
180 cfsWriter->close();
181 _CLDELETE(cfsWriter);
182 }
183
addIndexed(IndexReader * reader,FieldInfos * fieldInfos,StringArrayWithDeletor & names,bool storeTermVectors,bool storePositionWithTermVector,bool storeOffsetWithTermVector)184 void SegmentMerger::addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
185 StringArrayWithDeletor& names, bool storeTermVectors,
186 bool storePositionWithTermVector, bool storeOffsetWithTermVector)
187 {
188 StringArrayWithDeletor::const_iterator itr = names.begin();
189 while (itr != names.end()) {
190 fieldInfos->add(*itr, true,
191 storeTermVectors, storePositionWithTermVector,
192 storeOffsetWithTermVector, !reader->hasNorms(*itr));
193 ++itr;
194 }
195 }
196
mergeFields()197 int32_t SegmentMerger::mergeFields()
198 {
199 //Func - Merge the fields of all segments
200 //Pre - true
201 //Post - The field infos and field values of all segments have been merged.
202
203 //Create a new FieldInfos
204 fieldInfos = _CLNEW FieldInfos(); // merge field names
205
206 //Condition check to see if fieldInfos points to a valid instance
207 CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed");
208
209 IndexReader* reader = NULL;
210
211 int32_t docCount = 0;
212
213 //Iterate through all readers
214 for (uint32_t i = 0; i < readers.size(); i++) {
215 //get the i-th reader
216 reader = readers[i];
217 //Condition check to see if reader points to a valid instance
218 CND_CONDITION(reader != NULL,"No IndexReader found");
219
220 StringArrayWithDeletor tmp;
221
222 tmp.clear();
223 reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);
224 addIndexed(reader, fieldInfos, tmp, true, true, true);
225
226 tmp.clear();
227 reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);
228 addIndexed(reader, fieldInfos, tmp, true, true, false);
229
230 tmp.clear();
231 reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);
232 addIndexed(reader, fieldInfos, tmp, true, false, true);
233
234 tmp.clear();
235 reader->getFieldNames(IndexReader::TERMVECTOR, tmp);
236 addIndexed(reader, fieldInfos, tmp, true, false, false);
237
238 tmp.clear();
239 reader->getFieldNames(IndexReader::INDEXED, tmp);
240 addIndexed(reader, fieldInfos, tmp, false, false, false);
241
242 tmp.clear();
243 reader->getFieldNames(IndexReader::UNINDEXED, tmp);
244 if (tmp.size() > 0) {
245 TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);
246 tmp.toArray(arr);
247 fieldInfos->add((const TCHAR**)arr, false);
248 _CLDELETE_ARRAY(arr);
249 //no need to delete the contents, since tmp is responsible for it
250 }
251 }
252
253 //Create the filename of the new FieldInfos file
254 QString buf = Misc::segmentname(segment, QLatin1String(".fnm"));
255 //Write the new FieldInfos file to the directory
256 fieldInfos->write(directory, buf);
257
258 // merge field values
259 // Instantiate Fieldswriter which will write in directory for the segment
260 // name segment using the new merged fieldInfos
261 FieldsWriter* fieldsWriter = _CLNEW FieldsWriter(directory, segment, fieldInfos);
262
263 //Condition check to see if fieldsWriter points to a valid instance
264 CND_CONDITION(fieldsWriter != NULL, "Memory allocation for fieldsWriter failed");
265
266 try {
267 IndexReader* reader = NULL;
268 int32_t maxDoc = 0;
269 //Iterate through all readers
270 for (uint32_t i = 0; i < readers.size(); i++) {
271 // get the i-th reader
272 reader = readers[i];
273
274
275 // Condition check to see if reader points to a valid instance
276 CND_CONDITION(reader != NULL, "No IndexReader found");
277
278 // Get the total number documents including the documents that have
279 // been marked deleted
280 int32_t maxDoc = reader->maxDoc();
281
282 //document buffer
283 Document doc;
284
285 //Iterate through all the documents managed by the current reader
286 for (int32_t j = 0; j < maxDoc; j++) {
287 //Check if the j-th document has been deleted, if so skip it
288 if (!reader->isDeleted(j)) {
289 //Get the document
290 if (reader->document(j, &doc)) {
291 //Add the document to the new FieldsWriter
292 fieldsWriter->addDocument(&doc);
293 docCount++;
294 //doc is cleard for re-use
295 doc.clear();
296 }
297 }
298 }
299 }
300 } _CLFINALLY (
301 //Close the fieldsWriter
302 fieldsWriter->close();
303 //And have it deleted as it not used any more
304 _CLDELETE(fieldsWriter);
305 );
306
307 return docCount;
308 }
309
mergeVectors()310 void SegmentMerger::mergeVectors()
311 {
312 TermVectorsWriter* termVectorsWriter =
313 _CLNEW TermVectorsWriter(directory, segment, fieldInfos);
314
315 try {
316 for (uint32_t r = 0; r < readers.size(); r++) {
317 IndexReader* reader = readers[r];
318 int32_t maxDoc = reader->maxDoc();
319 for (int32_t docNum = 0; docNum < maxDoc; docNum++) {
320 // skip deleted docs
321 if (reader->isDeleted(docNum))
322 continue;
323
324 Array<TermFreqVector*> tmp;
325 if (reader->getTermFreqVectors(docNum, tmp))
326 termVectorsWriter->addAllDocVectors(tmp);
327 tmp.deleteAll();
328 }
329 }
330 } _CLFINALLY (
331 _CLDELETE(termVectorsWriter);
332 );
333 }
334
335
mergeTerms()336 void SegmentMerger::mergeTerms()
337 {
338 //Func - Merge the terms of all segments
339 //Pre - fieldInfos != NULL
340 //Post - The terms of all segments have been merged
341
342 CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
343
344 try{
345 //create a filename for the new Frequency File for segment
346 QString buf = Misc::segmentname(segment, QLatin1String(".frq"));
347 //Open an IndexOutput to the new Frequency File
348 freqOutput = directory->createOutput(buf);
349
350 //create a filename for the new Prox File for segment
351 buf = Misc::segmentname(segment, QLatin1String(".prx"));
352 //Open an IndexOutput to the new Prox File
353 proxOutput = directory->createOutput(buf);
354
355 //Instantiate a new termInfosWriter which will write in directory
356 //for the segment name segment using the new merged fieldInfos
357 termInfosWriter = _CLNEW TermInfosWriter(directory, segment, fieldInfos,
358 termIndexInterval);
359
360 //Condition check to see if termInfosWriter points to a valid instance
361 CND_CONDITION(termInfosWriter != NULL,
362 "Memory allocation for termInfosWriter failed");
363
364 skipInterval = termInfosWriter->skipInterval;
365 queue = _CLNEW SegmentMergeQueue(readers.size());
366
367 //And merge the Term Infos
368 mergeTermInfos();
369 } _CLFINALLY (
370 //Close and destroy the IndexOutput to the Frequency File
371 if (freqOutput != NULL) {
372 freqOutput->close(); _CLDELETE(freqOutput);
373 }
374
375 //Close and destroy the IndexOutput to the Prox File
376 if (proxOutput != NULL)
377 {
378 proxOutput->close();
379 _CLDELETE(proxOutput);
380 }
381
382 //Close and destroy the termInfosWriter
383 if (termInfosWriter != NULL) {
384 termInfosWriter->close();
385 _CLDELETE(termInfosWriter);
386 }
387
388 //Close and destroy the queue
389 if (queue != NULL) {
390 queue->close();
391 _CLDELETE(queue);
392 }
393 );
394 }
395
mergeTermInfos()396 void SegmentMerger::mergeTermInfos()
397 {
398 //Func - Merges all TermInfos into a single segment
399 //Pre - true
400 //Post - All TermInfos have been merged into a single segment
401
402 //Condition check to see if queue points to a valid instance
403 CND_CONDITION(queue != NULL, "Memory allocation for queue failed");
404
405 //base is the id of the first document in a segment
406 int32_t base = 0;
407
408 IndexReader* reader = NULL;
409 SegmentMergeInfo* smi = NULL;
410
411 //iterate through all the readers
412 for (uint32_t i = 0; i < readers.size(); i++) {
413 //Get the i-th reader
414 reader = readers[i];
415
416 //Condition check to see if reader points to a valid instance
417 CND_CONDITION(reader != NULL, "No IndexReader found");
418
419 //Get the term enumeration of the reader
420 TermEnum* termEnum = reader->terms();
421 //Instantiate a new SegmentMerginfo for the current reader and enumeration
422 smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);
423
424 //Condition check to see if smi points to a valid instance
425 CND_CONDITION(smi != NULL, "Memory allocation for smi failed") ;
426
427 //Increase the base by the number of documents that have not been marked deleted
428 //so base will contain a new value for the first document of the next iteration
429 base += reader->numDocs();
430 //Get the next current term
431 if (smi->next()) {
432 //Store the SegmentMergeInfo smi with the initialized SegmentTermEnum TermEnum
433 //into the queue
434 queue->put(smi);
435 } else {
436 //Apparently the end of the TermEnum of the SegmentTerm has been reached so
437 //close the SegmentMergeInfo smi
438 smi->close();
439 //And destroy the instance and set smi to NULL (It will be used later in this method)
440 _CLDELETE(smi);
441 }
442 }
443
444 //Instantiate an array of SegmentMergeInfo instances called match
445 SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);
446
447 //Condition check to see if match points to a valid instance
448 CND_CONDITION(match != NULL, "Memory allocation for match failed") ;
449
450 SegmentMergeInfo* top = NULL;
451
452 //As long as there are SegmentMergeInfo instances stored in the queue
453 while (queue->size() > 0) {
454 int32_t matchSize = 0;
455
456 // pop matching terms
457
458 //Pop the first SegmentMergeInfo from the queue
459 match[matchSize++] = queue->pop();
460 //Get the Term of match[0]
461 Term* term = match[0]->term;
462
463 //Condition check to see if term points to a valid instance
464 CND_CONDITION(term != NULL,"term is NULL") ;
465
466 //Get the current top of the queue
467 top = queue->top();
468
469 //For each SegmentMergInfo still in the queue
470 //Check if term matches the term of the SegmentMergeInfo instances in the queue
471 while (top != NULL && term->equals(top->term)) {
472 //A match has been found so add the matching SegmentMergeInfo to the match array
473 match[matchSize++] = queue->pop();
474 //Get the next SegmentMergeInfo
475 top = queue->top();
476 }
477 match[matchSize]=NULL;
478
479 //add new TermInfo
480 mergeTermInfo(match); //matchSize
481
482 //Restore the SegmentTermInfo instances in the match array back into the queue
483 while (matchSize > 0) {
484 smi = match[--matchSize];
485
486 //Condition check to see if smi points to a valid instance
487 CND_CONDITION(smi != NULL, "smi is NULL");
488
489 //Move to the next term in the enumeration of SegmentMergeInfo smi
490 if (smi->next()) {
491 //There still are some terms so restore smi in the queue
492 queue->put(smi);
493
494 } else {
495 //Done with a segment
496 //No terms anymore so close this SegmentMergeInfo instance
497 smi->close();
498 _CLDELETE(smi);
499 }
500 }
501 }
502
503 _CLDELETE_ARRAY(match);
504 }
505
mergeTermInfo(SegmentMergeInfo ** smis)506 void SegmentMerger::mergeTermInfo(SegmentMergeInfo** smis)
507 {
508 //Func - Merge the TermInfo of a term found in one or more segments.
509 //Pre - smis != NULL and it contains segments that are positioned at the same term.
510 // n is equal to the number of SegmentMergeInfo instances in smis
511 // freqOutput != NULL
512 // proxOutput != NULL
513 //Post - The TermInfo of a term has been merged
514
515 CND_PRECONDITION(smis != NULL, "smis is NULL");
516 CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
517 CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
518
519 //Get the file pointer of the IndexOutput to the Frequency File
520 int64_t freqPointer = freqOutput->getFilePointer();
521 //Get the file pointer of the IndexOutput to the Prox File
522 int64_t proxPointer = proxOutput->getFilePointer();
523
524 //Process postings from multiple segments all positioned on the same term.
525 int32_t df = appendPostings(smis);
526
527 int64_t skipPointer = writeSkip();
528
529 //df contains the number of documents across all segments where this term was found
530 if (df > 0) {
531 //add an entry to the dictionary with pointers to prox and freq files
532 termInfo.set(df, freqPointer, proxPointer, (int32_t)(skipPointer - freqPointer));
533 //Precondition check for to be sure that the reference to
534 //smis[0]->term will be valid
535 CND_PRECONDITION(smis[0]->term != NULL, "smis[0]->term is NULL");
536 //Write a new TermInfo
537 termInfosWriter->add(smis[0]->term, &termInfo);
538 }
539 }
540
541
appendPostings(SegmentMergeInfo ** smis)542 int32_t SegmentMerger::appendPostings(SegmentMergeInfo** smis)
543 {
544 //Func - Process postings from multiple segments all positioned on the
545 // same term. Writes out merged entries into freqOutput and
546 // the proxOutput streams.
547 //Pre - smis != NULL and it contains segments that are positioned at the same term.
548 // n is equal to the number of SegmentMergeInfo instances in smis
549 // freqOutput != NULL
550 // proxOutput != NULL
551 //Post - Returns number of documents across all segments where this term was found
552
553 CND_PRECONDITION(smis != NULL, "smis is NULL");
554 CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
555 CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
556
557 int32_t lastDoc = 0;
558 int32_t df = 0; //Document Counter
559
560 resetSkip();
561 SegmentMergeInfo* smi = NULL;
562
563 //Iterate through all SegmentMergeInfo instances in smis
564 int32_t i = 0;
565 while ((smi=smis[i]) != NULL) {
566 //Get the i-th SegmentMergeInfo
567
568 //Condition check to see if smi points to a valid instance
569 CND_PRECONDITION(smi != NULL, " is NULL");
570
571 //Get the term positions
572 TermPositions* postings = smi->getPositions();
573 //Get the base of this segment
574 int32_t base = smi->base;
575 //Get the docMap so we can see which documents have been deleted
576 int32_t* docMap = smi->getDocMap();
577 //Seek the termpost
578 postings->seek(smi->termEnum);
579 while (postings->next()) {
580 int32_t doc = postings->doc();
581 //Check if there are deletions
582 if (docMap != NULL)
583 doc = docMap[doc]; // map around deletions
584 doc += base; // convert to merged space
585
586 //Condition check to see doc is eaqual to or bigger than lastDoc
587 CND_CONDITION(doc >= lastDoc,"docs out of order");
588
589 //Increase the total frequency over all segments
590 df++;
591
592 if ((df % skipInterval) == 0) {
593 bufferSkip(lastDoc);
594 }
595
596 //Calculate a new docCode
597 //use low bit to flag freq=1
598 int32_t docCode = (doc - lastDoc) << 1;
599 lastDoc = doc;
600
601 //Get the frequency of the Term
602 int32_t freq = postings->freq();
603 if (freq == 1) {
604 //write doc & freq=1
605 freqOutput->writeVInt(docCode | 1);
606 } else {
607 //write doc
608 freqOutput->writeVInt(docCode);
609 //write frequency in doc
610 freqOutput->writeVInt(freq);
611 }
612
613 int32_t lastPosition = 0;
614 // write position deltas
615 for (int32_t j = 0; j < freq; j++) {
616 //Get the next position
617 int32_t position = postings->nextPosition();
618 //Write the difference between position and the last position
619 proxOutput->writeVInt(position - lastPosition);
620 lastPosition = position;
621 }
622 }
623
624 i++;
625 }
626
627 //Return total number of documents across all segments where term was found
628 return df;
629 }
630
resetSkip()631 void SegmentMerger::resetSkip()
632 {
633 skipBuffer->reset();
634 lastSkipDoc = 0;
635 lastSkipFreqPointer = freqOutput->getFilePointer();
636 lastSkipProxPointer = proxOutput->getFilePointer();
637 }
638
bufferSkip(int32_t doc)639 void SegmentMerger::bufferSkip(int32_t doc)
640 {
641 int64_t freqPointer = freqOutput->getFilePointer();
642 int64_t proxPointer = proxOutput->getFilePointer();
643
644 skipBuffer->writeVInt(doc - lastSkipDoc);
645 skipBuffer->writeVInt((int32_t) (freqPointer - lastSkipFreqPointer));
646 skipBuffer->writeVInt((int32_t) (proxPointer - lastSkipProxPointer));
647
648 lastSkipDoc = doc;
649 lastSkipFreqPointer = freqPointer;
650 lastSkipProxPointer = proxPointer;
651 }
652
writeSkip()653 int64_t SegmentMerger::writeSkip()
654 {
655 int64_t skipPointer = freqOutput->getFilePointer();
656 skipBuffer->writeTo(freqOutput);
657 return skipPointer;
658 }
659
660 // Func - Merges the norms for all fields
661 // Pre - fieldInfos != NULL
662 // Post - The norms for all fields have been merged
mergeNorms()663 void SegmentMerger::mergeNorms()
664 {
665 CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
666
667 //iterate through all the Field Infos instances
668 for (int32_t i = 0; i < fieldInfos->size(); i++) {
669 //Get the i-th FieldInfo
670 FieldInfo* fi = fieldInfos->fieldInfo(i);
671 //Is this Field indexed?
672 if (fi->isIndexed && !fi->omitNorms) {
673 //Create and Instantiate an IndexOutput to that norm file
674 QString buf = Misc::segmentname(segment, QLatin1String(".f"), i);
675 IndexOutput* output = directory->createOutput(buf);
676
677 //Condition check to see if output points to a valid instance
678 CND_CONDITION(output != NULL, "No Outputstream retrieved");
679
680 uint8_t* input = NULL;
681 try {
682 for (uint32_t j = 0; j < readers.size(); ++j) {
683 // get the next index reader + condition check
684 IndexReader* reader = readers[j];
685 CND_CONDITION(reader != NULL, "No reader found");
686
687 // Get the total number of documents including the documents
688 // that have been marked deleted
689 int32_t maxDoc = reader->maxDoc();
690 if (maxDoc > 0) {
691 // if there are docs, allocate buffer to read it's norms
692 uint8_t* data = (uint8_t*)realloc(input, maxDoc *
693 sizeof(uint8_t));
694 if (data) {
695 input = data;
696 memset(input, 0, maxDoc * sizeof(uint8_t));
697 // Get an IndexInput to the norm file for this
698 // field in this segment
699 reader->norms(fi->name, input);
700
701 //Iterate through all the documents
702 for(int32_t k = 0; k < maxDoc; k++) {
703 //Check if document k is deleted
704 if (!reader->isDeleted(k)) {
705 //write the new norm
706 output->writeByte(input[k]);
707 }
708 }
709 }
710 }
711 }
712 } _CLFINALLY (
713 if (output != NULL) {
714 output->close();
715 _CLDELETE(output);
716 }
717 free(input);
718 );
719 }
720 }
721 }
722
723 CL_NS_END
724