1 /*
2  * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3  *
4  * Distributable under the terms of either the Apache License (Version 2.0) or
5  * the GNU Lesser General Public License, as specified in the COPYING file.
6  *
7  * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
8 */
9 #include "CLucene/StdHeader.h"
10 #include "SegmentMerger.h"
11 
12 CL_NS_USE(util)
13 CL_NS_USE(document)
14 CL_NS_USE(store)
15 CL_NS_DEF(index)
16 
17 // File extensions of old-style index files
18 int COMPOUND_EXTENSIONS_LENGTH = 7;
19 const char* COMPOUND_EXTENSIONS = "fnm\0" "frq\0" "prx\0" "fdx\0" "fdt\0" "tii\0" "tis\0";
20 
21 int VECTOR_EXTENSIONS_LENGTH = 3;
22 const char* VECTOR_EXTENSIONS = "tvx\0" "tvd\0" "tvf\0";
23 
SegmentMerger(IndexWriter * writer,const QString & name)24 SegmentMerger::SegmentMerger(IndexWriter* writer, const QString& name)
25 {
26     //Func - Constructor
27     //Pre  - dir holds a valid reference to a Directory
28     //       name != NULL
29     //Post - Instance has been created
30 
31     CND_PRECONDITION(!name.isEmpty(), "name is NULL");
32 
33     freqOutput       = NULL;
34     proxOutput       = NULL;
35     termInfosWriter  = NULL;
36     queue            = NULL;
37     fieldInfos       = NULL;
38     useCompoundFile  = writer->getUseCompoundFile();
39     skipBuffer       = _CLNEW CL_NS(store)::RAMIndexOutput();
40 
41     segment          = name;
42     directory		 = writer->getDirectory();
43     termIndexInterval = writer->getTermIndexInterval();
44 
45     lastSkipDoc=0;
46     lastSkipFreqPointer=0;
47     lastSkipProxPointer=0;
48     skipInterval=0;
49 }
50 
~SegmentMerger()51 SegmentMerger::~SegmentMerger()
52 {
53     //Func - Destructor
54     //Pre  - true
55     //Post - The instance has been destroyed
56 
57     //Clear the readers set
58     readers.clear();
59 
60     //Delete field Infos
61     _CLDELETE(fieldInfos);
62     //Close and destroy the IndexOutput to the Frequency File
63     if (freqOutput != NULL) {
64         freqOutput->close();
65         _CLDELETE(freqOutput);
66     }
67     //Close and destroy the IndexOutput to the Prox File
68     if (proxOutput != NULL) {
69         proxOutput->close();
70         _CLDELETE(proxOutput);
71     }
72     //Close and destroy the termInfosWriter
73     if (termInfosWriter != NULL) {
74         termInfosWriter->close();
75         _CLDELETE(termInfosWriter);
76     }
77     //Close and destroy the queue
78     if (queue != NULL) {
79         queue->close();
80         _CLDELETE(queue);
81     }
82     //close and destory the skipBuffer
83     if (skipBuffer != NULL) {
84         skipBuffer->close();
85         _CLDELETE(skipBuffer);
86     }
87 }
88 
add(IndexReader * reader)89 void SegmentMerger::add(IndexReader* reader)
90 {
91     //Func - Adds a IndexReader to the set of readers
92     //Pre  - reader contains a valid reference to a IndexReader
93     //Post - The SegementReader reader has been added to the set of readers
94 
95     readers.push_back(reader);
96 }
97 
segmentReader(const int32_t i)98 IndexReader* SegmentMerger::segmentReader(const int32_t i)
99 {
100     //Func - Returns a reference to the i-th IndexReader
101     //Pre  - 0 <= i < readers.size()
102     //Post - A reference to the i-th IndexReader has been returned
103 
104     CND_PRECONDITION(i >= 0, "i is a negative number");
105     CND_PRECONDITION((size_t)i < readers.size(),
106         "i is bigger than the number of IndexReader instances");
107 
108     //Retrieve the i-th IndexReader
109     IndexReader* ret = readers[i];
110     CND_CONDITION(ret != NULL, "No IndexReader found");
111 
112     return ret;
113 }
114 
merge()115 int32_t SegmentMerger::merge()
116 {
117     int32_t value = mergeFields();
118     mergeTerms();
119     mergeNorms();
120 
121     if (fieldInfos->hasVectors())
122         mergeVectors();
123 
124     return value;
125 }
126 
closeReaders()127 void SegmentMerger::closeReaders()
128 {
129     for (uint32_t i = 0; i < readers.size(); i++) {
130         // close readers
131         IndexReader* reader = readers[i];
132         reader->close();
133     }
134 }
135 
createCompoundFile(const QString & filename,QStringList & files)136 void SegmentMerger::createCompoundFile(const QString& filename, QStringList& files)
137 {
138     CompoundFileWriter* cfsWriter = _CLNEW CompoundFileWriter(directory, filename);
139 
140     { //msvc6 scope fix
141         // Basic files
142         for (int32_t i = 0; i < COMPOUND_EXTENSIONS_LENGTH; i++) {
143             files.push_back(Misc::qjoin(segment, QLatin1String("."),
144                 QLatin1String(COMPOUND_EXTENSIONS+(i*4))));
145         }
146     }
147 
148     { //msvc6 scope fix
149         // Field norm files
150         for (int32_t i = 0; i < fieldInfos->size(); i++) {
151             FieldInfo* fi = fieldInfos->fieldInfo(i);
152             if (fi->isIndexed && !fi->omitNorms) {
153                 TCHAR tbuf[10];
154                 char abuf[10];
155                 _i64tot(i, tbuf, 10);
156                 STRCPY_TtoA(abuf, tbuf, 10);
157 
158                 files.push_back(Misc::qjoin(segment, QLatin1String(".f"),
159                     QLatin1String(abuf)));
160             }
161         }
162     }
163 
164     // Vector files
165     if (fieldInfos->hasVectors()) {
166         for (int32_t i = 0; i < VECTOR_EXTENSIONS_LENGTH; i++) {
167             files.push_back(Misc::qjoin(segment, QLatin1String("."),
168                 QLatin1String(VECTOR_EXTENSIONS+(i*4))));
169         }
170     }
171 
172     { //msvc6 scope fix
173         // Now merge all added files
174         for (size_t i=0;i<files.size();i++) {
175             cfsWriter->addFile(files[i]);
176         }
177     }
178 
179     // Perform the merge
180     cfsWriter->close();
181     _CLDELETE(cfsWriter);
182 }
183 
addIndexed(IndexReader * reader,FieldInfos * fieldInfos,StringArrayWithDeletor & names,bool storeTermVectors,bool storePositionWithTermVector,bool storeOffsetWithTermVector)184 void SegmentMerger::addIndexed(IndexReader* reader, FieldInfos* fieldInfos,
185     StringArrayWithDeletor& names, bool storeTermVectors,
186     bool storePositionWithTermVector, bool storeOffsetWithTermVector)
187 {
188     StringArrayWithDeletor::const_iterator itr = names.begin();
189     while (itr != names.end()) {
190         fieldInfos->add(*itr, true,
191             storeTermVectors, storePositionWithTermVector,
192             storeOffsetWithTermVector, !reader->hasNorms(*itr));
193         ++itr;
194     }
195 }
196 
mergeFields()197 int32_t SegmentMerger::mergeFields()
198 {
199     //Func - Merge the fields of all segments
200     //Pre  - true
201     //Post - The field infos and field values of all segments have been merged.
202 
203     //Create a new FieldInfos
204     fieldInfos = _CLNEW FieldInfos();		  // merge field names
205 
206     //Condition check to see if fieldInfos points to a valid instance
207     CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed");
208 
209     IndexReader* reader = NULL;
210 
211     int32_t docCount = 0;
212 
213     //Iterate through all readers
214     for (uint32_t i = 0; i < readers.size(); i++) {
215         //get the i-th reader
216         reader = readers[i];
217         //Condition check to see if reader points to a valid instance
218         CND_CONDITION(reader != NULL,"No IndexReader found");
219 
220         StringArrayWithDeletor tmp;
221 
222         tmp.clear();
223         reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);
224         addIndexed(reader, fieldInfos, tmp, true, true, true);
225 
226         tmp.clear();
227         reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);
228         addIndexed(reader, fieldInfos, tmp, true, true, false);
229 
230         tmp.clear();
231         reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);
232         addIndexed(reader, fieldInfos, tmp, true, false, true);
233 
234         tmp.clear();
235         reader->getFieldNames(IndexReader::TERMVECTOR, tmp);
236         addIndexed(reader, fieldInfos, tmp, true, false, false);
237 
238         tmp.clear();
239         reader->getFieldNames(IndexReader::INDEXED, tmp);
240         addIndexed(reader, fieldInfos, tmp, false, false, false);
241 
242         tmp.clear();
243         reader->getFieldNames(IndexReader::UNINDEXED, tmp);
244         if (tmp.size() > 0) {
245             TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);
246             tmp.toArray(arr);
247             fieldInfos->add((const TCHAR**)arr, false);
248             _CLDELETE_ARRAY(arr);
249             //no need to delete the contents, since tmp is responsible for it
250         }
251     }
252 
253     //Create the filename of the new FieldInfos file
254     QString buf = Misc::segmentname(segment, QLatin1String(".fnm"));
255     //Write the new FieldInfos file to the directory
256     fieldInfos->write(directory, buf);
257 
258     // merge field values
259     // Instantiate Fieldswriter which will write in directory for the segment
260     // name segment using the new merged fieldInfos
261     FieldsWriter* fieldsWriter = _CLNEW FieldsWriter(directory, segment, fieldInfos);
262 
263     //Condition check to see if fieldsWriter points to a valid instance
264     CND_CONDITION(fieldsWriter != NULL, "Memory allocation for fieldsWriter failed");
265 
266     try {
267         IndexReader* reader = NULL;
268         int32_t maxDoc          = 0;
269         //Iterate through all readers
270         for (uint32_t i = 0; i < readers.size(); i++) {
271             // get the i-th reader
272             reader = readers[i];
273 
274 
275             // Condition check to see if reader points to a valid instance
276             CND_CONDITION(reader != NULL, "No IndexReader found");
277 
278             // Get the total number documents including the documents that have
279             // been marked deleted
280             int32_t maxDoc = reader->maxDoc();
281 
282             //document buffer
283             Document doc;
284 
285             //Iterate through all the documents managed by the current reader
286             for (int32_t j = 0; j < maxDoc; j++) {
287                 //Check if the j-th document has been deleted, if so skip it
288                 if (!reader->isDeleted(j)) {
289                     //Get the document
290                     if (reader->document(j, &doc)) {
291                         //Add the document to the new FieldsWriter
292                         fieldsWriter->addDocument(&doc);
293                         docCount++;
294                         //doc is cleard for re-use
295                         doc.clear();
296                     }
297                 }
298             }
299         }
300     } _CLFINALLY (
301         //Close the fieldsWriter
302         fieldsWriter->close();
303         //And have it deleted as it not used any more
304         _CLDELETE(fieldsWriter);
305     );
306 
307     return docCount;
308 }
309 
mergeVectors()310 void SegmentMerger::mergeVectors()
311 {
312     TermVectorsWriter* termVectorsWriter =
313         _CLNEW TermVectorsWriter(directory, segment, fieldInfos);
314 
315     try {
316         for (uint32_t r = 0; r < readers.size(); r++) {
317             IndexReader* reader = readers[r];
318             int32_t maxDoc = reader->maxDoc();
319             for (int32_t docNum = 0; docNum < maxDoc; docNum++) {
320                 // skip deleted docs
321                 if (reader->isDeleted(docNum))
322                     continue;
323 
324                 Array<TermFreqVector*> tmp;
325                 if (reader->getTermFreqVectors(docNum, tmp))
326                     termVectorsWriter->addAllDocVectors(tmp);
327                 tmp.deleteAll();
328             }
329         }
330     } _CLFINALLY (
331         _CLDELETE(termVectorsWriter);
332     );
333 }
334 
335 
mergeTerms()336 void SegmentMerger::mergeTerms()
337 {
338     //Func - Merge the terms of all segments
339     //Pre  - fieldInfos != NULL
340     //Post - The terms of all segments have been merged
341 
342     CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
343 
344     try{
345         //create a filename for the new Frequency File for segment
346         QString buf = Misc::segmentname(segment, QLatin1String(".frq"));
347         //Open an IndexOutput to the new Frequency File
348         freqOutput = directory->createOutput(buf);
349 
350         //create a filename for the new Prox File for segment
351         buf = Misc::segmentname(segment, QLatin1String(".prx"));
352         //Open an IndexOutput to the new Prox File
353         proxOutput = directory->createOutput(buf);
354 
355         //Instantiate  a new termInfosWriter which will write in directory
356         //for the segment name segment using the new merged fieldInfos
357         termInfosWriter = _CLNEW TermInfosWriter(directory, segment, fieldInfos,
358             termIndexInterval);
359 
360         //Condition check to see if termInfosWriter points to a valid instance
361         CND_CONDITION(termInfosWriter != NULL,
362             "Memory allocation for termInfosWriter failed");
363 
364         skipInterval = termInfosWriter->skipInterval;
365         queue = _CLNEW SegmentMergeQueue(readers.size());
366 
367         //And merge the Term Infos
368         mergeTermInfos();
369     } _CLFINALLY (
370         //Close and destroy the IndexOutput to the Frequency File
371         if (freqOutput != NULL) {
372             freqOutput->close(); _CLDELETE(freqOutput);
373         }
374 
375         //Close and destroy the IndexOutput to the Prox File
376         if (proxOutput != NULL)
377         {
378             proxOutput->close();
379             _CLDELETE(proxOutput);
380         }
381 
382         //Close and destroy the termInfosWriter
383         if (termInfosWriter != NULL) {
384             termInfosWriter->close();
385             _CLDELETE(termInfosWriter);
386         }
387 
388         //Close and destroy the queue
389         if (queue != NULL) {
390             queue->close();
391             _CLDELETE(queue);
392         }
393     );
394 }
395 
mergeTermInfos()396 void SegmentMerger::mergeTermInfos()
397 {
398     //Func - Merges all TermInfos into a single segment
399     //Pre  - true
400     //Post - All TermInfos have been merged into a single segment
401 
402     //Condition check to see if queue points to a valid instance
403     CND_CONDITION(queue != NULL, "Memory allocation for queue failed");
404 
405     //base is the id of the first document in a segment
406     int32_t base = 0;
407 
408     IndexReader* reader = NULL;
409     SegmentMergeInfo* smi = NULL;
410 
411     //iterate through all the readers
412     for (uint32_t i = 0; i < readers.size(); i++) {
413         //Get the i-th reader
414         reader = readers[i];
415 
416         //Condition check to see if reader points to a valid instance
417         CND_CONDITION(reader != NULL, "No IndexReader found");
418 
419         //Get the term enumeration of the reader
420         TermEnum* termEnum = reader->terms();
421         //Instantiate a new SegmentMerginfo for the current reader and enumeration
422         smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);
423 
424         //Condition check to see if smi points to a valid instance
425         CND_CONDITION(smi != NULL, "Memory allocation for smi failed")	;
426 
427         //Increase the base by the number of documents that have not been marked deleted
428         //so base will contain a new value for the first document of the next iteration
429         base += reader->numDocs();
430         //Get the next current term
431         if (smi->next()) {
432             //Store the SegmentMergeInfo smi with the initialized SegmentTermEnum TermEnum
433             //into the queue
434             queue->put(smi);
435         } else {
436             //Apparently the end of the TermEnum of the SegmentTerm has been reached so
437             //close the SegmentMergeInfo smi
438             smi->close();
439             //And destroy the instance and set smi to NULL (It will be used later in this method)
440             _CLDELETE(smi);
441         }
442     }
443 
444     //Instantiate an array of SegmentMergeInfo instances called match
445     SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);
446 
447     //Condition check to see if match points to a valid instance
448     CND_CONDITION(match != NULL, "Memory allocation for match failed")	;
449 
450     SegmentMergeInfo* top = NULL;
451 
452     //As long as there are SegmentMergeInfo instances stored in the queue
453     while (queue->size() > 0) {
454         int32_t matchSize = 0;
455 
456         // pop matching terms
457 
458         //Pop the first SegmentMergeInfo from the queue
459         match[matchSize++] = queue->pop();
460         //Get the Term of match[0]
461         Term* term = match[0]->term;
462 
463         //Condition check to see if term points to a valid instance
464         CND_CONDITION(term != NULL,"term is NULL")	;
465 
466         //Get the current top of the queue
467         top = queue->top();
468 
469         //For each SegmentMergInfo still in the queue
470         //Check if term matches the term of the SegmentMergeInfo instances in the queue
471         while (top != NULL && term->equals(top->term)) {
472             //A match has been found so add the matching SegmentMergeInfo to the match array
473             match[matchSize++] = queue->pop();
474             //Get the next SegmentMergeInfo
475             top = queue->top();
476         }
477         match[matchSize]=NULL;
478 
479         //add new TermInfo
480         mergeTermInfo(match); //matchSize
481 
482         //Restore the SegmentTermInfo instances in the match array back into the queue
483         while (matchSize > 0) {
484             smi = match[--matchSize];
485 
486             //Condition check to see if smi points to a valid instance
487             CND_CONDITION(smi != NULL, "smi is NULL");
488 
489             //Move to the next term in the enumeration of SegmentMergeInfo smi
490             if (smi->next()) {
491                 //There still are some terms so restore smi in the queue
492                 queue->put(smi);
493 
494             } else {
495                 //Done with a segment
496                 //No terms anymore so close this SegmentMergeInfo instance
497                 smi->close();
498                 _CLDELETE(smi);
499             }
500         }
501     }
502 
503     _CLDELETE_ARRAY(match);
504 }
505 
mergeTermInfo(SegmentMergeInfo ** smis)506 void SegmentMerger::mergeTermInfo(SegmentMergeInfo** smis)
507 {
508     //Func - Merge the TermInfo of a term found in one or more segments.
509     //Pre  - smis != NULL and it contains segments that are positioned at the same term.
510     //       n is equal to the number of SegmentMergeInfo instances in smis
511     //       freqOutput != NULL
512     //       proxOutput != NULL
513     //Post - The TermInfo of a term has been merged
514 
515     CND_PRECONDITION(smis != NULL, "smis is NULL");
516     CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
517     CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
518 
519     //Get the file pointer of the IndexOutput to the Frequency File
520     int64_t freqPointer = freqOutput->getFilePointer();
521     //Get the file pointer of the IndexOutput to the Prox File
522     int64_t proxPointer = proxOutput->getFilePointer();
523 
524     //Process postings from multiple segments all positioned on the same term.
525     int32_t df = appendPostings(smis);
526 
527     int64_t skipPointer = writeSkip();
528 
529     //df contains the number of documents across all segments where this term was found
530     if (df > 0) {
531         //add an entry to the dictionary with pointers to prox and freq files
532         termInfo.set(df, freqPointer, proxPointer, (int32_t)(skipPointer - freqPointer));
533         //Precondition check for to be sure that the reference to
534         //smis[0]->term will be valid
535         CND_PRECONDITION(smis[0]->term != NULL, "smis[0]->term is NULL");
536         //Write a new TermInfo
537         termInfosWriter->add(smis[0]->term, &termInfo);
538     }
539 }
540 
541 
appendPostings(SegmentMergeInfo ** smis)542 int32_t SegmentMerger::appendPostings(SegmentMergeInfo** smis)
543 {
544     //Func - Process postings from multiple segments all positioned on the
545     //       same term. Writes out merged entries into freqOutput and
546     //       the proxOutput streams.
547     //Pre  - smis != NULL and it contains segments that are positioned at the same term.
548     //       n is equal to the number of SegmentMergeInfo instances in smis
549     //       freqOutput != NULL
550     //       proxOutput != NULL
551     //Post - Returns number of documents across all segments where this term was found
552 
553     CND_PRECONDITION(smis != NULL, "smis is NULL");
554     CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");
555     CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");
556 
557     int32_t lastDoc = 0;
558     int32_t df = 0;       //Document Counter
559 
560     resetSkip();
561     SegmentMergeInfo* smi = NULL;
562 
563     //Iterate through all SegmentMergeInfo instances in smis
564     int32_t i = 0;
565     while ((smi=smis[i]) != NULL) {
566         //Get the i-th SegmentMergeInfo
567 
568         //Condition check to see if smi points to a valid instance
569         CND_PRECONDITION(smi != NULL, "	 is NULL");
570 
571         //Get the term positions
572         TermPositions* postings = smi->getPositions();
573         //Get the base of this segment
574         int32_t base = smi->base;
575         //Get the docMap so we can see which documents have been deleted
576         int32_t* docMap = smi->getDocMap();
577         //Seek the termpost
578         postings->seek(smi->termEnum);
579         while (postings->next()) {
580             int32_t doc = postings->doc();
581             //Check if there are deletions
582             if (docMap != NULL)
583                 doc = docMap[doc]; // map around deletions
584             doc += base;                              // convert to merged space
585 
586             //Condition check to see doc is eaqual to or bigger than lastDoc
587             CND_CONDITION(doc >= lastDoc,"docs out of order");
588 
589             //Increase the total frequency over all segments
590             df++;
591 
592             if ((df % skipInterval) == 0) {
593                 bufferSkip(lastDoc);
594             }
595 
596             //Calculate a new docCode
597             //use low bit to flag freq=1
598             int32_t docCode = (doc - lastDoc) << 1;
599             lastDoc = doc;
600 
601             //Get the frequency of the Term
602             int32_t freq = postings->freq();
603             if (freq == 1) {
604                 //write doc & freq=1
605                 freqOutput->writeVInt(docCode | 1);
606             } else {
607                 //write doc
608                 freqOutput->writeVInt(docCode);
609                 //write frequency in doc
610                 freqOutput->writeVInt(freq);
611             }
612 
613             int32_t lastPosition = 0;
614             // write position deltas
615             for (int32_t j = 0; j < freq; j++) {
616                 //Get the next position
617                 int32_t position = postings->nextPosition();
618                 //Write the difference between position and the last position
619                 proxOutput->writeVInt(position - lastPosition);
620                 lastPosition = position;
621             }
622         }
623 
624         i++;
625     }
626 
627     //Return total number of documents across all segments where term was found
628     return df;
629 }
630 
resetSkip()631 void SegmentMerger::resetSkip()
632 {
633     skipBuffer->reset();
634     lastSkipDoc = 0;
635     lastSkipFreqPointer = freqOutput->getFilePointer();
636     lastSkipProxPointer = proxOutput->getFilePointer();
637 }
638 
bufferSkip(int32_t doc)639 void SegmentMerger::bufferSkip(int32_t doc)
640 {
641     int64_t freqPointer = freqOutput->getFilePointer();
642     int64_t proxPointer = proxOutput->getFilePointer();
643 
644     skipBuffer->writeVInt(doc - lastSkipDoc);
645     skipBuffer->writeVInt((int32_t) (freqPointer - lastSkipFreqPointer));
646     skipBuffer->writeVInt((int32_t) (proxPointer - lastSkipProxPointer));
647 
648     lastSkipDoc = doc;
649     lastSkipFreqPointer = freqPointer;
650     lastSkipProxPointer = proxPointer;
651 }
652 
writeSkip()653 int64_t SegmentMerger::writeSkip()
654 {
655     int64_t skipPointer = freqOutput->getFilePointer();
656     skipBuffer->writeTo(freqOutput);
657     return skipPointer;
658 }
659 
660 // Func - Merges the norms for all fields
661 // Pre  - fieldInfos != NULL
662 // Post - The norms for all fields have been merged
mergeNorms()663 void SegmentMerger::mergeNorms()
664 {
665     CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");
666 
667     //iterate through all the Field Infos instances
668     for (int32_t i = 0; i < fieldInfos->size(); i++) {
669         //Get the i-th FieldInfo
670         FieldInfo* fi = fieldInfos->fieldInfo(i);
671         //Is this Field indexed?
672         if (fi->isIndexed && !fi->omitNorms) {
673             //Create and Instantiate an IndexOutput to that norm file
674             QString buf = Misc::segmentname(segment, QLatin1String(".f"), i);
675             IndexOutput* output = directory->createOutput(buf);
676 
677             //Condition check to see if output points to a valid instance
678             CND_CONDITION(output != NULL, "No Outputstream retrieved");
679 
680             uint8_t* input = NULL;
681             try {
682                 for (uint32_t j = 0; j < readers.size(); ++j) {
683                     // get the next index reader + condition check
684                     IndexReader* reader = readers[j];
685                     CND_CONDITION(reader != NULL, "No reader found");
686 
687                     // Get the total number of documents including the documents
688                     // that have been marked deleted
689                     int32_t maxDoc = reader->maxDoc();
690                     if (maxDoc > 0) {
691                         // if there are docs, allocate buffer to read it's norms
692                         uint8_t* data = (uint8_t*)realloc(input, maxDoc *
693                             sizeof(uint8_t));
694                         if (data) {
695                             input = data;
696                             memset(input, 0, maxDoc * sizeof(uint8_t));
697                             // Get an IndexInput to the norm file for this
698                             // field in this segment
699                             reader->norms(fi->name, input);
700 
701                             //Iterate through all the documents
702                             for(int32_t k = 0; k < maxDoc; k++) {
703                                 //Check if document k is deleted
704                                 if (!reader->isDeleted(k)) {
705                                     //write the new norm
706                                     output->writeByte(input[k]);
707                                 }
708                             }
709                         }
710                     }
711                 }
712             } _CLFINALLY (
713                 if (output != NULL) {
714                     output->close();
715                     _CLDELETE(output);
716                 }
717                 free(input);
718             );
719         }
720     }
721 }
722 
723 CL_NS_END
724