1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using Lucene.Net.Support;
22 using AbstractField = Lucene.Net.Documents.AbstractField;
23 using Document = Lucene.Net.Documents.Document;
24 using Directory = Lucene.Net.Store.Directory;
25 using FSDirectory = Lucene.Net.Store.FSDirectory;
26 using IndexInput = Lucene.Net.Store.IndexInput;
27 
28 namespace Lucene.Net.Index
29 {
30 
31 	/// <summary> Basic tool and API to check the health of an index and
32 	/// write a new segments file that removes reference to
33 	/// problematic segments.
34 	///
35 	/// <p/>As this tool checks every byte in the index, on a large
36 	/// index it can take quite a long time to run.
37 	///
38 	/// <p/><b>WARNING</b>: this tool and API is new and
39 	/// experimental and is subject to suddenly change in the
40 	/// next release.  Please make a complete backup of your
41 	/// index before using this to fix your index!
42 	/// </summary>
43 	public class CheckIndex
44 	{
45 		private StreamWriter infoStream;
46 		private readonly Directory dir;
47 
48 		/// <summary> Returned from <see cref="CheckIndex_Renamed_Method()" /> detailing the health and status of the index.
49 		///
50 		/// <p/><b>WARNING</b>: this API is new and experimental and is
51 		/// subject to suddenly change in the next release.
52 		///
53 		/// </summary>
54 
55 		public class Status
56 		{
57 
58 			/// <summary>True if no problems were found with the index. </summary>
59 			public bool clean;
60 
61 			/// <summary>True if we were unable to locate and load the segments_N file. </summary>
62 			public bool missingSegments;
63 
64 			/// <summary>True if we were unable to open the segments_N file. </summary>
65 			public bool cantOpenSegments;
66 
67 			/// <summary>True if we were unable to read the version number from segments_N file. </summary>
68 			public bool missingSegmentVersion;
69 
70 			/// <summary>Name of latest segments_N file in the index. </summary>
71 			public System.String segmentsFileName;
72 
73 			/// <summary>Number of segments in the index. </summary>
74 			public int numSegments;
75 
76 			/// <summary>String description of the version of the index. </summary>
77 			public System.String segmentFormat;
78 
79 			/// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary>
80 			/// <seealso>
81 			///   <cref>CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList)</cref>
82 			/// </seealso>
83 			public List<string> segmentsChecked = new List<string>();
84 
85 			/// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary>
86 			public bool toolOutOfDate;
87 
88 			/// <summary>List of <see cref="SegmentInfoStatus" /> instances, detailing status of each segment. </summary>
89 			public IList<SegmentInfoStatus> segmentInfos = new List<SegmentInfoStatus>();
90 
91 			/// <summary>Directory index is in. </summary>
92 			public Directory dir;
93 
94 			/// <summary> SegmentInfos instance containing only segments that
95 			/// had no problems (this is used with the <see cref="CheckIndex.FixIndex" />
96 			/// method to repair the index.
97 			/// </summary>
98 			internal SegmentInfos newSegments;
99 
100 			/// <summary>How many documents will be lost to bad segments. </summary>
101 			public int totLoseDocCount;
102 
103 			/// <summary>How many bad segments were found. </summary>
104 			public int numBadSegments;
105 
106 			/// <summary>True if we checked only specific segments (<see cref="CheckIndex.CheckIndex_Renamed_Method(List{string})" />)
107 			/// was called with non-null
108 			/// argument).
109 			/// </summary>
110 			public bool partial;
111 
112 			/// <summary>Holds the userData of the last commit in the index </summary>
113             public IDictionary<string, string> userData;
114 
115 			/// <summary>Holds the status of each segment in the index.
116 			/// See <see cref="SegmentInfos" />.
117 			///
118 			/// <p/><b>WARNING</b>: this API is new and experimental and is
119 			/// subject to suddenly change in the next release.
120 			/// </summary>
121 			public class SegmentInfoStatus
122 			{
123 				/// <summary>Name of the segment. </summary>
124 				public System.String name;
125 
126 				/// <summary>Document count (does not take deletions into account). </summary>
127 				public int docCount;
128 
129 				/// <summary>True if segment is compound file format. </summary>
130 				public bool compound;
131 
132 				/// <summary>Number of files referenced by this segment. </summary>
133 				public int numFiles;
134 
135 				/// <summary>Net size (MB) of the files referenced by this
136 				/// segment.
137 				/// </summary>
138 				public double sizeMB;
139 
140 				/// <summary>Doc store offset, if this segment shares the doc
141 				/// store files (stored fields and term vectors) with
142 				/// other segments.  This is -1 if it does not share.
143 				/// </summary>
144 				public int docStoreOffset = - 1;
145 
146 				/// <summary>String of the shared doc store segment, or null if
147 				/// this segment does not share the doc store files.
148 				/// </summary>
149 				public System.String docStoreSegment;
150 
151 				/// <summary>True if the shared doc store files are compound file
152 				/// format.
153 				/// </summary>
154 				public bool docStoreCompoundFile;
155 
156 				/// <summary>True if this segment has pending deletions. </summary>
157 				public bool hasDeletions;
158 
159 				/// <summary>Name of the current deletions file name. </summary>
160 				public System.String deletionsFileName;
161 
162 				/// <summary>Number of deleted documents. </summary>
163 				public int numDeleted;
164 
165 				/// <summary>True if we were able to open a SegmentReader on this
166 				/// segment.
167 				/// </summary>
168 				public bool openReaderPassed;
169 
170 				/// <summary>Number of fields in this segment. </summary>
171 				internal int numFields;
172 
173 				/// <summary>True if at least one of the fields in this segment
174 				/// does not omitTermFreqAndPositions.
175 				/// </summary>
176 				/// <seealso cref="AbstractField.OmitTermFreqAndPositions">
177 				/// </seealso>
178 				public bool hasProx;
179 
180                 /// <summary>Map&lt;String, String&gt; that includes certain
181 				/// debugging details that IndexWriter records into
182 				/// each segment it creates
183 				/// </summary>
184                 public IDictionary<string, string> diagnostics;
185 
186 				/// <summary>Status for testing of field norms (null if field norms could not be tested). </summary>
187 				public FieldNormStatus fieldNormStatus;
188 
189 				/// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary>
190 				public TermIndexStatus termIndexStatus;
191 
192 				/// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary>
193 				public StoredFieldStatus storedFieldStatus;
194 
195 				/// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary>
196 				public TermVectorStatus termVectorStatus;
197 			}
198 
199 			/// <summary> Status from testing field norms.</summary>
200 			public sealed class FieldNormStatus
201 			{
202 				/// <summary>Number of fields successfully tested </summary>
203 				public long totFields = 0L;
204 
205 				/// <summary>Exception thrown during term index test (null on success) </summary>
206 				public System.Exception error = null;
207 			}
208 
209 			/// <summary> Status from testing term index.</summary>
210 			public sealed class TermIndexStatus
211 			{
212 				/// <summary>Total term count </summary>
213 				public long termCount = 0L;
214 
215 				/// <summary>Total frequency across all terms. </summary>
216 				public long totFreq = 0L;
217 
218 				/// <summary>Total number of positions. </summary>
219 				public long totPos = 0L;
220 
221 				/// <summary>Exception thrown during term index test (null on success) </summary>
222 				public System.Exception error = null;
223 			}
224 
225 			/// <summary> Status from testing stored fields.</summary>
226 			public sealed class StoredFieldStatus
227 			{
228 
229 				/// <summary>Number of documents tested. </summary>
230 				public int docCount = 0;
231 
232 				/// <summary>Total number of stored fields tested. </summary>
233 				public long totFields = 0;
234 
235 				/// <summary>Exception thrown during stored fields test (null on success) </summary>
236 				public System.Exception error = null;
237 			}
238 
239 			/// <summary> Status from testing stored fields.</summary>
240 			public sealed class TermVectorStatus
241 			{
242 
243 				/// <summary>Number of documents tested. </summary>
244 				public int docCount = 0;
245 
246 				/// <summary>Total number of term vectors tested. </summary>
247 				public long totVectors = 0;
248 
249 				/// <summary>Exception thrown during term vector test (null on success) </summary>
250 				public System.Exception error = null;
251 			}
252 		}
253 
254 		/// <summary>Create a new CheckIndex on the directory. </summary>
CheckIndex(Directory dir)255 		public CheckIndex(Directory dir)
256 		{
257 			this.dir = dir;
258 			infoStream = null;
259 		}
260 
261 		/// <summary>Set infoStream where messages should go.  If null, no
262 		/// messages are printed
263 		/// </summary>
264 		public virtual void  SetInfoStream(StreamWriter @out)
265 		{
266 			infoStream = @out;
267 		}
268 
Msg(System.String msg)269 		private void  Msg(System.String msg)
270 		{
271 			if (infoStream != null)
272 				infoStream.WriteLine(msg);
273 		}
274 
275 		private class MySegmentTermDocs:SegmentTermDocs
276 		{
277 
278 			internal int delCount;
279 
MySegmentTermDocs(SegmentReader p)280 			internal MySegmentTermDocs(SegmentReader p):base(p)
281 			{
282 			}
283 
Seek(Term term)284 			public override void  Seek(Term term)
285 			{
286 				base.Seek(term);
287 				delCount = 0;
288 			}
289 
SkippingDoc()290 			protected internal override void  SkippingDoc()
291 			{
292 				delCount++;
293 			}
294 		}
295 
296 		/// <summary>Returns a <see cref="Status" /> instance detailing
297 		/// the state of the index.
298 		///
299 		/// <p/>As this method checks every byte in the index, on a large
300 		/// index it can take quite a long time to run.
301 		///
302 		/// <p/><b>WARNING</b>: make sure
303 		/// you only call this when the index is not opened by any
304 		/// writer.
305 		/// </summary>
CheckIndex_Renamed_Method()306 		public virtual Status CheckIndex_Renamed_Method()
307 		{
308 			return CheckIndex_Renamed_Method(null);
309 		}
310 
311 		/// <summary>Returns a <see cref="Status" /> instance detailing
312 		/// the state of the index.
313 		///
314 		/// </summary>
315 		/// <param name="onlySegments">list of specific segment names to check
316 		///
317 		/// <p/>As this method checks every byte in the specified
318 		/// segments, on a large index it can take quite a long
319 		/// time to run.
320 		///
321 		/// <p/><b>WARNING</b>: make sure
322 		/// you only call this when the index is not opened by any
323 		/// writer.
324 		/// </param>
CheckIndex_Renamed_Method(List<string> onlySegments)325 		public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
326 		{
327             System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
328 			SegmentInfos sis = new SegmentInfos();
329 			Status result = new Status();
330 			result.dir = dir;
331 			try
332 			{
333 				sis.Read(dir);
334 			}
335 			catch (System.Exception t)
336 			{
337 				Msg("ERROR: could not read any segments file in directory");
338 				result.missingSegments = true;
339 				if (infoStream != null)
340 					infoStream.WriteLine(t.StackTrace);
341 				return result;
342 			}
343 
344 			int numSegments = sis.Count;
345 			var segmentsFileName = sis.GetCurrentSegmentFileName();
346 			IndexInput input = null;
347 			try
348 			{
349 				input = dir.OpenInput(segmentsFileName);
350 			}
351 			catch (System.Exception t)
352 			{
353 				Msg("ERROR: could not open segments file in directory");
354 				if (infoStream != null)
355 					infoStream.WriteLine(t.StackTrace);
356 				result.cantOpenSegments = true;
357 				return result;
358 			}
359 			int format = 0;
360 			try
361 			{
362 				format = input.ReadInt();
363 			}
364 			catch (System.Exception t)
365 			{
366 				Msg("ERROR: could not read segment file version in directory");
367 				if (infoStream != null)
368 					infoStream.WriteLine(t.StackTrace);
369 				result.missingSegmentVersion = true;
370 				return result;
371 			}
372 			finally
373 			{
374 				if (input != null)
375 					input.Close();
376 			}
377 
378 			System.String sFormat = "";
379 			bool skip = false;
380 
381 			if (format == SegmentInfos.FORMAT)
382 				sFormat = "FORMAT [Lucene Pre-2.1]";
383 			if (format == SegmentInfos.FORMAT_LOCKLESS)
384 				sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
385 			else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
386 				sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
387 			else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
388 				sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
389 			else
390 			{
391 				if (format == SegmentInfos.FORMAT_CHECKSUM)
392 					sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
393 				else if (format == SegmentInfos.FORMAT_DEL_COUNT)
394 					sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
395 				else if (format == SegmentInfos.FORMAT_HAS_PROX)
396 					sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
397 				else if (format == SegmentInfos.FORMAT_USER_DATA)
398 					sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
399 				else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
400 					sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
401 				else if (format < SegmentInfos.CURRENT_FORMAT)
402 				{
403 					sFormat = "int=" + format + " [newer version of Lucene than this tool]";
404 					skip = true;
405 				}
406 				else
407 				{
408 					sFormat = format + " [Lucene 1.3 or prior]";
409 				}
410 			}
411 
412 			result.segmentsFileName = segmentsFileName;
413 			result.numSegments = numSegments;
414 			result.segmentFormat = sFormat;
415 			result.userData = sis.UserData;
416 			System.String userDataString;
417 			if (sis.UserData.Count > 0)
418 			{
419 				userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
420 			}
421 			else
422 			{
423 				userDataString = "";
424 			}
425 
426 			Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
427 
428 			if (onlySegments != null)
429 			{
430 				result.partial = true;
431 				if (infoStream != null)
432 					infoStream.Write("\nChecking only these segments:");
433                 foreach(string s in onlySegments)
434 				{
435 					if (infoStream != null)
436 					{
437 						infoStream.Write(" " + s);
438 					}
439 				}
440                 result.segmentsChecked.AddRange(onlySegments);
441                 Msg(":");
442 			}
443 
444 			if (skip)
445 			{
446 				Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
447 				result.toolOutOfDate = true;
448 				return result;
449 			}
450 
451 
452 			result.newSegments = (SegmentInfos) sis.Clone();
453 			result.newSegments.Clear();
454 
455 			for (int i = 0; i < numSegments; i++)
456 			{
457 				SegmentInfo info = sis.Info(i);
458 				if (onlySegments != null && !onlySegments.Contains(info.name))
459 					continue;
460 				var segInfoStat = new Status.SegmentInfoStatus();
461 				result.segmentInfos.Add(segInfoStat);
462 				Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
463 				segInfoStat.name = info.name;
464 				segInfoStat.docCount = info.docCount;
465 
466 				int toLoseDocCount = info.docCount;
467 
468 				SegmentReader reader = null;
469 
470 				try
471 				{
472 					Msg("    compound=" + info.GetUseCompoundFile());
473 					segInfoStat.compound = info.GetUseCompoundFile();
474 					Msg("    hasProx=" + info.HasProx);
475 					segInfoStat.hasProx = info.HasProx;
476 					Msg("    numFiles=" + info.Files().Count);
477 					segInfoStat.numFiles = info.Files().Count;
478 					Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
479 					segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
480                     IDictionary<string, string> diagnostics = info.Diagnostics;
481 					segInfoStat.diagnostics = diagnostics;
482 					if (diagnostics.Count > 0)
483 					{
484 						Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
485 					}
486 
487 					int docStoreOffset = info.DocStoreOffset;
488 					if (docStoreOffset != - 1)
489 					{
490 						Msg("    docStoreOffset=" + docStoreOffset);
491 						segInfoStat.docStoreOffset = docStoreOffset;
492 						Msg("    docStoreSegment=" + info.DocStoreSegment);
493 						segInfoStat.docStoreSegment = info.DocStoreSegment;
494 						Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
495 						segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
496 					}
497 					System.String delFileName = info.GetDelFileName();
498 					if (delFileName == null)
499 					{
500 						Msg("    no deletions");
501 						segInfoStat.hasDeletions = false;
502 					}
503 					else
504 					{
505 						Msg("    has deletions [delFileName=" + delFileName + "]");
506 						segInfoStat.hasDeletions = true;
507 						segInfoStat.deletionsFileName = delFileName;
508 					}
509 					if (infoStream != null)
510 						infoStream.Write("    test: open reader.........");
511 					reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
512 
513 					segInfoStat.openReaderPassed = true;
514 
515 					int numDocs = reader.NumDocs();
516 					toLoseDocCount = numDocs;
517 					if (reader.HasDeletions)
518 					{
519 						if (reader.deletedDocs.Count() != info.GetDelCount())
520 						{
521 							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
522 						}
523 						if (reader.deletedDocs.Count() > reader.MaxDoc)
524 						{
525 							throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
526 						}
527 						if (info.docCount - numDocs != info.GetDelCount())
528 						{
529 							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
530 						}
531 						segInfoStat.numDeleted = info.docCount - numDocs;
532 						Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
533 					}
534 					else
535 					{
536 						if (info.GetDelCount() != 0)
537 						{
538 							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
539 						}
540 						Msg("OK");
541 					}
542 					if (reader.MaxDoc != info.docCount)
543 						throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
544 
545 					// Test getFieldNames()
546 					if (infoStream != null)
547 					{
548 						infoStream.Write("    test: fields..............");
549 					}
550                     ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
551 					Msg("OK [" + fieldNames.Count + " fields]");
552 					segInfoStat.numFields = fieldNames.Count;
553 
554 					// Test Field Norms
555 					segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
556 
557 					// Test the Term Index
558 					segInfoStat.termIndexStatus = TestTermIndex(info, reader);
559 
560 					// Test Stored Fields
561 					segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
562 
563 					// Test Term Vectors
564 					segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
565 
566 					// Rethrow the first exception we encountered
567 					//  This will cause stats for failed segments to be incremented properly
568 					if (segInfoStat.fieldNormStatus.error != null)
569 					{
570 						throw new SystemException("Field Norm test failed");
571 					}
572 					else if (segInfoStat.termIndexStatus.error != null)
573 					{
574 						throw new SystemException("Term Index test failed");
575 					}
576 					else if (segInfoStat.storedFieldStatus.error != null)
577 					{
578 						throw new SystemException("Stored Field test failed");
579 					}
580 					else if (segInfoStat.termVectorStatus.error != null)
581 					{
582 						throw new System.SystemException("Term Vector test failed");
583 					}
584 
585 					Msg("");
586 				}
587 				catch (System.Exception t)
588 				{
589 					Msg("FAILED");
590 					const string comment = "fixIndex() would remove reference to this segment";
591 					Msg("    WARNING: " + comment + "; full exception:");
592 					if (infoStream != null)
593 						infoStream.WriteLine(t.StackTrace);
594 					Msg("");
595 					result.totLoseDocCount += toLoseDocCount;
596 					result.numBadSegments++;
597 					continue;
598 				}
599 				finally
600 				{
601 					if (reader != null)
602 						reader.Close();
603 				}
604 
605 				// Keeper
606 				result.newSegments.Add((SegmentInfo)info.Clone());
607 			}
608 
609 			if (0 == result.numBadSegments)
610 			{
611 				result.clean = true;
612 				Msg("No problems were detected with this index.\n");
613 			}
614 			else
615 				Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
616 
617 			return result;
618 		}
619 
620 		/// <summary> Test field norms.</summary>
TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)621         private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)
622 		{
623 			var status = new Status.FieldNormStatus();
624 
625 			try
626 			{
627 				// Test Field Norms
628 				if (infoStream != null)
629 				{
630 					infoStream.Write("    test: field norms.........");
631 				}
632 
633 				var b = new byte[reader.MaxDoc];
634 				foreach(string fieldName in fieldNames)
635 				{
636                     if (reader.HasNorms(fieldName))
637                     {
638                         reader.Norms(fieldName, b, 0);
639                         ++status.totFields;
640                     }
641 				}
642 
643 				Msg("OK [" + status.totFields + " fields]");
644 			}
645 			catch (System.Exception e)
646 			{
647 				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
648 				status.error = e;
649 				if (infoStream != null)
650 				{
651 					infoStream.WriteLine(e.StackTrace);
652 				}
653 			}
654 
655 			return status;
656 		}
657 
658 		/// <summary> Test the term index.</summary>
TestTermIndex(SegmentInfo info, SegmentReader reader)659 		private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
660 		{
661 			var status = new Status.TermIndexStatus();
662 
663 			try
664 			{
665 				if (infoStream != null)
666 				{
667 					infoStream.Write("    test: terms, freq, prox...");
668 				}
669 
670 				TermEnum termEnum = reader.Terms();
671 				TermPositions termPositions = reader.TermPositions();
672 
673 				// Used only to count up # deleted docs for this term
674 				var myTermDocs = new MySegmentTermDocs(reader);
675 
676 				int maxDoc = reader.MaxDoc;
677 
678 				while (termEnum.Next())
679 				{
680 					status.termCount++;
681 					Term term = termEnum.Term;
682 					int docFreq = termEnum.DocFreq();
683 					termPositions.Seek(term);
684 					int lastDoc = - 1;
685 					int freq0 = 0;
686 					status.totFreq += docFreq;
687 					while (termPositions.Next())
688 					{
689 						freq0++;
690 						int doc = termPositions.Doc;
691 						int freq = termPositions.Freq;
692 						if (doc <= lastDoc)
693 						{
694 							throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
695 						}
696 						if (doc >= maxDoc)
697 						{
698 							throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
699 						}
700 
701 						lastDoc = doc;
702 						if (freq <= 0)
703 						{
704 							throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
705 						}
706 
707 						int lastPos = - 1;
708 						status.totPos += freq;
709 						for (int j = 0; j < freq; j++)
710 						{
711 							int pos = termPositions.NextPosition();
712 							if (pos < - 1)
713 							{
714 								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
715 							}
716 							if (pos < lastPos)
717 							{
718 								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
719 							}
720 						    lastPos = pos;
721 						}
722 					}
723 
724 					// Now count how many deleted docs occurred in
725 					// this term:
726 					int delCount;
727 					if (reader.HasDeletions)
728 					{
729 						myTermDocs.Seek(term);
730 						while (myTermDocs.Next())
731 						{
732 						}
733 						delCount = myTermDocs.delCount;
734 					}
735 					else
736 					{
737 						delCount = 0;
738 					}
739 
740 					if (freq0 + delCount != docFreq)
741 					{
742 						throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
743 					}
744 				}
745 
746 				Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
747 			}
748 			catch (System.Exception e)
749 			{
750 				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
751 				status.error = e;
752 				if (infoStream != null)
753 				{
754 					infoStream.WriteLine(e.StackTrace);
755 				}
756 			}
757 
758 			return status;
759 		}
760 
761 		/// <summary> Test stored fields for a segment.</summary>
TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)762 		private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
763 		{
764 			var status = new Status.StoredFieldStatus();
765 
766 			try
767 			{
768 				if (infoStream != null)
769 				{
770 					infoStream.Write("    test: stored fields.......");
771 				}
772 
773 				// Scan stored fields for all documents
774 				for (int j = 0; j < info.docCount; ++j)
775 				{
776 					if (!reader.IsDeleted(j))
777 					{
778 						status.docCount++;
779 						Document doc = reader.Document(j);
780 						status.totFields += doc.GetFields().Count;
781 					}
782 				}
783 
784 				// Validate docCount
785 				if (status.docCount != reader.NumDocs())
786 				{
787 					throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
788 				}
789 
790                 Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
791             }
792 			catch (System.Exception e)
793 			{
794 				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
795 				status.error = e;
796 				if (infoStream != null)
797 				{
798 					infoStream.WriteLine(e.StackTrace);
799 				}
800 			}
801 
802 			return status;
803 		}
804 
805 		/// <summary> Test term vectors for a segment.</summary>
TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)806         private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
807 		{
808 			var status = new Status.TermVectorStatus();
809 
810 			try
811 			{
812 				if (infoStream != null)
813 				{
814 					infoStream.Write("    test: term vectors........");
815 				}
816 
817 				for (int j = 0; j < info.docCount; ++j)
818 				{
819 					if (!reader.IsDeleted(j))
820 					{
821 						status.docCount++;
822 						ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
823 						if (tfv != null)
824 						{
825 							status.totVectors += tfv.Length;
826 						}
827 					}
828 				}
829 
830                 Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
831             }
832 			catch (System.Exception e)
833 			{
834 				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
835 				status.error = e;
836 				if (infoStream != null)
837 				{
838 					infoStream.WriteLine(e.StackTrace);
839 				}
840 			}
841 
842 			return status;
843 		}
844 
845 		/// <summary>Repairs the index using previously returned result
846 		/// from <see cref="CheckIndex" />.  Note that this does not
847 		/// remove any of the unreferenced files after it's done;
848 		/// you must separately open an <see cref="IndexWriter" />, which
849 		/// deletes unreferenced files when it's created.
850 		///
851 		/// <p/><b>WARNING</b>: this writes a
852 		/// new segments file into the index, effectively removing
853 		/// all documents in broken segments from the index.
854 		/// BE CAREFUL.
855 		///
856 		/// <p/><b>WARNING</b>: Make sure you only call this when the
857 		/// index is not opened  by any writer.
858 		/// </summary>
FixIndex(Status result)859 		public virtual void  FixIndex(Status result)
860 		{
861 			if (result.partial)
862 				throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
863 			result.newSegments.Commit(result.dir);
864 		}
865 
866 		private static bool assertsOn;
867 
TestAsserts()868 		private static bool TestAsserts()
869 		{
870 			assertsOn = true;
871 			return true;
872 		}
873 
AssertsOn()874 		private static bool AssertsOn()
875 		{
876 			System.Diagnostics.Debug.Assert(TestAsserts());
877 			return assertsOn;
878 		}
879 
880 		/// <summary>Command-line interface to check and fix an index.
881 		/// <p/>
882 		/// Run it like this:
883         /// <code>
884 		/// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
885         /// </code>
886 		/// <list type="bullet">
887 		/// <item><c>-fix</c>: actually write a new segments_N file, removing any problematic segments</item>
888 		/// <item><c>-segment X</c>: only check the specified
889 		/// segment(s).  This can be specified multiple times,
890 		/// to check more than one segment, eg <c>-segment _2
891 		/// -segment _a</c>.  You can't use this with the -fix
892 		/// option.</item>
893 		/// </list>
894 		/// <p/><b>WARNING</b>: <c>-fix</c> should only be used on an emergency basis as it will cause
895 		/// documents (perhaps many) to be permanently removed from the index.  Always make
896 		/// a backup copy of your index before running this!  Do not run this tool on an index
897 		/// that is actively being written to.  You have been warned!
898 		/// <p/>                Run without -fix, this tool will open the index, report version information
899 		/// and report any exceptions it hits and what action it would take if -fix were
900 		/// specified.  With -fix, this tool will remove any segments that have issues and
901 		/// write a new segments_N file.  This means all documents contained in the affected
902 		/// segments will be removed.
903 		/// <p/>
904 		/// This tool exits with exit code 1 if the index cannot be opened or has any
905 		/// corruption, else 0.
906 		/// </summary>
907 		[STAThread]
Main(System.String[] args)908 		public static void  Main(System.String[] args)
909 		{
910 
911 			bool doFix = false;
912 			var onlySegments = new List<string>();
913 			System.String indexPath = null;
914 			int i = 0;
915 			while (i < args.Length)
916 			{
917 				if (args[i].Equals("-fix"))
918 				{
919 					doFix = true;
920 					i++;
921 				}
922 				else if (args[i].Equals("-segment"))
923 				{
924 					if (i == args.Length - 1)
925 					{
926 						System.Console.Out.WriteLine("ERROR: missing name for -segment option");
927 						System.Environment.Exit(1);
928 					}
929 					onlySegments.Add(args[i + 1]);
930 					i += 2;
931 				}
932 				else
933 				{
934 					if (indexPath != null)
935 					{
936 						System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
937 						System.Environment.Exit(1);
938 					}
939 					indexPath = args[i];
940 					i++;
941 				}
942 			}
943 
944 			if (indexPath == null)
945 			{
946 				System.Console.Out.WriteLine("\nERROR: index path not specified");
947 				System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + "  -fix: actually write a new segments_N file, removing any problematic segments\n" + "  -segment X: only check the specified segments.  This can be specified multiple\n" + "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + "              You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index.  Always make\n" + "a backup copy of your index before running this!  Do not run this tool on an index\n" + "that is actively being written to.  You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified.  With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file.  This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
948 				System.Environment.Exit(1);
949 			}
950 
951 			if (!AssertsOn())
952 				System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled");
953 
954 			if (onlySegments.Count == 0)
955 				onlySegments = null;
956 			else if (doFix)
957 			{
958 				System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
959 				System.Environment.Exit(1);
960 			}
961 
962 			System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
963 			Directory dir = null;
964 			try
965 			{
966 				dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath));
967 			}
968 			catch (Exception t)
969 			{
970 				Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
971 				Console.Out.WriteLine(t.StackTrace);
972 				Environment.Exit(1);
973 			}
974 
975 			var checker = new CheckIndex(dir);
976 			var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding)
977 			                 	{AutoFlush = true};
978 			checker.SetInfoStream(tempWriter);
979 
980 			Status result = checker.CheckIndex_Renamed_Method(onlySegments);
981 			if (result.missingSegments)
982 			{
983 				System.Environment.Exit(1);
984 			}
985 
986 			if (!result.clean)
987 			{
988 				if (!doFix)
989 				{
990 					System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
991 				}
992 				else
993 				{
994 					Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
995 					Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
996 					for (var s = 0; s < 5; s++)
997 					{
998 						System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
999 						System.Console.Out.WriteLine("  " + (5 - s) + "...");
1000 					}
1001 					Console.Out.WriteLine("Writing...");
1002 					checker.FixIndex(result);
1003 					Console.Out.WriteLine("OK");
1004 					Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
1005 				}
1006 			}
1007 			System.Console.Out.WriteLine("");
1008 
1009 			int exitCode;
1010 			if (result != null && result.clean == true)
1011 				exitCode = 0;
1012 			else
1013 				exitCode = 1;
1014 			System.Environment.Exit(exitCode);
1015 		}
1016 	}
1017 }