1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 using System; 19 using System.Collections.Generic; 20 using System.IO; 21 using Lucene.Net.Support; 22 using AbstractField = Lucene.Net.Documents.AbstractField; 23 using Document = Lucene.Net.Documents.Document; 24 using Directory = Lucene.Net.Store.Directory; 25 using FSDirectory = Lucene.Net.Store.FSDirectory; 26 using IndexInput = Lucene.Net.Store.IndexInput; 27 28 namespace Lucene.Net.Index 29 { 30 31 /// <summary> Basic tool and API to check the health of an index and 32 /// write a new segments file that removes reference to 33 /// problematic segments. 34 /// 35 /// <p/>As this tool checks every byte in the index, on a large 36 /// index it can take quite a long time to run. 37 /// 38 /// <p/><b>WARNING</b>: this tool and API is new and 39 /// experimental and is subject to suddenly change in the 40 /// next release. Please make a complete backup of your 41 /// index before using this to fix your index! 42 /// </summary> 43 public class CheckIndex 44 { 45 private StreamWriter infoStream; 46 private readonly Directory dir; 47 48 /// <summary> Returned from <see cref="CheckIndex_Renamed_Method()" /> detailing the health and status of the index. 49 /// 50 /// <p/><b>WARNING</b>: this API is new and experimental and is 51 /// subject to suddenly change in the next release. 52 /// 53 /// </summary> 54 55 public class Status 56 { 57 58 /// <summary>True if no problems were found with the index. </summary> 59 public bool clean; 60 61 /// <summary>True if we were unable to locate and load the segments_N file. </summary> 62 public bool missingSegments; 63 64 /// <summary>True if we were unable to open the segments_N file. </summary> 65 public bool cantOpenSegments; 66 67 /// <summary>True if we were unable to read the version number from segments_N file. </summary> 68 public bool missingSegmentVersion; 69 70 /// <summary>Name of latest segments_N file in the index. </summary> 71 public System.String segmentsFileName; 72 73 /// <summary>Number of segments in the index. </summary> 74 public int numSegments; 75 76 /// <summary>String description of the version of the index. </summary> 77 public System.String segmentFormat; 78 79 /// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary> 80 /// <seealso> 81 /// <cref>CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList)</cref> 82 /// </seealso> 83 public List<string> segmentsChecked = new List<string>(); 84 85 /// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary> 86 public bool toolOutOfDate; 87 88 /// <summary>List of <see cref="SegmentInfoStatus" /> instances, detailing status of each segment. </summary> 89 public IList<SegmentInfoStatus> segmentInfos = new List<SegmentInfoStatus>(); 90 91 /// <summary>Directory index is in. </summary> 92 public Directory dir; 93 94 /// <summary> SegmentInfos instance containing only segments that 95 /// had no problems (this is used with the <see cref="CheckIndex.FixIndex" /> 96 /// method to repair the index. 97 /// </summary> 98 internal SegmentInfos newSegments; 99 100 /// <summary>How many documents will be lost to bad segments. </summary> 101 public int totLoseDocCount; 102 103 /// <summary>How many bad segments were found. </summary> 104 public int numBadSegments; 105 106 /// <summary>True if we checked only specific segments (<see cref="CheckIndex.CheckIndex_Renamed_Method(List{string})" />) 107 /// was called with non-null 108 /// argument). 109 /// </summary> 110 public bool partial; 111 112 /// <summary>Holds the userData of the last commit in the index </summary> 113 public IDictionary<string, string> userData; 114 115 /// <summary>Holds the status of each segment in the index. 116 /// See <see cref="SegmentInfos" />. 117 /// 118 /// <p/><b>WARNING</b>: this API is new and experimental and is 119 /// subject to suddenly change in the next release. 120 /// </summary> 121 public class SegmentInfoStatus 122 { 123 /// <summary>Name of the segment. </summary> 124 public System.String name; 125 126 /// <summary>Document count (does not take deletions into account). </summary> 127 public int docCount; 128 129 /// <summary>True if segment is compound file format. </summary> 130 public bool compound; 131 132 /// <summary>Number of files referenced by this segment. </summary> 133 public int numFiles; 134 135 /// <summary>Net size (MB) of the files referenced by this 136 /// segment. 137 /// </summary> 138 public double sizeMB; 139 140 /// <summary>Doc store offset, if this segment shares the doc 141 /// store files (stored fields and term vectors) with 142 /// other segments. This is -1 if it does not share. 143 /// </summary> 144 public int docStoreOffset = - 1; 145 146 /// <summary>String of the shared doc store segment, or null if 147 /// this segment does not share the doc store files. 148 /// </summary> 149 public System.String docStoreSegment; 150 151 /// <summary>True if the shared doc store files are compound file 152 /// format. 153 /// </summary> 154 public bool docStoreCompoundFile; 155 156 /// <summary>True if this segment has pending deletions. </summary> 157 public bool hasDeletions; 158 159 /// <summary>Name of the current deletions file name. </summary> 160 public System.String deletionsFileName; 161 162 /// <summary>Number of deleted documents. </summary> 163 public int numDeleted; 164 165 /// <summary>True if we were able to open a SegmentReader on this 166 /// segment. 167 /// </summary> 168 public bool openReaderPassed; 169 170 /// <summary>Number of fields in this segment. </summary> 171 internal int numFields; 172 173 /// <summary>True if at least one of the fields in this segment 174 /// does not omitTermFreqAndPositions. 175 /// </summary> 176 /// <seealso cref="AbstractField.OmitTermFreqAndPositions"> 177 /// </seealso> 178 public bool hasProx; 179 180 /// <summary>Map<String, String> that includes certain 181 /// debugging details that IndexWriter records into 182 /// each segment it creates 183 /// </summary> 184 public IDictionary<string, string> diagnostics; 185 186 /// <summary>Status for testing of field norms (null if field norms could not be tested). </summary> 187 public FieldNormStatus fieldNormStatus; 188 189 /// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary> 190 public TermIndexStatus termIndexStatus; 191 192 /// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary> 193 public StoredFieldStatus storedFieldStatus; 194 195 /// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary> 196 public TermVectorStatus termVectorStatus; 197 } 198 199 /// <summary> Status from testing field norms.</summary> 200 public sealed class FieldNormStatus 201 { 202 /// <summary>Number of fields successfully tested </summary> 203 public long totFields = 0L; 204 205 /// <summary>Exception thrown during term index test (null on success) </summary> 206 public System.Exception error = null; 207 } 208 209 /// <summary> Status from testing term index.</summary> 210 public sealed class TermIndexStatus 211 { 212 /// <summary>Total term count </summary> 213 public long termCount = 0L; 214 215 /// <summary>Total frequency across all terms. </summary> 216 public long totFreq = 0L; 217 218 /// <summary>Total number of positions. </summary> 219 public long totPos = 0L; 220 221 /// <summary>Exception thrown during term index test (null on success) </summary> 222 public System.Exception error = null; 223 } 224 225 /// <summary> Status from testing stored fields.</summary> 226 public sealed class StoredFieldStatus 227 { 228 229 /// <summary>Number of documents tested. </summary> 230 public int docCount = 0; 231 232 /// <summary>Total number of stored fields tested. </summary> 233 public long totFields = 0; 234 235 /// <summary>Exception thrown during stored fields test (null on success) </summary> 236 public System.Exception error = null; 237 } 238 239 /// <summary> Status from testing stored fields.</summary> 240 public sealed class TermVectorStatus 241 { 242 243 /// <summary>Number of documents tested. </summary> 244 public int docCount = 0; 245 246 /// <summary>Total number of term vectors tested. </summary> 247 public long totVectors = 0; 248 249 /// <summary>Exception thrown during term vector test (null on success) </summary> 250 public System.Exception error = null; 251 } 252 } 253 254 /// <summary>Create a new CheckIndex on the directory. </summary> CheckIndex(Directory dir)255 public CheckIndex(Directory dir) 256 { 257 this.dir = dir; 258 infoStream = null; 259 } 260 261 /// <summary>Set infoStream where messages should go. If null, no 262 /// messages are printed 263 /// </summary> 264 public virtual void SetInfoStream(StreamWriter @out) 265 { 266 infoStream = @out; 267 } 268 Msg(System.String msg)269 private void Msg(System.String msg) 270 { 271 if (infoStream != null) 272 infoStream.WriteLine(msg); 273 } 274 275 private class MySegmentTermDocs:SegmentTermDocs 276 { 277 278 internal int delCount; 279 MySegmentTermDocs(SegmentReader p)280 internal MySegmentTermDocs(SegmentReader p):base(p) 281 { 282 } 283 Seek(Term term)284 public override void Seek(Term term) 285 { 286 base.Seek(term); 287 delCount = 0; 288 } 289 SkippingDoc()290 protected internal override void SkippingDoc() 291 { 292 delCount++; 293 } 294 } 295 296 /// <summary>Returns a <see cref="Status" /> instance detailing 297 /// the state of the index. 298 /// 299 /// <p/>As this method checks every byte in the index, on a large 300 /// index it can take quite a long time to run. 301 /// 302 /// <p/><b>WARNING</b>: make sure 303 /// you only call this when the index is not opened by any 304 /// writer. 305 /// </summary> CheckIndex_Renamed_Method()306 public virtual Status CheckIndex_Renamed_Method() 307 { 308 return CheckIndex_Renamed_Method(null); 309 } 310 311 /// <summary>Returns a <see cref="Status" /> instance detailing 312 /// the state of the index. 313 /// 314 /// </summary> 315 /// <param name="onlySegments">list of specific segment names to check 316 /// 317 /// <p/>As this method checks every byte in the specified 318 /// segments, on a large index it can take quite a long 319 /// time to run. 320 /// 321 /// <p/><b>WARNING</b>: make sure 322 /// you only call this when the index is not opened by any 323 /// writer. 324 /// </param> CheckIndex_Renamed_Method(List<string> onlySegments)325 public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments) 326 { 327 System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; 328 SegmentInfos sis = new SegmentInfos(); 329 Status result = new Status(); 330 result.dir = dir; 331 try 332 { 333 sis.Read(dir); 334 } 335 catch (System.Exception t) 336 { 337 Msg("ERROR: could not read any segments file in directory"); 338 result.missingSegments = true; 339 if (infoStream != null) 340 infoStream.WriteLine(t.StackTrace); 341 return result; 342 } 343 344 int numSegments = sis.Count; 345 var segmentsFileName = sis.GetCurrentSegmentFileName(); 346 IndexInput input = null; 347 try 348 { 349 input = dir.OpenInput(segmentsFileName); 350 } 351 catch (System.Exception t) 352 { 353 Msg("ERROR: could not open segments file in directory"); 354 if (infoStream != null) 355 infoStream.WriteLine(t.StackTrace); 356 result.cantOpenSegments = true; 357 return result; 358 } 359 int format = 0; 360 try 361 { 362 format = input.ReadInt(); 363 } 364 catch (System.Exception t) 365 { 366 Msg("ERROR: could not read segment file version in directory"); 367 if (infoStream != null) 368 infoStream.WriteLine(t.StackTrace); 369 result.missingSegmentVersion = true; 370 return result; 371 } 372 finally 373 { 374 if (input != null) 375 input.Close(); 376 } 377 378 System.String sFormat = ""; 379 bool skip = false; 380 381 if (format == SegmentInfos.FORMAT) 382 sFormat = "FORMAT [Lucene Pre-2.1]"; 383 if (format == SegmentInfos.FORMAT_LOCKLESS) 384 sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; 385 else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) 386 sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; 387 else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) 388 sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; 389 else 390 { 391 if (format == SegmentInfos.FORMAT_CHECKSUM) 392 sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; 393 else if (format == SegmentInfos.FORMAT_DEL_COUNT) 394 sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; 395 else if (format == SegmentInfos.FORMAT_HAS_PROX) 396 sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; 397 else if (format == SegmentInfos.FORMAT_USER_DATA) 398 sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; 399 else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) 400 sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; 401 else if (format < SegmentInfos.CURRENT_FORMAT) 402 { 403 sFormat = "int=" + format + " [newer version of Lucene than this tool]"; 404 skip = true; 405 } 406 else 407 { 408 sFormat = format + " [Lucene 1.3 or prior]"; 409 } 410 } 411 412 result.segmentsFileName = segmentsFileName; 413 result.numSegments = numSegments; 414 result.segmentFormat = sFormat; 415 result.userData = sis.UserData; 416 System.String userDataString; 417 if (sis.UserData.Count > 0) 418 { 419 userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); 420 } 421 else 422 { 423 userDataString = ""; 424 } 425 426 Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); 427 428 if (onlySegments != null) 429 { 430 result.partial = true; 431 if (infoStream != null) 432 infoStream.Write("\nChecking only these segments:"); 433 foreach(string s in onlySegments) 434 { 435 if (infoStream != null) 436 { 437 infoStream.Write(" " + s); 438 } 439 } 440 result.segmentsChecked.AddRange(onlySegments); 441 Msg(":"); 442 } 443 444 if (skip) 445 { 446 Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); 447 result.toolOutOfDate = true; 448 return result; 449 } 450 451 452 result.newSegments = (SegmentInfos) sis.Clone(); 453 result.newSegments.Clear(); 454 455 for (int i = 0; i < numSegments; i++) 456 { 457 SegmentInfo info = sis.Info(i); 458 if (onlySegments != null && !onlySegments.Contains(info.name)) 459 continue; 460 var segInfoStat = new Status.SegmentInfoStatus(); 461 result.segmentInfos.Add(segInfoStat); 462 Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); 463 segInfoStat.name = info.name; 464 segInfoStat.docCount = info.docCount; 465 466 int toLoseDocCount = info.docCount; 467 468 SegmentReader reader = null; 469 470 try 471 { 472 Msg(" compound=" + info.GetUseCompoundFile()); 473 segInfoStat.compound = info.GetUseCompoundFile(); 474 Msg(" hasProx=" + info.HasProx); 475 segInfoStat.hasProx = info.HasProx; 476 Msg(" numFiles=" + info.Files().Count); 477 segInfoStat.numFiles = info.Files().Count; 478 Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); 479 segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); 480 IDictionary<string, string> diagnostics = info.Diagnostics; 481 segInfoStat.diagnostics = diagnostics; 482 if (diagnostics.Count > 0) 483 { 484 Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); 485 } 486 487 int docStoreOffset = info.DocStoreOffset; 488 if (docStoreOffset != - 1) 489 { 490 Msg(" docStoreOffset=" + docStoreOffset); 491 segInfoStat.docStoreOffset = docStoreOffset; 492 Msg(" docStoreSegment=" + info.DocStoreSegment); 493 segInfoStat.docStoreSegment = info.DocStoreSegment; 494 Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); 495 segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; 496 } 497 System.String delFileName = info.GetDelFileName(); 498 if (delFileName == null) 499 { 500 Msg(" no deletions"); 501 segInfoStat.hasDeletions = false; 502 } 503 else 504 { 505 Msg(" has deletions [delFileName=" + delFileName + "]"); 506 segInfoStat.hasDeletions = true; 507 segInfoStat.deletionsFileName = delFileName; 508 } 509 if (infoStream != null) 510 infoStream.Write(" test: open reader........."); 511 reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); 512 513 segInfoStat.openReaderPassed = true; 514 515 int numDocs = reader.NumDocs(); 516 toLoseDocCount = numDocs; 517 if (reader.HasDeletions) 518 { 519 if (reader.deletedDocs.Count() != info.GetDelCount()) 520 { 521 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); 522 } 523 if (reader.deletedDocs.Count() > reader.MaxDoc) 524 { 525 throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); 526 } 527 if (info.docCount - numDocs != info.GetDelCount()) 528 { 529 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); 530 } 531 segInfoStat.numDeleted = info.docCount - numDocs; 532 Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); 533 } 534 else 535 { 536 if (info.GetDelCount() != 0) 537 { 538 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); 539 } 540 Msg("OK"); 541 } 542 if (reader.MaxDoc != info.docCount) 543 throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); 544 545 // Test getFieldNames() 546 if (infoStream != null) 547 { 548 infoStream.Write(" test: fields.............."); 549 } 550 ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); 551 Msg("OK [" + fieldNames.Count + " fields]"); 552 segInfoStat.numFields = fieldNames.Count; 553 554 // Test Field Norms 555 segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); 556 557 // Test the Term Index 558 segInfoStat.termIndexStatus = TestTermIndex(info, reader); 559 560 // Test Stored Fields 561 segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); 562 563 // Test Term Vectors 564 segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); 565 566 // Rethrow the first exception we encountered 567 // This will cause stats for failed segments to be incremented properly 568 if (segInfoStat.fieldNormStatus.error != null) 569 { 570 throw new SystemException("Field Norm test failed"); 571 } 572 else if (segInfoStat.termIndexStatus.error != null) 573 { 574 throw new SystemException("Term Index test failed"); 575 } 576 else if (segInfoStat.storedFieldStatus.error != null) 577 { 578 throw new SystemException("Stored Field test failed"); 579 } 580 else if (segInfoStat.termVectorStatus.error != null) 581 { 582 throw new System.SystemException("Term Vector test failed"); 583 } 584 585 Msg(""); 586 } 587 catch (System.Exception t) 588 { 589 Msg("FAILED"); 590 const string comment = "fixIndex() would remove reference to this segment"; 591 Msg(" WARNING: " + comment + "; full exception:"); 592 if (infoStream != null) 593 infoStream.WriteLine(t.StackTrace); 594 Msg(""); 595 result.totLoseDocCount += toLoseDocCount; 596 result.numBadSegments++; 597 continue; 598 } 599 finally 600 { 601 if (reader != null) 602 reader.Close(); 603 } 604 605 // Keeper 606 result.newSegments.Add((SegmentInfo)info.Clone()); 607 } 608 609 if (0 == result.numBadSegments) 610 { 611 result.clean = true; 612 Msg("No problems were detected with this index.\n"); 613 } 614 else 615 Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); 616 617 return result; 618 } 619 620 /// <summary> Test field norms.</summary> TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)621 private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader) 622 { 623 var status = new Status.FieldNormStatus(); 624 625 try 626 { 627 // Test Field Norms 628 if (infoStream != null) 629 { 630 infoStream.Write(" test: field norms........."); 631 } 632 633 var b = new byte[reader.MaxDoc]; 634 foreach(string fieldName in fieldNames) 635 { 636 if (reader.HasNorms(fieldName)) 637 { 638 reader.Norms(fieldName, b, 0); 639 ++status.totFields; 640 } 641 } 642 643 Msg("OK [" + status.totFields + " fields]"); 644 } 645 catch (System.Exception e) 646 { 647 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); 648 status.error = e; 649 if (infoStream != null) 650 { 651 infoStream.WriteLine(e.StackTrace); 652 } 653 } 654 655 return status; 656 } 657 658 /// <summary> Test the term index.</summary> TestTermIndex(SegmentInfo info, SegmentReader reader)659 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) 660 { 661 var status = new Status.TermIndexStatus(); 662 663 try 664 { 665 if (infoStream != null) 666 { 667 infoStream.Write(" test: terms, freq, prox..."); 668 } 669 670 TermEnum termEnum = reader.Terms(); 671 TermPositions termPositions = reader.TermPositions(); 672 673 // Used only to count up # deleted docs for this term 674 var myTermDocs = new MySegmentTermDocs(reader); 675 676 int maxDoc = reader.MaxDoc; 677 678 while (termEnum.Next()) 679 { 680 status.termCount++; 681 Term term = termEnum.Term; 682 int docFreq = termEnum.DocFreq(); 683 termPositions.Seek(term); 684 int lastDoc = - 1; 685 int freq0 = 0; 686 status.totFreq += docFreq; 687 while (termPositions.Next()) 688 { 689 freq0++; 690 int doc = termPositions.Doc; 691 int freq = termPositions.Freq; 692 if (doc <= lastDoc) 693 { 694 throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); 695 } 696 if (doc >= maxDoc) 697 { 698 throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); 699 } 700 701 lastDoc = doc; 702 if (freq <= 0) 703 { 704 throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); 705 } 706 707 int lastPos = - 1; 708 status.totPos += freq; 709 for (int j = 0; j < freq; j++) 710 { 711 int pos = termPositions.NextPosition(); 712 if (pos < - 1) 713 { 714 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); 715 } 716 if (pos < lastPos) 717 { 718 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); 719 } 720 lastPos = pos; 721 } 722 } 723 724 // Now count how many deleted docs occurred in 725 // this term: 726 int delCount; 727 if (reader.HasDeletions) 728 { 729 myTermDocs.Seek(term); 730 while (myTermDocs.Next()) 731 { 732 } 733 delCount = myTermDocs.delCount; 734 } 735 else 736 { 737 delCount = 0; 738 } 739 740 if (freq0 + delCount != docFreq) 741 { 742 throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); 743 } 744 } 745 746 Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); 747 } 748 catch (System.Exception e) 749 { 750 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); 751 status.error = e; 752 if (infoStream != null) 753 { 754 infoStream.WriteLine(e.StackTrace); 755 } 756 } 757 758 return status; 759 } 760 761 /// <summary> Test stored fields for a segment.</summary> TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)762 private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) 763 { 764 var status = new Status.StoredFieldStatus(); 765 766 try 767 { 768 if (infoStream != null) 769 { 770 infoStream.Write(" test: stored fields......."); 771 } 772 773 // Scan stored fields for all documents 774 for (int j = 0; j < info.docCount; ++j) 775 { 776 if (!reader.IsDeleted(j)) 777 { 778 status.docCount++; 779 Document doc = reader.Document(j); 780 status.totFields += doc.GetFields().Count; 781 } 782 } 783 784 // Validate docCount 785 if (status.docCount != reader.NumDocs()) 786 { 787 throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); 788 } 789 790 Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); 791 } 792 catch (System.Exception e) 793 { 794 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); 795 status.error = e; 796 if (infoStream != null) 797 { 798 infoStream.WriteLine(e.StackTrace); 799 } 800 } 801 802 return status; 803 } 804 805 /// <summary> Test term vectors for a segment.</summary> TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)806 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) 807 { 808 var status = new Status.TermVectorStatus(); 809 810 try 811 { 812 if (infoStream != null) 813 { 814 infoStream.Write(" test: term vectors........"); 815 } 816 817 for (int j = 0; j < info.docCount; ++j) 818 { 819 if (!reader.IsDeleted(j)) 820 { 821 status.docCount++; 822 ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); 823 if (tfv != null) 824 { 825 status.totVectors += tfv.Length; 826 } 827 } 828 } 829 830 Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); 831 } 832 catch (System.Exception e) 833 { 834 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); 835 status.error = e; 836 if (infoStream != null) 837 { 838 infoStream.WriteLine(e.StackTrace); 839 } 840 } 841 842 return status; 843 } 844 845 /// <summary>Repairs the index using previously returned result 846 /// from <see cref="CheckIndex" />. Note that this does not 847 /// remove any of the unreferenced files after it's done; 848 /// you must separately open an <see cref="IndexWriter" />, which 849 /// deletes unreferenced files when it's created. 850 /// 851 /// <p/><b>WARNING</b>: this writes a 852 /// new segments file into the index, effectively removing 853 /// all documents in broken segments from the index. 854 /// BE CAREFUL. 855 /// 856 /// <p/><b>WARNING</b>: Make sure you only call this when the 857 /// index is not opened by any writer. 858 /// </summary> FixIndex(Status result)859 public virtual void FixIndex(Status result) 860 { 861 if (result.partial) 862 throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); 863 result.newSegments.Commit(result.dir); 864 } 865 866 private static bool assertsOn; 867 TestAsserts()868 private static bool TestAsserts() 869 { 870 assertsOn = true; 871 return true; 872 } 873 AssertsOn()874 private static bool AssertsOn() 875 { 876 System.Diagnostics.Debug.Assert(TestAsserts()); 877 return assertsOn; 878 } 879 880 /// <summary>Command-line interface to check and fix an index. 881 /// <p/> 882 /// Run it like this: 883 /// <code> 884 /// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] 885 /// </code> 886 /// <list type="bullet"> 887 /// <item><c>-fix</c>: actually write a new segments_N file, removing any problematic segments</item> 888 /// <item><c>-segment X</c>: only check the specified 889 /// segment(s). This can be specified multiple times, 890 /// to check more than one segment, eg <c>-segment _2 891 /// -segment _a</c>. You can't use this with the -fix 892 /// option.</item> 893 /// </list> 894 /// <p/><b>WARNING</b>: <c>-fix</c> should only be used on an emergency basis as it will cause 895 /// documents (perhaps many) to be permanently removed from the index. Always make 896 /// a backup copy of your index before running this! Do not run this tool on an index 897 /// that is actively being written to. You have been warned! 898 /// <p/> Run without -fix, this tool will open the index, report version information 899 /// and report any exceptions it hits and what action it would take if -fix were 900 /// specified. With -fix, this tool will remove any segments that have issues and 901 /// write a new segments_N file. This means all documents contained in the affected 902 /// segments will be removed. 903 /// <p/> 904 /// This tool exits with exit code 1 if the index cannot be opened or has any 905 /// corruption, else 0. 906 /// </summary> 907 [STAThread] Main(System.String[] args)908 public static void Main(System.String[] args) 909 { 910 911 bool doFix = false; 912 var onlySegments = new List<string>(); 913 System.String indexPath = null; 914 int i = 0; 915 while (i < args.Length) 916 { 917 if (args[i].Equals("-fix")) 918 { 919 doFix = true; 920 i++; 921 } 922 else if (args[i].Equals("-segment")) 923 { 924 if (i == args.Length - 1) 925 { 926 System.Console.Out.WriteLine("ERROR: missing name for -segment option"); 927 System.Environment.Exit(1); 928 } 929 onlySegments.Add(args[i + 1]); 930 i += 2; 931 } 932 else 933 { 934 if (indexPath != null) 935 { 936 System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'"); 937 System.Environment.Exit(1); 938 } 939 indexPath = args[i]; 940 i++; 941 } 942 } 943 944 if (indexPath == null) 945 { 946 System.Console.Out.WriteLine("\nERROR: index path not specified"); 947 System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); 948 System.Environment.Exit(1); 949 } 950 951 if (!AssertsOn()) 952 System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled"); 953 954 if (onlySegments.Count == 0) 955 onlySegments = null; 956 else if (doFix) 957 { 958 System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment"); 959 System.Environment.Exit(1); 960 } 961 962 System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n"); 963 Directory dir = null; 964 try 965 { 966 dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath)); 967 } 968 catch (Exception t) 969 { 970 Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting"); 971 Console.Out.WriteLine(t.StackTrace); 972 Environment.Exit(1); 973 } 974 975 var checker = new CheckIndex(dir); 976 var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding) 977 {AutoFlush = true}; 978 checker.SetInfoStream(tempWriter); 979 980 Status result = checker.CheckIndex_Renamed_Method(onlySegments); 981 if (result.missingSegments) 982 { 983 System.Environment.Exit(1); 984 } 985 986 if (!result.clean) 987 { 988 if (!doFix) 989 { 990 System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); 991 } 992 else 993 { 994 Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); 995 Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); 996 for (var s = 0; s < 5; s++) 997 { 998 System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); 999 System.Console.Out.WriteLine(" " + (5 - s) + "..."); 1000 } 1001 Console.Out.WriteLine("Writing..."); 1002 checker.FixIndex(result); 1003 Console.Out.WriteLine("OK"); 1004 Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\""); 1005 } 1006 } 1007 System.Console.Out.WriteLine(""); 1008 1009 int exitCode; 1010 if (result != null && result.clean == true) 1011 exitCode = 0; 1012 else 1013 exitCode = 1; 1014 System.Environment.Exit(exitCode); 1015 } 1016 } 1017 }