1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 using System; 19 using System.Collections.Generic; 20 using System.IO; 21 using Lucene.Net.Support; 22 using Analyzer = Lucene.Net.Analysis.Analyzer; 23 using Document = Lucene.Net.Documents.Document; 24 using IndexingChain = Lucene.Net.Index.DocumentsWriter.IndexingChain; 25 using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; 26 using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; 27 using Directory = Lucene.Net.Store.Directory; 28 using Lock = Lucene.Net.Store.Lock; 29 using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; 30 using Constants = Lucene.Net.Util.Constants; 31 using Query = Lucene.Net.Search.Query; 32 using Similarity = Lucene.Net.Search.Similarity; 33 34 namespace Lucene.Net.Index 35 { 36 37 /// <summary>An <c>IndexWriter</c> creates and maintains an index. 38 /// <p/>The <c>create</c> argument to the 39 /// <see cref="IndexWriter(Directory, Analyzer, bool, MaxFieldLength)">constructor</see> determines 40 /// whether a new index is created, or whether an existing index is 41 /// opened. Note that you can open an index with <c>create=true</c> 42 /// even while readers are using the index. The old readers will 43 /// continue to search the "point in time" snapshot they had opened, 44 /// and won't see the newly created index until they re-open. There are 45 /// also <see cref="IndexWriter(Directory, Analyzer, MaxFieldLength)">constructors</see> 46 /// with no <c>create</c> argument which will create a new index 47 /// if there is not already an index at the provided path and otherwise 48 /// open the existing index.<p/> 49 /// <p/>In either case, documents are added with <see cref="AddDocument(Document)" /> 50 /// and removed with <see cref="DeleteDocuments(Term)" /> or 51 /// <see cref="DeleteDocuments(Query)" />. A document can be updated with 52 /// <see cref="UpdateDocument(Term, Document)" /> (which just deletes 53 /// and then adds the entire document). When finished adding, deleting 54 /// and updating documents, <see cref="Close()" /> should be called.<p/> 55 /// <a name="flush"></a> 56 /// <p/>These changes are buffered in memory and periodically 57 /// flushed to the <see cref="Directory" /> (during the above method 58 /// calls). A flush is triggered when there are enough 59 /// buffered deletes (see <see cref="SetMaxBufferedDeleteTerms" />) 60 /// or enough added documents since the last flush, whichever 61 /// is sooner. For the added documents, flushing is triggered 62 /// either by RAM usage of the documents (see 63 /// <see cref="SetRAMBufferSizeMB" />) or the number of added documents. 64 /// The default is to flush when RAM usage hits 16 MB. For 65 /// best indexing speed you should flush by RAM usage with a 66 /// large RAM buffer. Note that flushing just moves the 67 /// internal buffered state in IndexWriter into the index, but 68 /// these changes are not visible to IndexReader until either 69 /// <see cref="Commit()" /> or <see cref="Close()" /> is called. A flush may 70 /// also trigger one or more segment merges which by default 71 /// run with a background thread so as not to block the 72 /// addDocument calls (see <a href="#mergePolicy">below</a> 73 /// for changing the <see cref="MergeScheduler" />). 74 /// <p/> 75 /// If an index will not have more documents added for a while and optimal search 76 /// performance is desired, then either the full <see cref="Optimize()" /> 77 /// method or partial <see cref="Optimize(int)" /> method should be 78 /// called before the index is closed. 79 /// <p/> 80 /// Opening an <c>IndexWriter</c> creates a lock file for the directory in use. Trying to open 81 /// another <c>IndexWriter</c> on the same directory will lead to a 82 /// <see cref="LockObtainFailedException" />. The <see cref="LockObtainFailedException" /> 83 /// is also thrown if an IndexReader on the same directory is used to delete documents 84 /// from the index.<p/> 85 /// </summary> 86 /// <summary><a name="deletionPolicy"></a> 87 /// <p/>Expert: <c>IndexWriter</c> allows an optional 88 /// <see cref="IndexDeletionPolicy" /> implementation to be 89 /// specified. You can use this to control when prior commits 90 /// are deleted from the index. The default policy is <see cref="KeepOnlyLastCommitDeletionPolicy" /> 91 /// which removes all prior 92 /// commits as soon as a new commit is done (this matches 93 /// behavior before 2.2). Creating your own policy can allow 94 /// you to explicitly keep previous "point in time" commits 95 /// alive in the index for some time, to allow readers to 96 /// refresh to the new commit without having the old commit 97 /// deleted out from under them. This is necessary on 98 /// filesystems like NFS that do not support "delete on last 99 /// close" semantics, which Lucene's "point in time" search 100 /// normally relies on. <p/> 101 /// <a name="mergePolicy"></a> <p/>Expert: 102 /// <c>IndexWriter</c> allows you to separately change 103 /// the <see cref="MergePolicy" /> and the <see cref="MergeScheduler" />. 104 /// The <see cref="MergePolicy" /> is invoked whenever there are 105 /// changes to the segments in the index. Its role is to 106 /// select which merges to do, if any, and return a <see cref="Index.MergePolicy.MergeSpecification" /> 107 /// describing the merges. It 108 /// also selects merges to do for optimize(). (The default is 109 /// <see cref="LogByteSizeMergePolicy" />. Then, the <see cref="MergeScheduler" /> 110 /// is invoked with the requested merges and 111 /// it decides when and how to run the merges. The default is 112 /// <see cref="ConcurrentMergeScheduler" />. <p/> 113 /// <a name="OOME"></a><p/><b>NOTE</b>: if you hit an 114 /// OutOfMemoryError then IndexWriter will quietly record this 115 /// fact and block all future segment commits. This is a 116 /// defensive measure in case any internal state (buffered 117 /// documents and deletions) were corrupted. Any subsequent 118 /// calls to <see cref="Commit()" /> will throw an 119 /// IllegalStateException. The only course of action is to 120 /// call <see cref="Close()" />, which internally will call <see cref="Rollback()" /> 121 ///, to undo any changes to the index since the 122 /// last commit. You can also just call <see cref="Rollback()" /> 123 /// directly.<p/> 124 /// <a name="thread-safety"></a><p/><b>NOTE</b>: 125 /// <see cref="IndexWriter" /> instances are completely thread 126 /// safe, meaning multiple threads can call any of its 127 /// methods, concurrently. If your application requires 128 /// external synchronization, you should <b>not</b> 129 /// synchronize on the <c>IndexWriter</c> instance as 130 /// this may cause deadlock; use your own (non-Lucene) objects 131 /// instead. <p/> 132 /// <b>NOTE:</b> if you call 133 /// <c>Thread.Interrupt()</c> on a thread that's within 134 /// IndexWriter, IndexWriter will try to catch this (eg, if 135 /// it's in a Wait() or Thread.Sleep()), and will then throw 136 /// the unchecked exception <see cref="System.Threading.ThreadInterruptedException"/> 137 /// and <b>clear</b> the interrupt status on the thread<p/> 138 /// </summary> 139 140 /* 141 * Clarification: Check Points (and commits) 142 * IndexWriter writes new index files to the directory without writing a new segments_N 143 * file which references these new files. It also means that the state of 144 * the in memory SegmentInfos object is different than the most recent 145 * segments_N file written to the directory. 146 * 147 * Each time the SegmentInfos is changed, and matches the (possibly 148 * modified) directory files, we have a new "check point". 149 * If the modified/new SegmentInfos is written to disk - as a new 150 * (generation of) segments_N file - this check point is also an 151 * IndexCommit. 152 * 153 * A new checkpoint always replaces the previous checkpoint and 154 * becomes the new "front" of the index. This allows the IndexFileDeleter 155 * to delete files that are referenced only by stale checkpoints. 156 * (files that were created since the last commit, but are no longer 157 * referenced by the "front" of the index). For this, IndexFileDeleter 158 * keeps track of the last non commit checkpoint. 159 */ 160 public class IndexWriter : System.IDisposable 161 { InitBlock()162 private void InitBlock() 163 { 164 similarity = Search.Similarity.Default; 165 mergePolicy = new LogByteSizeMergePolicy(this); 166 readerPool = new ReaderPool(this); 167 } 168 169 /// <summary> Default value for the write lock timeout (1,000).</summary> 170 /// <seealso cref="DefaultWriteLockTimeout"> 171 /// </seealso> 172 public static long WRITE_LOCK_TIMEOUT = 1000; 173 174 private long writeLockTimeout = WRITE_LOCK_TIMEOUT; 175 176 /// <summary> Name of the write lock in the index.</summary> 177 public const System.String WRITE_LOCK_NAME = "write.lock"; 178 179 /// <summary> Value to denote a flush trigger is disabled</summary> 180 public const int DISABLE_AUTO_FLUSH = - 1; 181 182 /// <summary> Disabled by default (because IndexWriter flushes by RAM usage 183 /// by default). Change using <see cref="SetMaxBufferedDocs(int)" />. 184 /// </summary> 185 public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH; 186 187 /// <summary> Default value is 16 MB (which means flush when buffered 188 /// docs consume 16 MB RAM). Change using <see cref="SetRAMBufferSizeMB" />. 189 /// </summary> 190 public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; 191 192 /// <summary> Disabled by default (because IndexWriter flushes by RAM usage 193 /// by default). Change using <see cref="SetMaxBufferedDeleteTerms(int)" />. 194 /// </summary> 195 public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH; 196 197 /// <summary> Default value is 10,000. Change using <see cref="SetMaxFieldLength(int)" />.</summary> 198 public const int DEFAULT_MAX_FIELD_LENGTH = 10000; 199 200 /// <summary> Default value is 128. Change using <see cref="TermIndexInterval" />.</summary> 201 public const int DEFAULT_TERM_INDEX_INTERVAL = 128; 202 203 /// <summary> Absolute hard maximum length for a term. If a term 204 /// arrives from the analyzer longer than this length, it 205 /// is skipped and a message is printed to infoStream, if 206 /// set (see <see cref="SetInfoStream" />). 207 /// </summary> 208 public static readonly int MAX_TERM_LENGTH; 209 210 // The normal read buffer size defaults to 1024, but 211 // increasing this during merging seems to yield 212 // performance gains. However we don't want to increase 213 // it too much because there are quite a few 214 // BufferedIndexInputs created during merging. See 215 // LUCENE-888 for details. 216 private const int MERGE_READ_BUFFER_SIZE = 4096; 217 218 // Used for printing messages 219 private static System.Object MESSAGE_ID_LOCK = new System.Object(); 220 private static int MESSAGE_ID = 0; 221 private int messageID = - 1; 222 private volatile bool hitOOM; 223 224 private Directory directory; // where this index resides 225 private Analyzer analyzer; // how to analyze text 226 227 private Similarity similarity; // how to normalize 228 229 private volatile uint changeCount; // increments every time a change is completed 230 private long lastCommitChangeCount; // last changeCount that was committed 231 232 private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails 233 private HashMap<SegmentInfo, int?> rollbackSegments; 234 235 internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) 236 internal volatile uint pendingCommitChangeCount; 237 238 private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails 239 private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction 240 241 private SegmentInfos segmentInfos = new SegmentInfos(); // the segments 242 private int optimizeMaxNumSegments; 243 244 private DocumentsWriter docWriter; 245 private IndexFileDeleter deleter; 246 247 private ISet<SegmentInfo> segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>(); // used by optimize to note those needing optimization 248 249 private Lock writeLock; 250 251 private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; 252 253 private bool closed; 254 private bool closing; 255 256 // Holds all SegmentInfo instances currently involved in 257 // merges 258 private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>(); 259 260 private MergePolicy mergePolicy; 261 private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); 262 private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>(); 263 private ISet<MergePolicy.OneMerge> runningMerges = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<MergePolicy.OneMerge>(); 264 private IList<MergePolicy.OneMerge> mergeExceptions = new List<MergePolicy.OneMerge>(); 265 private long mergeGen; 266 private bool stopMerges; 267 268 private int flushCount; 269 private int flushDeletesCount; 270 271 // Used to only allow one addIndexes to proceed at once 272 // TODO: use ReadWriteLock once we are on 5.0 273 private int readCount; // count of how many threads are holding read lock 274 private ThreadClass writeThread; // non-null if any thread holds write lock 275 internal ReaderPool readerPool; 276 private int upgradeCount; 277 278 private int readerTermsIndexDivisor = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; 279 280 // This is a "write once" variable (like the organic dye 281 // on a DVD-R that may or may not be heated by a laser and 282 // then cooled to permanently record the event): it's 283 // false, until getReader() is called for the first time, 284 // at which point it's switched to true and never changes 285 // back to false. Once this is true, we hold open and 286 // reuse SegmentReader instances internally for applying 287 // deletes, doing merges, and reopening near real-time 288 // readers. 289 private volatile bool poolReaders; 290 291 /// <summary> Expert: returns a readonly reader, covering all committed as well as 292 /// un-committed changes to the index. This provides "near real-time" 293 /// searching, in that changes made during an IndexWriter session can be 294 /// quickly made available for searching without closing the writer nor 295 /// calling <see cref="Commit()" />. 296 /// 297 /// <p/> 298 /// Note that this is functionally equivalent to calling {#commit} and then 299 /// using <see cref="IndexReader.Open(Lucene.Net.Store.Directory, bool)" /> to open a new reader. But the turarnound 300 /// time of this method should be faster since it avoids the potentially 301 /// costly <see cref="Commit()" />. 302 /// <p/> 303 /// 304 /// You must close the <see cref="IndexReader" /> returned by this method once you are done using it. 305 /// 306 /// <p/> 307 /// It's <i>near</i> real-time because there is no hard 308 /// guarantee on how quickly you can get a new reader after 309 /// making changes with IndexWriter. You'll have to 310 /// experiment in your situation to determine if it's 311 /// faster enough. As this is a new and experimental 312 /// feature, please report back on your findings so we can 313 /// learn, improve and iterate.<p/> 314 /// 315 /// <p/>The resulting reader suppports <see cref="IndexReader.Reopen()" /> 316 ///, but that call will simply forward 317 /// back to this method (though this may change in the 318 /// future).<p/> 319 /// 320 /// <p/>The very first time this method is called, this 321 /// writer instance will make every effort to pool the 322 /// readers that it opens for doing merges, applying 323 /// deletes, etc. This means additional resources (RAM, 324 /// file descriptors, CPU time) will be consumed.<p/> 325 /// 326 /// <p/>For lower latency on reopening a reader, you should call <see cref="MergedSegmentWarmer" /> 327 /// to call <see cref="MergedSegmentWarmer" /> to 328 /// pre-warm a newly merged segment before it's committed 329 /// to the index. This is important for minimizing index-to-search 330 /// delay after a large merge. 331 /// 332 /// <p/>If an addIndexes* call is running in another thread, 333 /// then this reader will only search those segments from 334 /// the foreign index that have been successfully copied 335 /// over, so far<p/>. 336 /// 337 /// <p/><b>NOTE</b>: Once the writer is closed, any 338 /// outstanding readers may continue to be used. However, 339 /// if you attempt to reopen any of those readers, you'll 340 /// hit an <see cref="AlreadyClosedException" />.<p/> 341 /// 342 /// <p/><b>NOTE:</b> This API is experimental and might 343 /// change in incompatible ways in the next release.<p/> 344 /// 345 /// </summary> 346 /// <returns> IndexReader that covers entire index plus all 347 /// changes made so far by this IndexWriter instance 348 /// 349 /// </returns> 350 /// <throws> IOException </throws> GetReader()351 public virtual IndexReader GetReader() 352 { 353 return GetReader(readerTermsIndexDivisor); 354 } 355 356 /// <summary>Expert: like <see cref="GetReader()" />, except you can 357 /// specify which termInfosIndexDivisor should be used for 358 /// any newly opened readers. 359 /// </summary> 360 /// <param name="termInfosIndexDivisor">Subsambles which indexed 361 /// terms are loaded into RAM. This has the same effect as <see cref="IndexWriter.TermIndexInterval" /> 362 /// except that setting 363 /// must be done at indexing time while this setting can be 364 /// set per reader. When set to N, then one in every 365 /// N*termIndexInterval terms in the index is loaded into 366 /// memory. By setting this to a value > 1 you can reduce 367 /// memory usage, at the expense of higher latency when 368 /// loading a TermInfo. The default value is 1. Set this 369 /// to -1 to skip loading the terms index entirely. 370 /// </param> GetReader(int termInfosIndexDivisor)371 public virtual IndexReader GetReader(int termInfosIndexDivisor) 372 { 373 EnsureOpen(); 374 375 if (infoStream != null) 376 { 377 Message("flush at getReader"); 378 } 379 380 // Do this up front before flushing so that the readers 381 // obtained during this flush are pooled, the first time 382 // this method is called: 383 poolReaders = true; 384 385 // Prevent segmentInfos from changing while opening the 386 // reader; in theory we could do similar retry logic, 387 // just like we do when loading segments_N 388 IndexReader r; 389 lock (this) 390 { 391 Flush(false, true, true); 392 r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor); 393 } 394 MaybeMerge(); 395 return r; 396 } 397 398 /// <summary>Holds shared SegmentReader instances. IndexWriter uses 399 /// SegmentReaders for 1) applying deletes, 2) doing 400 /// merges, 3) handing out a real-time reader. This pool 401 /// reuses instances of the SegmentReaders in all these 402 /// places if it is in "near real-time mode" (getReader() 403 /// has been called on this instance). 404 /// </summary> 405 406 internal class ReaderPool : IDisposable 407 { ReaderPool(IndexWriter enclosingInstance)408 public ReaderPool(IndexWriter enclosingInstance) 409 { 410 InitBlock(enclosingInstance); 411 } InitBlock(IndexWriter enclosingInstance)412 private void InitBlock(IndexWriter enclosingInstance) 413 { 414 this.enclosingInstance = enclosingInstance; 415 } 416 private IndexWriter enclosingInstance; 417 public IndexWriter Enclosing_Instance 418 { 419 get 420 { 421 return enclosingInstance; 422 } 423 424 } 425 426 private IDictionary<SegmentInfo, SegmentReader> readerMap = new HashMap<SegmentInfo, SegmentReader>(); 427 428 /// <summary>Forcefully clear changes for the specifed segments, 429 /// and remove from the pool. This is called on succesful merge. 430 /// </summary> Clear(SegmentInfos infos)431 internal virtual void Clear(SegmentInfos infos) 432 { 433 lock (this) 434 { 435 if (infos == null) 436 { 437 foreach(KeyValuePair<SegmentInfo, SegmentReader> ent in readerMap) 438 { 439 ent.Value.hasChanges = false; 440 } 441 } 442 else 443 { 444 foreach(SegmentInfo info in infos) 445 { 446 if (readerMap.ContainsKey(info)) 447 { 448 readerMap[info].hasChanges = false; 449 } 450 } 451 } 452 } 453 } 454 455 // used only by asserts InfoIsLive(SegmentInfo info)456 public virtual bool InfoIsLive(SegmentInfo info) 457 { 458 lock (this) 459 { 460 int idx = Enclosing_Instance.segmentInfos.IndexOf(info); 461 System.Diagnostics.Debug.Assert(idx != -1); 462 System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info); 463 return true; 464 } 465 } 466 MapToLive(SegmentInfo info)467 public virtual SegmentInfo MapToLive(SegmentInfo info) 468 { 469 lock (this) 470 { 471 int idx = Enclosing_Instance.segmentInfos.IndexOf(info); 472 if (idx != - 1) 473 { 474 info = Enclosing_Instance.segmentInfos[idx]; 475 } 476 return info; 477 } 478 } 479 480 /// <summary> Release the segment reader (i.e. decRef it and close if there 481 /// are no more references. 482 /// </summary> 483 /// <param name="sr"> 484 /// </param> 485 /// <throws> IOException </throws> Release(SegmentReader sr)486 public virtual void Release(SegmentReader sr) 487 { 488 lock (this) 489 { 490 Release(sr, false); 491 } 492 } 493 494 /// <summary> Release the segment reader (i.e. decRef it and close if there 495 /// are no more references. 496 /// </summary> 497 /// <param name="sr"> 498 /// </param> 499 /// <param name="drop"></param> 500 /// <throws> IOException </throws> Release(SegmentReader sr, bool drop)501 public virtual void Release(SegmentReader sr, bool drop) 502 { 503 lock (this) 504 { 505 506 bool pooled = readerMap.ContainsKey(sr.SegmentInfo); 507 508 System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr); 509 510 // Drop caller's ref; for an external reader (not 511 // pooled), this decRef will close it 512 sr.DecRef(); 513 514 if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1))) 515 { 516 517 // We invoke deleter.checkpoint below, so we must be 518 // sync'd on IW if there are changes: 519 520 // TODO: Java 1.5 has this, .NET can't. 521 // System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance)); 522 523 // Discard (don't save) changes when we are dropping 524 // the reader; this is used only on the sub-readers 525 // after a successful merge. 526 sr.hasChanges &= !drop; 527 528 bool hasChanges = sr.hasChanges; 529 530 // Drop our ref -- this will commit any pending 531 // changes to the dir 532 sr.Close(); 533 534 // We are the last ref to this reader; since we're 535 // not pooling readers, we release it: 536 readerMap.Remove(sr.SegmentInfo); 537 538 if (hasChanges) 539 { 540 // Must checkpoint w/ deleter, because this 541 // segment reader will have created new _X_N.del 542 // file. 543 enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); 544 } 545 } 546 } 547 } 548 549 /// <summary>Remove all our references to readers, and commits 550 /// any pending changes. 551 /// </summary> Dispose()552 public void Dispose() 553 { 554 Dispose(true); 555 } 556 Dispose(bool disposing)557 protected void Dispose(bool disposing) 558 { 559 if (disposing) 560 { 561 // We invoke deleter.checkpoint below, so we must be 562 // sync'd on IW: 563 // TODO: assert Thread.holdsLock(IndexWriter.this); 564 // TODO: Should this class have bool _isDisposed? 565 lock (this) 566 { 567 //var toRemove = new List<SegmentInfo>(); 568 foreach (var ent in readerMap) 569 { 570 SegmentReader sr = ent.Value; 571 if (sr.hasChanges) 572 { 573 System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo)); 574 sr.DoCommit(null); 575 // Must checkpoint w/ deleter, because this 576 // segment reader will have created new _X_N.del 577 // file. 578 enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); 579 } 580 581 //toRemove.Add(ent.Key); 582 583 // NOTE: it is allowed that this decRef does not 584 // actually close the SR; this can happen when a 585 // near real-time reader is kept open after the 586 // IndexWriter instance is closed 587 sr.DecRef(); 588 } 589 590 //foreach (var key in toRemove) 591 // readerMap.Remove(key); 592 readerMap.Clear(); 593 } 594 } 595 } 596 597 /// <summary> Commit all segment reader in the pool.</summary> 598 /// <throws> IOException </throws> Commit()599 internal virtual void Commit() 600 { 601 // We invoke deleter.checkpoint below, so we must be 602 // sync'd on IW: 603 // TODO: assert Thread.holdsLock(IndexWriter.this); 604 lock (this) 605 { 606 foreach(KeyValuePair<SegmentInfo,SegmentReader> ent in readerMap) 607 { 608 SegmentReader sr = ent.Value; 609 if (sr.hasChanges) 610 { 611 System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo)); 612 sr.DoCommit(null); 613 // Must checkpoint w/ deleter, because this 614 // segment reader will have created new _X_N.del 615 // file. 616 enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); 617 } 618 } 619 } 620 } 621 622 /// <summary> Returns a ref to a clone. NOTE: this clone is not 623 /// enrolled in the pool, so you should simply close() 624 /// it when you're done (ie, do not call release()). 625 /// </summary> GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor)626 public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor) 627 { 628 lock (this) 629 { 630 SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); 631 try 632 { 633 return (SegmentReader) sr.Clone(true); 634 } 635 finally 636 { 637 sr.DecRef(); 638 } 639 } 640 } 641 642 /// <summary> Obtain a SegmentReader from the readerPool. The reader 643 /// must be returned by calling <see cref="Release(SegmentReader)" /> 644 /// </summary> 645 /// <seealso cref="Release(SegmentReader)"> 646 /// </seealso> 647 /// <param name="info"> 648 /// </param> 649 /// <param name="doOpenStores"> 650 /// </param> 651 /// <throws> IOException </throws> Get(SegmentInfo info, bool doOpenStores)652 public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores) 653 { 654 lock (this) 655 { 656 return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor); 657 } 658 } 659 /// <summary> Obtain a SegmentReader from the readerPool. The reader 660 /// must be returned by calling <see cref="Release(SegmentReader)" /> 661 /// 662 /// </summary> 663 /// <seealso cref="Release(SegmentReader)"> 664 /// </seealso> 665 /// <param name="info"> 666 /// </param> 667 /// <param name="doOpenStores"> 668 /// </param> 669 /// <param name="readBufferSize"> 670 /// </param> 671 /// <param name="termsIndexDivisor"> 672 /// </param> 673 /// <throws> IOException </throws> Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor)674 public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor) 675 { 676 lock (this) 677 { 678 if (Enclosing_Instance.poolReaders) 679 { 680 readBufferSize = BufferedIndexInput.BUFFER_SIZE; 681 } 682 683 SegmentReader sr = readerMap[info]; 684 if (sr == null) 685 { 686 // TODO: we may want to avoid doing this while 687 // synchronized 688 // Returns a ref, which we xfer to readerMap: 689 sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor); 690 if (info.dir == enclosingInstance.directory) 691 { 692 // Only pool if reader is not external 693 readerMap[info]=sr; 694 } 695 } 696 else 697 { 698 if (doOpenStores) 699 { 700 sr.OpenDocStores(); 701 } 702 if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded()) 703 { 704 // If this reader was originally opened because we 705 // needed to merge it, we didn't load the terms 706 // index. But now, if the caller wants the terms 707 // index (eg because it's doing deletes, or an NRT 708 // reader is being opened) we ask the reader to 709 // load its terms index. 710 sr.LoadTermsIndex(termsIndexDivisor); 711 } 712 } 713 714 // Return a ref to our caller 715 if (info.dir == enclosingInstance.directory) 716 { 717 // Only incRef if we pooled (reader is not external) 718 sr.IncRef(); 719 } 720 return sr; 721 } 722 } 723 724 // Returns a ref GetIfExists(SegmentInfo info)725 public virtual SegmentReader GetIfExists(SegmentInfo info) 726 { 727 lock (this) 728 { 729 SegmentReader sr = readerMap[info]; 730 if (sr != null) 731 { 732 sr.IncRef(); 733 } 734 return sr; 735 } 736 } 737 } 738 739 /// <summary> Obtain the number of deleted docs for a pooled reader. 740 /// If the reader isn't being pooled, the segmentInfo's 741 /// delCount is returned. 742 /// </summary> NumDeletedDocs(SegmentInfo info)743 public virtual int NumDeletedDocs(SegmentInfo info) 744 { 745 SegmentReader reader = readerPool.GetIfExists(info); 746 try 747 { 748 if (reader != null) 749 { 750 return reader.NumDeletedDocs; 751 } 752 else 753 { 754 return info.GetDelCount(); 755 } 756 } 757 finally 758 { 759 if (reader != null) 760 { 761 readerPool.Release(reader); 762 } 763 } 764 } 765 AcquireWrite()766 internal virtual void AcquireWrite() 767 { 768 lock (this) 769 { 770 System.Diagnostics.Debug.Assert(writeThread != ThreadClass.Current()); 771 while (writeThread != null || readCount > 0) 772 DoWait(); 773 774 // We could have been closed while we were waiting: 775 EnsureOpen(); 776 777 writeThread = ThreadClass.Current(); 778 } 779 } 780 ReleaseWrite()781 internal virtual void ReleaseWrite() 782 { 783 lock (this) 784 { 785 System.Diagnostics.Debug.Assert(ThreadClass.Current() == writeThread); 786 writeThread = null; 787 System.Threading.Monitor.PulseAll(this); 788 } 789 } 790 AcquireRead()791 internal virtual void AcquireRead() 792 { 793 lock (this) 794 { 795 ThreadClass current = ThreadClass.Current(); 796 while (writeThread != null && writeThread != current) 797 DoWait(); 798 799 readCount++; 800 } 801 } 802 803 // Allows one readLock to upgrade to a writeLock even if 804 // there are other readLocks as long as all other 805 // readLocks are also blocked in this method: UpgradeReadToWrite()806 internal virtual void UpgradeReadToWrite() 807 { 808 lock (this) 809 { 810 System.Diagnostics.Debug.Assert(readCount > 0); 811 upgradeCount++; 812 while (readCount > upgradeCount || writeThread != null) 813 { 814 DoWait(); 815 } 816 817 writeThread = ThreadClass.Current(); 818 readCount--; 819 upgradeCount--; 820 } 821 } 822 ReleaseRead()823 internal virtual void ReleaseRead() 824 { 825 lock (this) 826 { 827 readCount--; 828 System.Diagnostics.Debug.Assert(readCount >= 0); 829 System.Threading.Monitor.PulseAll(this); 830 } 831 } 832 IsOpen(bool includePendingClose)833 internal bool IsOpen(bool includePendingClose) 834 { 835 lock (this) 836 { 837 return !(closed || (includePendingClose && closing)); 838 } 839 } 840 841 /// <summary> Used internally to throw an <see cref="AlreadyClosedException" /> 842 /// if this IndexWriter has been 843 /// closed. 844 /// </summary> 845 /// <throws> AlreadyClosedException if this IndexWriter is </throws> EnsureOpen(bool includePendingClose)846 protected internal void EnsureOpen(bool includePendingClose) 847 { 848 lock (this) 849 { 850 if (!IsOpen(includePendingClose)) 851 { 852 throw new AlreadyClosedException("this IndexWriter is closed"); 853 } 854 } 855 } 856 EnsureOpen()857 protected internal void EnsureOpen() 858 { 859 lock (this) 860 { 861 EnsureOpen(true); 862 } 863 } 864 865 /// <summary> Prints a message to the infoStream (if non-null), 866 /// prefixed with the identifying information for this 867 /// writer and the thread that's calling it. 868 /// </summary> Message(System.String message)869 public virtual void Message(System.String message) 870 { 871 if (infoStream != null) 872 infoStream.WriteLine("IW " + messageID + " [" + DateTime.Now.ToString() + "; " + ThreadClass.Current().Name + "]: " + message); 873 } 874 SetMessageID(System.IO.StreamWriter infoStream)875 private void SetMessageID(System.IO.StreamWriter infoStream) 876 { 877 lock (this) 878 { 879 if (infoStream != null && messageID == - 1) 880 { 881 lock (MESSAGE_ID_LOCK) 882 { 883 messageID = MESSAGE_ID++; 884 } 885 } 886 this.infoStream = infoStream; 887 } 888 } 889 890 /// <summary> Casts current mergePolicy to LogMergePolicy, and throws 891 /// an exception if the mergePolicy is not a LogMergePolicy. 892 /// </summary> 893 private LogMergePolicy LogMergePolicy 894 { 895 get 896 { 897 if (mergePolicy is LogMergePolicy) 898 return (LogMergePolicy) mergePolicy; 899 900 throw new System.ArgumentException( 901 "this method can only be called when the merge policy is the default LogMergePolicy"); 902 } 903 } 904 905 /// <summary><p/>Gets or sets the current setting of whether newly flushed 906 /// segments will use the compound file format. Note that 907 /// this just returns the value previously set with 908 /// setUseCompoundFile(boolean), or the default value 909 /// (true). You cannot use this to query the status of 910 /// previously flushed segments.<p/> 911 /// 912 /// <p/>Note that this method is a convenience method: it 913 /// just calls mergePolicy.getUseCompoundFile as long as 914 /// mergePolicy is an instance of <see cref="LogMergePolicy" />. 915 /// Otherwise an IllegalArgumentException is thrown.<p/> 916 /// 917 /// </summary> 918 public virtual bool UseCompoundFile 919 { 920 get { return LogMergePolicy.GetUseCompoundFile(); } 921 set 922 { 923 LogMergePolicy.SetUseCompoundFile(value); 924 LogMergePolicy.SetUseCompoundDocStore(value); 925 } 926 } 927 928 /// <summary>Expert: Set the Similarity implementation used by this IndexWriter. 929 /// </summary> SetSimilarity(Similarity similarity)930 public virtual void SetSimilarity(Similarity similarity) 931 { 932 EnsureOpen(); 933 this.similarity = similarity; 934 docWriter.SetSimilarity(similarity); 935 } 936 937 /// <summary>Expert: Return the Similarity implementation used by this IndexWriter. 938 /// 939 /// <p/>This defaults to the current value of <see cref="Search.Similarity.Default" />. 940 /// </summary> 941 public virtual Similarity Similarity 942 { 943 get 944 { 945 EnsureOpen(); 946 return this.similarity; 947 } 948 } 949 950 951 /// <summary>Expert: Gets or sets the interval between indexed terms. Large values cause less 952 /// memory to be used by IndexReader, but slow random-access to terms. Small 953 /// values cause more memory to be used by an IndexReader, and speed 954 /// random-access to terms. 955 /// 956 /// This parameter determines the amount of computation required per query 957 /// term, regardless of the number of documents that contain that term. In 958 /// particular, it is the maximum number of other terms that must be 959 /// scanned before a term is located and its frequency and position information 960 /// may be processed. In a large index with user-entered query terms, query 961 /// processing time is likely to be dominated not by term lookup but rather 962 /// by the processing of frequency and positional data. In a small index 963 /// or when many uncommon query terms are generated (e.g., by wildcard 964 /// queries) term lookup may become a dominant cost. 965 /// 966 /// In particular, <c>numUniqueTerms/interval</c> terms are read into 967 /// memory by an IndexReader, and, on average, <c>interval/2</c> terms 968 /// must be scanned for each random term access. 969 /// 970 /// </summary> 971 /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL"> 972 /// </seealso> 973 public virtual int TermIndexInterval 974 { 975 get 976 { 977 // We pass false because this method is called by SegmentMerger while we are in the process of closing 978 EnsureOpen(false); 979 return termIndexInterval; 980 } 981 set 982 { 983 EnsureOpen(); 984 this.termIndexInterval = value; 985 } 986 } 987 988 /// <summary> Constructs an IndexWriter for the index in <c>d</c>. 989 /// Text will be analyzed with <c>a</c>. If <c>create</c> 990 /// is true, then a new, empty index will be created in 991 /// <c>d</c>, replacing the index already there, if any. 992 /// 993 /// </summary> 994 /// <param name="d">the index directory 995 /// </param> 996 /// <param name="a">the analyzer to use 997 /// </param> 998 /// <param name="create"><c>true</c> to create the index or overwrite 999 /// the existing one; <c>false</c> to append to the existing 1000 /// index 1001 /// </param> 1002 /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified 1003 /// via the MaxFieldLength constructor. 1004 /// </param> 1005 /// <throws> CorruptIndexException if the index is corrupt </throws> 1006 /// <throws> LockObtainFailedException if another writer </throws> 1007 /// <summary> has this index open (<c>write.lock</c> could not 1008 /// be obtained) 1009 /// </summary> 1010 /// <throws> IOException if the directory cannot be read/written to, or </throws> 1011 /// <summary> if it does not exist and <c>create</c> is 1012 /// <c>false</c> or if there is any other low-level 1013 /// IO error 1014 /// </summary> IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl)1015 public IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl) 1016 { 1017 InitBlock(); 1018 Init(d, a, create, null, mfl.Limit, null, null); 1019 } 1020 1021 /// <summary> Constructs an IndexWriter for the index in 1022 /// <c>d</c>, first creating it if it does not 1023 /// already exist. 1024 /// 1025 /// </summary> 1026 /// <param name="d">the index directory 1027 /// </param> 1028 /// <param name="a">the analyzer to use 1029 /// </param> 1030 /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified 1031 /// via the MaxFieldLength constructor. 1032 /// </param> 1033 /// <throws> CorruptIndexException if the index is corrupt </throws> 1034 /// <throws> LockObtainFailedException if another writer </throws> 1035 /// <summary> has this index open (<c>write.lock</c> could not 1036 /// be obtained) 1037 /// </summary> 1038 /// <throws> IOException if the directory cannot be </throws> 1039 /// <summary> read/written to or if there is any other low-level 1040 /// IO error 1041 /// </summary> IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)1042 public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl) 1043 { 1044 InitBlock(); 1045 Init(d, a, null, mfl.Limit, null, null); 1046 } 1047 1048 /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> 1049 ///, for the index in <c>d</c>, 1050 /// first creating it if it does not already exist. Text 1051 /// will be analyzed with <c>a</c>. 1052 /// 1053 /// </summary> 1054 /// <param name="d">the index directory 1055 /// </param> 1056 /// <param name="a">the analyzer to use 1057 /// </param> 1058 /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> 1059 /// </param> 1060 /// <param name="mfl">whether or not to limit field lengths 1061 /// </param> 1062 /// <throws> CorruptIndexException if the index is corrupt </throws> 1063 /// <throws> LockObtainFailedException if another writer </throws> 1064 /// <summary> has this index open (<c>write.lock</c> could not 1065 /// be obtained) 1066 /// </summary> 1067 /// <throws> IOException if the directory cannot be </throws> 1068 /// <summary> read/written to or if there is any other low-level 1069 /// IO error 1070 /// </summary> IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)1071 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) 1072 { 1073 InitBlock(); 1074 Init(d, a, deletionPolicy, mfl.Limit, null, null); 1075 } 1076 1077 /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> 1078 ///, for the index in <c>d</c>. 1079 /// Text will be analyzed with <c>a</c>. If 1080 /// <c>create</c> is true, then a new, empty index 1081 /// will be created in <c>d</c>, replacing the index 1082 /// already there, if any. 1083 /// 1084 /// </summary> 1085 /// <param name="d">the index directory 1086 /// </param> 1087 /// <param name="a">the analyzer to use 1088 /// </param> 1089 /// <param name="create"><c>true</c> to create the index or overwrite 1090 /// the existing one; <c>false</c> to append to the existing 1091 /// index 1092 /// </param> 1093 /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> 1094 /// </param> 1095 /// <param name="mfl"><see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />, whether or not to limit field lengths. Value is in number of terms/tokens 1096 /// </param> 1097 /// <throws> CorruptIndexException if the index is corrupt </throws> 1098 /// <throws> LockObtainFailedException if another writer </throws> 1099 /// <summary> has this index open (<c>write.lock</c> could not 1100 /// be obtained) 1101 /// </summary> 1102 /// <throws> IOException if the directory cannot be read/written to, or </throws> 1103 /// <summary> if it does not exist and <c>create</c> is 1104 /// <c>false</c> or if there is any other low-level 1105 /// IO error 1106 /// </summary> IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)1107 public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) 1108 { 1109 InitBlock(); 1110 Init(d, a, create, deletionPolicy, mfl.Limit, null, null); 1111 } 1112 1113 /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" /> 1114 /// and <see cref="IndexingChain" />, 1115 /// for the index in <c>d</c>. 1116 /// Text will be analyzed with <c>a</c>. If 1117 /// <c>create</c> is true, then a new, empty index 1118 /// will be created in <c>d</c>, replacing the index 1119 /// already there, if any. 1120 /// 1121 /// </summary> 1122 /// <param name="d">the index directory 1123 /// </param> 1124 /// <param name="a">the analyzer to use 1125 /// </param> 1126 /// <param name="create"><c>true</c> to create the index or overwrite 1127 /// the existing one; <c>false</c> to append to the existing 1128 /// index 1129 /// </param> 1130 /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> 1131 /// </param> 1132 /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. 1133 /// </param> 1134 /// <param name="indexingChain">the <see cref="DocConsumer" /> chain to be used to 1135 /// process documents 1136 /// </param> 1137 /// <param name="commit">which commit to open 1138 /// </param> 1139 /// <throws> CorruptIndexException if the index is corrupt </throws> 1140 /// <throws> LockObtainFailedException if another writer </throws> 1141 /// <summary> has this index open (<c>write.lock</c> could not 1142 /// be obtained) 1143 /// </summary> 1144 /// <throws> IOException if the directory cannot be read/written to, or </throws> 1145 /// <summary> if it does not exist and <c>create</c> is 1146 /// <c>false</c> or if there is any other low-level 1147 /// IO error 1148 /// </summary> IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)1149 internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit) 1150 { 1151 InitBlock(); 1152 Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit); 1153 } 1154 1155 /// <summary> Expert: constructs an IndexWriter on specific commit 1156 /// point, with a custom <see cref="IndexDeletionPolicy" />, for 1157 /// the index in <c>d</c>. Text will be analyzed 1158 /// with <c>a</c>. 1159 /// 1160 /// <p/> This is only meaningful if you've used a <see cref="IndexDeletionPolicy" /> 1161 /// in that past that keeps more than 1162 /// just the last commit. 1163 /// 1164 /// <p/>This operation is similar to <see cref="Rollback()" />, 1165 /// except that method can only rollback what's been done 1166 /// with the current instance of IndexWriter since its last 1167 /// commit, whereas this method can rollback to an 1168 /// arbitrary commit point from the past, assuming the 1169 /// <see cref="IndexDeletionPolicy" /> has preserved past 1170 /// commits. 1171 /// 1172 /// </summary> 1173 /// <param name="d">the index directory 1174 /// </param> 1175 /// <param name="a">the analyzer to use 1176 /// </param> 1177 /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a> 1178 /// </param> 1179 /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />. 1180 /// </param> 1181 /// <param name="commit">which commit to open 1182 /// </param> 1183 /// <throws> CorruptIndexException if the index is corrupt </throws> 1184 /// <throws> LockObtainFailedException if another writer </throws> 1185 /// <summary> has this index open (<c>write.lock</c> could not 1186 /// be obtained) 1187 /// </summary> 1188 /// <throws> IOException if the directory cannot be read/written to, or </throws> 1189 /// <summary> if it does not exist and <c>create</c> is 1190 /// <c>false</c> or if there is any other low-level 1191 /// IO error 1192 /// </summary> IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)1193 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit) 1194 { 1195 InitBlock(); 1196 Init(d, a, false, deletionPolicy, mfl.Limit, null, commit); 1197 } 1198 Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)1199 private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) 1200 { 1201 if (IndexReader.IndexExists(d)) 1202 { 1203 Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit); 1204 } 1205 else 1206 { 1207 Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit); 1208 } 1209 } 1210 Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)1211 private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) 1212 { 1213 directory = d; 1214 analyzer = a; 1215 SetMessageID(defaultInfoStream); 1216 this.maxFieldLength = maxFieldLength; 1217 1218 if (indexingChain == null) 1219 indexingChain = DocumentsWriter.DefaultIndexingChain; 1220 1221 if (create) 1222 { 1223 // Clear the write lock in case it's leftover: 1224 directory.ClearLock(WRITE_LOCK_NAME); 1225 } 1226 1227 Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); 1228 if (!writeLock.Obtain(writeLockTimeout)) 1229 // obtain write lock 1230 { 1231 throw new LockObtainFailedException("Index locked for write: " + writeLock); 1232 } 1233 this.writeLock = writeLock; // save it 1234 1235 bool success = false; 1236 try 1237 { 1238 if (create) 1239 { 1240 // Try to read first. This is to allow create 1241 // against an index that's currently open for 1242 // searching. In this case we write the next 1243 // segments_N file with no segments: 1244 bool doCommit; 1245 try 1246 { 1247 segmentInfos.Read(directory); 1248 segmentInfos.Clear(); 1249 doCommit = false; 1250 } 1251 catch (System.IO.IOException) 1252 { 1253 // Likely this means it's a fresh directory 1254 doCommit = true; 1255 } 1256 1257 if (doCommit) 1258 { 1259 // Only commit if there is no segments file 1260 // in this dir already. 1261 segmentInfos.Commit(directory); 1262 synced.UnionWith(segmentInfos.Files(directory, true)); 1263 } 1264 else 1265 { 1266 // Record that we have a change (zero out all 1267 // segments) pending: 1268 changeCount++; 1269 } 1270 } 1271 else 1272 { 1273 segmentInfos.Read(directory); 1274 1275 if (commit != null) 1276 { 1277 // Swap out all segments, but, keep metadata in 1278 // SegmentInfos, like version & generation, to 1279 // preserve write-once. This is important if 1280 // readers are open against the future commit 1281 // points. 1282 if (commit.Directory != directory) 1283 throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); 1284 SegmentInfos oldInfos = new SegmentInfos(); 1285 oldInfos.Read(directory, commit.SegmentsFileName); 1286 segmentInfos.Replace(oldInfos); 1287 changeCount++; 1288 if (infoStream != null) 1289 Message("init: loaded commit \"" + commit.SegmentsFileName + "\""); 1290 } 1291 1292 // We assume that this segments_N was previously 1293 // properly sync'd: 1294 synced.UnionWith(segmentInfos.Files(directory, true)); 1295 } 1296 1297 SetRollbackSegmentInfos(segmentInfos); 1298 1299 docWriter = new DocumentsWriter(directory, this, indexingChain); 1300 docWriter.SetInfoStream(infoStream); 1301 docWriter.SetMaxFieldLength(maxFieldLength); 1302 1303 // Default deleter (for backwards compatibility) is 1304 // KeepOnlyLastCommitDeleter: 1305 deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced); 1306 1307 if (deleter.startingCommitDeleted) 1308 // Deletion policy deleted the "head" commit point. 1309 // We have to mark ourself as changed so that if we 1310 // are closed w/o any further changes we write a new 1311 // segments_N file. 1312 changeCount++; 1313 1314 PushMaxBufferedDocs(); 1315 1316 if (infoStream != null) 1317 { 1318 Message("init: create=" + create); 1319 MessageState(); 1320 } 1321 1322 success = true; 1323 } 1324 finally 1325 { 1326 if (!success) 1327 { 1328 if (infoStream != null) 1329 { 1330 Message("init: hit exception on init; releasing write lock"); 1331 } 1332 try 1333 { 1334 writeLock.Release(); 1335 } 1336 catch (Exception) 1337 { 1338 // don't mask the original exception 1339 } 1340 writeLock = null; 1341 } 1342 } 1343 } 1344 SetRollbackSegmentInfos(SegmentInfos infos)1345 private void SetRollbackSegmentInfos(SegmentInfos infos) 1346 { 1347 lock (this) 1348 { 1349 rollbackSegmentInfos = (SegmentInfos) infos.Clone(); 1350 System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory)); 1351 rollbackSegments = new HashMap<SegmentInfo, int?>(); 1352 int size = rollbackSegmentInfos.Count; 1353 for (int i = 0; i < size; i++) 1354 rollbackSegments[rollbackSegmentInfos.Info(i)] = i; 1355 } 1356 } 1357 1358 /// <summary> Expert: set the merge policy used by this writer.</summary> SetMergePolicy(MergePolicy mp)1359 public virtual void SetMergePolicy(MergePolicy mp) 1360 { 1361 EnsureOpen(); 1362 if (mp == null) 1363 throw new System.NullReferenceException("MergePolicy must be non-null"); 1364 1365 if (mergePolicy != mp) 1366 mergePolicy.Close(); 1367 mergePolicy = mp; 1368 PushMaxBufferedDocs(); 1369 if (infoStream != null) 1370 { 1371 Message("setMergePolicy " + mp); 1372 } 1373 } 1374 1375 /// <summary> Expert: returns the current MergePolicy in use by this writer.</summary> 1376 /// <seealso cref="SetMergePolicy"> 1377 /// </seealso> 1378 public virtual MergePolicy MergePolicy 1379 { 1380 get 1381 { 1382 EnsureOpen(); 1383 return mergePolicy; 1384 } 1385 } 1386 1387 /// <summary> Expert: set the merge scheduler used by this writer.</summary> SetMergeScheduler(MergeScheduler mergeScheduler)1388 public virtual void SetMergeScheduler(MergeScheduler mergeScheduler) 1389 { 1390 lock (this) 1391 { 1392 EnsureOpen(); 1393 if (mergeScheduler == null) 1394 throw new System.NullReferenceException("MergeScheduler must be non-null"); 1395 1396 if (this.mergeScheduler != mergeScheduler) 1397 { 1398 FinishMerges(true); 1399 this.mergeScheduler.Close(); 1400 } 1401 this.mergeScheduler = mergeScheduler; 1402 if (infoStream != null) 1403 { 1404 Message("setMergeScheduler " + mergeScheduler); 1405 } 1406 } 1407 } 1408 1409 /// <summary> Expert: returns the current MergePolicy in use by this 1410 /// writer. 1411 /// </summary> 1412 /// <seealso cref="SetMergePolicy"> 1413 /// </seealso> 1414 public virtual MergeScheduler MergeScheduler 1415 { 1416 get 1417 { 1418 EnsureOpen(); 1419 return mergeScheduler; 1420 } 1421 } 1422 1423 /// <summary> <p/>Gets or sets the largest segment (measured by document 1424 /// count) that may be merged with other segments. 1425 /// <p/> 1426 /// Small values (e.g., less than 10,000) are best for 1427 /// interactive indexing, as this limits the length of 1428 /// pauses while indexing to a few seconds. Larger values 1429 /// are best for batched indexing and speedier 1430 /// searches. 1431 /// <p/> 1432 /// The default value is <see cref="int.MaxValue" />. 1433 /// <p/> 1434 /// Note that this method is a convenience method: it 1435 /// just calls mergePolicy.getMaxMergeDocs as long as 1436 /// mergePolicy is an instance of <see cref="LogMergePolicy" />. 1437 /// Otherwise an IllegalArgumentException is thrown.<p/> 1438 /// 1439 /// The default merge policy (<see cref="LogByteSizeMergePolicy" />) 1440 /// also allows you to set this 1441 /// limit by net size (in MB) of the segment, using 1442 /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/> 1443 /// </summary> 1444 /// <seealso cref="MaxMergeDocs"> 1445 /// </seealso> 1446 public virtual int MaxMergeDocs 1447 { 1448 get { return LogMergePolicy.MaxMergeDocs; } 1449 set { LogMergePolicy.MaxMergeDocs = value; } 1450 } 1451 1452 /// <summary> The maximum number of terms that will be indexed for a single field in a 1453 /// document. This limits the amount of memory required for indexing, so that 1454 /// collections with very large files will not crash the indexing process by 1455 /// running out of memory. This setting refers to the number of running terms, 1456 /// not to the number of different terms.<p/> 1457 /// <strong>Note:</strong> this silently truncates large documents, excluding from the 1458 /// index all terms that occur further in the document. If you know your source 1459 /// documents are large, be sure to set this value high enough to accomodate 1460 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit 1461 /// is your memory, but you should anticipate an OutOfMemoryError.<p/> 1462 /// By default, no more than <see cref="DEFAULT_MAX_FIELD_LENGTH" /> terms 1463 /// will be indexed for a field. 1464 /// </summary> SetMaxFieldLength(int maxFieldLength)1465 public virtual void SetMaxFieldLength(int maxFieldLength) 1466 { 1467 EnsureOpen(); 1468 this.maxFieldLength = maxFieldLength; 1469 docWriter.SetMaxFieldLength(maxFieldLength); 1470 if (infoStream != null) 1471 Message("setMaxFieldLength " + maxFieldLength); 1472 } 1473 1474 /// <summary> Returns the maximum number of terms that will be 1475 /// indexed for a single field in a document. 1476 /// </summary> 1477 /// <seealso cref="SetMaxFieldLength"> 1478 /// </seealso> 1479 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] GetMaxFieldLength()1480 public virtual int GetMaxFieldLength() 1481 { 1482 EnsureOpen(); 1483 return maxFieldLength; 1484 } 1485 1486 /// Gets or sets the termsIndexDivisor passed to any readers that 1487 /// IndexWriter opens, for example when applying deletes 1488 /// or creating a near-real-time reader in 1489 /// <see cref="GetReader()"/>. Default value is 1490 /// <see cref="IndexReader.DEFAULT_TERMS_INDEX_DIVISOR"/>. 1491 public int ReaderTermsIndexDivisor 1492 { 1493 get 1494 { 1495 EnsureOpen(); 1496 return readerTermsIndexDivisor; 1497 } 1498 set 1499 { 1500 EnsureOpen(); 1501 if (value <= 0) 1502 { 1503 throw new ArgumentException("divisor must be >= 1 (got " + value + ")"); 1504 } 1505 readerTermsIndexDivisor = value; 1506 if (infoStream != null) 1507 { 1508 Message("setReaderTermsIndexDivisor " + readerTermsIndexDivisor); 1509 } 1510 } 1511 } 1512 1513 /// <summary>Determines the minimal number of documents required 1514 /// before the buffered in-memory documents are flushed as 1515 /// a new Segment. Large values generally gives faster 1516 /// indexing. 1517 /// 1518 /// <p/>When this is set, the writer will flush every 1519 /// maxBufferedDocs added documents. Pass in <see cref="DISABLE_AUTO_FLUSH" /> 1520 /// to prevent triggering a flush due 1521 /// to number of buffered documents. Note that if flushing 1522 /// by RAM usage is also enabled, then the flush will be 1523 /// triggered by whichever comes first.<p/> 1524 /// 1525 /// <p/>Disabled by default (writer flushes by RAM usage).<p/> 1526 /// 1527 /// </summary> 1528 /// <throws> IllegalArgumentException if maxBufferedDocs is </throws> 1529 /// <summary> enabled but smaller than 2, or it disables maxBufferedDocs 1530 /// when ramBufferSize is already disabled 1531 /// </summary> 1532 /// <seealso cref="SetRAMBufferSizeMB"> 1533 /// </seealso> SetMaxBufferedDocs(int maxBufferedDocs)1534 public virtual void SetMaxBufferedDocs(int maxBufferedDocs) 1535 { 1536 EnsureOpen(); 1537 if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) 1538 throw new ArgumentException("maxBufferedDocs must at least be 2 when enabled"); 1539 1540 if (maxBufferedDocs == DISABLE_AUTO_FLUSH && (int)GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) 1541 throw new ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); 1542 1543 docWriter.MaxBufferedDocs = maxBufferedDocs; 1544 PushMaxBufferedDocs(); 1545 if (infoStream != null) 1546 Message("setMaxBufferedDocs " + maxBufferedDocs); 1547 } 1548 1549 /// <summary> If we are flushing by doc count (not by RAM usage), and 1550 /// using LogDocMergePolicy then push maxBufferedDocs down 1551 /// as its minMergeDocs, to keep backwards compatibility. 1552 /// </summary> PushMaxBufferedDocs()1553 private void PushMaxBufferedDocs() 1554 { 1555 if (docWriter.MaxBufferedDocs != DISABLE_AUTO_FLUSH) 1556 { 1557 MergePolicy mp = mergePolicy; 1558 if (mp is LogDocMergePolicy) 1559 { 1560 LogDocMergePolicy lmp = (LogDocMergePolicy) mp; 1561 int maxBufferedDocs = docWriter.MaxBufferedDocs; 1562 if (lmp.MinMergeDocs != maxBufferedDocs) 1563 { 1564 if (infoStream != null) 1565 Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); 1566 lmp.MinMergeDocs = maxBufferedDocs; 1567 } 1568 } 1569 } 1570 } 1571 1572 /// <summary> Returns the number of buffered added documents that will 1573 /// trigger a flush if enabled. 1574 /// </summary> 1575 /// <seealso cref="SetMaxBufferedDocs"> 1576 /// </seealso> 1577 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] GetMaxBufferedDocs()1578 public virtual int GetMaxBufferedDocs() 1579 { 1580 EnsureOpen(); 1581 return docWriter.MaxBufferedDocs; 1582 } 1583 1584 /// <summary>Determines the amount of RAM that may be used for 1585 /// buffering added documents and deletions before they are 1586 /// flushed to the Directory. Generally for faster 1587 /// indexing performance it's best to flush by RAM usage 1588 /// instead of document count and use as large a RAM buffer 1589 /// as you can. 1590 /// 1591 /// <p/>When this is set, the writer will flush whenever 1592 /// buffered documents and deletions use this much RAM. 1593 /// Pass in <see cref="DISABLE_AUTO_FLUSH" /> to prevent 1594 /// triggering a flush due to RAM usage. Note that if 1595 /// flushing by document count is also enabled, then the 1596 /// flush will be triggered by whichever comes first.<p/> 1597 /// 1598 /// <p/> <b>NOTE</b>: the account of RAM usage for pending 1599 /// deletions is only approximate. Specifically, if you 1600 /// delete by Query, Lucene currently has no way to measure 1601 /// the RAM usage if individual Queries so the accounting 1602 /// will under-estimate and you should compensate by either 1603 /// calling commit() periodically yourself, or by using 1604 /// <see cref="SetMaxBufferedDeleteTerms" /> to flush by count 1605 /// instead of RAM usage (each buffered delete Query counts 1606 /// as one). 1607 /// 1608 /// <p/> 1609 /// <b>NOTE</b>: because IndexWriter uses <c>int</c>s when managing its 1610 /// internal storage, the absolute maximum value for this setting is somewhat 1611 /// less than 2048 MB. The precise limit depends on various factors, such as 1612 /// how large your documents are, how many fields have norms, etc., so it's 1613 /// best to set this value comfortably under 2048. 1614 /// <p/> 1615 /// 1616 /// <p/> The default value is <see cref="DEFAULT_RAM_BUFFER_SIZE_MB" />.<p/> 1617 /// 1618 /// </summary> 1619 /// <throws> IllegalArgumentException if ramBufferSize is </throws> 1620 /// <summary> enabled but non-positive, or it disables ramBufferSize 1621 /// when maxBufferedDocs is already disabled 1622 /// </summary> SetRAMBufferSizeMB(double mb)1623 public virtual void SetRAMBufferSizeMB(double mb) 1624 { 1625 if (mb > 2048.0) 1626 { 1627 throw new System.ArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048"); 1628 } 1629 if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) 1630 throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled"); 1631 if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH) 1632 throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); 1633 docWriter.SetRAMBufferSizeMB(mb); 1634 if (infoStream != null) 1635 Message("setRAMBufferSizeMB " + mb); 1636 } 1637 1638 /// <summary> Returns the value set by <see cref="SetRAMBufferSizeMB" /> if enabled.</summary> 1639 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] GetRAMBufferSizeMB()1640 public virtual double GetRAMBufferSizeMB() 1641 { 1642 return docWriter.GetRAMBufferSizeMB(); 1643 } 1644 1645 /// <summary> <p/>Determines the minimal number of delete terms required before the buffered 1646 /// in-memory delete terms are applied and flushed. If there are documents 1647 /// buffered in memory at the time, they are merged and a new segment is 1648 /// created.<p/> 1649 /// <p/>Disabled by default (writer flushes by RAM usage).<p/> 1650 /// 1651 /// </summary> 1652 /// <throws> IllegalArgumentException if maxBufferedDeleteTerms </throws> 1653 /// <summary> is enabled but smaller than 1 1654 /// </summary> 1655 /// <seealso cref="SetRAMBufferSizeMB"> 1656 /// </seealso> SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)1657 public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) 1658 { 1659 EnsureOpen(); 1660 if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) 1661 throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled"); 1662 docWriter.MaxBufferedDeleteTerms = maxBufferedDeleteTerms; 1663 if (infoStream != null) 1664 Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); 1665 } 1666 1667 /// <summary> Returns the number of buffered deleted terms that will 1668 /// trigger a flush if enabled. 1669 /// </summary> 1670 /// <seealso cref="SetMaxBufferedDeleteTerms"> 1671 /// </seealso> 1672 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] GetMaxBufferedDeleteTerms()1673 public virtual int GetMaxBufferedDeleteTerms() 1674 { 1675 EnsureOpen(); 1676 return docWriter.MaxBufferedDeleteTerms; 1677 } 1678 1679 /// <summary>Gets or sets the number of segments that are merged at 1680 /// once and also controls the total number of segments 1681 /// allowed to accumulate in the index. 1682 /// <p/>Determines how often segment indices are merged by addDocument(). With 1683 /// smaller values, less RAM is used while indexing, and searches on 1684 /// unoptimized indices are faster, but indexing speed is slower. With larger 1685 /// values, more RAM is used during indexing, and while searches on unoptimized 1686 /// indices are slower, indexing is faster. Thus larger values (> 10) are best 1687 /// for batch index creation, and smaller values (< 10) for indices that are 1688 /// interactively maintained. 1689 /// 1690 /// <p/>Note that this method is a convenience method: it 1691 /// just calls mergePolicy.setMergeFactor as long as 1692 /// mergePolicy is an instance of <see cref="LogMergePolicy" />. 1693 /// Otherwise an IllegalArgumentException is thrown.<p/> 1694 /// 1695 /// <p/>This must never be less than 2. The default value is 10. 1696 /// </summary> 1697 public virtual int MergeFactor 1698 { 1699 set { LogMergePolicy.MergeFactor = value; } 1700 get { return LogMergePolicy.MergeFactor; } 1701 } 1702 1703 /// <summary>Gets or sets the default info stream. 1704 /// If non-null, this will be the default infoStream used 1705 /// by a newly instantiated IndexWriter. 1706 /// </summary> 1707 /// <seealso cref="SetInfoStream"> 1708 /// </seealso> 1709 public static StreamWriter DefaultInfoStream 1710 { 1711 set { IndexWriter.defaultInfoStream = value; } 1712 get { return IndexWriter.defaultInfoStream; } 1713 } 1714 1715 /// <summary>If non-null, information about merges, deletes and a 1716 /// message when maxFieldLength is reached will be printed 1717 /// to this. 1718 /// </summary> SetInfoStream(System.IO.StreamWriter infoStream)1719 public virtual void SetInfoStream(System.IO.StreamWriter infoStream) 1720 { 1721 EnsureOpen(); 1722 SetMessageID(infoStream); 1723 docWriter.SetInfoStream(infoStream); 1724 deleter.SetInfoStream(infoStream); 1725 if (infoStream != null) 1726 MessageState(); 1727 } 1728 MessageState()1729 private void MessageState() 1730 { 1731 Message("setInfoStream: dir=" + directory + 1732 " mergePolicy=" + mergePolicy + 1733 " mergeScheduler=" + mergeScheduler + 1734 " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + 1735 " maxBufferedDocs=" + docWriter.MaxBufferedDocs + 1736 " maxBuffereDeleteTerms=" + docWriter.MaxBufferedDeleteTerms + 1737 " maxFieldLength=" + maxFieldLength + 1738 " index=" + SegString()); 1739 } 1740 1741 /// <summary> Returns the current infoStream in use by this writer.</summary> 1742 /// <seealso cref="SetInfoStream"> 1743 /// </seealso> 1744 public virtual StreamWriter InfoStream 1745 { 1746 get 1747 { 1748 EnsureOpen(); 1749 return infoStream; 1750 } 1751 } 1752 1753 /// <summary>Returns true if verbosing is enabled (i.e., infoStream != null). </summary> 1754 public virtual bool Verbose 1755 { 1756 get { return infoStream != null; } 1757 } 1758 1759 /// <summary>Gets or sets allowed timeout when acquiring the write lock.</summary> 1760 public virtual long WriteLockTimeout 1761 { 1762 get 1763 { 1764 EnsureOpen(); 1765 return writeLockTimeout; 1766 } 1767 set 1768 { 1769 EnsureOpen(); 1770 this.writeLockTimeout = value; 1771 } 1772 } 1773 1774 /// <summary> Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in 1775 /// milliseconds). 1776 /// </summary> 1777 public static long DefaultWriteLockTimeout 1778 { 1779 set { IndexWriter.WRITE_LOCK_TIMEOUT = value; } 1780 get { return IndexWriter.WRITE_LOCK_TIMEOUT; } 1781 } 1782 1783 /// <summary> Commits all changes to an index and closes all 1784 /// associated files. Note that this may be a costly 1785 /// operation, so, try to re-use a single writer instead of 1786 /// closing and opening a new one. See <see cref="Commit()" /> for 1787 /// caveats about write caching done by some IO devices. 1788 /// 1789 /// <p/> If an Exception is hit during close, eg due to disk 1790 /// full or some other reason, then both the on-disk index 1791 /// and the internal state of the IndexWriter instance will 1792 /// be consistent. However, the close will not be complete 1793 /// even though part of it (flushing buffered documents) 1794 /// may have succeeded, so the write lock will still be 1795 /// held.<p/> 1796 /// 1797 /// <p/> If you can correct the underlying cause (eg free up 1798 /// some disk space) then you can call close() again. 1799 /// Failing that, if you want to force the write lock to be 1800 /// released (dangerous, because you may then lose buffered 1801 /// docs in the IndexWriter instance) then you can do 1802 /// something like this:<p/> 1803 /// 1804 /// <code> 1805 /// try { 1806 /// writer.close(); 1807 /// } finally { 1808 /// if (IndexWriter.isLocked(directory)) { 1809 /// IndexWriter.unlock(directory); 1810 /// } 1811 /// } 1812 /// </code> 1813 /// 1814 /// after which, you must be certain not to use the writer 1815 /// instance anymore.<p/> 1816 /// 1817 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 1818 /// you should immediately close the writer, again. See <a 1819 /// href="#OOME">above</a> for details.<p/> 1820 /// 1821 /// </summary> 1822 /// <throws> CorruptIndexException if the index is corrupt </throws> 1823 /// <throws> IOException if there is a low-level IO error </throws> 1824 [Obsolete("Use Dispose() instead")] Close()1825 public void Close() 1826 { 1827 Dispose(true); 1828 } 1829 1830 /// <summary> Commits all changes to an index and closes all 1831 /// associated files. Note that this may be a costly 1832 /// operation, so, try to re-use a single writer instead of 1833 /// closing and opening a new one. See <see cref="Commit()" /> for 1834 /// caveats about write caching done by some IO devices. 1835 /// 1836 /// <p/> If an Exception is hit during close, eg due to disk 1837 /// full or some other reason, then both the on-disk index 1838 /// and the internal state of the IndexWriter instance will 1839 /// be consistent. However, the close will not be complete 1840 /// even though part of it (flushing buffered documents) 1841 /// may have succeeded, so the write lock will still be 1842 /// held.<p/> 1843 /// 1844 /// <p/> If you can correct the underlying cause (eg free up 1845 /// some disk space) then you can call close() again. 1846 /// Failing that, if you want to force the write lock to be 1847 /// released (dangerous, because you may then lose buffered 1848 /// docs in the IndexWriter instance) then you can do 1849 /// something like this:<p/> 1850 /// 1851 /// <code> 1852 /// try { 1853 /// writer.close(); 1854 /// } finally { 1855 /// if (IndexWriter.isLocked(directory)) { 1856 /// IndexWriter.unlock(directory); 1857 /// } 1858 /// } 1859 /// </code> 1860 /// 1861 /// after which, you must be certain not to use the writer 1862 /// instance anymore.<p/> 1863 /// 1864 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 1865 /// you should immediately close the writer, again. See <a 1866 /// href="#OOME">above</a> for details.<p/> 1867 /// 1868 /// </summary> 1869 /// <throws> CorruptIndexException if the index is corrupt </throws> 1870 /// <throws> IOException if there is a low-level IO error </throws> Dispose()1871 public virtual void Dispose() 1872 { 1873 Dispose(true); 1874 } 1875 1876 /// <summary> Closes the index with or without waiting for currently 1877 /// running merges to finish. This is only meaningful when 1878 /// using a MergeScheduler that runs merges in background 1879 /// threads. 1880 /// 1881 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 1882 /// you should immediately close the writer, again. See <a 1883 /// href="#OOME">above</a> for details.<p/> 1884 /// 1885 /// <p/><b>NOTE</b>: it is dangerous to always call 1886 /// close(false), especially when IndexWriter is not open 1887 /// for very long, because this can result in "merge 1888 /// starvation" whereby long merges will never have a 1889 /// chance to finish. This will cause too many segments in 1890 /// your index over time.<p/> 1891 /// 1892 /// </summary> 1893 /// <param name="waitForMerges">if true, this call will block 1894 /// until all merges complete; else, it will ask all 1895 /// running merges to abort, wait until those merges have 1896 /// finished (which should be at most a few seconds), and 1897 /// then return. 1898 /// </param> Dispose(bool waitForMerges)1899 public virtual void Dispose(bool waitForMerges) 1900 { 1901 Dispose(true, waitForMerges); 1902 } 1903 Dispose(bool disposing, bool waitForMerges)1904 protected virtual void Dispose(bool disposing, bool waitForMerges) 1905 { 1906 if (disposing) 1907 { 1908 // Ensure that only one thread actually gets to do the closing: 1909 if (ShouldClose()) 1910 { 1911 // If any methods have hit OutOfMemoryError, then abort 1912 // on close, in case the internal state of IndexWriter 1913 // or DocumentsWriter is corrupt 1914 if (hitOOM) 1915 RollbackInternal(); 1916 else 1917 CloseInternal(waitForMerges); 1918 } 1919 } 1920 } 1921 1922 /// <summary> Closes the index with or without waiting for currently 1923 /// running merges to finish. This is only meaningful when 1924 /// using a MergeScheduler that runs merges in background 1925 /// threads. 1926 /// 1927 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 1928 /// you should immediately close the writer, again. See <a 1929 /// href="#OOME">above</a> for details.<p/> 1930 /// 1931 /// <p/><b>NOTE</b>: it is dangerous to always call 1932 /// close(false), especially when IndexWriter is not open 1933 /// for very long, because this can result in "merge 1934 /// starvation" whereby long merges will never have a 1935 /// chance to finish. This will cause too many segments in 1936 /// your index over time.<p/> 1937 /// 1938 /// </summary> 1939 /// <param name="waitForMerges">if true, this call will block 1940 /// until all merges complete; else, it will ask all 1941 /// running merges to abort, wait until those merges have 1942 /// finished (which should be at most a few seconds), and 1943 /// then return. 1944 /// </param> 1945 [Obsolete("Use Dispose(bool) instead")] Close(bool waitForMerges)1946 public virtual void Close(bool waitForMerges) 1947 { 1948 Dispose(waitForMerges); 1949 } 1950 1951 // Returns true if this thread should attempt to close, or 1952 // false if IndexWriter is now closed; else, waits until 1953 // another thread finishes closing ShouldClose()1954 private bool ShouldClose() 1955 { 1956 lock (this) 1957 { 1958 while (true) 1959 { 1960 if (!closed) 1961 { 1962 if (!closing) 1963 { 1964 closing = true; 1965 return true; 1966 } 1967 else 1968 { 1969 // Another thread is presently trying to close; 1970 // wait until it finishes one way (closes 1971 // successfully) or another (fails to close) 1972 DoWait(); 1973 } 1974 } 1975 else 1976 return false; 1977 } 1978 } 1979 } 1980 CloseInternal(bool waitForMerges)1981 private void CloseInternal(bool waitForMerges) 1982 { 1983 1984 docWriter.PauseAllThreads(); 1985 1986 try 1987 { 1988 if (infoStream != null) 1989 Message("now flush at close"); 1990 1991 docWriter.Dispose(); 1992 1993 // Only allow a new merge to be triggered if we are 1994 // going to wait for merges: 1995 if (!hitOOM) 1996 { 1997 Flush(waitForMerges, true, true); 1998 } 1999 2000 if (waitForMerges) 2001 // Give merge scheduler last chance to run, in case 2002 // any pending merges are waiting: 2003 mergeScheduler.Merge(this); 2004 2005 mergePolicy.Close(); 2006 2007 FinishMerges(waitForMerges); 2008 stopMerges = true; 2009 2010 mergeScheduler.Close(); 2011 2012 if (infoStream != null) 2013 Message("now call final commit()"); 2014 2015 if (!hitOOM) 2016 { 2017 Commit(0); 2018 } 2019 2020 if (infoStream != null) 2021 Message("at close: " + SegString()); 2022 2023 lock (this) 2024 { 2025 readerPool.Dispose(); 2026 docWriter = null; 2027 deleter.Dispose(); 2028 } 2029 2030 if (writeLock != null) 2031 { 2032 writeLock.Release(); // release write lock 2033 writeLock = null; 2034 } 2035 lock (this) 2036 { 2037 closed = true; 2038 } 2039 } 2040 catch (System.OutOfMemoryException oom) 2041 { 2042 HandleOOM(oom, "closeInternal"); 2043 } 2044 finally 2045 { 2046 lock (this) 2047 { 2048 closing = false; 2049 System.Threading.Monitor.PulseAll(this); 2050 if (!closed) 2051 { 2052 if (docWriter != null) 2053 docWriter.ResumeAllThreads(); 2054 if (infoStream != null) 2055 Message("hit exception while closing"); 2056 } 2057 } 2058 } 2059 } 2060 2061 /// <summary>Tells the docWriter to close its currently open shared 2062 /// doc stores (stored fields & vectors files). 2063 /// Return value specifices whether new doc store files are compound or not. 2064 /// </summary> FlushDocStores()2065 private bool FlushDocStores() 2066 { 2067 lock (this) 2068 { 2069 if (infoStream != null) 2070 { 2071 Message("flushDocStores segment=" + docWriter.DocStoreSegment); 2072 } 2073 2074 bool useCompoundDocStore = false; 2075 if (infoStream != null) 2076 { 2077 Message("closeDocStores segment=" + docWriter.DocStoreSegment); 2078 } 2079 2080 System.String docStoreSegment; 2081 2082 bool success = false; 2083 try 2084 { 2085 docStoreSegment = docWriter.CloseDocStore(); 2086 success = true; 2087 } 2088 finally 2089 { 2090 if (!success && infoStream != null) 2091 { 2092 Message("hit exception closing doc store segment"); 2093 } 2094 } 2095 2096 if (infoStream != null) 2097 { 2098 Message("flushDocStores files=" + docWriter.ClosedFiles()); 2099 } 2100 2101 useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos); 2102 2103 if (useCompoundDocStore && docStoreSegment != null && docWriter.ClosedFiles().Count != 0) 2104 { 2105 // Now build compound doc store file 2106 2107 if (infoStream != null) 2108 { 2109 Message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); 2110 } 2111 2112 success = false; 2113 2114 int numSegments = segmentInfos.Count; 2115 System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; 2116 2117 try 2118 { 2119 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); 2120 foreach(string file in docWriter.closedFiles) 2121 { 2122 cfsWriter.AddFile(file); 2123 } 2124 2125 // Perform the merge 2126 cfsWriter.Close(); 2127 success = true; 2128 } 2129 finally 2130 { 2131 if (!success) 2132 { 2133 if (infoStream != null) 2134 Message("hit exception building compound file doc store for segment " + docStoreSegment); 2135 deleter.DeleteFile(compoundFileName); 2136 docWriter.Abort(); 2137 } 2138 } 2139 2140 for (int i = 0; i < numSegments; i++) 2141 { 2142 SegmentInfo si = segmentInfos.Info(i); 2143 if (si.DocStoreOffset != - 1 && si.DocStoreSegment.Equals(docStoreSegment)) 2144 si.DocStoreIsCompoundFile = true; 2145 } 2146 2147 Checkpoint(); 2148 2149 // In case the files we just merged into a CFS were 2150 // not previously checkpointed: 2151 deleter.DeleteNewFiles(docWriter.ClosedFiles()); 2152 } 2153 2154 return useCompoundDocStore; 2155 } 2156 } 2157 2158 /// <summary>Returns the Directory used by this index. </summary> 2159 public virtual Directory Directory 2160 { 2161 get 2162 { 2163 // Pass false because the flush during closing calls getDirectory 2164 EnsureOpen(false); 2165 return directory; 2166 } 2167 } 2168 2169 /// <summary>Returns the analyzer used by this index. </summary> 2170 public virtual Analyzer Analyzer 2171 { 2172 get 2173 { 2174 EnsureOpen(); 2175 return analyzer; 2176 } 2177 } 2178 2179 /// <summary>Returns total number of docs in this index, including 2180 /// docs not yet flushed (still in the RAM buffer), 2181 /// not counting deletions. 2182 /// </summary> 2183 /// <seealso cref="NumDocs"> 2184 /// </seealso> MaxDoc()2185 public virtual int MaxDoc() 2186 { 2187 lock (this) 2188 { 2189 int count; 2190 if (docWriter != null) 2191 count = docWriter.NumDocsInRAM; 2192 else 2193 count = 0; 2194 2195 for (int i = 0; i < segmentInfos.Count; i++) 2196 count += segmentInfos.Info(i).docCount; 2197 return count; 2198 } 2199 } 2200 2201 /// <summary>Returns total number of docs in this index, including 2202 /// docs not yet flushed (still in the RAM buffer), and 2203 /// including deletions. <b>NOTE:</b> buffered deletions 2204 /// are not counted. If you really need these to be 2205 /// counted you should call <see cref="Commit()" /> first. 2206 /// </summary> 2207 /// <seealso cref="NumDocs"> 2208 /// </seealso> NumDocs()2209 public virtual int NumDocs() 2210 { 2211 lock (this) 2212 { 2213 int count; 2214 if (docWriter != null) 2215 count = docWriter.NumDocsInRAM; 2216 else 2217 count = 0; 2218 2219 for (int i = 0; i < segmentInfos.Count; i++) 2220 { 2221 SegmentInfo info = segmentInfos.Info(i); 2222 count += info.docCount - info.GetDelCount(); 2223 } 2224 return count; 2225 } 2226 } 2227 HasDeletions()2228 public virtual bool HasDeletions() 2229 { 2230 lock (this) 2231 { 2232 EnsureOpen(); 2233 if (docWriter.HasDeletes()) 2234 return true; 2235 for (int i = 0; i < segmentInfos.Count; i++) 2236 if (segmentInfos.Info(i).HasDeletions()) 2237 return true; 2238 return false; 2239 } 2240 } 2241 2242 /// <summary> The maximum number of terms that will be indexed for a single field in a 2243 /// document. This limits the amount of memory required for indexing, so that 2244 /// collections with very large files will not crash the indexing process by 2245 /// running out of memory.<p/> 2246 /// Note that this effectively truncates large documents, excluding from the 2247 /// index terms that occur further in the document. If you know your source 2248 /// documents are large, be sure to set this value high enough to accomodate 2249 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit 2250 /// is your memory, but you should anticipate an OutOfMemoryError.<p/> 2251 /// By default, no more than 10,000 terms will be indexed for a field. 2252 /// 2253 /// </summary> 2254 /// <seealso cref="MaxFieldLength"> 2255 /// </seealso> 2256 private int maxFieldLength; 2257 2258 /// <summary> Adds a document to this index. If the document contains more than 2259 /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are 2260 /// discarded. 2261 /// 2262 /// <p/> Note that if an Exception is hit (for example disk full) 2263 /// then the index will be consistent, but this document 2264 /// may not have been added. Furthermore, it's possible 2265 /// the index will have one segment in non-compound format 2266 /// even when using compound files (when a merge has 2267 /// partially succeeded).<p/> 2268 /// 2269 /// <p/> This method periodically flushes pending documents 2270 /// to the Directory (see <a href="#flush">above</a>), and 2271 /// also periodically triggers segment merges in the index 2272 /// according to the <see cref="MergePolicy" /> in use.<p/> 2273 /// 2274 /// <p/>Merges temporarily consume space in the 2275 /// directory. The amount of space required is up to 1X the 2276 /// size of all segments being merged, when no 2277 /// readers/searchers are open against the index, and up to 2278 /// 2X the size of all segments being merged when 2279 /// readers/searchers are open against the index (see 2280 /// <see cref="Optimize()" /> for details). The sequence of 2281 /// primitive merge operations performed is governed by the 2282 /// merge policy. 2283 /// 2284 /// <p/>Note that each term in the document can be no longer 2285 /// than 16383 characters, otherwise an 2286 /// IllegalArgumentException will be thrown.<p/> 2287 /// 2288 /// <p/>Note that it's possible to create an invalid Unicode 2289 /// string in java if a UTF16 surrogate pair is malformed. 2290 /// In this case, the invalid characters are silently 2291 /// replaced with the Unicode replacement character 2292 /// U+FFFD.<p/> 2293 /// 2294 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2295 /// you should immediately close the writer. See <a 2296 /// href="#OOME">above</a> for details.<p/> 2297 /// 2298 /// </summary> 2299 /// <throws> CorruptIndexException if the index is corrupt </throws> 2300 /// <throws> IOException if there is a low-level IO error </throws> AddDocument(Document doc)2301 public virtual void AddDocument(Document doc) 2302 { 2303 AddDocument(doc, analyzer); 2304 } 2305 2306 /// <summary> Adds a document to this index, using the provided analyzer instead of the 2307 /// value of <see cref="Analyzer" />. If the document contains more than 2308 /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are 2309 /// discarded. 2310 /// 2311 /// <p/>See <see cref="AddDocument(Document)" /> for details on 2312 /// index and IndexWriter state after an Exception, and 2313 /// flushing/merging temporary free space requirements.<p/> 2314 /// 2315 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2316 /// you should immediately close the writer. See <a 2317 /// href="#OOME">above</a> for details.<p/> 2318 /// 2319 /// </summary> 2320 /// <throws> CorruptIndexException if the index is corrupt </throws> 2321 /// <throws> IOException if there is a low-level IO error </throws> AddDocument(Document doc, Analyzer analyzer)2322 public virtual void AddDocument(Document doc, Analyzer analyzer) 2323 { 2324 EnsureOpen(); 2325 bool doFlush = false; 2326 bool success = false; 2327 try 2328 { 2329 try 2330 { 2331 doFlush = docWriter.AddDocument(doc, analyzer); 2332 success = true; 2333 } 2334 finally 2335 { 2336 if (!success) 2337 { 2338 2339 if (infoStream != null) 2340 Message("hit exception adding document"); 2341 2342 lock (this) 2343 { 2344 // If docWriter has some aborted files that were 2345 // never incref'd, then we clean them up here 2346 if (docWriter != null) 2347 { 2348 ICollection<string> files = docWriter.AbortedFiles(); 2349 if (files != null) 2350 deleter.DeleteNewFiles(files); 2351 } 2352 } 2353 } 2354 } 2355 if (doFlush) 2356 Flush(true, false, false); 2357 } 2358 catch (System.OutOfMemoryException oom) 2359 { 2360 HandleOOM(oom, "addDocument"); 2361 } 2362 } 2363 2364 /// <summary> Deletes the document(s) containing <c>term</c>. 2365 /// 2366 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2367 /// you should immediately close the writer. See <a 2368 /// href="#OOME">above</a> for details.<p/> 2369 /// 2370 /// </summary> 2371 /// <param name="term">the term to identify the documents to be deleted 2372 /// </param> 2373 /// <throws> CorruptIndexException if the index is corrupt </throws> 2374 /// <throws> IOException if there is a low-level IO error </throws> DeleteDocuments(Term term)2375 public virtual void DeleteDocuments(Term term) 2376 { 2377 EnsureOpen(); 2378 try 2379 { 2380 bool doFlush = docWriter.BufferDeleteTerm(term); 2381 if (doFlush) 2382 Flush(true, false, false); 2383 } 2384 catch (System.OutOfMemoryException oom) 2385 { 2386 HandleOOM(oom, "deleteDocuments(Term)"); 2387 } 2388 } 2389 2390 /// <summary> Deletes the document(s) containing any of the 2391 /// terms. All deletes are flushed at the same time. 2392 /// 2393 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2394 /// you should immediately close the writer. See <a 2395 /// href="#OOME">above</a> for details.<p/> 2396 /// 2397 /// </summary> 2398 /// <param name="terms">array of terms to identify the documents 2399 /// to be deleted 2400 /// </param> 2401 /// <throws> CorruptIndexException if the index is corrupt </throws> 2402 /// <throws> IOException if there is a low-level IO error </throws> DeleteDocuments(params Term[] terms)2403 public virtual void DeleteDocuments(params Term[] terms) 2404 { 2405 EnsureOpen(); 2406 try 2407 { 2408 bool doFlush = docWriter.BufferDeleteTerms(terms); 2409 if (doFlush) 2410 Flush(true, false, false); 2411 } 2412 catch (System.OutOfMemoryException oom) 2413 { 2414 HandleOOM(oom, "deleteDocuments(params Term[])"); 2415 } 2416 } 2417 2418 /// <summary> Deletes the document(s) matching the provided query. 2419 /// 2420 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2421 /// you should immediately close the writer. See <a 2422 /// href="#OOME">above</a> for details.<p/> 2423 /// 2424 /// </summary> 2425 /// <param name="query">the query to identify the documents to be deleted 2426 /// </param> 2427 /// <throws> CorruptIndexException if the index is corrupt </throws> 2428 /// <throws> IOException if there is a low-level IO error </throws> DeleteDocuments(Query query)2429 public virtual void DeleteDocuments(Query query) 2430 { 2431 EnsureOpen(); 2432 bool doFlush = docWriter.BufferDeleteQuery(query); 2433 if (doFlush) 2434 Flush(true, false, false); 2435 } 2436 2437 /// <summary> Deletes the document(s) matching any of the provided queries. 2438 /// All deletes are flushed at the same time. 2439 /// 2440 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2441 /// you should immediately close the writer. See <a 2442 /// href="#OOME">above</a> for details.<p/> 2443 /// 2444 /// </summary> 2445 /// <param name="queries">array of queries to identify the documents 2446 /// to be deleted 2447 /// </param> 2448 /// <throws> CorruptIndexException if the index is corrupt </throws> 2449 /// <throws> IOException if there is a low-level IO error </throws> DeleteDocuments(params Query[] queries)2450 public virtual void DeleteDocuments(params Query[] queries) 2451 { 2452 EnsureOpen(); 2453 bool doFlush = docWriter.BufferDeleteQueries(queries); 2454 if (doFlush) 2455 Flush(true, false, false); 2456 } 2457 2458 /// <summary> Updates a document by first deleting the document(s) 2459 /// containing <c>term</c> and then adding the new 2460 /// document. The delete and then add are atomic as seen 2461 /// by a reader on the same index (flush may happen only after 2462 /// the add). 2463 /// 2464 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2465 /// you should immediately close the writer. See <a 2466 /// href="#OOME">above</a> for details.<p/> 2467 /// 2468 /// </summary> 2469 /// <param name="term">the term to identify the document(s) to be 2470 /// deleted 2471 /// </param> 2472 /// <param name="doc">the document to be added 2473 /// </param> 2474 /// <throws> CorruptIndexException if the index is corrupt </throws> 2475 /// <throws> IOException if there is a low-level IO error </throws> UpdateDocument(Term term, Document doc)2476 public virtual void UpdateDocument(Term term, Document doc) 2477 { 2478 EnsureOpen(); 2479 UpdateDocument(term, doc, Analyzer); 2480 } 2481 2482 /// <summary> Updates a document by first deleting the document(s) 2483 /// containing <c>term</c> and then adding the new 2484 /// document. The delete and then add are atomic as seen 2485 /// by a reader on the same index (flush may happen only after 2486 /// the add). 2487 /// 2488 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2489 /// you should immediately close the writer. See <a 2490 /// href="#OOME">above</a> for details.<p/> 2491 /// 2492 /// </summary> 2493 /// <param name="term">the term to identify the document(s) to be 2494 /// deleted 2495 /// </param> 2496 /// <param name="doc">the document to be added 2497 /// </param> 2498 /// <param name="analyzer">the analyzer to use when analyzing the document 2499 /// </param> 2500 /// <throws> CorruptIndexException if the index is corrupt </throws> 2501 /// <throws> IOException if there is a low-level IO error </throws> UpdateDocument(Term term, Document doc, Analyzer analyzer)2502 public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer) 2503 { 2504 EnsureOpen(); 2505 try 2506 { 2507 bool doFlush = false; 2508 bool success = false; 2509 try 2510 { 2511 doFlush = docWriter.UpdateDocument(term, doc, analyzer); 2512 success = true; 2513 } 2514 finally 2515 { 2516 if (!success) 2517 { 2518 2519 if (infoStream != null) 2520 Message("hit exception updating document"); 2521 2522 lock (this) 2523 { 2524 // If docWriter has some aborted files that were 2525 // never incref'd, then we clean them up here 2526 ICollection<string> files = docWriter.AbortedFiles(); 2527 if (files != null) 2528 deleter.DeleteNewFiles(files); 2529 } 2530 } 2531 } 2532 if (doFlush) 2533 Flush(true, false, false); 2534 } 2535 catch (System.OutOfMemoryException oom) 2536 { 2537 HandleOOM(oom, "updateDocument"); 2538 } 2539 } 2540 2541 // for test purpose GetSegmentCount()2542 internal int GetSegmentCount() 2543 { 2544 lock (this) 2545 { 2546 return segmentInfos.Count; 2547 } 2548 } 2549 2550 // for test purpose GetNumBufferedDocuments()2551 internal int GetNumBufferedDocuments() 2552 { 2553 lock (this) 2554 { 2555 return docWriter.NumDocsInRAM; 2556 } 2557 } 2558 2559 // for test purpose GetDocCount(int i)2560 public /*internal*/ int GetDocCount(int i) 2561 { 2562 lock (this) 2563 { 2564 if (i >= 0 && i < segmentInfos.Count) 2565 { 2566 return segmentInfos.Info(i).docCount; 2567 } 2568 else 2569 { 2570 return - 1; 2571 } 2572 } 2573 } 2574 2575 // for test purpose GetFlushCount()2576 internal int GetFlushCount() 2577 { 2578 lock (this) 2579 { 2580 return flushCount; 2581 } 2582 } 2583 2584 // for test purpose GetFlushDeletesCount()2585 internal int GetFlushDeletesCount() 2586 { 2587 lock (this) 2588 { 2589 return flushDeletesCount; 2590 } 2591 } 2592 NewSegmentName()2593 internal System.String NewSegmentName() 2594 { 2595 // Cannot synchronize on IndexWriter because that causes 2596 // deadlock 2597 lock (segmentInfos) 2598 { 2599 // Important to increment changeCount so that the 2600 // segmentInfos is written on close. Otherwise we 2601 // could close, re-open and re-return the same segment 2602 // name that was previously returned which can cause 2603 // problems at least with ConcurrentMergeScheduler. 2604 changeCount++; 2605 return "_" + Number.ToString(segmentInfos.counter++); 2606 } 2607 } 2608 2609 /// <summary>If non-null, information about merges will be printed to this.</summary> 2610 private System.IO.StreamWriter infoStream = null; 2611 private static System.IO.StreamWriter defaultInfoStream = null; 2612 2613 /// <summary> Requests an "optimize" operation on an index, priming the index 2614 /// for the fastest available search. Traditionally this has meant 2615 /// merging all segments into a single segment as is done in the 2616 /// default merge policy, but individaul merge policies may implement 2617 /// optimize in different ways. 2618 /// 2619 /// <p/>It is recommended that this method be called upon completion of indexing. In 2620 /// environments with frequent updates, optimize is best done during low volume times, if at all. 2621 /// 2622 /// <p/> 2623 /// <p/>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. <p/> 2624 /// 2625 /// <p/>Note that optimize requires 2X the index size free 2626 /// space in your Directory (3X if you're using compound 2627 /// file format). For example, if your index 2628 /// size is 10 MB then you need 20 MB free for optimize to 2629 /// complete (30 MB if you're using compound fiel format).<p/> 2630 /// 2631 /// <p/>If some but not all readers re-open while an 2632 /// optimize is underway, this will cause > 2X temporary 2633 /// space to be consumed as those new readers will then 2634 /// hold open the partially optimized segments at that 2635 /// time. It is best not to re-open readers while optimize 2636 /// is running.<p/> 2637 /// 2638 /// <p/>The actual temporary usage could be much less than 2639 /// these figures (it depends on many factors).<p/> 2640 /// 2641 /// <p/>In general, once the optimize completes, the total size of the 2642 /// index will be less than the size of the starting index. 2643 /// It could be quite a bit smaller (if there were many 2644 /// pending deletes) or just slightly smaller.<p/> 2645 /// 2646 /// <p/>If an Exception is hit during optimize(), for example 2647 /// due to disk full, the index will not be corrupt and no 2648 /// documents will have been lost. However, it may have 2649 /// been partially optimized (some segments were merged but 2650 /// not all), and it's possible that one of the segments in 2651 /// the index will be in non-compound format even when 2652 /// using compound file format. This will occur when the 2653 /// Exception is hit during conversion of the segment into 2654 /// compound format.<p/> 2655 /// 2656 /// <p/>This call will optimize those segments present in 2657 /// the index when the call started. If other threads are 2658 /// still adding documents and flushing segments, those 2659 /// newly created segments will not be optimized unless you 2660 /// call optimize again.<p/> 2661 /// 2662 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2663 /// you should immediately close the writer. See <a 2664 /// href="#OOME">above</a> for details.<p/> 2665 /// 2666 /// </summary> 2667 /// <throws> CorruptIndexException if the index is corrupt </throws> 2668 /// <throws> IOException if there is a low-level IO error </throws> 2669 /// <seealso cref="Index.LogMergePolicy.FindMergesForOptimize"> 2670 /// </seealso> Optimize()2671 public virtual void Optimize() 2672 { 2673 Optimize(true); 2674 } 2675 2676 /// <summary> Optimize the index down to <= maxNumSegments. If 2677 /// maxNumSegments==1 then this is the same as <see cref="Optimize()" /> 2678 ///. 2679 /// 2680 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2681 /// you should immediately close the writer. See <a 2682 /// href="#OOME">above</a> for details.<p/> 2683 /// 2684 /// </summary> 2685 /// <param name="maxNumSegments">maximum number of segments left 2686 /// in the index after optimization finishes 2687 /// </param> Optimize(int maxNumSegments)2688 public virtual void Optimize(int maxNumSegments) 2689 { 2690 Optimize(maxNumSegments, true); 2691 } 2692 2693 /// <summary>Just like <see cref="Optimize()" />, except you can specify 2694 /// whether the call should block until the optimize 2695 /// completes. This is only meaningful with a 2696 /// <see cref="MergeScheduler" /> that is able to run merges in 2697 /// background threads. 2698 /// 2699 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2700 /// you should immediately close the writer. See <a 2701 /// href="#OOME">above</a> for details.<p/> 2702 /// </summary> Optimize(bool doWait)2703 public virtual void Optimize(bool doWait) 2704 { 2705 Optimize(1, doWait); 2706 } 2707 2708 /// <summary>Just like <see cref="Optimize(int)" />, except you can 2709 /// specify whether the call should block until the 2710 /// optimize completes. This is only meaningful with a 2711 /// <see cref="MergeScheduler" /> that is able to run merges in 2712 /// background threads. 2713 /// 2714 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2715 /// you should immediately close the writer. See <a 2716 /// href="#OOME">above</a> for details.<p/> 2717 /// </summary> Optimize(int maxNumSegments, bool doWait)2718 public virtual void Optimize(int maxNumSegments, bool doWait) 2719 { 2720 EnsureOpen(); 2721 2722 if (maxNumSegments < 1) 2723 throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); 2724 2725 if (infoStream != null) 2726 Message("optimize: index now " + SegString()); 2727 2728 Flush(true, false, true); 2729 2730 lock (this) 2731 { 2732 ResetMergeExceptions(); 2733 segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>(); 2734 optimizeMaxNumSegments = maxNumSegments; 2735 int numSegments = segmentInfos.Count; 2736 for (int i = 0; i < numSegments; i++) 2737 segmentsToOptimize.Add(segmentInfos.Info(i)); 2738 2739 // Now mark all pending & running merges as optimize 2740 // merge: 2741 foreach(MergePolicy.OneMerge merge in pendingMerges) 2742 { 2743 merge.optimize = true; 2744 merge.maxNumSegmentsOptimize = maxNumSegments; 2745 } 2746 2747 foreach(MergePolicy.OneMerge merge in runningMerges) 2748 { 2749 merge.optimize = true; 2750 merge.maxNumSegmentsOptimize = maxNumSegments; 2751 } 2752 } 2753 2754 MaybeMerge(maxNumSegments, true); 2755 2756 if (doWait) 2757 { 2758 lock (this) 2759 { 2760 while (true) 2761 { 2762 2763 if (hitOOM) 2764 { 2765 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete optimize"); 2766 } 2767 2768 if (mergeExceptions.Count > 0) 2769 { 2770 // Forward any exceptions in background merge 2771 // threads to the current thread: 2772 int size = mergeExceptions.Count; 2773 for (int i = 0; i < size; i++) 2774 { 2775 MergePolicy.OneMerge merge = mergeExceptions[i]; 2776 if (merge.optimize) 2777 { 2778 System.IO.IOException err; 2779 System.Exception t = merge.GetException(); 2780 if (t != null) 2781 err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); 2782 else 2783 err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory)); 2784 throw err; 2785 } 2786 } 2787 } 2788 2789 if (OptimizeMergesPending()) 2790 DoWait(); 2791 else 2792 break; 2793 } 2794 } 2795 2796 // If close is called while we are still 2797 // running, throw an exception so the calling 2798 // thread will know the optimize did not 2799 // complete 2800 EnsureOpen(); 2801 } 2802 2803 // NOTE: in the ConcurrentMergeScheduler case, when 2804 // doWait is false, we can return immediately while 2805 // background threads accomplish the optimization 2806 } 2807 2808 /// <summary>Returns true if any merges in pendingMerges or 2809 /// runningMerges are optimization merges. 2810 /// </summary> OptimizeMergesPending()2811 private bool OptimizeMergesPending() 2812 { 2813 lock (this) 2814 { 2815 foreach (MergePolicy.OneMerge merge in pendingMerges) 2816 { 2817 if (merge.optimize) return true; 2818 } 2819 2820 foreach(MergePolicy.OneMerge merge in runningMerges) 2821 { 2822 if (merge.optimize) return true; 2823 } 2824 2825 return false; 2826 } 2827 } 2828 2829 /// <summary>Just like <see cref="ExpungeDeletes()" />, except you can 2830 /// specify whether the call should block until the 2831 /// operation completes. This is only meaningful with a 2832 /// <see cref="MergeScheduler" /> that is able to run merges in 2833 /// background threads. 2834 /// 2835 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2836 /// you should immediately close the writer. See <a 2837 /// href="#OOME">above</a> for details.<p/> 2838 /// </summary> ExpungeDeletes(bool doWait)2839 public virtual void ExpungeDeletes(bool doWait) 2840 { 2841 EnsureOpen(); 2842 2843 if (infoStream != null) 2844 Message("expungeDeletes: index now " + SegString()); 2845 2846 MergePolicy.MergeSpecification spec; 2847 2848 lock (this) 2849 { 2850 spec = mergePolicy.FindMergesToExpungeDeletes(segmentInfos); 2851 if (spec != null) 2852 { 2853 int numMerges = spec.merges.Count; 2854 for (int i = 0; i < numMerges; i++) 2855 RegisterMerge(spec.merges[i]); 2856 } 2857 } 2858 2859 mergeScheduler.Merge(this); 2860 2861 if (spec != null && doWait) 2862 { 2863 int numMerges = spec.merges.Count; 2864 lock (this) 2865 { 2866 bool running = true; 2867 while (running) 2868 { 2869 2870 if (hitOOM) 2871 { 2872 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes"); 2873 } 2874 2875 // Check each merge that MergePolicy asked us to 2876 // do, to see if any of them are still running and 2877 // if any of them have hit an exception. 2878 running = false; 2879 for (int i = 0; i < numMerges; i++) 2880 { 2881 MergePolicy.OneMerge merge = spec.merges[i]; 2882 if (pendingMerges.Contains(merge) || runningMerges.Contains(merge)) 2883 running = true; 2884 System.Exception t = merge.GetException(); 2885 if (t != null) 2886 { 2887 System.IO.IOException ioe = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); 2888 throw ioe; 2889 } 2890 } 2891 2892 // If any of our merges are still running, wait: 2893 if (running) 2894 DoWait(); 2895 } 2896 } 2897 } 2898 2899 // NOTE: in the ConcurrentMergeScheduler case, when 2900 // doWait is false, we can return immediately while 2901 // background threads accomplish the optimization 2902 } 2903 2904 2905 /// <summary>Expunges all deletes from the index. When an index 2906 /// has many document deletions (or updates to existing 2907 /// documents), it's best to either call optimize or 2908 /// expungeDeletes to remove all unused data in the index 2909 /// associated with the deleted documents. To see how 2910 /// many deletions you have pending in your index, call 2911 /// <see cref="IndexReader.NumDeletedDocs" /> 2912 /// This saves disk space and memory usage while 2913 /// searching. expungeDeletes should be somewhat faster 2914 /// than optimize since it does not insist on reducing the 2915 /// index to a single segment (though, this depends on the 2916 /// <see cref="MergePolicy" />; see <see cref="Index.MergePolicy.FindMergesToExpungeDeletes" />.). Note that 2917 /// this call does not first commit any buffered 2918 /// documents, so you must do so yourself if necessary. 2919 /// See also <seealso cref="ExpungeDeletes(bool)" /> 2920 /// 2921 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2922 /// you should immediately close the writer. See <a 2923 /// href="#OOME">above</a> for details.<p/> 2924 /// </summary> ExpungeDeletes()2925 public virtual void ExpungeDeletes() 2926 { 2927 ExpungeDeletes(true); 2928 } 2929 2930 /// <summary> Expert: asks the mergePolicy whether any merges are 2931 /// necessary now and if so, runs the requested merges and 2932 /// then iterate (test again if merges are needed) until no 2933 /// more merges are returned by the mergePolicy. 2934 /// 2935 /// Explicit calls to maybeMerge() are usually not 2936 /// necessary. The most common case is when merge policy 2937 /// parameters have changed. 2938 /// 2939 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 2940 /// you should immediately close the writer. See <a 2941 /// href="#OOME">above</a> for details.<p/> 2942 /// </summary> MaybeMerge()2943 public void MaybeMerge() 2944 { 2945 MaybeMerge(false); 2946 } 2947 MaybeMerge(bool optimize)2948 private void MaybeMerge(bool optimize) 2949 { 2950 MaybeMerge(1, optimize); 2951 } 2952 MaybeMerge(int maxNumSegmentsOptimize, bool optimize)2953 private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize) 2954 { 2955 UpdatePendingMerges(maxNumSegmentsOptimize, optimize); 2956 mergeScheduler.Merge(this); 2957 } 2958 UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize)2959 private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize) 2960 { 2961 lock (this) 2962 { 2963 System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0); 2964 2965 if (stopMerges) 2966 { 2967 return; 2968 } 2969 2970 // Do not start new merges if we've hit OOME 2971 if (hitOOM) 2972 { 2973 return ; 2974 } 2975 2976 MergePolicy.MergeSpecification spec; 2977 if (optimize) 2978 { 2979 spec = mergePolicy.FindMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); 2980 2981 if (spec != null) 2982 { 2983 int numMerges = spec.merges.Count; 2984 for (int i = 0; i < numMerges; i++) 2985 { 2986 MergePolicy.OneMerge merge = spec.merges[i]; 2987 merge.optimize = true; 2988 merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize; 2989 } 2990 } 2991 } 2992 else 2993 { 2994 spec = mergePolicy.FindMerges(segmentInfos); 2995 } 2996 2997 if (spec != null) 2998 { 2999 int numMerges = spec.merges.Count; 3000 for (int i = 0; i < numMerges; i++) 3001 RegisterMerge(spec.merges[i]); 3002 } 3003 } 3004 } 3005 3006 /// <summary>Expert: the <see cref="MergeScheduler" /> calls this method 3007 /// to retrieve the next merge requested by the 3008 /// MergePolicy 3009 /// </summary> GetNextMerge()3010 internal virtual MergePolicy.OneMerge GetNextMerge() 3011 { 3012 lock (this) 3013 { 3014 if (pendingMerges.Count == 0) 3015 return null; 3016 else 3017 { 3018 // Advance the merge from pending to running 3019 MergePolicy.OneMerge merge = pendingMerges.First.Value; 3020 pendingMerges.RemoveFirst(); 3021 runningMerges.Add(merge); 3022 return merge; 3023 } 3024 } 3025 } 3026 3027 /// <summary>Like getNextMerge() except only returns a merge if it's 3028 /// external. 3029 /// </summary> GetNextExternalMerge()3030 private MergePolicy.OneMerge GetNextExternalMerge() 3031 { 3032 lock (this) 3033 { 3034 if (pendingMerges.Count == 0) 3035 return null; 3036 else 3037 { 3038 var it = pendingMerges.GetEnumerator(); 3039 while (it.MoveNext()) 3040 { 3041 MergePolicy.OneMerge merge = it.Current; 3042 if (merge.isExternal) 3043 { 3044 // Advance the merge from pending to running 3045 pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue? 3046 runningMerges.Add(merge); 3047 return merge; 3048 } 3049 } 3050 3051 // All existing merges do not involve external segments 3052 return null; 3053 } 3054 } 3055 } 3056 3057 /* 3058 * Begin a transaction. During a transaction, any segment 3059 * merges that happen (or ram segments flushed) will not 3060 * write a new segments file and will not remove any files 3061 * that were present at the start of the transaction. You 3062 * must make a matched (try/finally) call to 3063 * commitTransaction() or rollbackTransaction() to finish 3064 * the transaction. 3065 * 3066 * Note that buffered documents and delete terms are not handled 3067 * within the transactions, so they must be flushed before the 3068 * transaction is started. 3069 */ StartTransaction(bool haveReadLock)3070 private void StartTransaction(bool haveReadLock) 3071 { 3072 lock (this) 3073 { 3074 3075 bool success = false; 3076 try 3077 { 3078 if (infoStream != null) 3079 Message("now start transaction"); 3080 3081 System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0 , 3082 "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms()); 3083 System.Diagnostics.Debug.Assert(docWriter.NumDocsInRAM == 0 , 3084 "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.NumDocsInRAM); 3085 3086 EnsureOpen(); 3087 3088 // If a transaction is trying to roll back (because 3089 // addIndexes hit an exception) then wait here until 3090 // that's done: 3091 lock (this) 3092 { 3093 while (stopMerges) 3094 DoWait(); 3095 } 3096 success = true; 3097 } 3098 finally 3099 { 3100 // Release the write lock if our caller held it, on 3101 // hitting an exception 3102 if (!success && haveReadLock) 3103 ReleaseRead(); 3104 } 3105 3106 if (haveReadLock) 3107 { 3108 UpgradeReadToWrite(); 3109 } 3110 else 3111 { 3112 AcquireWrite(); 3113 } 3114 3115 success = false; 3116 try 3117 { 3118 localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone(); 3119 3120 System.Diagnostics.Debug.Assert(!HasExternalSegments()); 3121 3122 localFlushedDocCount = docWriter.GetFlushedDocCount(); 3123 3124 // Remove the incRef we did in startTransaction: 3125 deleter.IncRef(segmentInfos, false); 3126 3127 success = true; 3128 } 3129 finally 3130 { 3131 if (!success) 3132 FinishAddIndexes(); 3133 } 3134 } 3135 } 3136 3137 /* 3138 * Rolls back the transaction and restores state to where 3139 * we were at the start. 3140 */ RollbackTransaction()3141 private void RollbackTransaction() 3142 { 3143 lock (this) 3144 { 3145 3146 if (infoStream != null) 3147 Message("now rollback transaction"); 3148 3149 if (docWriter != null) 3150 { 3151 docWriter.SetFlushedDocCount(localFlushedDocCount); 3152 } 3153 3154 // Must finish merges before rolling back segmentInfos 3155 // so merges don't hit exceptions on trying to commit 3156 // themselves, don't get files deleted out from under 3157 // them, etc: 3158 FinishMerges(false); 3159 3160 // Keep the same segmentInfos instance but replace all 3161 // of its SegmentInfo instances. This is so the next 3162 // attempt to commit using this instance of IndexWriter 3163 // will always write to a new generation ("write once"). 3164 segmentInfos.Clear(); 3165 segmentInfos.AddRange(localRollbackSegmentInfos); 3166 localRollbackSegmentInfos = null; 3167 3168 // This must come after we rollback segmentInfos, so 3169 // that if a commit() kicks off it does not see the 3170 // segmentInfos with external segments 3171 FinishAddIndexes(); 3172 3173 // Ask deleter to locate unreferenced files we had 3174 // created & remove them: 3175 deleter.Checkpoint(segmentInfos, false); 3176 3177 // Remove the incRef we did in startTransaction: 3178 deleter.DecRef(segmentInfos); 3179 3180 // Also ask deleter to remove any newly created files 3181 // that were never incref'd; this "garbage" is created 3182 // when a merge kicks off but aborts part way through 3183 // before it had a chance to incRef the files it had 3184 // partially created 3185 deleter.Refresh(); 3186 3187 System.Threading.Monitor.PulseAll(this); 3188 3189 System.Diagnostics.Debug.Assert(!HasExternalSegments()); 3190 } 3191 } 3192 3193 /* 3194 * Commits the transaction. This will write the new 3195 * segments file and remove and pending deletions we have 3196 * accumulated during the transaction 3197 */ CommitTransaction()3198 private void CommitTransaction() 3199 { 3200 lock (this) 3201 { 3202 3203 if (infoStream != null) 3204 Message("now commit transaction"); 3205 3206 // Give deleter a chance to remove files now: 3207 Checkpoint(); 3208 3209 // Remove the incRef we did in startTransaction. 3210 deleter.DecRef(localRollbackSegmentInfos); 3211 3212 localRollbackSegmentInfos = null; 3213 3214 System.Diagnostics.Debug.Assert(!HasExternalSegments()); 3215 3216 FinishAddIndexes(); 3217 } 3218 } 3219 3220 /// <summary> Close the <c>IndexWriter</c> without committing 3221 /// any changes that have occurred since the last commit 3222 /// (or since it was opened, if commit hasn't been called). 3223 /// This removes any temporary files that had been created, 3224 /// after which the state of the index will be the same as 3225 /// it was when commit() was last called or when this 3226 /// writer was first opened. This also clears a previous 3227 /// call to <see cref="PrepareCommit()" />. 3228 /// </summary> 3229 /// <throws> IOException if there is a low-level IO error </throws> Rollback()3230 public virtual void Rollback() 3231 { 3232 EnsureOpen(); 3233 3234 // Ensure that only one thread actually gets to do the closing: 3235 if (ShouldClose()) 3236 RollbackInternal(); 3237 } 3238 RollbackInternal()3239 private void RollbackInternal() 3240 { 3241 3242 bool success = false; 3243 3244 if (infoStream != null) 3245 { 3246 Message("rollback"); 3247 } 3248 3249 docWriter.PauseAllThreads(); 3250 3251 try 3252 { 3253 FinishMerges(false); 3254 3255 // Must pre-close these two, in case they increment 3256 // changeCount so that we can then set it to false 3257 // before calling closeInternal 3258 mergePolicy.Close(); 3259 mergeScheduler.Close(); 3260 3261 lock (this) 3262 { 3263 3264 if (pendingCommit != null) 3265 { 3266 pendingCommit.RollbackCommit(directory); 3267 deleter.DecRef(pendingCommit); 3268 pendingCommit = null; 3269 System.Threading.Monitor.PulseAll(this); 3270 } 3271 3272 // Keep the same segmentInfos instance but replace all 3273 // of its SegmentInfo instances. This is so the next 3274 // attempt to commit using this instance of IndexWriter 3275 // will always write to a new generation ("write 3276 // once"). 3277 segmentInfos.Clear(); 3278 segmentInfos.AddRange(rollbackSegmentInfos); 3279 3280 System.Diagnostics.Debug.Assert(!HasExternalSegments()); 3281 3282 docWriter.Abort(); 3283 3284 System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint")); 3285 3286 // Ask deleter to locate unreferenced files & remove 3287 // them: 3288 deleter.Checkpoint(segmentInfos, false); 3289 deleter.Refresh(); 3290 } 3291 3292 // Don't bother saving any changes in our segmentInfos 3293 readerPool.Clear(null); 3294 3295 lastCommitChangeCount = changeCount; 3296 3297 success = true; 3298 } 3299 catch (System.OutOfMemoryException oom) 3300 { 3301 HandleOOM(oom, "rollbackInternal"); 3302 } 3303 finally 3304 { 3305 lock (this) 3306 { 3307 if (!success) 3308 { 3309 docWriter.ResumeAllThreads(); 3310 closing = false; 3311 System.Threading.Monitor.PulseAll(this); 3312 if (infoStream != null) 3313 Message("hit exception during rollback"); 3314 } 3315 } 3316 } 3317 3318 CloseInternal(false); 3319 } 3320 3321 /// <summary> Delete all documents in the index. 3322 /// 3323 /// <p/>This method will drop all buffered documents and will 3324 /// remove all segments from the index. This change will not be 3325 /// visible until a <see cref="Commit()" /> has been called. This method 3326 /// can be rolled back using <see cref="Rollback()" />.<p/> 3327 /// 3328 /// <p/>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).<p/> 3329 /// 3330 /// <p/>NOTE: this method will forcefully abort all merges 3331 /// in progress. If other threads are running <see cref="Optimize()" /> 3332 /// or any of the addIndexes methods, they 3333 /// will receive <see cref="Index.MergePolicy.MergeAbortedException" />s. 3334 /// </summary> DeleteAll()3335 public virtual void DeleteAll() 3336 { 3337 lock (this) 3338 { 3339 docWriter.PauseAllThreads(); 3340 try 3341 { 3342 3343 // Abort any running merges 3344 FinishMerges(false); 3345 3346 // Remove any buffered docs 3347 docWriter.Abort(); 3348 docWriter.SetFlushedDocCount(0); 3349 3350 // Remove all segments 3351 segmentInfos.Clear(); 3352 3353 // Ask deleter to locate unreferenced files & remove them: 3354 deleter.Checkpoint(segmentInfos, false); 3355 deleter.Refresh(); 3356 3357 // Don't bother saving any changes in our segmentInfos 3358 readerPool.Clear(null); 3359 3360 // Mark that the index has changed 3361 ++changeCount; 3362 } 3363 catch (System.OutOfMemoryException oom) 3364 { 3365 HandleOOM(oom, "deleteAll"); 3366 } 3367 finally 3368 { 3369 docWriter.ResumeAllThreads(); 3370 if (infoStream != null) 3371 { 3372 Message("hit exception during deleteAll"); 3373 } 3374 } 3375 } 3376 } 3377 FinishMerges(bool waitForMerges)3378 private void FinishMerges(bool waitForMerges) 3379 { 3380 lock (this) 3381 { 3382 if (!waitForMerges) 3383 { 3384 3385 stopMerges = true; 3386 3387 // Abort all pending & running merges: 3388 foreach(MergePolicy.OneMerge merge in pendingMerges) 3389 { 3390 if (infoStream != null) 3391 Message("now abort pending merge " + merge.SegString(directory)); 3392 merge.Abort(); 3393 MergeFinish(merge); 3394 } 3395 pendingMerges.Clear(); 3396 3397 foreach(MergePolicy.OneMerge merge in runningMerges) 3398 { 3399 if (infoStream != null) 3400 Message("now abort running merge " + merge.SegString(directory)); 3401 merge.Abort(); 3402 } 3403 3404 // Ensure any running addIndexes finishes. It's fine 3405 // if a new one attempts to start because its merges 3406 // will quickly see the stopMerges == true and abort. 3407 AcquireRead(); 3408 ReleaseRead(); 3409 3410 // These merges periodically check whether they have 3411 // been aborted, and stop if so. We wait here to make 3412 // sure they all stop. It should not take very long 3413 // because the merge threads periodically check if 3414 // they are aborted. 3415 while (runningMerges.Count > 0) 3416 { 3417 if (infoStream != null) 3418 Message("now wait for " + runningMerges.Count + " running merge to abort"); 3419 DoWait(); 3420 } 3421 3422 stopMerges = false; 3423 System.Threading.Monitor.PulseAll(this); 3424 3425 System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); 3426 3427 if (infoStream != null) 3428 Message("all running merges have aborted"); 3429 } 3430 else 3431 { 3432 // waitForMerges() will ensure any running addIndexes finishes. 3433 // It's fine if a new one attempts to start because from our 3434 // caller above the call will see that we are in the 3435 // process of closing, and will throw an 3436 // AlreadyClosedException. 3437 WaitForMerges(); 3438 } 3439 } 3440 } 3441 3442 /// <summary> Wait for any currently outstanding merges to finish. 3443 /// 3444 /// <p/>It is guaranteed that any merges started prior to calling this method 3445 /// will have completed once this method completes.<p/> 3446 /// </summary> WaitForMerges()3447 public virtual void WaitForMerges() 3448 { 3449 lock (this) 3450 { 3451 // Ensure any running addIndexes finishes. 3452 AcquireRead(); 3453 ReleaseRead(); 3454 3455 while (pendingMerges.Count > 0 || runningMerges.Count > 0) 3456 { 3457 DoWait(); 3458 } 3459 3460 // sanity check 3461 System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); 3462 } 3463 } 3464 3465 /* 3466 * Called whenever the SegmentInfos has been updated and 3467 * the index files referenced exist (correctly) in the 3468 * index directory. 3469 */ Checkpoint()3470 private void Checkpoint() 3471 { 3472 lock (this) 3473 { 3474 changeCount++; 3475 deleter.Checkpoint(segmentInfos, false); 3476 } 3477 } 3478 FinishAddIndexes()3479 private void FinishAddIndexes() 3480 { 3481 ReleaseWrite(); 3482 } 3483 BlockAddIndexes(bool includePendingClose)3484 private void BlockAddIndexes(bool includePendingClose) 3485 { 3486 3487 AcquireRead(); 3488 3489 bool success = false; 3490 try 3491 { 3492 3493 // Make sure we are still open since we could have 3494 // waited quite a while for last addIndexes to finish 3495 EnsureOpen(includePendingClose); 3496 success = true; 3497 } 3498 finally 3499 { 3500 if (!success) 3501 ReleaseRead(); 3502 } 3503 } 3504 ResumeAddIndexes()3505 private void ResumeAddIndexes() 3506 { 3507 ReleaseRead(); 3508 } 3509 ResetMergeExceptions()3510 private void ResetMergeExceptions() 3511 { 3512 lock (this) 3513 { 3514 mergeExceptions = new List<MergePolicy.OneMerge>(); 3515 mergeGen++; 3516 } 3517 } 3518 NoDupDirs(Directory[] dirs)3519 private void NoDupDirs(Directory[] dirs) 3520 { 3521 HashSet<Directory> dups = new HashSet<Directory>(); 3522 for (int i = 0; i < dirs.Length; i++) 3523 { 3524 if (dups.Contains(dirs[i])) 3525 { 3526 throw new System.ArgumentException("Directory " + dirs[i] + " appears more than once"); 3527 } 3528 if (dirs[i] == directory) 3529 throw new System.ArgumentException("Cannot add directory to itself"); 3530 dups.Add(dirs[i]); 3531 } 3532 } 3533 3534 /// <summary> Merges all segments from an array of indexes into this 3535 /// index. 3536 /// 3537 /// <p/>This may be used to parallelize batch indexing. A large document 3538 /// collection can be broken into sub-collections. Each sub-collection can be 3539 /// indexed in parallel, on a different thread, process or machine. The 3540 /// complete index can then be created by merging sub-collection indexes 3541 /// with this method. 3542 /// 3543 /// <p/><b>NOTE:</b> the index in each Directory must not be 3544 /// changed (opened by a writer) while this method is 3545 /// running. This method does not acquire a write lock in 3546 /// each input Directory, so it is up to the caller to 3547 /// enforce this. 3548 /// 3549 /// <p/><b>NOTE:</b> while this is running, any attempts to 3550 /// add or delete documents (with another thread) will be 3551 /// paused until this method completes. 3552 /// 3553 /// <p/>This method is transactional in how Exceptions are 3554 /// handled: it does not commit a new segments_N file until 3555 /// all indexes are added. This means if an Exception 3556 /// occurs (for example disk full), then either no indexes 3557 /// will have been added or they all will have been.<p/> 3558 /// 3559 /// <p/>Note that this requires temporary free space in the 3560 /// Directory up to 2X the sum of all input indexes 3561 /// (including the starting index). If readers/searchers 3562 /// are open against the starting index, then temporary 3563 /// free space required will be higher by the size of the 3564 /// starting index (see <see cref="Optimize()" /> for details). 3565 /// <p/> 3566 /// 3567 /// <p/>Once this completes, the final size of the index 3568 /// will be less than the sum of all input index sizes 3569 /// (including the starting index). It could be quite a 3570 /// bit smaller (if there were many pending deletes) or 3571 /// just slightly smaller.<p/> 3572 /// 3573 /// <p/> 3574 /// This requires this index not be among those to be added. 3575 /// 3576 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 3577 /// you should immediately close the writer. See <a 3578 /// href="#OOME">above</a> for details.<p/> 3579 /// 3580 /// </summary> 3581 /// <throws> CorruptIndexException if the index is corrupt </throws> 3582 /// <throws> IOException if there is a low-level IO error </throws> AddIndexesNoOptimize(params Directory[] dirs)3583 public virtual void AddIndexesNoOptimize(params Directory[] dirs) 3584 { 3585 3586 EnsureOpen(); 3587 3588 NoDupDirs(dirs); 3589 3590 // Do not allow add docs or deletes while we are running: 3591 docWriter.PauseAllThreads(); 3592 3593 try 3594 { 3595 if (infoStream != null) 3596 Message("flush at addIndexesNoOptimize"); 3597 Flush(true, false, true); 3598 3599 bool success = false; 3600 3601 StartTransaction(false); 3602 3603 try 3604 { 3605 3606 int docCount = 0; 3607 lock (this) 3608 { 3609 EnsureOpen(); 3610 3611 for (int i = 0; i < dirs.Length; i++) 3612 { 3613 if (directory == dirs[i]) 3614 { 3615 // cannot add this index: segments may be deleted in merge before added 3616 throw new System.ArgumentException("Cannot add this index to itself"); 3617 } 3618 3619 SegmentInfos sis = new SegmentInfos(); // read infos from dir 3620 sis.Read(dirs[i]); 3621 for (int j = 0; j < sis.Count; j++) 3622 { 3623 SegmentInfo info = sis.Info(j); 3624 System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name); 3625 docCount += info.docCount; 3626 segmentInfos.Add(info); // add each info 3627 } 3628 } 3629 } 3630 3631 // Notify DocumentsWriter that the flushed count just increased 3632 docWriter.UpdateFlushedDocCount(docCount); 3633 3634 MaybeMerge(); 3635 3636 EnsureOpen(); 3637 3638 // If after merging there remain segments in the index 3639 // that are in a different directory, just copy these 3640 // over into our index. This is necessary (before 3641 // finishing the transaction) to avoid leaving the 3642 // index in an unusable (inconsistent) state. 3643 ResolveExternalSegments(); 3644 3645 EnsureOpen(); 3646 3647 success = true; 3648 } 3649 finally 3650 { 3651 if (success) 3652 { 3653 CommitTransaction(); 3654 } 3655 else 3656 { 3657 RollbackTransaction(); 3658 } 3659 } 3660 } 3661 catch (System.OutOfMemoryException oom) 3662 { 3663 HandleOOM(oom, "addIndexesNoOptimize"); 3664 } 3665 finally 3666 { 3667 if (docWriter != null) 3668 { 3669 docWriter.ResumeAllThreads(); 3670 } 3671 } 3672 } 3673 HasExternalSegments()3674 private bool HasExternalSegments() 3675 { 3676 return segmentInfos.HasExternalSegments(directory); 3677 } 3678 3679 /* If any of our segments are using a directory != ours 3680 * then we have to either copy them over one by one, merge 3681 * them (if merge policy has chosen to) or wait until 3682 * currently running merges (in the background) complete. 3683 * We don't return until the SegmentInfos has no more 3684 * external segments. Currently this is only used by 3685 * addIndexesNoOptimize(). */ ResolveExternalSegments()3686 private void ResolveExternalSegments() 3687 { 3688 3689 bool any = false; 3690 3691 bool done = false; 3692 3693 while (!done) 3694 { 3695 SegmentInfo info = null; 3696 MergePolicy.OneMerge merge = null; 3697 lock (this) 3698 { 3699 3700 if (stopMerges) 3701 throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception"); 3702 3703 int numSegments = segmentInfos.Count; 3704 3705 done = true; 3706 for (int i = 0; i < numSegments; i++) 3707 { 3708 info = segmentInfos.Info(i); 3709 if (info.dir != directory) 3710 { 3711 done = false; 3712 MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), mergePolicy is LogMergePolicy && UseCompoundFile); 3713 3714 // Returns true if no running merge conflicts 3715 // with this one (and, records this merge as 3716 // pending), ie, this segment is not currently 3717 // being merged: 3718 if (RegisterMerge(newMerge)) 3719 { 3720 merge = newMerge; 3721 3722 // If this segment is not currently being 3723 // merged, then advance it to running & run 3724 // the merge ourself (below): 3725 pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue? 3726 runningMerges.Add(merge); 3727 break; 3728 } 3729 } 3730 } 3731 3732 if (!done && merge == null) 3733 // We are not yet done (external segments still 3734 // exist in segmentInfos), yet, all such segments 3735 // are currently "covered" by a pending or running 3736 // merge. We now try to grab any pending merge 3737 // that involves external segments: 3738 merge = GetNextExternalMerge(); 3739 3740 if (!done && merge == null) 3741 // We are not yet done, and, all external segments 3742 // fall under merges that the merge scheduler is 3743 // currently running. So, we now wait and check 3744 // back to see if the merge has completed. 3745 DoWait(); 3746 } 3747 3748 if (merge != null) 3749 { 3750 any = true; 3751 Merge(merge); 3752 } 3753 } 3754 3755 if (any) 3756 // Sometimes, on copying an external segment over, 3757 // more merges may become necessary: 3758 mergeScheduler.Merge(this); 3759 } 3760 3761 /// <summary>Merges the provided indexes into this index. 3762 /// <p/>After this completes, the index is optimized. <p/> 3763 /// <p/>The provided IndexReaders are not closed.<p/> 3764 /// 3765 /// <p/><b>NOTE:</b> while this is running, any attempts to 3766 /// add or delete documents (with another thread) will be 3767 /// paused until this method completes. 3768 /// 3769 /// <p/>See <see cref="AddIndexesNoOptimize(Directory[])" /> for 3770 /// details on transactional semantics, temporary free 3771 /// space required in the Directory, and non-CFS segments 3772 /// on an Exception.<p/> 3773 /// 3774 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 3775 /// you should immediately close the writer. See <a 3776 /// href="#OOME">above</a> for details.<p/> 3777 /// 3778 /// </summary> 3779 /// <throws> CorruptIndexException if the index is corrupt </throws> 3780 /// <throws> IOException if there is a low-level IO error </throws> AddIndexes(params IndexReader[] readers)3781 public virtual void AddIndexes(params IndexReader[] readers) 3782 { 3783 3784 EnsureOpen(); 3785 3786 // Do not allow add docs or deletes while we are running: 3787 docWriter.PauseAllThreads(); 3788 3789 // We must pre-acquire a read lock here (and upgrade to 3790 // write lock in startTransaction below) so that no 3791 // other addIndexes is allowed to start up after we have 3792 // flushed & optimized but before we then start our 3793 // transaction. This is because the merging below 3794 // requires that only one segment is present in the 3795 // index: 3796 AcquireRead(); 3797 3798 try 3799 { 3800 3801 SegmentInfo info = null; 3802 System.String mergedName = null; 3803 SegmentMerger merger = null; 3804 3805 bool success = false; 3806 3807 try 3808 { 3809 Flush(true, false, true); 3810 Optimize(); // start with zero or 1 seg 3811 success = true; 3812 } 3813 finally 3814 { 3815 // Take care to release the read lock if we hit an 3816 // exception before starting the transaction 3817 if (!success) 3818 ReleaseRead(); 3819 } 3820 3821 // true means we already have a read lock; if this 3822 // call hits an exception it will release the write 3823 // lock: 3824 StartTransaction(true); 3825 3826 try 3827 { 3828 mergedName = NewSegmentName(); 3829 merger = new SegmentMerger(this, mergedName, null); 3830 3831 SegmentReader sReader = null; 3832 lock (this) 3833 { 3834 if (segmentInfos.Count == 1) 3835 { 3836 // add existing index, if any 3837 sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1); 3838 } 3839 } 3840 3841 success = false; 3842 3843 try 3844 { 3845 if (sReader != null) 3846 merger.Add(sReader); 3847 3848 for (int i = 0; i < readers.Length; i++) 3849 // add new indexes 3850 merger.Add(readers[i]); 3851 3852 int docCount = merger.Merge(); // merge 'em 3853 3854 lock (this) 3855 { 3856 segmentInfos.Clear(); // pop old infos & add new 3857 info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx()); 3858 SetDiagnostics(info, "addIndexes(params IndexReader[])"); 3859 segmentInfos.Add(info); 3860 } 3861 3862 // Notify DocumentsWriter that the flushed count just increased 3863 docWriter.UpdateFlushedDocCount(docCount); 3864 3865 success = true; 3866 } 3867 finally 3868 { 3869 if (sReader != null) 3870 { 3871 readerPool.Release(sReader); 3872 } 3873 } 3874 } 3875 finally 3876 { 3877 if (!success) 3878 { 3879 if (infoStream != null) 3880 Message("hit exception in addIndexes during merge"); 3881 RollbackTransaction(); 3882 } 3883 else 3884 { 3885 CommitTransaction(); 3886 } 3887 } 3888 3889 if (mergePolicy is LogMergePolicy && UseCompoundFile) 3890 { 3891 3892 IList<string> files = null; 3893 3894 lock (this) 3895 { 3896 // Must incRef our files so that if another thread 3897 // is running merge/optimize, it doesn't delete our 3898 // segment's files before we have a change to 3899 // finish making the compound file. 3900 if (segmentInfos.Contains(info)) 3901 { 3902 files = info.Files(); 3903 deleter.IncRef(files); 3904 } 3905 } 3906 3907 if (files != null) 3908 { 3909 3910 success = false; 3911 3912 StartTransaction(false); 3913 3914 try 3915 { 3916 merger.CreateCompoundFile(mergedName + ".cfs"); 3917 lock (this) 3918 { 3919 info.SetUseCompoundFile(true); 3920 } 3921 3922 success = true; 3923 } 3924 finally 3925 { 3926 lock (this) 3927 { 3928 deleter.DecRef(files); 3929 } 3930 3931 if (!success) 3932 { 3933 if (infoStream != null) 3934 Message("hit exception building compound file in addIndexes during merge"); 3935 3936 RollbackTransaction(); 3937 } 3938 else 3939 { 3940 CommitTransaction(); 3941 } 3942 } 3943 } 3944 } 3945 } 3946 catch (System.OutOfMemoryException oom) 3947 { 3948 HandleOOM(oom, "addIndexes(params IndexReader[])"); 3949 } 3950 finally 3951 { 3952 if (docWriter != null) 3953 { 3954 docWriter.ResumeAllThreads(); 3955 } 3956 } 3957 } 3958 3959 ///<summary> 3960 /// A hook for extending classes to execute operations after pending added and 3961 /// deleted documents have been flushed to the Directory but before the change 3962 /// is committed (new segments_N file written). 3963 ///</summary> DoAfterFlush()3964 protected virtual void DoAfterFlush() 3965 { 3966 } 3967 3968 ///<summary> 3969 /// A hook for extending classes to execute operations before pending added and 3970 /// deleted documents are flushed to the Directory. 3971 ///</summary> DoBeforeFlush()3972 protected virtual void DoBeforeFlush() 3973 { 3974 } 3975 3976 /// <summary>Expert: prepare for commit. 3977 /// 3978 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 3979 /// you should immediately close the writer. See <a 3980 /// href="#OOME">above</a> for details.<p/> 3981 /// 3982 /// </summary> 3983 /// <seealso cref="PrepareCommit(IDictionary{string,string})"> 3984 /// </seealso> PrepareCommit()3985 public void PrepareCommit() 3986 { 3987 EnsureOpen(); 3988 PrepareCommit(null); 3989 } 3990 3991 /// <summary><p/>Expert: prepare for commit, specifying 3992 /// commitUserData Map (String -> String). This does the 3993 /// first phase of 2-phase commit. This method does all steps 3994 /// necessary to commit changes since this writer was 3995 /// opened: flushes pending added and deleted docs, syncs 3996 /// the index files, writes most of next segments_N file. 3997 /// After calling this you must call either <see cref="Commit()" /> 3998 /// to finish the commit, or <see cref="Rollback()" /> 3999 /// to revert the commit and undo all changes 4000 /// done since the writer was opened.<p/> 4001 /// 4002 /// You can also just call <see cref="Commit(IDictionary{string,string})" /> directly 4003 /// without prepareCommit first in which case that method 4004 /// will internally call prepareCommit. 4005 /// 4006 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 4007 /// you should immediately close the writer. See <a 4008 /// href="#OOME">above</a> for details.<p/> 4009 /// 4010 /// </summary> 4011 /// <param name="commitUserData">Opaque Map (String->String) 4012 /// that's recorded into the segments file in the index, 4013 /// and retrievable by <see cref="IndexReader.GetCommitUserData" />. 4014 /// Note that when IndexWriter commits itself, during <see cref="Close()" />, the 4015 /// commitUserData is unchanged (just carried over from 4016 /// the prior commit). If this is null then the previous 4017 /// commitUserData is kept. Also, the commitUserData will 4018 /// only "stick" if there are actually changes in the 4019 /// index to commit. 4020 /// </param> PrepareCommit(IDictionary<string, string> commitUserData)4021 private void PrepareCommit(IDictionary<string, string> commitUserData) 4022 { 4023 if (hitOOM) 4024 { 4025 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit"); 4026 } 4027 4028 if (pendingCommit != null) 4029 throw new System.SystemException("prepareCommit was already called with no corresponding call to commit"); 4030 4031 if (infoStream != null) 4032 Message("prepareCommit: flush"); 4033 4034 Flush(true, true, true); 4035 4036 StartCommit(0, commitUserData); 4037 } 4038 4039 // Used only by commit, below; lock order is commitLock -> IW 4040 private Object commitLock = new Object(); 4041 Commit(long sizeInBytes)4042 private void Commit(long sizeInBytes) 4043 { 4044 lock(commitLock) { 4045 StartCommit(sizeInBytes, null); 4046 FinishCommit(); 4047 } 4048 } 4049 4050 /// <summary> <p/>Commits all pending changes (added & deleted 4051 /// documents, optimizations, segment merges, added 4052 /// indexes, etc.) to the index, and syncs all referenced 4053 /// index files, such that a reader will see the changes 4054 /// and the index updates will survive an OS or machine 4055 /// crash or power loss. Note that this does not wait for 4056 /// any running background merges to finish. This may be a 4057 /// costly operation, so you should test the cost in your 4058 /// application and do it only when really necessary.<p/> 4059 /// 4060 /// <p/> Note that this operation calls Directory.sync on 4061 /// the index files. That call should not return until the 4062 /// file contents & metadata are on stable storage. For 4063 /// FSDirectory, this calls the OS's fsync. But, beware: 4064 /// some hardware devices may in fact cache writes even 4065 /// during fsync, and return before the bits are actually 4066 /// on stable storage, to give the appearance of faster 4067 /// performance. If you have such a device, and it does 4068 /// not have a battery backup (for example) then on power 4069 /// loss it may still lose data. Lucene cannot guarantee 4070 /// consistency on such devices. <p/> 4071 /// 4072 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 4073 /// you should immediately close the writer. See <a 4074 /// href="#OOME">above</a> for details.<p/> 4075 /// 4076 /// </summary> 4077 /// <seealso cref="PrepareCommit()"> 4078 /// </seealso> 4079 /// <seealso cref="Commit(IDictionary{string,string})"> 4080 /// </seealso> Commit()4081 public void Commit() 4082 { 4083 Commit(null); 4084 } 4085 4086 /// <summary>Commits all changes to the index, specifying a 4087 /// commitUserData Map (String -> String). This just 4088 /// calls <see cref="PrepareCommit(IDictionary{string, string})" /> (if you didn't 4089 /// already call it) and then <see cref="FinishCommit" />. 4090 /// 4091 /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError 4092 /// you should immediately close the writer. See <a 4093 /// href="#OOME">above</a> for details.<p/> 4094 /// </summary> Commit(IDictionary<string, string> commitUserData)4095 public void Commit(IDictionary<string, string> commitUserData) 4096 { 4097 EnsureOpen(); 4098 4099 if (infoStream != null) 4100 { 4101 Message("commit: start"); 4102 } 4103 4104 lock (commitLock) 4105 { 4106 if (infoStream != null) 4107 { 4108 Message("commit: enter lock"); 4109 } 4110 if (pendingCommit == null) 4111 { 4112 if (infoStream != null) 4113 { 4114 Message("commit: now prepare"); 4115 } 4116 PrepareCommit(commitUserData); 4117 } 4118 else if (infoStream != null) 4119 { 4120 Message("commit: already prepared"); 4121 } 4122 4123 FinishCommit(); 4124 } 4125 } 4126 FinishCommit()4127 private void FinishCommit() 4128 { 4129 lock (this) 4130 { 4131 4132 if (pendingCommit != null) 4133 { 4134 try 4135 { 4136 if (infoStream != null) 4137 Message("commit: pendingCommit != null"); 4138 pendingCommit.FinishCommit(directory); 4139 if (infoStream != null) 4140 Message("commit: wrote segments file \"" + pendingCommit.GetCurrentSegmentFileName() + "\""); 4141 lastCommitChangeCount = pendingCommitChangeCount; 4142 segmentInfos.UpdateGeneration(pendingCommit); 4143 segmentInfos.UserData = pendingCommit.UserData; 4144 SetRollbackSegmentInfos(pendingCommit); 4145 deleter.Checkpoint(pendingCommit, true); 4146 } 4147 finally 4148 { 4149 deleter.DecRef(pendingCommit); 4150 pendingCommit = null; 4151 System.Threading.Monitor.PulseAll(this); 4152 } 4153 } 4154 else if (infoStream != null) 4155 { 4156 Message("commit: pendingCommit == null; skip"); 4157 } 4158 4159 if (infoStream != null) 4160 { 4161 Message("commit: done"); 4162 } 4163 } 4164 } 4165 4166 /// <summary> Flush all in-memory buffered udpates (adds and deletes) 4167 /// to the Directory. 4168 /// </summary> 4169 /// <param name="triggerMerge">if true, we may merge segments (if 4170 /// deletes or docs were flushed) if necessary 4171 /// </param> 4172 /// <param name="flushDocStores">if false we are allowed to keep 4173 /// doc stores open to share with the next segment 4174 /// </param> 4175 /// <param name="flushDeletes">whether pending deletes should also 4176 /// be flushed 4177 /// </param> Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes)4178 public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) 4179 { 4180 // We can be called during close, when closing==true, so we must pass false to ensureOpen: 4181 EnsureOpen(false); 4182 if (DoFlush(flushDocStores, flushDeletes) && triggerMerge) 4183 MaybeMerge(); 4184 } 4185 4186 // TODO: this method should not have to be entirely 4187 // synchronized, ie, merges should be allowed to commit 4188 // even while a flush is happening DoFlush(bool flushDocStores, bool flushDeletes)4189 private bool DoFlush(bool flushDocStores, bool flushDeletes) 4190 { 4191 lock (this) 4192 { 4193 try 4194 { 4195 try 4196 { 4197 return DoFlushInternal(flushDocStores, flushDeletes); 4198 } 4199 finally 4200 { 4201 if (docWriter.DoBalanceRAM()) 4202 { 4203 docWriter.BalanceRAM(); 4204 } 4205 } 4206 } 4207 finally 4208 { 4209 docWriter.ClearFlushPending(); 4210 } 4211 } 4212 } 4213 4214 // TODO: this method should not have to be entirely 4215 // synchronized, ie, merges should be allowed to commit 4216 // even while a flush is happening DoFlushInternal(bool flushDocStores, bool flushDeletes)4217 private bool DoFlushInternal(bool flushDocStores, bool flushDeletes) 4218 { 4219 lock (this) 4220 { 4221 if (hitOOM) 4222 { 4223 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush"); 4224 } 4225 4226 EnsureOpen(false); 4227 4228 System.Diagnostics.Debug.Assert(TestPoint("startDoFlush")); 4229 4230 DoBeforeFlush(); 4231 4232 flushCount++; 4233 4234 // If we are flushing because too many deletes 4235 // accumulated, then we should apply the deletes to free 4236 // RAM: 4237 flushDeletes |= docWriter.DoApplyDeletes(); 4238 4239 // Make sure no threads are actively adding a document. 4240 // Returns true if docWriter is currently aborting, in 4241 // which case we skip flushing this segment 4242 if (infoStream != null) 4243 { 4244 Message("flush: now pause all indexing threads"); 4245 } 4246 if (docWriter.PauseAllThreads()) 4247 { 4248 docWriter.ResumeAllThreads(); 4249 return false; 4250 } 4251 4252 try 4253 { 4254 4255 SegmentInfo newSegment = null; 4256 4257 int numDocs = docWriter.NumDocsInRAM; 4258 4259 // Always flush docs if there are any 4260 bool flushDocs = numDocs > 0; 4261 4262 System.String docStoreSegment = docWriter.DocStoreSegment; 4263 4264 System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs); 4265 4266 if (docStoreSegment == null) 4267 flushDocStores = false; 4268 4269 int docStoreOffset = docWriter.DocStoreOffset; 4270 4271 bool docStoreIsCompoundFile = false; 4272 4273 if (infoStream != null) 4274 { 4275 Message(" flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); 4276 Message(" index before flush " + SegString()); 4277 } 4278 4279 // Check if the doc stores must be separately flushed 4280 // because other segments, besides the one we are about 4281 // to flush, reference it 4282 if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment))) 4283 { 4284 // We must separately flush the doc store 4285 if (infoStream != null) 4286 Message(" flush shared docStore segment " + docStoreSegment); 4287 4288 docStoreIsCompoundFile = FlushDocStores(); 4289 flushDocStores = false; 4290 } 4291 4292 System.String segment = docWriter.Segment; 4293 4294 // If we are flushing docs, segment must not be null: 4295 System.Diagnostics.Debug.Assert(segment != null || !flushDocs); 4296 4297 if (flushDocs) 4298 { 4299 4300 bool success = false; 4301 int flushedDocCount; 4302 4303 try 4304 { 4305 flushedDocCount = docWriter.Flush(flushDocStores); 4306 if (infoStream != null) 4307 { 4308 Message("flushedFiles=" + docWriter.GetFlushedFiles()); 4309 } 4310 success = true; 4311 } 4312 finally 4313 { 4314 if (!success) 4315 { 4316 if (infoStream != null) 4317 Message("hit exception flushing segment " + segment); 4318 deleter.Refresh(segment); 4319 } 4320 } 4321 4322 if (0 == docStoreOffset && flushDocStores) 4323 { 4324 // This means we are flushing private doc stores 4325 // with this segment, so it will not be shared 4326 // with other segments 4327 System.Diagnostics.Debug.Assert(docStoreSegment != null); 4328 System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); 4329 docStoreOffset = - 1; 4330 docStoreIsCompoundFile = false; 4331 docStoreSegment = null; 4332 } 4333 4334 // Create new SegmentInfo, but do not add to our 4335 // segmentInfos until deletes are flushed 4336 // successfully. 4337 newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx()); 4338 SetDiagnostics(newSegment, "flush"); 4339 } 4340 4341 docWriter.PushDeletes(); 4342 4343 if (flushDocs) 4344 { 4345 segmentInfos.Add(newSegment); 4346 Checkpoint(); 4347 } 4348 4349 if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) 4350 { 4351 // Now build compound file 4352 bool success = false; 4353 try 4354 { 4355 docWriter.CreateCompoundFile(segment); 4356 success = true; 4357 } 4358 finally 4359 { 4360 if (!success) 4361 { 4362 if (infoStream != null) 4363 Message("hit exception creating compound file for newly flushed segment " + segment); 4364 deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); 4365 } 4366 } 4367 4368 newSegment.SetUseCompoundFile(true); 4369 Checkpoint(); 4370 } 4371 4372 if (flushDeletes) 4373 { 4374 ApplyDeletes(); 4375 } 4376 4377 if (flushDocs) 4378 Checkpoint(); 4379 4380 DoAfterFlush(); 4381 4382 return flushDocs; 4383 } 4384 catch (System.OutOfMemoryException oom) 4385 { 4386 HandleOOM(oom, "doFlush"); 4387 // never hit 4388 return false; 4389 } 4390 finally 4391 { 4392 docWriter.ResumeAllThreads(); 4393 } 4394 } 4395 } 4396 4397 /// <summary>Expert: Return the total size of all index files currently cached in memory. 4398 /// Useful for size management with flushRamDocs() 4399 /// </summary> RamSizeInBytes()4400 public long RamSizeInBytes() 4401 { 4402 EnsureOpen(); 4403 return docWriter.GetRAMUsed(); 4404 } 4405 4406 /// <summary>Expert: Return the number of documents currently 4407 /// buffered in RAM. 4408 /// </summary> NumRamDocs()4409 public int NumRamDocs() 4410 { 4411 lock (this) 4412 { 4413 EnsureOpen(); 4414 return docWriter.NumDocsInRAM; 4415 } 4416 } 4417 EnsureContiguousMerge(MergePolicy.OneMerge merge)4418 private int EnsureContiguousMerge(MergePolicy.OneMerge merge) 4419 { 4420 4421 int first = segmentInfos.IndexOf(merge.segments.Info(0)); 4422 if (first == - 1) 4423 throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current index " + SegString(), directory); 4424 4425 int numSegments = segmentInfos.Count; 4426 4427 int numSegmentsToMerge = merge.segments.Count; 4428 for (int i = 0; i < numSegmentsToMerge; i++) 4429 { 4430 SegmentInfo info = merge.segments.Info(i); 4431 4432 if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info)) 4433 { 4434 if (segmentInfos.IndexOf(info) == - 1) 4435 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + SegString(), directory); 4436 else 4437 throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory); 4438 } 4439 } 4440 4441 return first; 4442 } 4443 4444 /// <summary>Carefully merges deletes for the segments we just 4445 /// merged. This is tricky because, although merging will 4446 /// clear all deletes (compacts the documents), new 4447 /// deletes may have been flushed to the segments since 4448 /// the merge was started. This method "carries over" 4449 /// such new deletes onto the newly merged segment, and 4450 /// saves the resulting deletes file (incrementing the 4451 /// delete generation for merge.info). If no deletes were 4452 /// flushed, no new deletes file is saved. 4453 /// </summary> CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader)4454 private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) 4455 { 4456 lock (this) 4457 { 4458 4459 System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); 4460 4461 SegmentInfos sourceSegments = merge.segments; 4462 4463 if (infoStream != null) 4464 Message("commitMergeDeletes " + merge.SegString(directory)); 4465 4466 // Carefully merge deletes that occurred after we 4467 // started merging: 4468 int docUpto = 0; 4469 int delCount = 0; 4470 4471 for (int i = 0; i < sourceSegments.Count; i++) 4472 { 4473 SegmentInfo info = sourceSegments.Info(i); 4474 int docCount = info.docCount; 4475 SegmentReader previousReader = merge.readersClone[i]; 4476 SegmentReader currentReader = merge.readers[i]; 4477 if (previousReader.HasDeletions) 4478 { 4479 4480 // There were deletes on this segment when the merge 4481 // started. The merge has collapsed away those 4482 // deletes, but, if new deletes were flushed since 4483 // the merge started, we must now carefully keep any 4484 // newly flushed deletes but mapping them to the new 4485 // docIDs. 4486 4487 if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs) 4488 { 4489 // This means this segment has had new deletes 4490 // committed since we started the merge, so we 4491 // must merge them: 4492 for (int j = 0; j < docCount; j++) 4493 { 4494 if (previousReader.IsDeleted(j)) 4495 { 4496 System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j)); 4497 } 4498 else 4499 { 4500 if (currentReader.IsDeleted(j)) 4501 { 4502 mergeReader.DoDelete(docUpto); 4503 delCount++; 4504 } 4505 docUpto++; 4506 } 4507 } 4508 } 4509 else 4510 { 4511 docUpto += docCount - previousReader.NumDeletedDocs; 4512 } 4513 } 4514 else if (currentReader.HasDeletions) 4515 { 4516 // This segment had no deletes before but now it 4517 // does: 4518 for (int j = 0; j < docCount; j++) 4519 { 4520 if (currentReader.IsDeleted(j)) 4521 { 4522 mergeReader.DoDelete(docUpto); 4523 delCount++; 4524 } 4525 docUpto++; 4526 } 4527 } 4528 // No deletes before or after 4529 else 4530 docUpto += info.docCount; 4531 } 4532 4533 System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount); 4534 4535 mergeReader.hasChanges = delCount > 0; 4536 } 4537 } 4538 4539 /* FIXME if we want to support non-contiguous segment merges */ CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader)4540 private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) 4541 { 4542 lock (this) 4543 { 4544 4545 System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); 4546 4547 if (hitOOM) 4548 { 4549 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); 4550 } 4551 4552 if (infoStream != null) 4553 Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); 4554 4555 System.Diagnostics.Debug.Assert(merge.registerDone); 4556 4557 // If merge was explicitly aborted, or, if rollback() or 4558 // rollbackTransaction() had been called since our merge 4559 // started (which results in an unqualified 4560 // deleter.refresh() call that will remove any index 4561 // file that current segments does not reference), we 4562 // abort this merge 4563 if (merge.IsAborted()) 4564 { 4565 if (infoStream != null) 4566 Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); 4567 4568 return false; 4569 } 4570 4571 int start = EnsureContiguousMerge(merge); 4572 4573 CommitMergedDeletes(merge, mergedReader); 4574 docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); 4575 4576 // If the doc store we are using has been closed and 4577 // is in now compound format (but wasn't when we 4578 // started), then we will switch to the compound 4579 // format as well: 4580 SetMergeDocStoreIsCompoundFile(merge); 4581 4582 merge.info.HasProx = merger.HasProx(); 4583 4584 segmentInfos.RemoveRange(start, start + merge.segments.Count - start); 4585 System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); 4586 segmentInfos.Insert(start, merge.info); 4587 4588 CloseMergeReaders(merge, false); 4589 4590 // Must note the change to segmentInfos so any commits 4591 // in-flight don't lose it: 4592 Checkpoint(); 4593 4594 // If the merged segments had pending changes, clear 4595 // them so that they don't bother writing them to 4596 // disk, updating SegmentInfo, etc.: 4597 readerPool.Clear(merge.segments); 4598 4599 if (merge.optimize) 4600 { 4601 // cascade the optimize: 4602 segmentsToOptimize.Add(merge.info); 4603 } 4604 return true; 4605 } 4606 } 4607 HandleMergeException(System.Exception t, MergePolicy.OneMerge merge)4608 private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge) 4609 { 4610 4611 if (infoStream != null) 4612 { 4613 Message("handleMergeException: merge=" + merge.SegString(directory) + " exc=" + t); 4614 } 4615 4616 // Set the exception on the merge, so if 4617 // optimize() is waiting on us it sees the root 4618 // cause exception: 4619 merge.SetException(t); 4620 AddMergeException(merge); 4621 4622 if (t is MergePolicy.MergeAbortedException) 4623 { 4624 // We can ignore this exception (it happens when 4625 // close(false) or rollback is called), unless the 4626 // merge involves segments from external directories, 4627 // in which case we must throw it so, for example, the 4628 // rollbackTransaction code in addIndexes* is 4629 // executed. 4630 if (merge.isExternal) 4631 throw t; 4632 } 4633 else if (t is System.IO.IOException || t is System.SystemException || t is System.ApplicationException) 4634 { 4635 throw t; 4636 } 4637 else 4638 { 4639 // Should not get here 4640 System.Diagnostics.Debug.Fail("Exception is not expected type!"); 4641 throw new System.SystemException(null, t); 4642 } 4643 } 4644 Merge_ForNUnit(MergePolicy.OneMerge merge)4645 public void Merge_ForNUnit(MergePolicy.OneMerge merge) 4646 { 4647 Merge(merge); 4648 } 4649 /// <summary> Merges the indicated segments, replacing them in the stack with a 4650 /// single segment. 4651 /// </summary> Merge(MergePolicy.OneMerge merge)4652 internal void Merge(MergePolicy.OneMerge merge) 4653 { 4654 4655 bool success = false; 4656 4657 try 4658 { 4659 try 4660 { 4661 try 4662 { 4663 MergeInit(merge); 4664 4665 if (infoStream != null) 4666 { 4667 Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); 4668 } 4669 4670 MergeMiddle(merge); 4671 MergeSuccess(merge); 4672 success = true; 4673 } 4674 catch (System.Exception t) 4675 { 4676 HandleMergeException(t, merge); 4677 } 4678 } 4679 finally 4680 { 4681 lock (this) 4682 { 4683 MergeFinish(merge); 4684 4685 if (!success) 4686 { 4687 if (infoStream != null) 4688 Message("hit exception during merge"); 4689 if (merge.info != null && !segmentInfos.Contains(merge.info)) 4690 deleter.Refresh(merge.info.name); 4691 } 4692 4693 // This merge (and, generally, any change to the 4694 // segments) may now enable new merges, so we call 4695 // merge policy & update pending merges. 4696 if (success && !merge.IsAborted() && !closed && !closing) 4697 UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); 4698 } 4699 } 4700 } 4701 catch (System.OutOfMemoryException oom) 4702 { 4703 HandleOOM(oom, "merge"); 4704 } 4705 } 4706 4707 /// <summary>Hook that's called when the specified merge is complete. </summary> MergeSuccess(MergePolicy.OneMerge merge)4708 internal virtual void MergeSuccess(MergePolicy.OneMerge merge) 4709 { 4710 } 4711 4712 /// <summary>Checks whether this merge involves any segments 4713 /// already participating in a merge. If not, this merge 4714 /// is "registered", meaning we record that its segments 4715 /// are now participating in a merge, and true is 4716 /// returned. Else (the merge conflicts) false is 4717 /// returned. 4718 /// </summary> RegisterMerge(MergePolicy.OneMerge merge)4719 internal bool RegisterMerge(MergePolicy.OneMerge merge) 4720 { 4721 lock (this) 4722 { 4723 4724 if (merge.registerDone) 4725 return true; 4726 4727 if (stopMerges) 4728 { 4729 merge.Abort(); 4730 throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory)); 4731 } 4732 4733 int count = merge.segments.Count; 4734 bool isExternal = false; 4735 for (int i = 0; i < count; i++) 4736 { 4737 SegmentInfo info = merge.segments.Info(i); 4738 if (mergingSegments.Contains(info)) 4739 { 4740 return false; 4741 } 4742 if (segmentInfos.IndexOf(info) == -1) 4743 { 4744 return false; 4745 } 4746 if (info.dir != directory) 4747 { 4748 isExternal = true; 4749 } 4750 if (segmentsToOptimize.Contains(info)) 4751 { 4752 merge.optimize = true; 4753 merge.maxNumSegmentsOptimize = optimizeMaxNumSegments; 4754 } 4755 } 4756 4757 EnsureContiguousMerge(merge); 4758 4759 pendingMerges.AddLast(merge); 4760 4761 if (infoStream != null) 4762 Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); 4763 4764 merge.mergeGen = mergeGen; 4765 merge.isExternal = isExternal; 4766 4767 // OK it does not conflict; now record that this merge 4768 // is running (while synchronized) to avoid race 4769 // condition where two conflicting merges from different 4770 // threads, start 4771 for (int i = 0; i < count; i++) 4772 { 4773 SegmentInfo si = merge.segments.Info(i); 4774 mergingSegments.Add(si); 4775 } 4776 4777 // Merge is now registered 4778 merge.registerDone = true; 4779 return true; 4780 } 4781 } 4782 4783 /// <summary>Does initial setup for a merge, which is fast but holds 4784 /// the synchronized lock on IndexWriter instance. 4785 /// </summary> MergeInit(MergePolicy.OneMerge merge)4786 internal void MergeInit(MergePolicy.OneMerge merge) 4787 { 4788 lock (this) 4789 { 4790 bool success = false; 4791 try 4792 { 4793 _MergeInit(merge); 4794 success = true; 4795 } 4796 finally 4797 { 4798 if (!success) 4799 { 4800 MergeFinish(merge); 4801 } 4802 } 4803 } 4804 } 4805 _MergeInit(MergePolicy.OneMerge merge)4806 private void _MergeInit(MergePolicy.OneMerge merge) 4807 { 4808 lock (this) 4809 { 4810 4811 System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); 4812 4813 System.Diagnostics.Debug.Assert(merge.registerDone); 4814 System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); 4815 4816 if (hitOOM) 4817 { 4818 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge"); 4819 } 4820 4821 if (merge.info != null) 4822 // mergeInit already done 4823 return ; 4824 4825 if (merge.IsAborted()) 4826 return ; 4827 4828 ApplyDeletes(); 4829 4830 SegmentInfos sourceSegments = merge.segments; 4831 int end = sourceSegments.Count; 4832 4833 // Check whether this merge will allow us to skip 4834 // merging the doc stores (stored field & vectors). 4835 // This is a very substantial optimization (saves tons 4836 // of IO). 4837 4838 Directory lastDir = directory; 4839 System.String lastDocStoreSegment = null; 4840 int next = - 1; 4841 4842 bool mergeDocStores = false; 4843 bool doFlushDocStore = false; 4844 System.String currentDocStoreSegment = docWriter.DocStoreSegment; 4845 4846 // Test each segment to be merged: check if we need to 4847 // flush/merge doc stores 4848 for (int i = 0; i < end; i++) 4849 { 4850 SegmentInfo si = sourceSegments.Info(i); 4851 4852 // If it has deletions we must merge the doc stores 4853 if (si.HasDeletions()) 4854 mergeDocStores = true; 4855 4856 // If it has its own (private) doc stores we must 4857 // merge the doc stores 4858 if (- 1 == si.DocStoreOffset) 4859 mergeDocStores = true; 4860 4861 // If it has a different doc store segment than 4862 // previous segments, we must merge the doc stores 4863 System.String docStoreSegment = si.DocStoreSegment; 4864 if (docStoreSegment == null) 4865 mergeDocStores = true; 4866 else if (lastDocStoreSegment == null) 4867 lastDocStoreSegment = docStoreSegment; 4868 else if (!lastDocStoreSegment.Equals(docStoreSegment)) 4869 mergeDocStores = true; 4870 4871 // Segments' docScoreOffsets must be in-order, 4872 // contiguous. For the default merge policy now 4873 // this will always be the case but for an arbitrary 4874 // merge policy this may not be the case 4875 if (- 1 == next) 4876 next = si.DocStoreOffset + si.docCount; 4877 else if (next != si.DocStoreOffset) 4878 mergeDocStores = true; 4879 else 4880 next = si.DocStoreOffset + si.docCount; 4881 4882 // If the segment comes from a different directory 4883 // we must merge 4884 if (lastDir != si.dir) 4885 mergeDocStores = true; 4886 4887 // If the segment is referencing the current "live" 4888 // doc store outputs then we must merge 4889 if (si.DocStoreOffset != - 1 && currentDocStoreSegment != null && si.DocStoreSegment.Equals(currentDocStoreSegment)) 4890 { 4891 doFlushDocStore = true; 4892 } 4893 } 4894 4895 // if a mergedSegmentWarmer is installed, we must merge 4896 // the doc stores because we will open a full 4897 // SegmentReader on the merged segment: 4898 if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment)) 4899 { 4900 mergeDocStores = true; 4901 } 4902 4903 int docStoreOffset; 4904 System.String docStoreSegment2; 4905 bool docStoreIsCompoundFile; 4906 4907 if (mergeDocStores) 4908 { 4909 docStoreOffset = - 1; 4910 docStoreSegment2 = null; 4911 docStoreIsCompoundFile = false; 4912 } 4913 else 4914 { 4915 SegmentInfo si = sourceSegments.Info(0); 4916 docStoreOffset = si.DocStoreOffset; 4917 docStoreSegment2 = si.DocStoreSegment; 4918 docStoreIsCompoundFile = si.DocStoreIsCompoundFile; 4919 } 4920 4921 if (mergeDocStores && doFlushDocStore) 4922 { 4923 // SegmentMerger intends to merge the doc stores 4924 // (stored fields, vectors), and at least one of the 4925 // segments to be merged refers to the currently 4926 // live doc stores. 4927 4928 // TODO: if we know we are about to merge away these 4929 // newly flushed doc store files then we should not 4930 // make compound file out of them... 4931 if (infoStream != null) 4932 Message("now flush at merge"); 4933 DoFlush(true, false); 4934 } 4935 4936 merge.mergeDocStores = mergeDocStores; 4937 4938 // Bind a new segment name here so even with 4939 // ConcurrentMergePolicy we keep deterministic segment 4940 // names. 4941 merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false); 4942 4943 4944 IDictionary<string, string> details = new Dictionary<string, string>(); 4945 details["optimize"] = merge.optimize + ""; 4946 details["mergeFactor"] = end + ""; 4947 details["mergeDocStores"] = mergeDocStores + ""; 4948 SetDiagnostics(merge.info, "merge", details); 4949 4950 // Also enroll the merged segment into mergingSegments; 4951 // this prevents it from getting selected for a merge 4952 // after our merge is done but while we are building the 4953 // CFS: 4954 mergingSegments.Add(merge.info); 4955 } 4956 } 4957 SetDiagnostics(SegmentInfo info, System.String source)4958 private void SetDiagnostics(SegmentInfo info, System.String source) 4959 { 4960 SetDiagnostics(info, source, null); 4961 } 4962 SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details)4963 private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details) 4964 { 4965 IDictionary<string, string> diagnostics = new Dictionary<string,string>(); 4966 diagnostics["source"] = source; 4967 diagnostics["lucene.version"] = Constants.LUCENE_VERSION; 4968 diagnostics["os"] = Constants.OS_NAME + ""; 4969 diagnostics["os.arch"] = Constants.OS_ARCH + ""; 4970 diagnostics["os.version"] = Constants.OS_VERSION + ""; 4971 diagnostics["java.version"] = Constants.JAVA_VERSION + ""; 4972 diagnostics["java.vendor"] = Constants.JAVA_VENDOR + ""; 4973 if (details != null) 4974 { 4975 //System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys); 4976 //System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values); 4977 foreach (string key in details.Keys) 4978 { 4979 diagnostics[key] = details[key]; 4980 } 4981 } 4982 info.Diagnostics = diagnostics; 4983 } 4984 4985 /// <summary>Does fininishing for a merge, which is fast but holds 4986 /// the synchronized lock on IndexWriter instance. 4987 /// </summary> MergeFinish(MergePolicy.OneMerge merge)4988 internal void MergeFinish(MergePolicy.OneMerge merge) 4989 { 4990 lock (this) 4991 { 4992 4993 // Optimize, addIndexes or finishMerges may be waiting 4994 // on merges to finish. 4995 System.Threading.Monitor.PulseAll(this); 4996 4997 // It's possible we are called twice, eg if there was an 4998 // exception inside mergeInit 4999 if (merge.registerDone) 5000 { 5001 SegmentInfos sourceSegments = merge.segments; 5002 int end = sourceSegments.Count; 5003 for (int i = 0; i < end; i++) 5004 mergingSegments.Remove(sourceSegments.Info(i)); 5005 if(merge.info != null) 5006 mergingSegments.Remove(merge.info); 5007 merge.registerDone = false; 5008 } 5009 5010 runningMerges.Remove(merge); 5011 } 5012 } 5013 SetMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge)5014 private void SetMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge) 5015 { 5016 lock (this) 5017 { 5018 string mergeDocStoreSegment = merge.info.DocStoreSegment; 5019 if (mergeDocStoreSegment != null && !merge.info.DocStoreIsCompoundFile) 5020 { 5021 int size = segmentInfos.Count; 5022 for (int i = 0; i < size; i++) 5023 { 5024 SegmentInfo info = segmentInfos.Info(i); 5025 string docStoreSegment = info.DocStoreSegment; 5026 if (docStoreSegment != null && 5027 docStoreSegment.Equals(mergeDocStoreSegment) && 5028 info.DocStoreIsCompoundFile) 5029 { 5030 merge.info.DocStoreIsCompoundFile = true; 5031 break; 5032 } 5033 } 5034 } 5035 } 5036 } 5037 CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions)5038 private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions) 5039 { 5040 lock (this) 5041 { 5042 int numSegments = merge.segments.Count; 5043 if (suppressExceptions) 5044 { 5045 // Suppress any new exceptions so we throw the 5046 // original cause 5047 for (int i = 0; i < numSegments; i++) 5048 { 5049 if (merge.readers[i] != null) 5050 { 5051 try 5052 { 5053 readerPool.Release(merge.readers[i], false); 5054 } 5055 catch (Exception) 5056 { 5057 } 5058 merge.readers[i] = null; 5059 } 5060 5061 if (merge.readersClone[i] != null) 5062 { 5063 try 5064 { 5065 merge.readersClone[i].Close(); 5066 } 5067 catch (Exception) 5068 { 5069 } 5070 // This was a private clone and we had the 5071 // only reference 5072 System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); //: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); 5073 merge.readersClone[i] = null; 5074 } 5075 } 5076 } 5077 else 5078 { 5079 for (int i = 0; i < numSegments; i++) 5080 { 5081 if (merge.readers[i] != null) 5082 { 5083 readerPool.Release(merge.readers[i], true); 5084 merge.readers[i] = null; 5085 } 5086 5087 if (merge.readersClone[i] != null) 5088 { 5089 merge.readersClone[i].Close(); 5090 // This was a private clone and we had the only reference 5091 System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); 5092 merge.readersClone[i] = null; 5093 } 5094 } 5095 } 5096 } 5097 } 5098 5099 5100 /// <summary>Does the actual (time-consuming) work of the merge, 5101 /// but without holding synchronized lock on IndexWriter 5102 /// instance 5103 /// </summary> MergeMiddle(MergePolicy.OneMerge merge)5104 private int MergeMiddle(MergePolicy.OneMerge merge) 5105 { 5106 5107 merge.CheckAborted(directory); 5108 5109 System.String mergedName = merge.info.name; 5110 5111 SegmentMerger merger = null; 5112 5113 int mergedDocCount = 0; 5114 5115 SegmentInfos sourceSegments = merge.segments; 5116 int numSegments = sourceSegments.Count; 5117 5118 if (infoStream != null) 5119 Message("merging " + merge.SegString(directory)); 5120 5121 merger = new SegmentMerger(this, mergedName, merge); 5122 5123 merge.readers = new SegmentReader[numSegments]; 5124 merge.readersClone = new SegmentReader[numSegments]; 5125 5126 bool mergeDocStores = false; 5127 5128 String currentDocStoreSegment; 5129 lock(this) { 5130 currentDocStoreSegment = docWriter.DocStoreSegment; 5131 } 5132 bool currentDSSMerged = false; 5133 5134 // This is try/finally to make sure merger's readers are 5135 // closed: 5136 bool success = false; 5137 try 5138 { 5139 int totDocCount = 0; 5140 5141 for (int i = 0; i < numSegments; i++) 5142 { 5143 5144 SegmentInfo info = sourceSegments.Info(i); 5145 5146 // Hold onto the "live" reader; we will use this to 5147 // commit merged deletes 5148 SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); 5149 5150 // We clone the segment readers because other 5151 // deletes may come in while we're merging so we 5152 // need readers that will not change 5153 SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); 5154 merger.Add(clone); 5155 5156 if (clone.HasDeletions) 5157 { 5158 mergeDocStores = true; 5159 } 5160 5161 if (info.DocStoreOffset != -1 && currentDocStoreSegment != null) 5162 { 5163 currentDSSMerged |= currentDocStoreSegment.Equals(info.DocStoreSegment); 5164 } 5165 5166 totDocCount += clone.NumDocs(); 5167 } 5168 5169 if (infoStream != null) 5170 { 5171 Message("merge: total " + totDocCount + " docs"); 5172 } 5173 5174 merge.CheckAborted(directory); 5175 5176 // If deletions have arrived and it has now become 5177 // necessary to merge doc stores, go and open them: 5178 if (mergeDocStores && !merge.mergeDocStores) 5179 { 5180 merge.mergeDocStores = true; 5181 lock (this) 5182 { 5183 if (currentDSSMerged) 5184 { 5185 if (infoStream != null) 5186 { 5187 Message("now flush at mergeMiddle"); 5188 } 5189 DoFlush(true, false); 5190 } 5191 } 5192 5193 for (int i = 0; i < numSegments; i++) 5194 { 5195 merge.readersClone[i].OpenDocStores(); 5196 } 5197 5198 // Clear DSS 5199 merge.info.SetDocStore(-1, null, false); 5200 5201 } 5202 5203 // This is where all the work happens: 5204 mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); 5205 5206 System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); 5207 5208 if (merge.useCompoundFile) 5209 { 5210 5211 success = false; 5212 string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); 5213 5214 try 5215 { 5216 if (infoStream != null) 5217 { 5218 Message("create compound file " + compoundFileName); 5219 } 5220 merger.CreateCompoundFile(compoundFileName); 5221 success = true; 5222 } 5223 catch (System.IO.IOException ioe) 5224 { 5225 lock (this) 5226 { 5227 if (merge.IsAborted()) 5228 { 5229 // This can happen if rollback or close(false) 5230 // is called -- fall through to logic below to 5231 // remove the partially created CFS: 5232 } 5233 else 5234 { 5235 HandleMergeException(ioe, merge); 5236 } 5237 } 5238 } 5239 catch (Exception t) 5240 { 5241 HandleMergeException(t, merge); 5242 } 5243 finally 5244 { 5245 if (!success) 5246 { 5247 if (infoStream != null) 5248 { 5249 Message("hit exception creating compound file during merge"); 5250 } 5251 5252 lock (this) 5253 { 5254 deleter.DeleteFile(compoundFileName); 5255 deleter.DeleteNewFiles(merger.GetMergedFiles()); 5256 } 5257 } 5258 } 5259 5260 success = false; 5261 5262 lock (this) 5263 { 5264 5265 // delete new non cfs files directly: they were never 5266 // registered with IFD 5267 deleter.DeleteNewFiles(merger.GetMergedFiles()); 5268 5269 if (merge.IsAborted()) 5270 { 5271 if (infoStream != null) 5272 { 5273 Message("abort merge after building CFS"); 5274 } 5275 deleter.DeleteFile(compoundFileName); 5276 return 0; 5277 } 5278 } 5279 5280 merge.info.SetUseCompoundFile(true); 5281 } 5282 5283 int termsIndexDivisor; 5284 bool loadDocStores; 5285 5286 // if the merged segment warmer was not installed when 5287 // this merge was started, causing us to not force 5288 // the docStores to close, we can't warm it now 5289 bool canWarm = merge.info.DocStoreSegment == null || currentDocStoreSegment == null || !merge.info.DocStoreSegment.Equals(currentDocStoreSegment); 5290 5291 if (poolReaders && mergedSegmentWarmer != null && canWarm) 5292 { 5293 // Load terms index & doc stores so the segment 5294 // warmer can run searches, load documents/term 5295 // vectors 5296 termsIndexDivisor = readerTermsIndexDivisor; 5297 loadDocStores = true; 5298 } 5299 else 5300 { 5301 termsIndexDivisor = -1; 5302 loadDocStores = false; 5303 } 5304 5305 // TODO: in the non-realtime case, we may want to only 5306 // keep deletes (it's costly to open entire reader 5307 // when we just need deletes) 5308 5309 SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); 5310 try 5311 { 5312 if (poolReaders && mergedSegmentWarmer != null) 5313 { 5314 mergedSegmentWarmer.Warm(mergedReader); 5315 } 5316 if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) 5317 { 5318 // commitMerge will return false if this merge was aborted 5319 return 0; 5320 } 5321 } 5322 finally 5323 { 5324 lock (this) 5325 { 5326 readerPool.Release(mergedReader); 5327 } 5328 } 5329 5330 success = true; 5331 } 5332 finally 5333 { 5334 // Readers are already closed in commitMerge if we didn't hit 5335 // an exc: 5336 if (!success) 5337 { 5338 CloseMergeReaders(merge, true); 5339 } 5340 } 5341 5342 return mergedDocCount; 5343 } 5344 AddMergeException(MergePolicy.OneMerge merge)5345 internal virtual void AddMergeException(MergePolicy.OneMerge merge) 5346 { 5347 lock (this) 5348 { 5349 System.Diagnostics.Debug.Assert(merge.GetException() != null); 5350 if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen) 5351 mergeExceptions.Add(merge); 5352 } 5353 } 5354 5355 // Apply buffered deletes to all segments. ApplyDeletes()5356 private bool ApplyDeletes() 5357 { 5358 lock (this) 5359 { 5360 System.Diagnostics.Debug.Assert(TestPoint("startApplyDeletes")); 5361 flushDeletesCount++; 5362 5363 bool success = false; 5364 bool changed; 5365 try 5366 { 5367 changed = docWriter.ApplyDeletes(segmentInfos); 5368 success = true; 5369 } 5370 finally 5371 { 5372 if (!success && infoStream != null) 5373 { 5374 Message("hit exception flushing deletes"); 5375 } 5376 } 5377 5378 if (changed) 5379 Checkpoint(); 5380 return changed; 5381 } 5382 } 5383 5384 // For test purposes. GetBufferedDeleteTermsSize()5385 internal int GetBufferedDeleteTermsSize() 5386 { 5387 lock (this) 5388 { 5389 return docWriter.GetBufferedDeleteTerms().Count; 5390 } 5391 } 5392 5393 // For test purposes. GetNumBufferedDeleteTerms()5394 internal int GetNumBufferedDeleteTerms() 5395 { 5396 lock (this) 5397 { 5398 return docWriter.GetNumBufferedDeleteTerms(); 5399 } 5400 } 5401 5402 // utility routines for tests NewestSegment()5403 public /*internal*/ virtual SegmentInfo NewestSegment() 5404 { 5405 return segmentInfos.Count > 0 ? segmentInfos.Info(segmentInfos.Count - 1) : null; 5406 } 5407 SegString()5408 public virtual System.String SegString() 5409 { 5410 lock (this) 5411 { 5412 return SegString(segmentInfos); 5413 } 5414 } 5415 SegString(SegmentInfos infos)5416 private System.String SegString(SegmentInfos infos) 5417 { 5418 lock (this) 5419 { 5420 System.Text.StringBuilder buffer = new System.Text.StringBuilder(); 5421 int count = infos.Count; 5422 for (int i = 0; i < count; i++) 5423 { 5424 if (i > 0) 5425 { 5426 buffer.Append(' '); 5427 } 5428 SegmentInfo info = infos.Info(i); 5429 buffer.Append(info.SegString(directory)); 5430 if (info.dir != directory) 5431 buffer.Append("**"); 5432 } 5433 return buffer.ToString(); 5434 } 5435 } 5436 5437 // Files that have been sync'd already 5438 private HashSet<string> synced = new HashSet<string>(); 5439 5440 // Files that are now being sync'd 5441 private HashSet<string> syncing = new HashSet<string>(); 5442 StartSync(System.String fileName, ICollection<string> pending)5443 private bool StartSync(System.String fileName, ICollection<string> pending) 5444 { 5445 lock (synced) 5446 { 5447 if (!synced.Contains(fileName)) 5448 { 5449 if (!syncing.Contains(fileName)) 5450 { 5451 syncing.Add(fileName); 5452 return true; 5453 } 5454 else 5455 { 5456 pending.Add(fileName); 5457 return false; 5458 } 5459 } 5460 else 5461 return false; 5462 } 5463 } 5464 FinishSync(System.String fileName, bool success)5465 private void FinishSync(System.String fileName, bool success) 5466 { 5467 lock (synced) 5468 { 5469 System.Diagnostics.Debug.Assert(syncing.Contains(fileName)); 5470 syncing.Remove(fileName); 5471 if (success) 5472 synced.Add(fileName); 5473 System.Threading.Monitor.PulseAll(synced); 5474 } 5475 } 5476 5477 /// <summary>Blocks until all files in syncing are sync'd </summary> WaitForAllSynced(ICollection<System.String> syncing)5478 private bool WaitForAllSynced(ICollection<System.String> syncing) 5479 { 5480 lock (synced) 5481 { 5482 IEnumerator<string> it = syncing.GetEnumerator(); 5483 while (it.MoveNext()) 5484 { 5485 System.String fileName = it.Current; 5486 while (!synced.Contains(fileName)) 5487 { 5488 if (!syncing.Contains(fileName)) 5489 // There was an error because a file that was 5490 // previously syncing failed to appear in synced 5491 return false; 5492 else 5493 System.Threading.Monitor.Wait(synced); 5494 5495 } 5496 } 5497 return true; 5498 } 5499 } 5500 DoWait()5501 private void DoWait() 5502 { 5503 lock (this) 5504 { 5505 // NOTE: the callers of this method should in theory 5506 // be able to do simply wait(), but, as a defense 5507 // against thread timing hazards where notifyAll() 5508 // falls to be called, we wait for at most 1 second 5509 // and then return so caller can check if wait 5510 // conditions are satisified: 5511 System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(1000)); 5512 5513 } 5514 } 5515 5516 /// <summary>Walk through all files referenced by the current 5517 /// segmentInfos and ask the Directory to sync each file, 5518 /// if it wasn't already. If that succeeds, then we 5519 /// prepare a new segments_N file but do not fully commit 5520 /// it. 5521 /// </summary> StartCommit(long sizeInBytes, IDictionary<string, string> commitUserData)5522 private void StartCommit(long sizeInBytes, IDictionary<string, string> commitUserData) 5523 { 5524 5525 System.Diagnostics.Debug.Assert(TestPoint("startStartCommit")); 5526 5527 // TODO: as of LUCENE-2095, we can simplify this method, 5528 // since only 1 thread can be in here at once 5529 5530 if (hitOOM) 5531 { 5532 throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit"); 5533 } 5534 5535 try 5536 { 5537 5538 if (infoStream != null) 5539 Message("startCommit(): start sizeInBytes=" + sizeInBytes); 5540 5541 SegmentInfos toSync = null; 5542 long myChangeCount; 5543 5544 lock (this) 5545 { 5546 // Wait for any running addIndexes to complete 5547 // first, then block any from running until we've 5548 // copied the segmentInfos we intend to sync: 5549 BlockAddIndexes(false); 5550 5551 // On commit the segmentInfos must never 5552 // reference a segment in another directory: 5553 System.Diagnostics.Debug.Assert(!HasExternalSegments()); 5554 5555 try 5556 { 5557 5558 System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount); 5559 myChangeCount = changeCount; 5560 5561 if (changeCount == lastCommitChangeCount) 5562 { 5563 if (infoStream != null) 5564 Message(" skip startCommit(): no changes pending"); 5565 return ; 5566 } 5567 5568 // First, we clone & incref the segmentInfos we intend 5569 // to sync, then, without locking, we sync() each file 5570 // referenced by toSync, in the background. Multiple 5571 // threads can be doing this at once, if say a large 5572 // merge and a small merge finish at the same time: 5573 5574 if (infoStream != null) 5575 Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount); 5576 5577 readerPool.Commit(); 5578 5579 // It's possible another flush (that did not close 5580 // the open do stores) snuck in after the flush we 5581 // just did, so we remove any tail segments 5582 // referencing the open doc store from the 5583 // SegmentInfos we are about to sync (the main 5584 // SegmentInfos will keep them): 5585 toSync = (SegmentInfos) segmentInfos.Clone(); 5586 string dss = docWriter.DocStoreSegment; 5587 if (dss != null) 5588 { 5589 while (true) 5590 { 5591 String dss2 = toSync.Info(toSync.Count - 1).DocStoreSegment; 5592 if (dss2 == null || !dss2.Equals(dss)) 5593 { 5594 break; 5595 } 5596 toSync.RemoveAt(toSync.Count - 1); 5597 changeCount++; 5598 } 5599 } 5600 5601 if (commitUserData != null) 5602 toSync.UserData = commitUserData; 5603 5604 deleter.IncRef(toSync, false); 5605 5606 ICollection<string> files = toSync.Files(directory, false); 5607 foreach(string fileName in files) 5608 { 5609 System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file " + fileName + " does not exist"); 5610 // If this trips it means we are missing a call to 5611 // .checkpoint somewhere, because by the time we 5612 // are called, deleter should know about every 5613 // file referenced by the current head 5614 // segmentInfos: 5615 System.Diagnostics.Debug.Assert(deleter.Exists(fileName)); 5616 } 5617 } 5618 finally 5619 { 5620 ResumeAddIndexes(); 5621 } 5622 } 5623 5624 System.Diagnostics.Debug.Assert(TestPoint("midStartCommit")); 5625 5626 bool setPending = false; 5627 5628 try 5629 { 5630 // Loop until all files toSync references are sync'd: 5631 while (true) 5632 { 5633 ICollection<string> pending = new List<string>(); 5634 5635 IEnumerator<string> it = toSync.Files(directory, false).GetEnumerator(); 5636 while (it.MoveNext()) 5637 { 5638 string fileName = it.Current; 5639 if (StartSync(fileName, pending)) 5640 { 5641 bool success = false; 5642 try 5643 { 5644 // Because we incRef'd this commit point, above, 5645 // the file had better exist: 5646 System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory); 5647 if (infoStream != null) 5648 Message("now sync " + fileName); 5649 directory.Sync(fileName); 5650 success = true; 5651 } 5652 finally 5653 { 5654 FinishSync(fileName, success); 5655 } 5656 } 5657 } 5658 5659 // All files that I require are either synced or being 5660 // synced by other threads. If they are being synced, 5661 // we must at this point block until they are done. 5662 // If this returns false, that means an error in 5663 // another thread resulted in failing to actually 5664 // sync one of our files, so we repeat: 5665 if (WaitForAllSynced(pending)) 5666 break; 5667 } 5668 5669 System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2")); 5670 5671 lock (this) 5672 { 5673 // If someone saved a newer version of segments file 5674 // since I first started syncing my version, I can 5675 // safely skip saving myself since I've been 5676 // superseded: 5677 5678 while (true) 5679 { 5680 if (myChangeCount <= lastCommitChangeCount) 5681 { 5682 if (infoStream != null) 5683 { 5684 Message("sync superseded by newer infos"); 5685 } 5686 break; 5687 } 5688 else if (pendingCommit == null) 5689 { 5690 // My turn to commit 5691 5692 if (segmentInfos.Generation > toSync.Generation) 5693 toSync.UpdateGeneration(segmentInfos); 5694 5695 bool success = false; 5696 try 5697 { 5698 5699 // Exception here means nothing is prepared 5700 // (this method unwinds everything it did on 5701 // an exception) 5702 try 5703 { 5704 toSync.PrepareCommit(directory); 5705 } 5706 finally 5707 { 5708 // Have our master segmentInfos record the 5709 // generations we just prepared. We do this 5710 // on error or success so we don't 5711 // double-write a segments_N file. 5712 segmentInfos.UpdateGeneration(toSync); 5713 } 5714 5715 System.Diagnostics.Debug.Assert(pendingCommit == null); 5716 setPending = true; 5717 pendingCommit = toSync; 5718 pendingCommitChangeCount = (uint) myChangeCount; 5719 success = true; 5720 } 5721 finally 5722 { 5723 if (!success && infoStream != null) 5724 Message("hit exception committing segments file"); 5725 } 5726 break; 5727 } 5728 else 5729 { 5730 // Must wait for other commit to complete 5731 DoWait(); 5732 } 5733 } 5734 } 5735 5736 if (infoStream != null) 5737 Message("done all syncs"); 5738 5739 System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess")); 5740 } 5741 finally 5742 { 5743 lock (this) 5744 { 5745 if (!setPending) 5746 deleter.DecRef(toSync); 5747 } 5748 } 5749 } 5750 catch (System.OutOfMemoryException oom) 5751 { 5752 HandleOOM(oom, "startCommit"); 5753 } 5754 System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit")); 5755 } 5756 5757 /// <summary> Returns <c>true</c> iff the index in the named directory is 5758 /// currently locked. 5759 /// </summary> 5760 /// <param name="directory">the directory to check for a lock 5761 /// </param> 5762 /// <throws> IOException if there is a low-level IO error </throws> IsLocked(Directory directory)5763 public static bool IsLocked(Directory directory) 5764 { 5765 return directory.MakeLock(WRITE_LOCK_NAME).IsLocked(); 5766 } 5767 5768 /// <summary> Forcibly unlocks the index in the named directory. 5769 /// <p/> 5770 /// Caution: this should only be used by failure recovery code, 5771 /// when it is known that no other process nor thread is in fact 5772 /// currently accessing this index. 5773 /// </summary> Unlock(Directory directory)5774 public static void Unlock(Directory directory) 5775 { 5776 directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release(); 5777 } 5778 5779 /// <summary> Specifies maximum field length (in number of tokens/terms) in <see cref="IndexWriter" /> constructors. 5780 /// <see cref="SetMaxFieldLength(int)" /> overrides the value set by 5781 /// the constructor. 5782 /// </summary> 5783 public sealed class MaxFieldLength 5784 { 5785 5786 private int limit; 5787 private System.String name; 5788 5789 /// <summary> Private type-safe-enum-pattern constructor. 5790 /// 5791 /// </summary> 5792 /// <param name="name">instance name 5793 /// </param> 5794 /// <param name="limit">maximum field length 5795 /// </param> MaxFieldLength(System.String name, int limit)5796 internal MaxFieldLength(System.String name, int limit) 5797 { 5798 this.name = name; 5799 this.limit = limit; 5800 } 5801 5802 /// <summary> Public constructor to allow users to specify the maximum field size limit. 5803 /// 5804 /// </summary> 5805 /// <param name="limit">The maximum field length 5806 /// </param> MaxFieldLength(int limit)5807 public MaxFieldLength(int limit):this("User-specified", limit) 5808 { 5809 } 5810 5811 public int Limit 5812 { 5813 get { return limit; } 5814 } 5815 ToString()5816 public override System.String ToString() 5817 { 5818 return name + ":" + limit; 5819 } 5820 5821 /// <summary>Sets the maximum field length to <see cref="int.MaxValue" />. </summary> 5822 public static readonly MaxFieldLength UNLIMITED = new MaxFieldLength("UNLIMITED", System.Int32.MaxValue); 5823 5824 /// <summary> Sets the maximum field length to 5825 /// <see cref="DEFAULT_MAX_FIELD_LENGTH" /> 5826 /// 5827 /// </summary> 5828 public static readonly MaxFieldLength LIMITED; MaxFieldLength()5829 static MaxFieldLength() 5830 { 5831 LIMITED = new MaxFieldLength("LIMITED", Lucene.Net.Index.IndexWriter.DEFAULT_MAX_FIELD_LENGTH); 5832 } 5833 } 5834 5835 /// <summary>If <see cref="GetReader()" /> has been called (ie, this writer 5836 /// is in near real-time mode), then after a merge 5837 /// completes, this class can be invoked to warm the 5838 /// reader on the newly merged segment, before the merge 5839 /// commits. This is not required for near real-time 5840 /// search, but will reduce search latency on opening a 5841 /// new near real-time reader after a merge completes. 5842 /// 5843 /// <p/><b>NOTE:</b> This API is experimental and might 5844 /// change in incompatible ways in the next release.<p/> 5845 /// 5846 /// <p/><b>NOTE</b>: warm is called before any deletes have 5847 /// been carried over to the merged segment. 5848 /// </summary> 5849 public abstract class IndexReaderWarmer 5850 { Warm(IndexReader reader)5851 public abstract void Warm(IndexReader reader); 5852 } 5853 5854 private IndexReaderWarmer mergedSegmentWarmer; 5855 5856 /// <summary>Gets or sets the merged segment warmer. See <see cref="IndexReaderWarmer" /> 5857 ///. 5858 /// </summary> 5859 public virtual IndexReaderWarmer MergedSegmentWarmer 5860 { 5861 set { mergedSegmentWarmer = value; } 5862 get { return mergedSegmentWarmer; } 5863 } 5864 HandleOOM(System.OutOfMemoryException oom, System.String location)5865 private void HandleOOM(System.OutOfMemoryException oom, System.String location) 5866 { 5867 if (infoStream != null) 5868 { 5869 Message("hit OutOfMemoryError inside " + location); 5870 } 5871 hitOOM = true; 5872 throw oom; 5873 } 5874 5875 // Used only by assert for testing. Current points: 5876 // startDoFlush 5877 // startCommitMerge 5878 // startStartCommit 5879 // midStartCommit 5880 // midStartCommit2 5881 // midStartCommitSuccess 5882 // finishStartCommit 5883 // startCommitMergeDeletes 5884 // startMergeInit 5885 // startApplyDeletes 5886 // DocumentsWriter.ThreadState.init start TestPoint(System.String name)5887 public /*internal*/ virtual bool TestPoint(System.String name) 5888 { 5889 return true; 5890 } 5891 NrtIsCurrent(SegmentInfos infos)5892 internal virtual bool NrtIsCurrent(SegmentInfos infos) 5893 { 5894 lock (this) 5895 { 5896 if (!infos.Equals(segmentInfos)) 5897 { 5898 // if any structural changes (new segments), we are 5899 // stale 5900 return false; 5901 } 5902 else if (infos.Generation != segmentInfos.Generation) 5903 { 5904 // if any commit took place since we were opened, we 5905 // are stale 5906 return false; 5907 } 5908 else 5909 { 5910 return !docWriter.AnyChanges; 5911 } 5912 } 5913 } 5914 IsClosed()5915 internal virtual bool IsClosed() 5916 { 5917 lock (this) 5918 { 5919 return closed; 5920 } 5921 } 5922 IndexWriter()5923 static IndexWriter() 5924 { 5925 MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH; 5926 } 5927 } 5928 }