1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 using System; 19 using System.Collections.Generic; 20 using Lucene.Net.Documents; 21 using Document = Lucene.Net.Documents.Document; 22 using FieldSelector = Lucene.Net.Documents.FieldSelector; 23 using Lucene.Net.Store; 24 using Similarity = Lucene.Net.Search.Similarity; 25 26 namespace Lucene.Net.Index 27 { 28 29 /// <summary>IndexReader is an abstract class, providing an interface for accessing an 30 /// index. Search of an index is done entirely through this abstract interface, 31 /// so that any subclass which implements it is searchable. 32 /// <p/> Concrete subclasses of IndexReader are usually constructed with a call to 33 /// one of the static <c>open()</c> methods, e.g. <see cref="Open(Lucene.Net.Store.Directory, bool)" /> 34 ///. 35 /// <p/> For efficiency, in this API documents are often referred to via 36 /// <i>document numbers</i>, non-negative integers which each name a unique 37 /// document in the index. These document numbers are ephemeral--they may change 38 /// as documents are added to and deleted from an index. Clients should thus not 39 /// rely on a given document having the same number between sessions. 40 /// <p/> An IndexReader can be opened on a directory for which an IndexWriter is 41 /// opened already, but it cannot be used to delete documents from the index then. 42 /// <p/> 43 /// <b>NOTE</b>: for backwards API compatibility, several methods are not listed 44 /// as abstract, but have no useful implementations in this base class and 45 /// instead always throw UnsupportedOperationException. Subclasses are 46 /// strongly encouraged to override these methods, but in many cases may not 47 /// need to. 48 /// <p/> 49 /// <p/> 50 /// <b>NOTE</b>: as of 2.4, it's possible to open a read-only 51 /// IndexReader using the static open methods that accepts the 52 /// boolean readOnly parameter. Such a reader has better 53 /// better concurrency as it's not necessary to synchronize on the 54 /// isDeleted method. You must explicitly specify false 55 /// if you want to make changes with the resulting IndexReader. 56 /// <p/> 57 /// <a name="thread-safety"></a><p/><b>NOTE</b>: <see cref="IndexReader" /> 58 /// instances are completely thread 59 /// safe, meaning multiple threads can call any of its methods, 60 /// concurrently. If your application requires external 61 /// synchronization, you should <b>not</b> synchronize on the 62 /// <c>IndexReader</c> instance; use your own 63 /// (non-Lucene) objects instead. 64 /// </summary> 65 public abstract class IndexReader : System.ICloneable, System.IDisposable 66 { 67 private class AnonymousClassFindSegmentsFile : SegmentInfos.FindSegmentsFile 68 { InitBlock(Lucene.Net.Store.Directory directory2)69 private void InitBlock(Lucene.Net.Store.Directory directory2) 70 { 71 this.directory2 = directory2; 72 } 73 private Lucene.Net.Store.Directory directory2; AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1)74 internal AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1):base(Param1) 75 { 76 InitBlock(directory2); 77 } DoBody(System.String segmentFileName)78 public override System.Object DoBody(System.String segmentFileName) 79 { 80 return (long) directory2.FileModified(segmentFileName); 81 } 82 } 83 84 /// <summary> Constants describing field properties, for example used for 85 /// <see cref="IndexReader.GetFieldNames(FieldOption)" />. 86 /// </summary> 87 public sealed class FieldOption 88 { 89 private readonly System.String option; FieldOption()90 internal FieldOption() 91 { 92 } FieldOption(System.String option)93 internal FieldOption(System.String option) 94 { 95 this.option = option; 96 } ToString()97 public override System.String ToString() 98 { 99 return this.option; 100 } 101 /// <summary>All fields </summary> 102 public static readonly FieldOption ALL = new FieldOption("ALL"); 103 /// <summary>All indexed fields </summary> 104 public static readonly FieldOption INDEXED = new FieldOption("INDEXED"); 105 /// <summary>All fields that store payloads </summary> 106 public static readonly FieldOption STORES_PAYLOADS = new FieldOption("STORES_PAYLOADS"); 107 /// <summary>All fields that omit tf </summary> 108 public static readonly FieldOption OMIT_TERM_FREQ_AND_POSITIONS = new FieldOption("OMIT_TERM_FREQ_AND_POSITIONS"); 109 /// <summary>All fields which are not indexed </summary> 110 public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED"); 111 /// <summary>All fields which are indexed with termvectors enabled </summary> 112 public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR"); 113 /// <summary>All fields which are indexed but don't have termvectors enabled </summary> 114 public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR"); 115 /// <summary>All fields with termvectors enabled. Please note that only standard termvector fields are returned </summary> 116 public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR"); 117 /// <summary>All fields with termvectors with position values enabled </summary> 118 public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION"); 119 /// <summary>All fields with termvectors with offset values enabled </summary> 120 public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET"); 121 /// <summary>All fields with termvectors with offset values and position values enabled </summary> 122 public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET"); 123 } 124 125 private bool closed; 126 protected internal bool hasChanges; 127 128 private int refCount; 129 130 protected internal static int DEFAULT_TERMS_INDEX_DIVISOR = 1; 131 132 /// <summary>Expert: returns the current refCount for this reader </summary> 133 public virtual int RefCount 134 { 135 get 136 { 137 lock (this) 138 { 139 return refCount; 140 } 141 } 142 } 143 144 /// <summary> Expert: increments the refCount of this IndexReader 145 /// instance. RefCounts are used to determine when a 146 /// reader can be closed safely, i.e. as soon as there are 147 /// no more references. Be sure to always call a 148 /// corresponding <see cref="DecRef" />, in a finally clause; 149 /// otherwise the reader may never be closed. Note that 150 /// <see cref="Close" /> simply calls decRef(), which means that 151 /// the IndexReader will not really be closed until <see cref="DecRef" /> 152 /// has been called for all outstanding 153 /// references. 154 /// 155 /// </summary> 156 /// <seealso cref="DecRef"> 157 /// </seealso> IncRef()158 public virtual void IncRef() 159 { 160 lock (this) 161 { 162 System.Diagnostics.Debug.Assert(refCount > 0); 163 EnsureOpen(); 164 refCount++; 165 } 166 } 167 168 /// <summary> Expert: decreases the refCount of this IndexReader 169 /// instance. If the refCount drops to 0, then pending 170 /// changes (if any) are committed to the index and this 171 /// reader is closed. 172 /// 173 /// </summary> 174 /// <throws> IOException in case an IOException occurs in commit() or doClose() </throws> 175 /// <summary> 176 /// </summary> 177 /// <seealso cref="IncRef"> 178 /// </seealso> DecRef()179 public virtual void DecRef() 180 { 181 lock (this) 182 { 183 System.Diagnostics.Debug.Assert(refCount > 0); 184 EnsureOpen(); 185 if (refCount == 1) 186 { 187 Commit(); 188 DoClose(); 189 } 190 refCount--; 191 } 192 } 193 IndexReader()194 protected internal IndexReader() 195 { 196 refCount = 1; 197 } 198 199 /// <throws> AlreadyClosedException if this IndexReader is closed </throws> EnsureOpen()200 protected internal void EnsureOpen() 201 { 202 if (refCount <= 0) 203 { 204 throw new AlreadyClosedException("this IndexReader is closed"); 205 } 206 } 207 208 /// <summary>Returns an IndexReader reading the index in the given 209 /// Directory. You should pass readOnly=true, since it 210 /// gives much better concurrent performance, unless you 211 /// intend to do write operations (delete documents or 212 /// change norms) with the reader. 213 /// </summary> 214 /// <param name="directory">the index directory</param> 215 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader</param> 216 /// <exception cref="CorruptIndexException">CorruptIndexException if the index is corrupt</exception> 217 /// <exception cref="System.IO.IOException">IOException if there is a low-level IO error</exception> Open(Directory directory, bool readOnly)218 public static IndexReader Open(Directory directory, bool readOnly) 219 { 220 return Open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); 221 } 222 223 /// <summary>Expert: returns an IndexReader reading the index in the given 224 /// <see cref="IndexCommit" />. You should pass readOnly=true, since it 225 /// gives much better concurrent performance, unless you 226 /// intend to do write operations (delete documents or 227 /// change norms) with the reader. 228 /// </summary> 229 /// <param name="commit">the commit point to open 230 /// </param> 231 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader 232 /// </param> 233 /// <throws> CorruptIndexException if the index is corrupt </throws> 234 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Open(IndexCommit commit, bool readOnly)235 public static IndexReader Open(IndexCommit commit, bool readOnly) 236 { 237 return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); 238 } 239 240 /// <summary>Expert: returns an IndexReader reading the index in 241 /// the given Directory, with a custom <see cref="IndexDeletionPolicy" /> 242 ///. You should pass readOnly=true, 243 /// since it gives much better concurrent performance, 244 /// unless you intend to do write operations (delete 245 /// documents or change norms) with the reader. 246 /// </summary> 247 /// <param name="directory">the index directory 248 /// </param> 249 /// <param name="deletionPolicy">a custom deletion policy (only used 250 /// if you use this reader to perform deletes or to set 251 /// norms); see <see cref="IndexWriter" /> for details. 252 /// </param> 253 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader 254 /// </param> 255 /// <throws> CorruptIndexException if the index is corrupt </throws> 256 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly)257 public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly) 258 { 259 return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); 260 } 261 262 /// <summary>Expert: returns an IndexReader reading the index in 263 /// the given Directory, with a custom <see cref="IndexDeletionPolicy" /> 264 ///. You should pass readOnly=true, 265 /// since it gives much better concurrent performance, 266 /// unless you intend to do write operations (delete 267 /// documents or change norms) with the reader. 268 /// </summary> 269 /// <param name="directory">the index directory 270 /// </param> 271 /// <param name="deletionPolicy">a custom deletion policy (only used 272 /// if you use this reader to perform deletes or to set 273 /// norms); see <see cref="IndexWriter" /> for details. 274 /// </param> 275 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader 276 /// </param> 277 /// <param name="termInfosIndexDivisor">Subsamples which indexed 278 /// terms are loaded into RAM. This has the same effect as <see> 279 /// <cref>IndexWriter.SetTermIndexInterval</cref> 280 /// </see> except that setting 281 /// must be done at indexing time while this setting can be 282 /// set per reader. When set to N, then one in every 283 /// N*termIndexInterval terms in the index is loaded into 284 /// memory. By setting this to a value > 1 you can reduce 285 /// memory usage, at the expense of higher latency when 286 /// loading a TermInfo. The default value is 1. Set this 287 /// to -1 to skip loading the terms index entirely. 288 /// </param> 289 /// <throws> CorruptIndexException if the index is corrupt </throws> 290 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)291 public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) 292 { 293 return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); 294 } 295 296 /// <summary>Expert: returns an IndexReader reading the index in 297 /// the given Directory, using a specific commit and with 298 /// a custom <see cref="IndexDeletionPolicy" />. You should pass 299 /// readOnly=true, since it gives much better concurrent 300 /// performance, unless you intend to do write operations 301 /// (delete documents or change norms) with the reader. 302 /// </summary> 303 /// <param name="commit">the specific <see cref="IndexCommit" /> to open; 304 /// see <see cref="IndexReader.ListCommits" /> to list all commits 305 /// in a directory 306 /// </param> 307 /// <param name="deletionPolicy">a custom deletion policy (only used 308 /// if you use this reader to perform deletes or to set 309 /// norms); see <see cref="IndexWriter" /> for details. 310 /// </param> 311 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader 312 /// </param> 313 /// <throws> CorruptIndexException if the index is corrupt </throws> 314 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly)315 public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly) 316 { 317 return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); 318 } 319 320 /// <summary>Expert: returns an IndexReader reading the index in 321 /// the given Directory, using a specific commit and with 322 /// a custom <see cref="IndexDeletionPolicy" />. You should pass 323 /// readOnly=true, since it gives much better concurrent 324 /// performance, unless you intend to do write operations 325 /// (delete documents or change norms) with the reader. 326 /// </summary> 327 /// <param name="commit">the specific <see cref="IndexCommit" /> to open; 328 /// see <see cref="IndexReader.ListCommits" /> to list all commits 329 /// in a directory 330 /// </param> 331 /// <param name="deletionPolicy">a custom deletion policy (only used 332 /// if you use this reader to perform deletes or to set 333 /// norms); see <see cref="IndexWriter" /> for details. 334 /// </param> 335 /// <param name="readOnly">true if no changes (deletions, norms) will be made with this IndexReader 336 /// </param> 337 /// <param name="termInfosIndexDivisor">Subsambles which indexed 338 /// terms are loaded into RAM. This has the same effect as <see> 339 /// <cref>IndexWriter.SetTermIndexInterval</cref> 340 /// </see> except that setting 341 /// must be done at indexing time while this setting can be 342 /// set per reader. When set to N, then one in every 343 /// N*termIndexInterval terms in the index is loaded into 344 /// memory. By setting this to a value > 1 you can reduce 345 /// memory usage, at the expense of higher latency when 346 /// loading a TermInfo. The default value is 1. Set this 347 /// to -1 to skip loading the terms index entirely. 348 /// </param> 349 /// <throws> CorruptIndexException if the index is corrupt </throws> 350 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)351 public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) 352 { 353 return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); 354 } 355 Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor)356 private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) 357 { 358 return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); 359 } 360 361 /// <summary> Refreshes an IndexReader if the index has changed since this instance 362 /// was (re)opened. 363 /// <p/> 364 /// Opening an IndexReader is an expensive operation. This method can be used 365 /// to refresh an existing IndexReader to reduce these costs. This method 366 /// tries to only load segments that have changed or were created after the 367 /// IndexReader was (re)opened. 368 /// <p/> 369 /// If the index has not changed since this instance was (re)opened, then this 370 /// call is a NOOP and returns this instance. Otherwise, a new instance is 371 /// returned. The old instance is <b>not</b> closed and remains usable.<br/> 372 /// <p/> 373 /// If the reader is reopened, even though they share 374 /// resources internally, it's safe to make changes 375 /// (deletions, norms) with the new reader. All shared 376 /// mutable state obeys "copy on write" semantics to ensure 377 /// the changes are not seen by other readers. 378 /// <p/> 379 /// You can determine whether a reader was actually reopened by comparing the 380 /// old instance with the instance returned by this method: 381 /// <code> 382 /// IndexReader reader = ... 383 /// ... 384 /// IndexReader newReader = r.reopen(); 385 /// if (newReader != reader) { 386 /// ... // reader was reopened 387 /// reader.close(); 388 /// } 389 /// reader = newReader; 390 /// ... 391 /// </code> 392 /// 393 /// Be sure to synchronize that code so that other threads, 394 /// if present, can never use reader after it has been 395 /// closed and before it's switched to newReader. 396 /// 397 /// <p/><b>NOTE</b>: If this reader is a near real-time 398 /// reader (obtained from <see cref="IndexWriter.GetReader()" />, 399 /// reopen() will simply call writer.getReader() again for 400 /// you, though this may change in the future. 401 /// 402 /// </summary> 403 /// <throws> CorruptIndexException if the index is corrupt </throws> 404 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Reopen()405 public virtual IndexReader Reopen() 406 { 407 lock (this) 408 { 409 throw new NotSupportedException("This reader does not support reopen()."); 410 } 411 } 412 413 414 /// <summary>Just like <see cref="Reopen()" />, except you can change the 415 /// readOnly of the original reader. If the index is 416 /// unchanged but readOnly is different then a new reader 417 /// will be returned. 418 /// </summary> Reopen(bool openReadOnly)419 public virtual IndexReader Reopen(bool openReadOnly) 420 { 421 lock (this) 422 { 423 throw new NotSupportedException("This reader does not support reopen()."); 424 } 425 } 426 427 /// <summary>Expert: reopen this reader on a specific commit point. 428 /// This always returns a readOnly reader. If the 429 /// specified commit point matches what this reader is 430 /// already on, and this reader is already readOnly, then 431 /// this same instance is returned; if it is not already 432 /// readOnly, a readOnly clone is returned. 433 /// </summary> Reopen(IndexCommit commit)434 public virtual IndexReader Reopen(IndexCommit commit) 435 { 436 lock (this) 437 { 438 throw new NotSupportedException("This reader does not support reopen(IndexCommit)."); 439 } 440 } 441 442 /// <summary> Efficiently clones the IndexReader (sharing most 443 /// internal state). 444 /// <p/> 445 /// On cloning a reader with pending changes (deletions, 446 /// norms), the original reader transfers its write lock to 447 /// the cloned reader. This means only the cloned reader 448 /// may make further changes to the index, and commit the 449 /// changes to the index on close, but the old reader still 450 /// reflects all changes made up until it was cloned. 451 /// <p/> 452 /// Like <see cref="Reopen()" />, it's safe to make changes to 453 /// either the original or the cloned reader: all shared 454 /// mutable state obeys "copy on write" semantics to ensure 455 /// the changes are not seen by other readers. 456 /// <p/> 457 /// </summary> 458 /// <throws> CorruptIndexException if the index is corrupt </throws> 459 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Clone()460 public virtual System.Object Clone() 461 { 462 throw new System.NotSupportedException("This reader does not implement clone()"); 463 } 464 465 /// <summary> Clones the IndexReader and optionally changes readOnly. A readOnly 466 /// reader cannot open a writeable reader. 467 /// </summary> 468 /// <throws> CorruptIndexException if the index is corrupt </throws> 469 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Clone(bool openReadOnly)470 public virtual IndexReader Clone(bool openReadOnly) 471 { 472 lock (this) 473 { 474 throw new System.NotSupportedException("This reader does not implement clone()"); 475 } 476 } 477 478 /// <summary> Returns the directory associated with this index. The Default 479 /// implementation returns the directory specified by subclasses when 480 /// delegating to the IndexReader(Directory) constructor, or throws an 481 /// UnsupportedOperationException if one was not specified. 482 /// </summary> 483 /// <throws> UnsupportedOperationException if no directory </throws> Directory()484 public virtual Directory Directory() 485 { 486 EnsureOpen(); 487 throw new NotSupportedException("This reader does not support this method."); 488 } 489 490 /// <summary> Returns the time the index in the named directory was last modified. 491 /// Do not use this to check whether the reader is still up-to-date, use 492 /// <see cref="IsCurrent()" /> instead. 493 /// </summary> 494 /// <throws> CorruptIndexException if the index is corrupt </throws> 495 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> LastModified(Directory directory2)496 public static long LastModified(Directory directory2) 497 { 498 return (long) ((System.Int64) new AnonymousClassFindSegmentsFile(directory2, directory2).Run()); 499 } 500 501 /// <summary> Reads version number from segments files. The version number is 502 /// initialized with a timestamp and then increased by one for each change of 503 /// the index. 504 /// 505 /// </summary> 506 /// <param name="directory">where the index resides. 507 /// </param> 508 /// <returns> version number. 509 /// </returns> 510 /// <throws> CorruptIndexException if the index is corrupt </throws> 511 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> GetCurrentVersion(Directory directory)512 public static long GetCurrentVersion(Directory directory) 513 { 514 return SegmentInfos.ReadCurrentVersion(directory); 515 } 516 517 /// <summary> Reads commitUserData, previously passed to 518 /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />, 519 /// from current index segments file. This will return null if 520 /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" /> 521 /// has never been called for this index. 522 /// </summary> 523 /// <param name="directory">where the index resides. 524 /// </param> 525 /// <returns> commit userData. 526 /// </returns> 527 /// <throws> CorruptIndexException if the index is corrupt </throws> 528 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> 529 /// <summary> 530 /// </summary> 531 /// <seealso cref="GetCommitUserData(Store.Directory)"> 532 /// </seealso> GetCommitUserData(Directory directory)533 public static System.Collections.Generic.IDictionary<string, string> GetCommitUserData(Directory directory) 534 { 535 return SegmentInfos.ReadCurrentUserData(directory); 536 } 537 538 /// <summary> Version number when this IndexReader was opened. Not implemented in the 539 /// IndexReader base class. 540 /// 541 /// <p/> 542 /// If this reader is based on a Directory (ie, was created by calling 543 /// <see cref="Open(Lucene.Net.Store.Directory, bool)" />, or <see cref="Reopen()" /> 544 /// on a reader based on a Directory), then 545 /// this method returns the version recorded in the commit that the reader 546 /// opened. This version is advanced every time <see cref="IndexWriter.Commit()" /> is 547 /// called. 548 /// <p/> 549 /// 550 /// <p/> 551 /// If instead this reader is a near real-time reader (ie, obtained by a call 552 /// to <see cref="IndexWriter.GetReader()" />, or by calling <see cref="Reopen()" /> on a near 553 /// real-time reader), then this method returns the version of the last 554 /// commit done by the writer. Note that even as further changes are made 555 /// with the writer, the version will not changed until a commit is 556 /// completed. Thus, you should not rely on this method to determine when a 557 /// near real-time reader should be opened. Use <see cref="IsCurrent" /> instead. 558 /// <p/> 559 /// 560 /// </summary> 561 /// <throws> UnsupportedOperationException </throws> 562 /// <summary> unless overridden in subclass 563 /// </summary> 564 public virtual long Version 565 { 566 get { throw new System.NotSupportedException("This reader does not support this method."); } 567 } 568 569 /// <summary> Retrieve the String userData optionally passed to 570 /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />. 571 /// This will return null if 572 /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" /> 573 /// has never been called for this index. 574 /// </summary> 575 /// <seealso cref="GetCommitUserData(Store.Directory)"> 576 /// </seealso> 577 public virtual IDictionary<string, string> CommitUserData 578 { 579 get { throw new System.NotSupportedException("This reader does not support this method."); } 580 } 581 582 /// <summary> Check whether any new changes have occurred to the index since this 583 /// reader was opened. 584 /// 585 /// <p/> 586 /// If this reader is based on a Directory (ie, was created by calling 587 /// <see> 588 /// <cref>Open(Store.Directory)</cref> 589 /// </see> , or <see cref="Reopen()" /> on a reader based on a Directory), then 590 /// this method checks if any further commits (see <see cref="IndexWriter.Commit()" /> 591 /// have occurred in that directory). 592 /// <p/> 593 /// 594 /// <p/> 595 /// If instead this reader is a near real-time reader (ie, obtained by a call 596 /// to <see cref="IndexWriter.GetReader()" />, or by calling <see cref="Reopen()" /> on a near 597 /// real-time reader), then this method checks if either a new commmit has 598 /// occurred, or any new uncommitted changes have taken place via the writer. 599 /// Note that even if the writer has only performed merging, this method will 600 /// still return false. 601 /// <p/> 602 /// 603 /// <p/> 604 /// In any event, if this returns false, you should call <see cref="Reopen()" /> to 605 /// get a new reader that sees the changes. 606 /// <p/> 607 /// 608 /// </summary> 609 /// <throws> CorruptIndexException if the index is corrupt </throws> 610 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> 611 /// <throws> UnsupportedOperationException unless overridden in subclass </throws> IsCurrent()612 public virtual bool IsCurrent() 613 { 614 throw new NotSupportedException("This reader does not support this method."); 615 } 616 617 /// <summary> Checks is the index is optimized (if it has a single segment and 618 /// no deletions). Not implemented in the IndexReader base class. 619 /// </summary> 620 /// <returns> &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise </returns> 621 /// <throws> UnsupportedOperationException unless overridden in subclass </throws> IsOptimized()622 public virtual bool IsOptimized() 623 { 624 throw new NotSupportedException("This reader does not support this method."); 625 } 626 627 /// <summary> Return an array of term frequency vectors for the specified document. 628 /// The array contains a vector for each vectorized field in the document. 629 /// Each vector contains terms and frequencies for all terms in a given vectorized field. 630 /// If no such fields existed, the method returns null. The term vectors that are 631 /// returned may either be of type <see cref="ITermFreqVector" /> 632 /// or of type <see cref="TermPositionVector" /> if 633 /// positions or offsets have been stored. 634 /// 635 /// </summary> 636 /// <param name="docNumber">document for which term frequency vectors are returned 637 /// </param> 638 /// <returns> array of term frequency vectors. May be null if no term vectors have been 639 /// stored for the specified document. 640 /// </returns> 641 /// <throws> IOException if index cannot be accessed </throws> 642 /// <seealso cref="Lucene.Net.Documents.Field.TermVector"> 643 /// </seealso> GetTermFreqVectors(int docNumber)644 abstract public ITermFreqVector[] GetTermFreqVectors(int docNumber); 645 646 647 /// <summary> Return a term frequency vector for the specified document and field. The 648 /// returned vector contains terms and frequencies for the terms in 649 /// the specified field of this document, if the field had the storeTermVector 650 /// flag set. If termvectors had been stored with positions or offsets, a 651 /// <see cref="TermPositionVector" /> is returned. 652 /// 653 /// </summary> 654 /// <param name="docNumber">document for which the term frequency vector is returned 655 /// </param> 656 /// <param name="field">field for which the term frequency vector is returned. 657 /// </param> 658 /// <returns> term frequency vector May be null if field does not exist in the specified 659 /// document or term vector was not stored. 660 /// </returns> 661 /// <throws> IOException if index cannot be accessed </throws> 662 /// <seealso cref="Lucene.Net.Documents.Field.TermVector"> 663 /// </seealso> GetTermFreqVector(int docNumber, String field)664 abstract public ITermFreqVector GetTermFreqVector(int docNumber, String field); 665 666 /// <summary> Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of 667 /// the <see cref="ITermFreqVector" />. 668 /// </summary> 669 /// <param name="docNumber">The number of the document to load the vector for 670 /// </param> 671 /// <param name="field">The name of the field to load 672 /// </param> 673 /// <param name="mapper">The <see cref="TermVectorMapper" /> to process the vector. Must not be null 674 /// </param> 675 /// <throws> IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. </throws> 676 /// <summary> 677 /// </summary> GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper)678 abstract public void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper); 679 680 /// <summary> Map all the term vectors for all fields in a Document</summary> 681 /// <param name="docNumber">The number of the document to load the vector for 682 /// </param> 683 /// <param name="mapper">The <see cref="TermVectorMapper" /> to process the vector. Must not be null 684 /// </param> 685 /// <throws> IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. </throws> GetTermFreqVector(int docNumber, TermVectorMapper mapper)686 abstract public void GetTermFreqVector(int docNumber, TermVectorMapper mapper); 687 688 /// <summary> Returns <c>true</c> if an index exists at the specified directory. 689 /// If the directory does not exist or if there is no index in it. 690 /// </summary> 691 /// <param name="directory">the directory to check for an index 692 /// </param> 693 /// <returns> <c>true</c> if an index exists; <c>false</c> otherwise 694 /// </returns> 695 /// <throws> IOException if there is a problem with accessing the index </throws> IndexExists(Directory directory)696 public static bool IndexExists(Directory directory) 697 { 698 return SegmentInfos.GetCurrentSegmentGeneration(directory) != - 1; 699 } 700 701 /// <summary>Returns the number of documents in this index. </summary> 702 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] NumDocs()703 public abstract int NumDocs(); 704 705 /// <summary>Returns one greater than the largest possible document number. 706 /// This may be used to, e.g., determine how big to allocate an array which 707 /// will have an element for every document number in an index. 708 /// </summary> 709 public abstract int MaxDoc { get; } 710 711 /// <summary>Returns the number of deleted documents. </summary> 712 public virtual int NumDeletedDocs 713 { 714 get { return MaxDoc - NumDocs(); } 715 } 716 717 /// <summary> Returns the stored fields of the <c>n</c><sup>th</sup> 718 /// <c>Document</c> in this index. 719 /// <p/> 720 /// <b>NOTE:</b> for performance reasons, this method does not check if the 721 /// requested document is deleted, and therefore asking for a deleted document 722 /// may yield unspecified results. Usually this is not required, however you 723 /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify 724 /// the document is not deleted. 725 /// 726 /// </summary> 727 /// <throws> CorruptIndexException if the index is corrupt </throws> 728 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Document(int n)729 public virtual Document Document(int n) 730 { 731 EnsureOpen(); 732 return Document(n, null); 733 } 734 735 /// <summary> Returns the stored fields of the <c>n</c><sup>th</sup> 736 /// <c>Document</c> in this index. 737 /// <p/> 738 /// <b>NOTE:</b> for performance reasons, this method does not check if the 739 /// requested document is deleted, and therefore asking for a deleted document 740 /// may yield unspecified results. Usually this is not required, however you 741 /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify 742 /// the document is not deleted. 743 /// 744 /// </summary> 745 /// <throws> CorruptIndexException if the index is corrupt </throws> 746 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> 747 public Document this[int doc] 748 { 749 get { return Document(doc); } 750 } 751 752 /// <summary> Get the <see cref="Lucene.Net.Documents.Document" /> at the <c>n</c> 753 /// <sup>th</sup> position. The <see cref="FieldSelector" /> may be used to determine 754 /// what <see cref="Lucene.Net.Documents.Field" />s to load and how they should 755 /// be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying 756 /// <c>FieldsReader</c>) is closed before the lazy 757 /// <see cref="Lucene.Net.Documents.Field" /> is loaded an exception may be 758 /// thrown. If you want the value of a lazy 759 /// <see cref="Lucene.Net.Documents.Field" /> to be available after closing you 760 /// must explicitly load it or fetch the Document again with a new loader. 761 /// <p/> 762 /// <b>NOTE:</b> for performance reasons, this method does not check if the 763 /// requested document is deleted, and therefore asking for a deleted document 764 /// may yield unspecified results. Usually this is not required, however you 765 /// can call <see cref="IsDeleted(int)" /> with the requested document ID to verify 766 /// the document is not deleted. 767 /// 768 /// </summary> 769 /// <param name="n">Get the document at the <c>n</c><sup>th</sup> position 770 /// </param> 771 /// <param name="fieldSelector">The <see cref="FieldSelector" /> to use to determine what 772 /// Fields should be loaded on the Document. May be null, in which case 773 /// all Fields will be loaded. 774 /// </param> 775 /// <returns> The stored fields of the 776 /// <see cref="Lucene.Net.Documents.Document" /> at the nth position 777 /// </returns> 778 /// <throws> CorruptIndexException if the index is corrupt </throws> 779 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> 780 /// <seealso cref="IFieldable"> 781 /// </seealso> 782 /// <seealso cref="Lucene.Net.Documents.FieldSelector"> 783 /// </seealso> 784 /// <seealso cref="Lucene.Net.Documents.SetBasedFieldSelector"> 785 /// </seealso> 786 /// <seealso cref="Lucene.Net.Documents.LoadFirstFieldSelector"> 787 /// </seealso> 788 // TODO (1.5): When we convert to JDK 1.5 make this Set<String> Document(int n, FieldSelector fieldSelector)789 public abstract Document Document(int n, FieldSelector fieldSelector); 790 791 /// <summary>Returns true if document <i>n</i> has been deleted </summary> IsDeleted(int n)792 public abstract bool IsDeleted(int n); 793 794 /// <summary>Returns true if any documents have been deleted </summary> 795 public abstract bool HasDeletions { get; } 796 797 /// <summary>Returns true if there are norms stored for this field. </summary> HasNorms(System.String field)798 public virtual bool HasNorms(System.String field) 799 { 800 // backward compatible implementation. 801 // SegmentReader has an efficient implementation. 802 EnsureOpen(); 803 return Norms(field) != null; 804 } 805 806 /// <summary> 807 /// Returns the byte-encoded normalization factor for the named field of 808 /// every document. This is used by the search code to score documents. 809 /// </summary> 810 /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" /> Norms(System.String field)811 public abstract byte[] Norms(System.String field); 812 813 /// <summary> 814 /// Reads the byte-encoded normalization factor for the named field of every 815 /// document. This is used by the search code to score documents. 816 /// </summary> 817 /// <seealso cref="Lucene.Net.Documents.AbstractField.Boost" /> Norms(System.String field, byte[] bytes, int offset)818 public abstract void Norms(System.String field, byte[] bytes, int offset); 819 820 /// <summary>Expert: Resets the normalization factor for the named field of the named 821 /// document. The norm represents the product of the field's <see cref="IFieldable.Boost">boost</see> 822 /// and its <see cref="Similarity.LengthNorm(String,int)">length normalization</see>. Thus, to preserve the length normalization 823 /// values when resetting this, one should base the new value upon the old. 824 /// 825 /// <b>NOTE:</b> If this field does not store norms, then 826 /// this method call will silently do nothing. 827 /// </summary> 828 /// <seealso cref="Norms(String)" /> 829 /// <seealso cref="Similarity.DecodeNorm(byte)" /> 830 /// <exception cref="StaleReaderException"> 831 /// If the index has changed since this reader was opened 832 /// </exception> 833 /// <exception cref="CorruptIndexException"> 834 /// If the index is corrupt 835 /// </exception> 836 /// <exception cref="LockObtainFailedException"> 837 /// If another writer has this index open (<c>write.lock</c> could not be obtained) 838 /// </exception> 839 /// <exception cref="System.IO.IOException"> 840 /// If there is a low-level IO error 841 /// </exception> SetNorm(int doc, String field, byte value)842 public virtual void SetNorm(int doc, String field, byte value) 843 { 844 lock (this) 845 { 846 EnsureOpen(); 847 AcquireWriteLock(); 848 hasChanges = true; 849 DoSetNorm(doc, field, value); 850 } 851 } 852 853 /// <summary>Implements setNorm in subclass.</summary> DoSetNorm(int doc, System.String field, byte value_Renamed)854 protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed); 855 856 /// <summary> 857 /// Expert: Resets the normalization factor for the named field of the named document. 858 /// </summary> 859 /// <seealso cref="Norms(String)" /> 860 /// <seealso cref="Similarity.DecodeNorm(byte)" /> 861 /// <exception cref="StaleReaderException"> 862 /// If the index has changed since this reader was opened 863 /// </exception> 864 /// <exception cref="CorruptIndexException"> 865 /// If the index is corrupt 866 /// </exception> 867 /// <exception cref="LockObtainFailedException"> 868 /// If another writer has this index open (<c>write.lock</c> could not be obtained) 869 /// </exception> 870 /// <exception cref="System.IO.IOException"> 871 /// If there is a low-level IO error 872 /// </exception> SetNorm(int doc, System.String field, float value)873 public virtual void SetNorm(int doc, System.String field, float value) 874 { 875 EnsureOpen(); 876 SetNorm(doc, field, Similarity.EncodeNorm(value)); 877 } 878 879 /// <summary>Returns an enumeration of all the terms in the index. The 880 /// enumeration is ordered by Term.compareTo(). Each term is greater 881 /// than all that precede it in the enumeration. Note that after 882 /// calling terms(), <see cref="TermEnum.Next()" /> must be called 883 /// on the resulting enumeration before calling other methods such as 884 /// <see cref="TermEnum.Term" />. 885 /// </summary> 886 /// <exception cref="System.IO.IOException"> 887 /// If there is a low-level IO error 888 /// </exception> Terms()889 public abstract TermEnum Terms(); 890 891 /// <summary>Returns an enumeration of all terms starting at a given term. If 892 /// the given term does not exist, the enumeration is positioned at the 893 /// first term greater than the supplied term. The enumeration is 894 /// ordered by Term.compareTo(). Each term is greater than all that 895 /// precede it in the enumeration. 896 /// </summary> 897 /// <exception cref="System.IO.IOException"> 898 /// If there is a low-level IO error 899 /// </exception> Terms(Term t)900 public abstract TermEnum Terms(Term t); 901 902 /// <summary>Returns the number of documents containing the term <c>t</c>.</summary> 903 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> DocFreq(Term t)904 public abstract int DocFreq(Term t); 905 906 /// <summary>Returns an enumeration of all the documents which contain 907 /// <c>term</c>. For each document, the document number, the frequency of 908 /// the term in that document is also provided, for use in 909 /// search scoring. If term is null, then all non-deleted 910 /// docs are returned with freq=1. 911 /// Thus, this method implements the mapping: 912 /// <p/><list> 913 /// Term    =>    <docNum, freq><sup>*</sup> 914 /// </list> 915 /// <p/>The enumeration is ordered by document number. Each document number 916 /// is greater than all that precede it in the enumeration. 917 /// </summary> 918 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> TermDocs(Term term)919 public virtual TermDocs TermDocs(Term term) 920 { 921 EnsureOpen(); 922 TermDocs termDocs = TermDocs(); 923 termDocs.Seek(term); 924 return termDocs; 925 } 926 927 /// <summary>Returns an unpositioned <see cref="Lucene.Net.Index.TermDocs" /> enumerator.</summary> 928 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> TermDocs()929 public abstract TermDocs TermDocs(); 930 931 /// <summary>Returns an enumeration of all the documents which contain 932 /// <c>term</c>. For each document, in addition to the document number 933 /// and frequency of the term in that document, a list of all of the ordinal 934 /// positions of the term in the document is available. Thus, this method 935 /// implements the mapping: 936 /// 937 /// <p/><list> 938 /// Term    =>    <docNum, freq, 939 /// <pos<sub>1</sub>, pos<sub>2</sub>, ... 940 /// pos<sub>freq-1</sub>> 941 /// ><sup>*</sup> 942 /// </list> 943 /// <p/> This positional information facilitates phrase and proximity searching. 944 /// <p/>The enumeration is ordered by document number. Each document number is 945 /// greater than all that precede it in the enumeration. 946 /// </summary> 947 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> TermPositions(Term term)948 public virtual TermPositions TermPositions(Term term) 949 { 950 EnsureOpen(); 951 TermPositions termPositions = TermPositions(); 952 termPositions.Seek(term); 953 return termPositions; 954 } 955 956 /// <summary>Returns an unpositioned <see cref="Lucene.Net.Index.TermPositions" /> enumerator.</summary> 957 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> TermPositions()958 public abstract TermPositions TermPositions(); 959 960 961 962 /// <summary> 963 /// Deletes the document numbered <c>docNum</c>. Once a document is 964 /// deleted it will not appear in TermDocs or TermPostitions enumerations. 965 /// Attempts to read its field with the <see cref="Document(int)" /> 966 /// method will result in an error. The presence of this document may still be 967 /// reflected in the <see cref="DocFreq" /> statistic, though 968 /// this will be corrected eventually as the index is further modified. 969 /// </summary> 970 /// <exception cref="StaleReaderException"> 971 /// If the index has changed since this reader was opened 972 /// </exception> 973 /// <exception cref="CorruptIndexException">If the index is corrupt</exception> 974 /// <exception cref="LockObtainFailedException"> 975 /// If another writer has this index open (<c>write.lock</c> could not be obtained) 976 /// </exception> 977 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> DeleteDocument(int docNum)978 public virtual void DeleteDocument(int docNum) 979 { 980 lock (this) 981 { 982 EnsureOpen(); 983 AcquireWriteLock(); 984 hasChanges = true; 985 DoDelete(docNum); 986 } 987 } 988 989 990 /// <summary>Implements deletion of the document numbered <c>docNum</c>. 991 /// Applications should call <see cref="DeleteDocument(int)" /> or <see cref="DeleteDocuments(Term)" />. 992 /// </summary> DoDelete(int docNum)993 protected internal abstract void DoDelete(int docNum); 994 995 996 /// <summary> 997 /// Deletes all documents that have a given <c>term</c> indexed. 998 /// This is useful if one uses a document field to hold a unique ID string for 999 /// the document. Then to delete such a document, one merely constructs a 1000 /// term with the appropriate field and the unique ID string as its text and 1001 /// passes it to this method. 1002 /// See <see cref="DeleteDocument(int)" /> for information about when this deletion will 1003 /// become effective. 1004 /// </summary> 1005 /// <returns>The number of documents deleted</returns> 1006 /// <exception cref="StaleReaderException"> 1007 /// If the index has changed since this reader was opened 1008 /// </exception> 1009 /// <exception cref="CorruptIndexException">If the index is corrupt</exception> 1010 /// <exception cref="LockObtainFailedException"> 1011 /// If another writer has this index open (<c>write.lock</c> could not be obtained) 1012 /// </exception> 1013 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> DeleteDocuments(Term term)1014 public virtual int DeleteDocuments(Term term) 1015 { 1016 EnsureOpen(); 1017 TermDocs docs = TermDocs(term); 1018 if (docs == null) 1019 return 0; 1020 int n = 0; 1021 try 1022 { 1023 while (docs.Next()) 1024 { 1025 DeleteDocument(docs.Doc); 1026 n++; 1027 } 1028 } 1029 finally 1030 { 1031 docs.Close(); 1032 } 1033 return n; 1034 } 1035 1036 /// <summary>Undeletes all documents currently marked as deleted in this index. 1037 /// 1038 /// </summary> 1039 /// <exception cref="StaleReaderException"> 1040 /// If the index has changed since this reader was opened 1041 /// </exception> 1042 /// <exception cref="CorruptIndexException">If the index is corrupt</exception> 1043 /// <exception cref="LockObtainFailedException"> 1044 /// If another writer has this index open (<c>write.lock</c> could not be obtained) 1045 /// </exception> 1046 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> UndeleteAll()1047 public virtual void UndeleteAll() 1048 { 1049 lock (this) 1050 { 1051 EnsureOpen(); 1052 AcquireWriteLock(); 1053 hasChanges = true; 1054 DoUndeleteAll(); 1055 } 1056 } 1057 1058 /// <summary>Implements actual undeleteAll() in subclass. </summary> DoUndeleteAll()1059 protected internal abstract void DoUndeleteAll(); 1060 1061 /// <summary> 1062 /// Does nothing by default. Subclasses that require a write lock for 1063 /// index modifications must implement this method. 1064 /// </summary> AcquireWriteLock()1065 protected internal virtual void AcquireWriteLock() 1066 { 1067 lock (this) 1068 { 1069 /* NOOP */ 1070 } 1071 } 1072 1073 /// <summary> </summary> 1074 /// <exception cref="System.IO.IOException" /> Flush()1075 public void Flush() 1076 { 1077 lock (this) 1078 { 1079 EnsureOpen(); 1080 Commit(); 1081 } 1082 } 1083 1084 /// <param name="commitUserData">Opaque Map (String -> String) 1085 /// that's recorded into the segments file in the index, 1086 /// and retrievable by <see cref="IndexReader.GetCommitUserData" /> 1087 /// </param> 1088 /// <exception cref="System.IO.IOException" /> Flush(IDictionary<string, string> commitUserData)1089 public void Flush(IDictionary<string, string> commitUserData) 1090 { 1091 lock (this) 1092 { 1093 EnsureOpen(); 1094 Commit(commitUserData); 1095 } 1096 } 1097 1098 /// <summary> Commit changes resulting from delete, undeleteAll, or 1099 /// setNorm operations 1100 /// 1101 /// If an exception is hit, then either no changes or all 1102 /// changes will have been committed to the index 1103 /// (transactional semantics). 1104 /// </summary> 1105 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Commit()1106 public /*protected internal*/ void Commit() 1107 { 1108 lock (this) 1109 { 1110 Commit(null); 1111 } 1112 } 1113 1114 /// <summary> Commit changes resulting from delete, undeleteAll, or 1115 /// setNorm operations 1116 /// 1117 /// If an exception is hit, then either no changes or all 1118 /// changes will have been committed to the index 1119 /// (transactional semantics). 1120 /// </summary> 1121 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Commit(IDictionary<string, string> commitUserData)1122 public void Commit(IDictionary<string, string> commitUserData) 1123 { 1124 lock (this) 1125 { 1126 if (hasChanges) 1127 { 1128 DoCommit(commitUserData); 1129 } 1130 hasChanges = false; 1131 } 1132 } 1133 1134 /// <summary>Implements commit.</summary> DoCommit(IDictionary<string, string> commitUserData)1135 protected internal abstract void DoCommit(IDictionary<string, string> commitUserData); 1136 1137 [Obsolete("Use Dispose() instead")] Close()1138 public void Close() 1139 { 1140 Dispose(); 1141 } 1142 1143 /// <summary> Closes files associated with this index. 1144 /// Also saves any new deletions to disk. 1145 /// No other methods should be called after this has been called. 1146 /// </summary> 1147 /// <exception cref="System.IO.IOException">If there is a low-level IO error</exception> Dispose()1148 public void Dispose() 1149 { 1150 Dispose(true); 1151 } 1152 Dispose(bool disposing)1153 protected virtual void Dispose(bool disposing) 1154 { 1155 if (disposing) 1156 { 1157 lock (this) 1158 { 1159 if (!closed) 1160 { 1161 DecRef(); 1162 closed = true; 1163 } 1164 } 1165 } 1166 } 1167 1168 /// <summary>Implements close. </summary> DoClose()1169 protected internal abstract void DoClose(); 1170 1171 1172 /// <summary> Get a list of unique field names that exist in this index and have the specified 1173 /// field option information. 1174 /// </summary> 1175 /// <param name="fldOption">specifies which field option should be available for the returned fields 1176 /// </param> 1177 /// <returns> Collection of Strings indicating the names of the fields. 1178 /// </returns> 1179 /// <seealso cref="IndexReader.FieldOption"> 1180 /// </seealso> GetFieldNames(FieldOption fldOption)1181 public abstract ICollection<string> GetFieldNames(FieldOption fldOption); 1182 1183 /// <summary> Expert: return the IndexCommit that this reader has 1184 /// opened. This method is only implemented by those 1185 /// readers that correspond to a Directory with its own 1186 /// segments_N file. 1187 /// 1188 /// <p/><b>WARNING</b>: this API is new and experimental and 1189 /// may suddenly change.<p/> 1190 /// </summary> 1191 public virtual IndexCommit IndexCommit 1192 { 1193 get { throw new NotSupportedException("This reader does not support this method."); } 1194 } 1195 1196 /// <summary> Prints the filename and size of each file within a given compound file. 1197 /// Add the -extract flag to extract files to the current working directory. 1198 /// In order to make the extracted version of the index work, you have to copy 1199 /// the segments file from the compound index into the directory where the extracted files are stored. 1200 /// </summary> 1201 /// <param name="args">Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> 1202 /// </param> 1203 [STAThread] Main(String[] args)1204 public static void Main(String[] args) 1205 { 1206 System.String filename = null; 1207 bool extract = false; 1208 1209 foreach (string t in args) 1210 { 1211 if (t.Equals("-extract")) 1212 { 1213 extract = true; 1214 } 1215 else if (filename == null) 1216 { 1217 filename = t; 1218 } 1219 } 1220 1221 if (filename == null) 1222 { 1223 System.Console.Out.WriteLine("Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile>"); 1224 return ; 1225 } 1226 1227 Directory dir = null; 1228 CompoundFileReader cfr = null; 1229 1230 try 1231 { 1232 var file = new System.IO.FileInfo(filename); 1233 System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName; 1234 filename = file.Name; 1235 dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirname)); 1236 cfr = new CompoundFileReader(dir, filename); 1237 1238 System.String[] files = cfr.ListAll(); 1239 System.Array.Sort(files); // sort the array of filename so that the output is more readable 1240 1241 foreach (string t in files) 1242 { 1243 long len = cfr.FileLength(t); 1244 1245 if (extract) 1246 { 1247 System.Console.Out.WriteLine("extract " + t + " with " + len + " bytes to local directory..."); 1248 IndexInput ii = cfr.OpenInput(t); 1249 1250 var f = new System.IO.FileStream(t, System.IO.FileMode.Create); 1251 1252 // read and write with a small buffer, which is more effectiv than reading byte by byte 1253 var buffer = new byte[1024]; 1254 int chunk = buffer.Length; 1255 while (len > 0) 1256 { 1257 var bufLen = (int) System.Math.Min(chunk, len); 1258 ii.ReadBytes(buffer, 0, bufLen); 1259 f.Write(buffer, 0, bufLen); 1260 len -= bufLen; 1261 } 1262 1263 f.Close(); 1264 ii.Close(); 1265 } 1266 else 1267 System.Console.Out.WriteLine(t + ": " + len + " bytes"); 1268 } 1269 } 1270 catch (System.IO.IOException ioe) 1271 { 1272 System.Console.Error.WriteLine(ioe.StackTrace); 1273 } 1274 finally 1275 { 1276 try 1277 { 1278 if (dir != null) 1279 dir.Close(); 1280 if (cfr != null) 1281 cfr.Close(); 1282 } 1283 catch (System.IO.IOException ioe) 1284 { 1285 System.Console.Error.WriteLine(ioe.StackTrace); 1286 } 1287 } 1288 } 1289 1290 /// <summary>Returns all commit points that exist in the Directory. 1291 /// Normally, because the default is <see cref="KeepOnlyLastCommitDeletionPolicy" /> 1292 ///, there would be only 1293 /// one commit point. But if you're using a custom <see cref="IndexDeletionPolicy" /> 1294 /// then there could be many commits. 1295 /// Once you have a given commit, you can open a reader on 1296 /// it by calling <see cref="IndexReader.Open(IndexCommit,bool)" /> 1297 /// There must be at least one commit in 1298 /// the Directory, else this method throws <see cref="System.IO.IOException" />. 1299 /// Note that if a commit is in 1300 /// progress while this method is running, that commit 1301 /// may or may not be returned array. 1302 /// </summary> ListCommits(Directory dir)1303 public static System.Collections.Generic.ICollection<IndexCommit> ListCommits(Directory dir) 1304 { 1305 return DirectoryReader.ListCommits(dir); 1306 } 1307 1308 /// <summary>Expert: returns the sequential sub readers that this 1309 /// reader is logically composed of. For example, 1310 /// IndexSearcher uses this API to drive searching by one 1311 /// sub reader at a time. If this reader is not composed 1312 /// of sequential child readers, it should return null. 1313 /// If this method returns an empty array, that means this 1314 /// reader is a null reader (for example a MultiReader 1315 /// that has no sub readers). 1316 /// <p/> 1317 /// NOTE: You should not try using sub-readers returned by 1318 /// this method to make any changes (setNorm, deleteDocument, 1319 /// etc.). While this might succeed for one composite reader 1320 /// (like MultiReader), it will most likely lead to index 1321 /// corruption for other readers (like DirectoryReader obtained 1322 /// through <see cref="IndexReader.Open(Lucene.Net.Store.Directory,bool)" />. Use the parent reader directly. 1323 /// </summary> GetSequentialSubReaders()1324 public virtual IndexReader[] GetSequentialSubReaders() 1325 { 1326 return null; 1327 } 1328 1329 /// <summary>Expert</summary> 1330 public virtual object FieldCacheKey 1331 { 1332 get { return this; } 1333 } 1334 1335 /* Expert. Warning: this returns null if the reader has 1336 * no deletions 1337 */ 1338 1339 public virtual object DeletesCacheKey 1340 { 1341 get { return this; } 1342 } 1343 1344 /// <summary>Returns the number of unique terms (across all fields) 1345 /// in this reader. 1346 /// 1347 /// This method returns long, even though internally 1348 /// Lucene cannot handle more than 2^31 unique terms, for 1349 /// a possible future when this limitation is removed. 1350 /// 1351 /// </summary> 1352 /// <throws> UnsupportedOperationException if this count </throws> 1353 /// <summary> cannot be easily determined (eg Multi*Readers). 1354 /// Instead, you should call <see cref="GetSequentialSubReaders" /> 1355 /// and ask each sub reader for 1356 /// its unique term count. 1357 /// </summary> 1358 public virtual long UniqueTermCount 1359 { 1360 get { throw new System.NotSupportedException("this reader does not implement getUniqueTermCount()"); } 1361 } 1362 1363 /// <summary> 1364 /// For IndexReader implementations that use 1365 /// TermInfosReader to read terms, this returns the 1366 /// current indexDivisor as specified when the reader was 1367 /// opened. 1368 /// </summary> 1369 public virtual int TermInfosIndexDivisor 1370 { 1371 get { throw new NotSupportedException("This reader does not support this method."); } 1372 } 1373 } 1374 }