1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #ifndef _lucene_index_TermInfosReader_
8 #define _lucene_index_TermInfosReader_
9
10
11 //#include "Terms.h"
12 #include "_SegmentTermEnum.h"
CL_CLASS_DEF(store,Directory)13 CL_CLASS_DEF(store,Directory)
14 //CL_CLASS_DEF(store,IndexInput)
15 #include "CLucene/util/_ThreadLocal.h"
16 //#include "FieldInfos.h"
17 //#include "TermInfo.h"
18 //#include "TermInfosWriter.h"
19
20 CL_NS_DEF(index)
21 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
22 * Directory. Pairs are accessed either by Term or by ordinal position the
23 * set.
24 *
25 * PORT STATUS: 365707 (jlucene 1.9) -- started port to JLucene 2.3.2
26 */
27 class TermInfosReader :LUCENE_BASE{
28 private:
29 CL_NS(store)::Directory* directory;
30 const char* segment;
31 FieldInfos* fieldInfos;
32
33 CL_NS(util)::ThreadLocal<SegmentTermEnum*,
34 CL_NS(util)::Deletor::Object<SegmentTermEnum> > enumerators;
35
36 SegmentTermEnum* getEnum();
37 SegmentTermEnum* origEnum;
38 SegmentTermEnum* indexEnum;
39 int64_t _size;
40
41 Term* indexTerms; //note: this is a list of objects, not arrays!
42 int32_t indexTermsLength;
43 TermInfo* indexInfos;
44 int64_t* indexPointers;
45
46 int32_t indexDivisor;
47 int32_t totalIndexInterval;
48
49 DEFINE_MUTEX(THIS_LOCK)
50
51 public:
52 /**
53 * Constructor.
54 * Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
55 */
56 TermInfosReader(CL_NS(store)::Directory* dir, const char* segment, FieldInfos* fis,
57 const int32_t readBufferSize = CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
58 ~TermInfosReader();
59
60 int32_t getSkipInterval() const;
61 int32_t getMaxSkipLevels() const;
62
63 /**
64 * <p>Sets the indexDivisor, which subsamples the number
65 * of indexed terms loaded into memory. This has a
66 * similar effect as {@link
67 * IndexWriter#setTermIndexInterval} except that setting
68 * must be done at indexing time while this setting can be
69 * set per reader. When set to N, then one in every
70 * N*termIndexInterval terms in the index is loaded into
71 * memory. By setting this to a value > 1 you can reduce
72 * memory usage, at the expense of higher latency when
73 * loading a TermInfo. The default value is 1.</p>
74 *
75 * <b>NOTE:</b> you must call this before the term
76 * index is loaded. If the index is already loaded,
77 * an IllegalStateException is thrown.
78 *
79 * @throws IllegalStateException if the term index has
80 * already been loaded into memory.
81 */
82 void setIndexDivisor(const int32_t _indexDivisor);
83
84 /** Returns the indexDivisor.
85 * @see #setIndexDivisor
86 */
87 int32_t getIndexDivisor() const;
88
89 /** Close the enumeration of TermInfos */
90 void close();
91
92 /** Returns the number of term/value pairs in the set. */
93 int64_t size() const;
94
95 /**
96 * Returns an enumeration of terms starting at or after the named term.
97 * If no term is specified, an enumeration of all the Terms
98 * and TermInfos in the set is returned.
99 */
100 SegmentTermEnum* terms(const Term* term=NULL);
101
102 /** Returns the TermInfo for a Term in the set, or null. */
103 TermInfo* get(const Term* term);
104 private:
105 /** Reads the term info index file or .tti file. */
106 void ensureIndexIsRead();
107
108 /** Returns the offset of the greatest index entry which is less than or equal to term.*/
109 int32_t getIndexOffset(const Term* term);
110
111 /** Reposition the current Term and TermInfo to indexOffset */
112 void seekEnum(const int32_t indexOffset);
113
114 /** Scans the Enumeration of terms for term and returns the corresponding TermInfo instance if found.
115 * The search is started from the current term.
116 */
117 TermInfo* scanEnum(const Term* term);
118
119 /** Scans the enumeration to the requested position and returns the Term located at that position */
120 Term* scanEnum(const int32_t position);
121
122 /** Returns the position of a Term in the set or -1. */
123 int64_t getPosition(const Term* term);
124
125 /** Returns the nth term in the set. synchronized */
126 Term* get(const int32_t position);
127
128 };
129 CL_NS_END
130 #endif
131