1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #ifndef _lucene_index_TermInfosReader_
8 #define _lucene_index_TermInfosReader_
9 
10 
11 //#include "Terms.h"
12 #include "_SegmentTermEnum.h"
CL_CLASS_DEF(store,Directory)13 CL_CLASS_DEF(store,Directory)
14 //CL_CLASS_DEF(store,IndexInput)
15 #include "CLucene/util/_ThreadLocal.h"
16 //#include "FieldInfos.h"
17 //#include "TermInfo.h"
18 //#include "TermInfosWriter.h"
19 
20 CL_NS_DEF(index)
21 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
22 * Directory.  Pairs are accessed either by Term or by ordinal position the
23 * set.
24 *
25 * PORT STATUS: 365707 (jlucene 1.9) -- started port to JLucene 2.3.2
26 */
27 	class TermInfosReader :LUCENE_BASE{
28 	private:
29 		CL_NS(store)::Directory* directory;
30 		const char* segment;
31 		FieldInfos* fieldInfos;
32 
33 		CL_NS(util)::ThreadLocal<SegmentTermEnum*,
34 			CL_NS(util)::Deletor::Object<SegmentTermEnum> > enumerators;
35 
36 		SegmentTermEnum* getEnum();
37 		SegmentTermEnum* origEnum;
38 		SegmentTermEnum* indexEnum;
39 		int64_t _size;
40 
41 		Term* indexTerms; //note: this is a list of objects, not arrays!
42     int32_t indexTermsLength;
43 		TermInfo* indexInfos;
44 		int64_t* indexPointers;
45 
46 		int32_t indexDivisor;
47 		int32_t totalIndexInterval;
48 
49 		DEFINE_MUTEX(THIS_LOCK)
50 
51 	public:
52 		/**
53 		* Constructor.
54         * Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
55 		*/
56 		TermInfosReader(CL_NS(store)::Directory* dir, const char* segment, FieldInfos* fis,
57 			const int32_t readBufferSize = CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
58 		~TermInfosReader();
59 
60 		int32_t getSkipInterval() const;
61 		int32_t getMaxSkipLevels() const;
62 
63 		/**
64 		* <p>Sets the indexDivisor, which subsamples the number
65 		* of indexed terms loaded into memory.  This has a
66 		* similar effect as {@link
67 		* IndexWriter#setTermIndexInterval} except that setting
68 		* must be done at indexing time while this setting can be
69 		* set per reader.  When set to N, then one in every
70 		* N*termIndexInterval terms in the index is loaded into
71 		* memory.  By setting this to a value > 1 you can reduce
72 		* memory usage, at the expense of higher latency when
73 		* loading a TermInfo.  The default value is 1.</p>
74 		*
75 		* <b>NOTE:</b> you must call this before the term
76 		* index is loaded.  If the index is already loaded,
77 		* an IllegalStateException is thrown.
78 		*
79 		* @throws IllegalStateException if the term index has
80 		* already been loaded into memory.
81 		*/
82 		void setIndexDivisor(const int32_t _indexDivisor);
83 
84 		/** Returns the indexDivisor.
85 		* @see #setIndexDivisor
86 		*/
87 		int32_t getIndexDivisor() const;
88 
89 		/** Close the enumeration of TermInfos */
90 		void close();
91 
92 		/** Returns the number of term/value pairs in the set. */
93 		int64_t size() const;
94 
95 		/**
96 		* Returns an enumeration of terms starting at or after the named term.
97 		* If no term is specified, an enumeration of all the Terms
98 		* and TermInfos in the set is returned.
99 		*/
100 		SegmentTermEnum* terms(const Term* term=NULL);
101 
102 		/** Returns the TermInfo for a Term in the set, or null. */
103 		TermInfo* get(const Term* term);
104 	private:
105 		/** Reads the term info index file or .tti file. */
106 		void ensureIndexIsRead();
107 
108 		/** Returns the offset of the greatest index entry which is less than or equal to term.*/
109 		int32_t getIndexOffset(const Term* term);
110 
111 		/** Reposition the current Term and TermInfo to indexOffset */
112 		void seekEnum(const int32_t indexOffset);
113 
114 		/** Scans the Enumeration of terms for term and returns the corresponding TermInfo instance if found.
115         * The search is started from the current term.
116 		*/
117 		TermInfo* scanEnum(const Term* term);
118 
119         /** Scans the enumeration to the requested position and returns the Term located at that position */
120 		Term* scanEnum(const int32_t position);
121 
122 		/** Returns the position of a Term in the set or -1. */
123 		int64_t getPosition(const Term* term);
124 
125 		/** Returns the nth term in the set. synchronized */
126 		Term* get(const int32_t position);
127 
128 	};
129 CL_NS_END
130 #endif
131