1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #ifndef ABSTRACTFIELD_H
8 #define ABSTRACTFIELD_H
9 
10 #include "Fieldable.h"
11 
12 namespace Lucene {
13 
14 class LPPAPI AbstractField : public Fieldable, public LuceneObject {
15 public:
16     /// Specifies whether and how a field should be stored.
17     enum Store {
18         /// Store the original field value in the index. This is useful for short texts like a document's title
19         /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer
20         /// is used before it is stored.
21         STORE_YES,
22 
23         /// Do not store the field value in the index.
24         STORE_NO
25     };
26 
27     /// Specifies whether and how a field should be indexed.
28     enum Index {
29         /// Do not index the field value. This field can thus not be searched, but one can still access its
30         /// contents provided it is {@link Field.Store stored}.
31         INDEX_NO,
32 
33         /// Index the tokens produced by running the field's value through an Analyzer.  This is useful for
34         /// common text.
35         INDEX_ANALYZED,
36 
37         /// Index the field's value without using an Analyzer, so it can be searched.  As no analyzer is used
38         /// the value will be stored as a single term. This is useful for unique Ids like product numbers.
39         INDEX_NOT_ANALYZED,
40 
41         /// Index the field's value without an Analyzer, and also disable the storing of norms.  Note that you
42         /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}.  No norms means
43         /// that index-time field and document boosting and field length normalization are disabled.  The benefit
44         /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the
45         /// index, during searching.  Note that once you index a given field with norms enabled, disabling norms
46         /// will have no effect.  In other words, for this to have the above described effect on a field, all
47         /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning.
48         INDEX_NOT_ANALYZED_NO_NORMS,
49 
50         /// Index the tokens produced by running the field's value through an Analyzer, and also separately
51         /// disable the  storing of norms.  See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you
52         /// may want to disable them.
53         INDEX_ANALYZED_NO_NORMS
54     };
55 
56     /// Specifies whether and how a field should have term vectors.
57     enum TermVector {
58         /// Do not store term vectors.
59         TERM_VECTOR_NO,
60 
61         /// Store the term vectors of each document. A term vector is a list of the document's terms and their
62         /// number of occurrences in that document.
63         TERM_VECTOR_YES,
64 
65         /// Store the term vector + token position information
66         /// @see #YES
67         TERM_VECTOR_WITH_POSITIONS,
68 
69         /// Store the term vector + token offset information
70         /// @see #YES
71         TERM_VECTOR_WITH_OFFSETS,
72 
73         /// Store the term vector + token position and offset information
74         /// @see #YES
75         /// @see #WITH_POSITIONS
76         /// @see #WITH_OFFSETS
77         TERM_VECTOR_WITH_POSITIONS_OFFSETS
78     };
79 
80 public:
81     virtual ~AbstractField();
82 
83     LUCENE_CLASS(AbstractField);
84 
85 protected:
86     AbstractField();
87     AbstractField(const String& name, Store store, Index index, TermVector termVector);
88 
89     String _name;
90     bool storeTermVector;
91     bool storeOffsetWithTermVector;
92     bool storePositionWithTermVector;
93     bool _omitNorms;
94     bool _isStored;
95     bool _isIndexed;
96     bool _isTokenized;
97     bool _isBinary;
98     bool lazy;
99     bool omitTermFreqAndPositions;
100     double boost;
101 
102     // the data object for all different kind of field values
103     FieldsData fieldsData;
104 
105     // pre-analyzed tokenStream for indexed fields
106     TokenStreamPtr tokenStream;
107 
108     // length/offset for all primitive types
109     int32_t binaryLength;
110     int32_t binaryOffset;
111 
112 public:
113     /// Sets the boost factor hits on this field.  This value will be multiplied into the score of all
114     /// hits on this this field of this document.
115     ///
116     /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field.
117     /// If a document has multiple fields with the same name, all such values are multiplied together.
118     /// This product is then used to compute the norm factor for the field.  By default, in the {@link
119     /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the
120     /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)}
121     /// before it is stored in the index.  One should attempt to ensure that this product does not overflow
122     /// the range of that encoding.
123     ///
124     /// @see Document#setBoost(double)
125     /// @see Similarity#computeNorm(String, FieldInvertState)
126     /// @see Similarity#encodeNorm(double)
127     virtual void setBoost(double boost);
128 
129     /// Returns the boost factor for hits for this field.
130     ///
131     /// The default value is 1.0.
132     ///
133     /// Note: this value is not stored directly with the document in the index. Documents returned from
134     /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value
135     /// present as when this field was indexed.
136     virtual double getBoost();
137 
138     /// Returns the name of the field as an interned string. For example "date", "title", "body", ...
139     virtual String name();
140 
141     /// True if the value of the field is to be stored in the index for return with search hits. It is an
142     /// error for this to be true if a field is Reader-valued.
143     virtual bool isStored();
144 
145     /// True if the value of the field is to be indexed, so that it may be searched on.
146     virtual bool isIndexed();
147 
148     /// True if the value of the field should be tokenized as text prior to indexing.  Un-tokenized fields
149     /// are indexed as a single word and may not be Reader-valued.
150     virtual bool isTokenized();
151 
152     /// True if the term or terms used to index this field are stored as a term vector, available from
153     /// {@link IndexReader#getTermFreqVector(int,String)}.  These methods do not provide access to the
154     /// original content of the field, only to terms used to index it. If the original content must be
155     /// preserved, use the stored attribute instead.
156     virtual bool isTermVectorStored();
157 
158     /// True if terms are stored as term vector together with their offsets (start and end position in
159     /// source text).
160     virtual bool isStoreOffsetWithTermVector();
161 
162     /// True if terms are stored as term vector together with their token positions.
163     virtual bool isStorePositionWithTermVector();
164 
165     /// True if the value of the field is stored as binary.
166     virtual bool isBinary();
167 
168     /// Return the raw byte[] for the binary field.  Note that you must also call {@link #getBinaryLength}
169     /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field.
170     /// @return reference to the Field value as byte[].
171     virtual ByteArray getBinaryValue();
172 
173     /// Return the raw byte[] for the binary field.  Note that you must also call {@link #getBinaryLength}
174     /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field.
175     /// @return reference to the Field value as byte[].
176     virtual ByteArray getBinaryValue(ByteArray result);
177 
178     /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is
179     /// undefined.
180     /// @return length of byte[] segment that represents this Field value.
181     virtual int32_t getBinaryLength();
182 
183     /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is
184     /// undefined.
185     /// @return index of the first character in byte[] segment that represents this Field value.
186     virtual int32_t getBinaryOffset();
187 
188     /// True if norms are omitted for this indexed field.
189     virtual bool getOmitNorms();
190 
191     /// @see #setOmitTermFreqAndPositions
192     virtual bool getOmitTermFreqAndPositions();
193 
194     /// If set, omit normalization factors associated with this indexed field.
195     /// This effectively disables indexing boosts and length normalization for this field.
196     virtual void setOmitNorms(bool omitNorms);
197 
198     /// If set, omit term freq, positions and payloads from postings for this field.
199     ///
200     /// NOTE: While this option reduces storage space required in the index, it also means any query requiring
201     /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail
202     /// to find results.
203     virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions);
204 
205     /// Indicates whether a Field is Lazy or not.  The semantics of Lazy loading are such that if a Field
206     /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()}
207     /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open.
208     ///
209     /// @return true if this field can be loaded lazily
210     virtual bool isLazy();
211 
212     /// Prints a Field for human consumption.
213     virtual String toString();
214 
215 protected:
216     void setStoreTermVector(TermVector termVector);
217 };
218 
219 }
220 
221 #endif
222