1 ///////////////////////////////////////////////////////////////////////////// 2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved. 3 // Distributable under the terms of either the Apache License (Version 2.0) 4 // or the GNU Lesser General Public License. 5 ///////////////////////////////////////////////////////////////////////////// 6 7 #ifndef ABSTRACTFIELD_H 8 #define ABSTRACTFIELD_H 9 10 #include "Fieldable.h" 11 12 namespace Lucene { 13 14 class LPPAPI AbstractField : public Fieldable, public LuceneObject { 15 public: 16 /// Specifies whether and how a field should be stored. 17 enum Store { 18 /// Store the original field value in the index. This is useful for short texts like a document's title 19 /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer 20 /// is used before it is stored. 21 STORE_YES, 22 23 /// Do not store the field value in the index. 24 STORE_NO 25 }; 26 27 /// Specifies whether and how a field should be indexed. 28 enum Index { 29 /// Do not index the field value. This field can thus not be searched, but one can still access its 30 /// contents provided it is {@link Field.Store stored}. 31 INDEX_NO, 32 33 /// Index the tokens produced by running the field's value through an Analyzer. This is useful for 34 /// common text. 35 INDEX_ANALYZED, 36 37 /// Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used 38 /// the value will be stored as a single term. This is useful for unique Ids like product numbers. 39 INDEX_NOT_ANALYZED, 40 41 /// Index the field's value without an Analyzer, and also disable the storing of norms. Note that you 42 /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}. No norms means 43 /// that index-time field and document boosting and field length normalization are disabled. The benefit 44 /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the 45 /// index, during searching. Note that once you index a given field with norms enabled, disabling norms 46 /// will have no effect. In other words, for this to have the above described effect on a field, all 47 /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning. 48 INDEX_NOT_ANALYZED_NO_NORMS, 49 50 /// Index the tokens produced by running the field's value through an Analyzer, and also separately 51 /// disable the storing of norms. See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you 52 /// may want to disable them. 53 INDEX_ANALYZED_NO_NORMS 54 }; 55 56 /// Specifies whether and how a field should have term vectors. 57 enum TermVector { 58 /// Do not store term vectors. 59 TERM_VECTOR_NO, 60 61 /// Store the term vectors of each document. A term vector is a list of the document's terms and their 62 /// number of occurrences in that document. 63 TERM_VECTOR_YES, 64 65 /// Store the term vector + token position information 66 /// @see #YES 67 TERM_VECTOR_WITH_POSITIONS, 68 69 /// Store the term vector + token offset information 70 /// @see #YES 71 TERM_VECTOR_WITH_OFFSETS, 72 73 /// Store the term vector + token position and offset information 74 /// @see #YES 75 /// @see #WITH_POSITIONS 76 /// @see #WITH_OFFSETS 77 TERM_VECTOR_WITH_POSITIONS_OFFSETS 78 }; 79 80 public: 81 virtual ~AbstractField(); 82 83 LUCENE_CLASS(AbstractField); 84 85 protected: 86 AbstractField(); 87 AbstractField(const String& name, Store store, Index index, TermVector termVector); 88 89 String _name; 90 bool storeTermVector; 91 bool storeOffsetWithTermVector; 92 bool storePositionWithTermVector; 93 bool _omitNorms; 94 bool _isStored; 95 bool _isIndexed; 96 bool _isTokenized; 97 bool _isBinary; 98 bool lazy; 99 bool omitTermFreqAndPositions; 100 double boost; 101 102 // the data object for all different kind of field values 103 FieldsData fieldsData; 104 105 // pre-analyzed tokenStream for indexed fields 106 TokenStreamPtr tokenStream; 107 108 // length/offset for all primitive types 109 int32_t binaryLength; 110 int32_t binaryOffset; 111 112 public: 113 /// Sets the boost factor hits on this field. This value will be multiplied into the score of all 114 /// hits on this this field of this document. 115 /// 116 /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. 117 /// If a document has multiple fields with the same name, all such values are multiplied together. 118 /// This product is then used to compute the norm factor for the field. By default, in the {@link 119 /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the 120 /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} 121 /// before it is stored in the index. One should attempt to ensure that this product does not overflow 122 /// the range of that encoding. 123 /// 124 /// @see Document#setBoost(double) 125 /// @see Similarity#computeNorm(String, FieldInvertState) 126 /// @see Similarity#encodeNorm(double) 127 virtual void setBoost(double boost); 128 129 /// Returns the boost factor for hits for this field. 130 /// 131 /// The default value is 1.0. 132 /// 133 /// Note: this value is not stored directly with the document in the index. Documents returned from 134 /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value 135 /// present as when this field was indexed. 136 virtual double getBoost(); 137 138 /// Returns the name of the field as an interned string. For example "date", "title", "body", ... 139 virtual String name(); 140 141 /// True if the value of the field is to be stored in the index for return with search hits. It is an 142 /// error for this to be true if a field is Reader-valued. 143 virtual bool isStored(); 144 145 /// True if the value of the field is to be indexed, so that it may be searched on. 146 virtual bool isIndexed(); 147 148 /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields 149 /// are indexed as a single word and may not be Reader-valued. 150 virtual bool isTokenized(); 151 152 /// True if the term or terms used to index this field are stored as a term vector, available from 153 /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the 154 /// original content of the field, only to terms used to index it. If the original content must be 155 /// preserved, use the stored attribute instead. 156 virtual bool isTermVectorStored(); 157 158 /// True if terms are stored as term vector together with their offsets (start and end position in 159 /// source text). 160 virtual bool isStoreOffsetWithTermVector(); 161 162 /// True if terms are stored as term vector together with their token positions. 163 virtual bool isStorePositionWithTermVector(); 164 165 /// True if the value of the field is stored as binary. 166 virtual bool isBinary(); 167 168 /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} 169 /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. 170 /// @return reference to the Field value as byte[]. 171 virtual ByteArray getBinaryValue(); 172 173 /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} 174 /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. 175 /// @return reference to the Field value as byte[]. 176 virtual ByteArray getBinaryValue(ByteArray result); 177 178 /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is 179 /// undefined. 180 /// @return length of byte[] segment that represents this Field value. 181 virtual int32_t getBinaryLength(); 182 183 /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is 184 /// undefined. 185 /// @return index of the first character in byte[] segment that represents this Field value. 186 virtual int32_t getBinaryOffset(); 187 188 /// True if norms are omitted for this indexed field. 189 virtual bool getOmitNorms(); 190 191 /// @see #setOmitTermFreqAndPositions 192 virtual bool getOmitTermFreqAndPositions(); 193 194 /// If set, omit normalization factors associated with this indexed field. 195 /// This effectively disables indexing boosts and length normalization for this field. 196 virtual void setOmitNorms(bool omitNorms); 197 198 /// If set, omit term freq, positions and payloads from postings for this field. 199 /// 200 /// NOTE: While this option reduces storage space required in the index, it also means any query requiring 201 /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail 202 /// to find results. 203 virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions); 204 205 /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field 206 /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} 207 /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. 208 /// 209 /// @return true if this field can be loaded lazily 210 virtual bool isLazy(); 211 212 /// Prints a Field for human consumption. 213 virtual String toString(); 214 215 protected: 216 void setStoreTermVector(TermVector termVector); 217 }; 218 219 } 220 221 #endif 222