1 ///////////////////////////////////////////////////////////////////////////// 2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved. 3 // Distributable under the terms of either the Apache License (Version 2.0) 4 // or the GNU Lesser General Public License. 5 ///////////////////////////////////////////////////////////////////////////// 6 7 #ifndef FIELDSREADER_H 8 #define FIELDSREADER_H 9 10 #include "AbstractField.h" 11 #include "CloseableThreadLocal.h" 12 13 namespace Lucene { 14 15 /// Class responsible for access to stored document fields. It uses <segment>.fdt and <segment>.fdx; files. 16 class LPPAPI FieldsReader : public LuceneObject { 17 public: 18 /// Used only by clone 19 FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, 20 int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, const IndexInputPtr& cloneableIndexStream); 21 FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn); 22 FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); 23 24 virtual ~FieldsReader(); 25 26 LUCENE_CLASS(FieldsReader); 27 28 protected: 29 FieldInfosPtr fieldInfos; 30 31 // The main fieldStream, used only for cloning. 32 IndexInputPtr cloneableFieldsStream; 33 34 // This is a clone of cloneableFieldsStream used for reading documents. It should not be cloned outside of a 35 // synchronized context. 36 IndexInputPtr fieldsStream; 37 38 IndexInputPtr cloneableIndexStream; 39 IndexInputPtr indexStream; 40 int32_t numTotalDocs; 41 int32_t _size; 42 bool closed; 43 int32_t format; 44 int32_t formatSize; 45 46 // The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. 47 int32_t docStoreOffset; 48 49 CloseableThreadLocal<IndexInput> fieldsStreamTL; 50 bool isOriginal; 51 52 public: 53 /// Returns a cloned FieldsReader that shares open IndexInputs with the original one. It is the caller's job not to 54 /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic). 55 virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); 56 57 /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a 58 /// Field. This means that the Fields values will not be accessible. 59 void close(); 60 61 int32_t size(); 62 63 bool canReadRawDocs(); 64 65 DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); 66 67 /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID. 68 /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID. 69 IndexInputPtr rawDocs(Collection<int32_t> lengths, int32_t startDocID, int32_t numDocs); 70 71 protected: 72 void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); 73 74 void ensureOpen(); 75 76 void seekIndex(int32_t docID); 77 78 /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. 79 /// This will have the most payoff on large fields. 80 void skipField(bool binary, bool compressed); 81 void skipField(bool binary, bool compressed, int32_t toRead); 82 83 void addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); 84 void addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); 85 86 /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes). 87 /// Read just the size - caller must skip the field content to continue reading fields. Return the size in bytes or chars, 88 /// depending on field type. 89 int32_t addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed); 90 91 ByteArray uncompress(ByteArray b); 92 String uncompressString(ByteArray b); 93 94 friend class LazyField; 95 }; 96 97 class LazyField : public AbstractField { 98 public: 99 LazyField(const FieldsReaderPtr& reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); 100 LazyField(const FieldsReaderPtr& reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); 101 virtual ~LazyField(); 102 103 LUCENE_CLASS(LazyField); 104 105 protected: 106 FieldsReaderWeakPtr _reader; 107 int32_t toRead; 108 int64_t pointer; 109 110 /// @deprecated Only kept for backward-compatibility with <3.0 indexes. 111 bool isCompressed; 112 113 public: 114 /// The value of the field as a Reader, or null. If null, the String value, binary value, or TokenStream value is used. 115 /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. 116 ReaderPtr readerValue(); 117 118 /// The value of the field as a TokenStream, or null. If null, the Reader value, String value, or binary value is used. 119 /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. 120 TokenStreamPtr tokenStreamValue(); 121 122 /// The value of the field as a String, or null. If null, the Reader value, binary value, or TokenStream value is used. 123 /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. 124 String stringValue(); 125 126 int64_t getPointer(); 127 void setPointer(int64_t pointer); 128 int32_t getToRead(); 129 void setToRead(int32_t toRead); 130 131 /// Return the raw byte[] for the binary field. 132 virtual ByteArray getBinaryValue(ByteArray result); 133 134 protected: 135 IndexInputPtr getFieldStream(); 136 }; 137 138 } 139 140 #endif 141