1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #ifndef FIELDSREADER_H
8 #define FIELDSREADER_H
9 
10 #include "AbstractField.h"
11 #include "CloseableThreadLocal.h"
12 
13 namespace Lucene {
14 
15 /// Class responsible for access to stored document fields.  It uses <segment>.fdt and <segment>.fdx; files.
16 class LPPAPI FieldsReader : public LuceneObject {
17 public:
18     /// Used only by clone
19     FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize,
20                  int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, const IndexInputPtr& cloneableIndexStream);
21     FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn);
22     FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0);
23 
24     virtual ~FieldsReader();
25 
26     LUCENE_CLASS(FieldsReader);
27 
28 protected:
29     FieldInfosPtr fieldInfos;
30 
31     // The main fieldStream, used only for cloning.
32     IndexInputPtr cloneableFieldsStream;
33 
34     // This is a clone of cloneableFieldsStream used for reading documents.  It should not be cloned outside of a
35     // synchronized context.
36     IndexInputPtr fieldsStream;
37 
38     IndexInputPtr cloneableIndexStream;
39     IndexInputPtr indexStream;
40     int32_t numTotalDocs;
41     int32_t _size;
42     bool closed;
43     int32_t format;
44     int32_t formatSize;
45 
46     // The docID offset where our docs begin in the index file.  This will be 0 if we have our own private file.
47     int32_t docStoreOffset;
48 
49     CloseableThreadLocal<IndexInput> fieldsStreamTL;
50     bool isOriginal;
51 
52 public:
53     /// Returns a cloned FieldsReader that shares open IndexInputs with the original one.  It is the caller's job not to
54     /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic).
55     virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr());
56 
57     /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a
58     /// Field.  This means that the Fields values will not be accessible.
59     void close();
60 
61     int32_t size();
62 
63     bool canReadRawDocs();
64 
65     DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector);
66 
67     /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID.
68     /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID.
69     IndexInputPtr rawDocs(Collection<int32_t> lengths, int32_t startDocID, int32_t numDocs);
70 
71 protected:
72     void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size);
73 
74     void ensureOpen();
75 
76     void seekIndex(int32_t docID);
77 
78     /// Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
79     /// This will have the most payoff on large fields.
80     void skipField(bool binary, bool compressed);
81     void skipField(bool binary, bool compressed, int32_t toRead);
82 
83     void addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize);
84     void addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize);
85 
86     /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes).
87     /// Read just the size - caller must skip the field content to continue reading fields.  Return the size in bytes or chars,
88     /// depending on field type.
89     int32_t addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed);
90 
91     ByteArray uncompress(ByteArray b);
92     String uncompressString(ByteArray b);
93 
94     friend class LazyField;
95 };
96 
97 class LazyField : public AbstractField {
98 public:
99     LazyField(const FieldsReaderPtr& reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed);
100     LazyField(const FieldsReaderPtr& reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed);
101     virtual ~LazyField();
102 
103     LUCENE_CLASS(LazyField);
104 
105 protected:
106     FieldsReaderWeakPtr _reader;
107     int32_t toRead;
108     int64_t pointer;
109 
110     /// @deprecated Only kept for backward-compatibility with <3.0 indexes.
111     bool isCompressed;
112 
113 public:
114     /// The value of the field as a Reader, or null.  If null, the String value, binary value, or TokenStream value is used.
115     /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
116     ReaderPtr readerValue();
117 
118     /// The value of the field as a TokenStream, or null.  If null, the Reader value, String value, or binary value is used.
119     /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
120     TokenStreamPtr tokenStreamValue();
121 
122     /// The value of the field as a String, or null.  If null, the Reader value, binary value, or TokenStream value is used.
123     /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
124     String stringValue();
125 
126     int64_t getPointer();
127     void setPointer(int64_t pointer);
128     int32_t getToRead();
129     void setToRead(int32_t toRead);
130 
131     /// Return the raw byte[] for the binary field.
132     virtual ByteArray getBinaryValue(ByteArray result);
133 
134 protected:
135     IndexInputPtr getFieldStream();
136 };
137 
138 }
139 
140 #endif
141