1 /** @file
2  * @brief A TermList in a chert database.
3  */
4 /* Copyright (C) 2007,2008,2010,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_CHERT_TERMLIST_H
22 #define XAPIAN_INCLUDED_CHERT_TERMLIST_H
23 
24 #include <string>
25 
26 #include "xapian/intrusive_ptr.h"
27 #include <xapian/positioniterator.h>
28 #include <xapian/types.h>
29 
30 namespace Xapian {
31     namespace Internal {
32 	class ExpandStats;
33     }
34 }
35 
36 #include "chert_database.h"
37 #include "api/termlist.h"
38 #include "chert_table.h"
39 
40 /// A TermList in a chert database.
41 class ChertTermList : public TermList {
42     /// Don't allow assignment.
43     void operator=(const ChertTermList &);
44 
45     /// Don't allow copying.
46     ChertTermList(const ChertTermList &);
47 
48     /// The database we're reading data from.
49     Xapian::Internal::intrusive_ptr<const ChertDatabase> db;
50 
51     /// The document id that this TermList is for.
52     Xapian::docid did;
53 
54     /// The length of document @a did.
55     chert_doclen_t doclen;
56 
57     /// The number of entries in this termlist.
58     Xapian::termcount termlist_size;
59 
60     /// The tag value from the termlist table which holds the encoded termlist.
61     std::string data;
62 
63     /** Current position with the encoded tag value held in @a data.
64      *
65      *  If we've iterated to the end of the list, this gets set to NULL.
66      */
67     const char *pos;
68 
69     /// Pointer to the end of the encoded tag value.
70     const char *end;
71 
72     /// The termname at the current position.
73     std::string current_term;
74 
75     /// The wdf for the term at the current position.
76     Xapian::termcount current_wdf;
77 
78     /** The term frequency for the term at the current position.
79      *
80      *  This will have the value 0 if the term frequency has not yet been
81      *  looked up in the database (so it needs to be mutable).
82      */
83     mutable Xapian::doccount current_termfreq;
84 
85   public:
86     /// Create a new ChertTermList object for document @a did_ in DB @a db_
87     ChertTermList(Xapian::Internal::intrusive_ptr<const ChertDatabase> db_,
88 		  Xapian::docid did_);
89 
90     /** Return the length of this document.
91      *
92      *  This is a non-virtual method, used by ChertDatabase.
93      */
94     chert_doclen_t get_doclength() const;
95 
96     /** Return approximate size of this termlist.
97      *
98      *  For a ChertTermList, this value will always be exact.
99      */
100     Xapian::termcount get_approx_size() const;
101 
102     /// Collate weighting information for the current term.
103     void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
104 
105     /// Return the termname at the current position.
106     std::string get_termname() const;
107 
108     /// Return the wdf for the term at the current position.
109     Xapian::termcount get_wdf() const;
110 
111     /** Return the term frequency for the term at the current position.
112      *
113      *  In order to be able to support updating databases efficiently, we can't
114      *  store this value in the termlist table, so it has to be read from the
115      *  postlist table, which is relatively expensive (compared to reading the
116      *  wdf for example).
117      */
118     Xapian::doccount get_termfreq() const;
119 
120     /** Advance the current position to the next term in the termlist.
121      *
122      *  The list starts before the first term in the list, so next()
123      *  must be called before any methods which need the context of
124      *  the current position.
125      *
126      *  @return Always returns 0 for a ChertTermList.
127      */
128     TermList * next();
129 
130     TermList * skip_to(const std::string & term);
131 
132     /// Return true if the current position is past the last term in this list.
133     bool at_end() const;
134 
135     /// Return the length of the position list for the current position.
136     Xapian::termcount positionlist_count() const;
137 
138     /// Return a PositionIterator for the current position.
139     Xapian::PositionIterator positionlist_begin() const;
140 };
141 
142 #endif // XAPIAN_INCLUDED_CHERT_TERMLIST_H
143