1 /* chert_alltermslist.cc: A termlist containing all terms in a chert database.
2 *
3 * Copyright (C) 2005,2007,2008,2009,2010 Olly Betts
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18 * USA
19 */
20
21 #include <config.h>
22
23 #include "chert_alltermslist.h"
24 #include "chert_postlist.h"
25
26 #include "debuglog.h"
27 #include "pack.h"
28 #include "stringutils.h"
29
30 void
read_termfreq_and_collfreq() const31 ChertAllTermsList::read_termfreq_and_collfreq() const
32 {
33 LOGCALL_VOID(DB, "ChertAllTermsList::read_termfreq_and_collfreq", NO_ARGS);
34 Assert(!current_term.empty());
35 Assert(!at_end());
36
37 // Unpack the termfreq and collfreq from the tag. Only do this if
38 // one or other is actually read.
39 cursor->read_tag();
40 const char *p = cursor->current_tag.data();
41 const char *pend = p + cursor->current_tag.size();
42 ChertPostList::read_number_of_entries(&p, pend, &termfreq, &collfreq);
43 }
44
~ChertAllTermsList()45 ChertAllTermsList::~ChertAllTermsList()
46 {
47 LOGCALL_DTOR(DB, "ChertAllTermsList");
48 delete cursor;
49 }
50
51 string
get_termname() const52 ChertAllTermsList::get_termname() const
53 {
54 LOGCALL(DB, string, "ChertAllTermsList::get_termname", NO_ARGS);
55 Assert(!current_term.empty());
56 Assert(!at_end());
57 RETURN(current_term);
58 }
59
60 Xapian::doccount
get_termfreq() const61 ChertAllTermsList::get_termfreq() const
62 {
63 LOGCALL(DB, Xapian::doccount, "ChertAllTermsList::get_termfreq", NO_ARGS);
64 Assert(!current_term.empty());
65 Assert(!at_end());
66 if (termfreq == 0) read_termfreq_and_collfreq();
67 RETURN(termfreq);
68 }
69
70 Xapian::termcount
get_collection_freq() const71 ChertAllTermsList::get_collection_freq() const
72 {
73 LOGCALL(DB, Xapian::termcount, "ChertAllTermsList::get_collection_freq", NO_ARGS);
74 Assert(!current_term.empty());
75 Assert(!at_end());
76 if (termfreq == 0) read_termfreq_and_collfreq();
77 RETURN(collfreq);
78 }
79
80 TermList *
next()81 ChertAllTermsList::next()
82 {
83 LOGCALL(DB, TermList *, "ChertAllTermsList::next", NO_ARGS);
84 Assert(!at_end());
85 // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
86 // the current term.
87 termfreq = 0;
88
89 if (rare(!cursor)) {
90 cursor = database->postlist_table.cursor_get();
91 Assert(cursor); // The postlist table isn't optional.
92
93 if (prefix.empty()) {
94 (void)cursor->find_entry_ge(string("\x00\xff", 2));
95 } else {
96 const string & key = pack_chert_postlist_key(prefix);
97 if (cursor->find_entry_ge(key)) {
98 // The exact term we asked for is there, so just copy it rather
99 // than wasting effort unpacking it from the key.
100 current_term = prefix;
101 RETURN(NULL);
102 }
103 }
104 goto first_time;
105 }
106
107 while (true) {
108 cursor->next();
109 first_time:
110 if (cursor->after_end()) {
111 current_term.resize(0);
112 RETURN(NULL);
113 }
114
115 const char *p = cursor->current_key.data();
116 const char *pend = p + cursor->current_key.size();
117 if (!unpack_string_preserving_sort(&p, pend, current_term)) {
118 throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
119 }
120
121 // If this key is for the first chunk of a postlist, we're done.
122 // Otherwise we need to skip past continuation chunks until we find the
123 // first chunk of the next postlist.
124 if (p == pend) break;
125 }
126
127 if (!startswith(current_term, prefix)) {
128 // We've reached the end of the prefixed terms.
129 cursor->to_end();
130 current_term.resize(0);
131 }
132
133 RETURN(NULL);
134 }
135
136 TermList *
skip_to(const string & term)137 ChertAllTermsList::skip_to(const string &term)
138 {
139 LOGCALL(DB, TermList *, "ChertAllTermsList::skip_to", term);
140 Assert(!at_end());
141 // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
142 // the current term.
143 termfreq = 0;
144
145 if (rare(!cursor)) {
146 cursor = database->postlist_table.cursor_get();
147 Assert(cursor); // The postlist table isn't optional.
148 }
149
150 string key = pack_chert_postlist_key(term);
151 if (cursor->find_entry_ge(key)) {
152 // The exact term we asked for is there, so just copy it rather than
153 // wasting effort unpacking it from the key.
154 current_term = term;
155 } else {
156 if (cursor->after_end()) {
157 current_term.resize(0);
158 RETURN(NULL);
159 }
160
161 const char *p = cursor->current_key.data();
162 const char *pend = p + cursor->current_key.size();
163 if (!unpack_string_preserving_sort(&p, pend, current_term)) {
164 throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
165 }
166 }
167
168 if (!startswith(current_term, prefix)) {
169 // We've reached the end of the prefixed terms.
170 cursor->to_end();
171 current_term.resize(0);
172 }
173
174 RETURN(NULL);
175 }
176
177 bool
at_end() const178 ChertAllTermsList::at_end() const
179 {
180 LOGCALL(DB, bool, "ChertAllTermsList::at_end", NO_ARGS);
181 RETURN(cursor && cursor->after_end());
182 }
183