1 /* chert_alltermslist.cc: A termlist containing all terms in a chert database.
2  *
3  * Copyright (C) 2005,2007,2008,2009,2010 Olly Betts
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
18  * USA
19  */
20 
21 #include <config.h>
22 
23 #include "chert_alltermslist.h"
24 #include "chert_postlist.h"
25 
26 #include "debuglog.h"
27 #include "pack.h"
28 #include "stringutils.h"
29 
30 void
read_termfreq_and_collfreq() const31 ChertAllTermsList::read_termfreq_and_collfreq() const
32 {
33     LOGCALL_VOID(DB, "ChertAllTermsList::read_termfreq_and_collfreq", NO_ARGS);
34     Assert(!current_term.empty());
35     Assert(!at_end());
36 
37     // Unpack the termfreq and collfreq from the tag.  Only do this if
38     // one or other is actually read.
39     cursor->read_tag();
40     const char *p = cursor->current_tag.data();
41     const char *pend = p + cursor->current_tag.size();
42     ChertPostList::read_number_of_entries(&p, pend, &termfreq, &collfreq);
43 }
44 
~ChertAllTermsList()45 ChertAllTermsList::~ChertAllTermsList()
46 {
47     LOGCALL_DTOR(DB, "ChertAllTermsList");
48     delete cursor;
49 }
50 
51 string
get_termname() const52 ChertAllTermsList::get_termname() const
53 {
54     LOGCALL(DB, string, "ChertAllTermsList::get_termname", NO_ARGS);
55     Assert(!current_term.empty());
56     Assert(!at_end());
57     RETURN(current_term);
58 }
59 
60 Xapian::doccount
get_termfreq() const61 ChertAllTermsList::get_termfreq() const
62 {
63     LOGCALL(DB, Xapian::doccount, "ChertAllTermsList::get_termfreq", NO_ARGS);
64     Assert(!current_term.empty());
65     Assert(!at_end());
66     if (termfreq == 0) read_termfreq_and_collfreq();
67     RETURN(termfreq);
68 }
69 
70 Xapian::termcount
get_collection_freq() const71 ChertAllTermsList::get_collection_freq() const
72 {
73     LOGCALL(DB, Xapian::termcount, "ChertAllTermsList::get_collection_freq", NO_ARGS);
74     Assert(!current_term.empty());
75     Assert(!at_end());
76     if (termfreq == 0) read_termfreq_and_collfreq();
77     RETURN(collfreq);
78 }
79 
80 TermList *
next()81 ChertAllTermsList::next()
82 {
83     LOGCALL(DB, TermList *, "ChertAllTermsList::next", NO_ARGS);
84     Assert(!at_end());
85     // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
86     // the current term.
87     termfreq = 0;
88 
89     if (rare(!cursor)) {
90 	cursor = database->postlist_table.cursor_get();
91 	Assert(cursor); // The postlist table isn't optional.
92 
93 	if (prefix.empty()) {
94 	    (void)cursor->find_entry_ge(string("\x00\xff", 2));
95 	} else {
96 	    const string & key = pack_chert_postlist_key(prefix);
97 	    if (cursor->find_entry_ge(key)) {
98 		// The exact term we asked for is there, so just copy it rather
99 		// than wasting effort unpacking it from the key.
100 		current_term = prefix;
101 		RETURN(NULL);
102 	    }
103 	}
104 	goto first_time;
105     }
106 
107     while (true) {
108 	cursor->next();
109 first_time:
110 	if (cursor->after_end()) {
111 	    current_term.resize(0);
112 	    RETURN(NULL);
113 	}
114 
115 	const char *p = cursor->current_key.data();
116 	const char *pend = p + cursor->current_key.size();
117 	if (!unpack_string_preserving_sort(&p, pend, current_term)) {
118 	    throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
119 	}
120 
121 	// If this key is for the first chunk of a postlist, we're done.
122 	// Otherwise we need to skip past continuation chunks until we find the
123 	// first chunk of the next postlist.
124 	if (p == pend) break;
125     }
126 
127     if (!startswith(current_term, prefix)) {
128 	// We've reached the end of the prefixed terms.
129 	cursor->to_end();
130 	current_term.resize(0);
131     }
132 
133     RETURN(NULL);
134 }
135 
136 TermList *
skip_to(const string & term)137 ChertAllTermsList::skip_to(const string &term)
138 {
139     LOGCALL(DB, TermList *, "ChertAllTermsList::skip_to", term);
140     Assert(!at_end());
141     // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
142     // the current term.
143     termfreq = 0;
144 
145     if (rare(!cursor)) {
146 	cursor = database->postlist_table.cursor_get();
147 	Assert(cursor); // The postlist table isn't optional.
148     }
149 
150     string key = pack_chert_postlist_key(term);
151     if (cursor->find_entry_ge(key)) {
152 	// The exact term we asked for is there, so just copy it rather than
153 	// wasting effort unpacking it from the key.
154 	current_term = term;
155     } else {
156 	if (cursor->after_end()) {
157 	    current_term.resize(0);
158 	    RETURN(NULL);
159 	}
160 
161 	const char *p = cursor->current_key.data();
162 	const char *pend = p + cursor->current_key.size();
163 	if (!unpack_string_preserving_sort(&p, pend, current_term)) {
164 	    throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
165 	}
166     }
167 
168     if (!startswith(current_term, prefix)) {
169 	// We've reached the end of the prefixed terms.
170 	cursor->to_end();
171 	current_term.resize(0);
172     }
173 
174     RETURN(NULL);
175 }
176 
177 bool
at_end() const178 ChertAllTermsList::at_end() const
179 {
180     LOGCALL(DB, bool, "ChertAllTermsList::at_end", NO_ARGS);
181     RETURN(cursor && cursor->after_end());
182 }
183