1 /** @file
2  * @brief A vector-like container of terms which can be iterated.
3  */
4 /* Copyright (C) 2011,2012,2017 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_VECTORTERMLIST_H
22 #define XAPIAN_INCLUDED_VECTORTERMLIST_H
23 
24 #include "xapian/types.h"
25 
26 #include "net/length.h"
27 #include "termlist.h"
28 
29 /** This class stores a list of terms.
30  *
31  *  To be memory efficient, we store the terms in a single string using a
32  *  suitable simple encoding.  This way the number of bytes needed will
33  *  usually be the sum of the lengths of all the terms plus the number of
34  *  terms.  If we used std::vector<std::string> here like we used to, that
35  *  would need something like an additional 30 bytes per term (30 calculated
36  *  for GCC 4.x on x86_64).
37  */
38 class VectorTermList : public TermList {
39     /// The encoded terms.
40     std::string data;
41 
42     /// Pointer to the next term's data, or NULL if we are at end.
43     const char * p;
44 
45     /// The number of terms in the list.
46     Xapian::termcount num_terms;
47 
48     /// The current term.
49     std::string current_term;
50 
51   public:
52     template<typename I>
VectorTermList(I begin,I end)53     VectorTermList(I begin, I end) : num_terms(0)
54     {
55 	// First calculate how much space we'll need so we can reserve it.
56 	size_t total_size = 0;
57 	for (I i = begin; i != end; ++i) {
58 	    ++num_terms;
59 	    const std::string & s = *i;
60 	    total_size += s.size() + 1;
61 	    if (s.size() >= 255) {
62 		// Not a common case, so just assume the worst case rather than
63 		// trying to carefully calculate the exact size.
64 		total_size += 5;
65 	    }
66 	}
67 	data.reserve(total_size);
68 
69 	// Now encode all the terms into data.
70 	for (I i = begin; i != end; ++i) {
71 	    const std::string & s = *i;
72 	    data += encode_length(s.size());
73 	    data += s;
74 	}
75 
76 	p = data.data();
77     }
78 
79     Xapian::termcount get_approx_size() const;
80 
81     std::string get_termname() const;
82 
83     Xapian::termcount get_wdf() const;
84 
85     Xapian::doccount get_termfreq() const;
86 
87     TermList * next();
88 
89     TermList * skip_to(const std::string &);
90 
91     bool at_end() const;
92 
93     Xapian::termcount positionlist_count() const;
94 
95     Xapian::PositionIterator positionlist_begin() const;
96 };
97 
98 #endif // XAPIAN_INCLUDED_VECTORTERMLIST_H
99