1 /** @file 2 * @brief A vector-like container of terms which can be iterated. 3 */ 4 /* Copyright (C) 2011,2012,2017 Olly Betts 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_VECTORTERMLIST_H 22 #define XAPIAN_INCLUDED_VECTORTERMLIST_H 23 24 #include "xapian/types.h" 25 26 #include "net/length.h" 27 #include "termlist.h" 28 29 /** This class stores a list of terms. 30 * 31 * To be memory efficient, we store the terms in a single string using a 32 * suitable simple encoding. This way the number of bytes needed will 33 * usually be the sum of the lengths of all the terms plus the number of 34 * terms. If we used std::vector<std::string> here like we used to, that 35 * would need something like an additional 30 bytes per term (30 calculated 36 * for GCC 4.x on x86_64). 37 */ 38 class VectorTermList : public TermList { 39 /// The encoded terms. 40 std::string data; 41 42 /// Pointer to the next term's data, or NULL if we are at end. 43 const char * p; 44 45 /// The number of terms in the list. 46 Xapian::termcount num_terms; 47 48 /// The current term. 49 std::string current_term; 50 51 public: 52 template<typename I> VectorTermList(I begin,I end)53 VectorTermList(I begin, I end) : num_terms(0) 54 { 55 // First calculate how much space we'll need so we can reserve it. 56 size_t total_size = 0; 57 for (I i = begin; i != end; ++i) { 58 ++num_terms; 59 const std::string & s = *i; 60 total_size += s.size() + 1; 61 if (s.size() >= 255) { 62 // Not a common case, so just assume the worst case rather than 63 // trying to carefully calculate the exact size. 64 total_size += 5; 65 } 66 } 67 data.reserve(total_size); 68 69 // Now encode all the terms into data. 70 for (I i = begin; i != end; ++i) { 71 const std::string & s = *i; 72 data += encode_length(s.size()); 73 data += s; 74 } 75 76 p = data.data(); 77 } 78 79 Xapian::termcount get_approx_size() const; 80 81 std::string get_termname() const; 82 83 Xapian::termcount get_wdf() const; 84 85 Xapian::doccount get_termfreq() const; 86 87 TermList * next(); 88 89 TermList * skip_to(const std::string &); 90 91 bool at_end() const; 92 93 Xapian::termcount positionlist_count() const; 94 95 Xapian::PositionIterator positionlist_begin() const; 96 }; 97 98 #endif // XAPIAN_INCLUDED_VECTORTERMLIST_H 99