1 /** @file 2 * @brief Compact a database, or merge and compact several. 3 */ 4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2015,2018 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 20 * USA 21 */ 22 23 #ifndef XAPIAN_INCLUDED_COMPACTOR_H 24 #define XAPIAN_INCLUDED_COMPACTOR_H 25 26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD 27 # error Never use <xapian/compactor.h> directly; include <xapian.h> instead. 28 #endif 29 30 #include <xapian/constants.h> 31 #include <xapian/deprecated.h> 32 #include <xapian/intrusive_ptr.h> 33 #include <xapian/visibility.h> 34 #include <string> 35 36 namespace Xapian { 37 38 class Database; 39 40 /** Compact a database, or merge and compact several. 41 */ 42 class XAPIAN_VISIBILITY_DEFAULT Compactor { 43 public: 44 /// Class containing the implementation. 45 class Internal; 46 47 /** Compaction level. */ 48 typedef enum { 49 /** Don't split items unnecessarily. */ 50 STANDARD = 0, 51 /** Split items whenever it saves space (the default). */ 52 FULL = 1, 53 /** Allow oversize items to save more space (not recommended if you 54 * ever plan to update the compacted database). */ 55 FULLER = 2 56 } compaction_level; 57 58 private: 59 /// @internal Reference counted internals. 60 Xapian::Internal::intrusive_ptr<Internal> internal; 61 62 void set_flags_(unsigned flags, unsigned mask = 0); 63 64 public: 65 Compactor(); 66 67 virtual ~Compactor(); 68 69 /** Set the block size to use for tables in the output database. 70 * 71 * @param block_size The block size to use. Valid block sizes are 72 * currently powers of two between 2048 and 65536, 73 * with the default being 8192, but the valid 74 * sizes and default may change in the future. 75 */ 76 XAPIAN_DEPRECATED(void set_block_size(size_t block_size)); 77 78 /** Set whether to preserve existing document id values. 79 * 80 * @param renumber The default is true, which means that document ids will 81 * be renumbered - currently by applying the same offset 82 * to all the document ids in a particular source 83 * database. 84 * 85 * If false, then the document ids must be unique over all 86 * source databases. Currently the ranges of document ids 87 * in each source must not overlap either, though this 88 * restriction may be removed in the future. 89 */ XAPIAN_DEPRECATED(void set_renumber (bool renumber))90 XAPIAN_DEPRECATED(void set_renumber(bool renumber)) { 91 set_flags_(renumber ? 0 : DBCOMPACT_NO_RENUMBER, 92 ~unsigned(DBCOMPACT_NO_RENUMBER)); 93 } 94 95 /** Set whether to merge postlists in multiple passes. 96 * 97 * @param multipass If true and merging more than 3 databases, 98 * merge the postlists in multiple passes, which is generally faster but 99 * requires more disk space for temporary files. By default we don't do 100 * this. 101 */ XAPIAN_DEPRECATED(void set_multipass (bool multipass))102 XAPIAN_DEPRECATED(void set_multipass(bool multipass)) { 103 set_flags_(multipass ? DBCOMPACT_MULTIPASS : 0, 104 ~unsigned(DBCOMPACT_MULTIPASS)); 105 } 106 107 /** Set the compaction level. 108 * 109 * @param compaction Available values are: 110 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily. 111 * - Xapian::Compactor::FULL - Split items whenever it saves space 112 * (the default). 113 * - Xapian::Compactor::FULLER - Allow oversize items to save more space 114 * (not recommended if you ever plan to update the compacted database). 115 */ XAPIAN_DEPRECATED(void set_compaction_level (compaction_level compaction))116 XAPIAN_DEPRECATED(void set_compaction_level(compaction_level compaction)) { 117 set_flags_(compaction, ~unsigned(STANDARD|FULL|FULLER)); 118 } 119 120 /** Set where to write the output. 121 * 122 * @deprecated Use Database::compact(destdir[, compactor]) instead. 123 * 124 * @param destdir Output path. This can be the same as an input if that 125 * input is a stub database (in which case the database(s) 126 * listed in the stub will be compacted to a new database 127 * and then the stub will be atomically updated to point 128 * to this new database). 129 */ 130 XAPIAN_DEPRECATED(void set_destdir(const std::string & destdir)); 131 132 /** Add a source database. 133 * 134 * @deprecated Use Database::compact(destdir[, compactor]) instead. 135 * 136 * @param srcdir The path to the source database to add. 137 */ 138 XAPIAN_DEPRECATED(void add_source(const std::string & srcdir)); 139 140 /** Perform the actual compaction/merging operation. 141 * 142 * @deprecated Use Database::compact(destdir[, compactor]) instead. 143 */ 144 XAPIAN_DEPRECATED(void compact()); 145 146 /** Update progress. 147 * 148 * Subclass this method if you want to get progress updates during 149 * compaction. This is called for each table first with empty status, 150 * And then one or more times with non-empty status. 151 * 152 * The default implementation does nothing. 153 * 154 * @param table The table currently being compacted. 155 * @param status A status message. 156 */ 157 virtual void 158 set_status(const std::string & table, const std::string & status); 159 160 /** Resolve multiple user metadata entries with the same key. 161 * 162 * When merging, if the same user metadata key is set in more than one 163 * input, then this method is called to allow this to be resolving in 164 * an appropriate way. 165 * 166 * The default implementation just returns tags[0]. 167 * 168 * For multipass this will currently get called multiple times for the 169 * same key if there are duplicates to resolve in each pass, but this 170 * may change in the future. 171 * 172 * Since 1.4.6, an implementation of this method can return an empty 173 * string to indicate that the appropriate result is to not set a value 174 * for this user metadata key in the output database. In older versions, 175 * you should not return an empty string. 176 * 177 * @param key The metadata key with duplicate entries. 178 * @param num_tags How many tags there are. 179 * @param tags An array of num_tags strings containing the tags to 180 * merge. 181 */ 182 virtual std::string 183 resolve_duplicate_metadata(const std::string & key, 184 size_t num_tags, const std::string tags[]); 185 }; 186 187 } 188 189 #endif /* XAPIAN_INCLUDED_COMPACTOR_H */ 190