1 /** @file
2  * @brief Compact a database, or merge and compact several.
3  */
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2015,2018 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #ifndef XAPIAN_INCLUDED_COMPACTOR_H
24 #define XAPIAN_INCLUDED_COMPACTOR_H
25 
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/compactor.h> directly; include <xapian.h> instead.
28 #endif
29 
30 #include <xapian/constants.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/visibility.h>
34 #include <string>
35 
36 namespace Xapian {
37 
38 class Database;
39 
40 /** Compact a database, or merge and compact several.
41  */
42 class XAPIAN_VISIBILITY_DEFAULT Compactor {
43   public:
44     /// Class containing the implementation.
45     class Internal;
46 
47     /** Compaction level. */
48     typedef enum {
49 	/** Don't split items unnecessarily. */
50 	STANDARD = 0,
51 	/** Split items whenever it saves space (the default). */
52 	FULL = 1,
53 	/** Allow oversize items to save more space (not recommended if you
54 	 *  ever plan to update the compacted database). */
55 	FULLER = 2
56     } compaction_level;
57 
58   private:
59     /// @internal Reference counted internals.
60     Xapian::Internal::intrusive_ptr<Internal> internal;
61 
62     void set_flags_(unsigned flags, unsigned mask = 0);
63 
64   public:
65     Compactor();
66 
67     virtual ~Compactor();
68 
69     /** Set the block size to use for tables in the output database.
70      *
71      *  @param block_size	The block size to use.  Valid block sizes are
72      *				currently powers of two between 2048 and 65536,
73      *				with the default being 8192, but the valid
74      *				sizes and default may change in the future.
75      */
76     XAPIAN_DEPRECATED(void set_block_size(size_t block_size));
77 
78     /** Set whether to preserve existing document id values.
79      *
80      *  @param renumber	The default is true, which means that document ids will
81      *			be renumbered - currently by applying the same offset
82      *			to all the document ids in a particular source
83      *			database.
84      *
85      *			If false, then the document ids must be unique over all
86      *			source databases.  Currently the ranges of document ids
87      *			in each source must not overlap either, though this
88      *			restriction may be removed in the future.
89      */
XAPIAN_DEPRECATED(void set_renumber (bool renumber))90     XAPIAN_DEPRECATED(void set_renumber(bool renumber)) {
91 	set_flags_(renumber ? 0 : DBCOMPACT_NO_RENUMBER,
92 		   ~unsigned(DBCOMPACT_NO_RENUMBER));
93     }
94 
95     /** Set whether to merge postlists in multiple passes.
96      *
97      *  @param multipass	If true and merging more than 3 databases,
98      *  merge the postlists in multiple passes, which is generally faster but
99      *  requires more disk space for temporary files.  By default we don't do
100      *  this.
101      */
XAPIAN_DEPRECATED(void set_multipass (bool multipass))102     XAPIAN_DEPRECATED(void set_multipass(bool multipass)) {
103 	set_flags_(multipass ? DBCOMPACT_MULTIPASS : 0,
104 		   ~unsigned(DBCOMPACT_MULTIPASS));
105     }
106 
107     /** Set the compaction level.
108      *
109      *  @param compaction Available values are:
110      *  - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
111      *  - Xapian::Compactor::FULL     - Split items whenever it saves space
112      *    (the default).
113      *  - Xapian::Compactor::FULLER   - Allow oversize items to save more space
114      *    (not recommended if you ever plan to update the compacted database).
115      */
XAPIAN_DEPRECATED(void set_compaction_level (compaction_level compaction))116     XAPIAN_DEPRECATED(void set_compaction_level(compaction_level compaction)) {
117 	set_flags_(compaction, ~unsigned(STANDARD|FULL|FULLER));
118     }
119 
120     /** Set where to write the output.
121      *
122      *  @deprecated Use Database::compact(destdir[, compactor]) instead.
123      *
124      *  @param destdir	Output path.  This can be the same as an input if that
125      *			input is a stub database (in which case the database(s)
126      *			listed in the stub will be compacted to a new database
127      *			and then the stub will be atomically updated to point
128      *			to this new database).
129      */
130     XAPIAN_DEPRECATED(void set_destdir(const std::string & destdir));
131 
132     /** Add a source database.
133      *
134      *  @deprecated Use Database::compact(destdir[, compactor]) instead.
135      *
136      *  @param srcdir	The path to the source database to add.
137      */
138     XAPIAN_DEPRECATED(void add_source(const std::string & srcdir));
139 
140     /** Perform the actual compaction/merging operation.
141      *
142      *  @deprecated Use Database::compact(destdir[, compactor]) instead.
143      */
144     XAPIAN_DEPRECATED(void compact());
145 
146     /** Update progress.
147      *
148      *  Subclass this method if you want to get progress updates during
149      *  compaction.  This is called for each table first with empty status,
150      *  And then one or more times with non-empty status.
151      *
152      *  The default implementation does nothing.
153      *
154      *  @param table	The table currently being compacted.
155      *  @param status	A status message.
156      */
157     virtual void
158     set_status(const std::string & table, const std::string & status);
159 
160     /** Resolve multiple user metadata entries with the same key.
161      *
162      *  When merging, if the same user metadata key is set in more than one
163      *  input, then this method is called to allow this to be resolving in
164      *  an appropriate way.
165      *
166      *  The default implementation just returns tags[0].
167      *
168      *  For multipass this will currently get called multiple times for the
169      *  same key if there are duplicates to resolve in each pass, but this
170      *  may change in the future.
171      *
172      *  Since 1.4.6, an implementation of this method can return an empty
173      *  string to indicate that the appropriate result is to not set a value
174      *  for this user metadata key in the output database.  In older versions,
175      *  you should not return an empty string.
176      *
177      *  @param key	The metadata key with duplicate entries.
178      *  @param num_tags	How many tags there are.
179      *  @param tags	An array of num_tags strings containing the tags to
180      *			merge.
181      */
182     virtual std::string
183     resolve_duplicate_metadata(const std::string & key,
184 			       size_t num_tags, const std::string tags[]);
185 };
186 
187 }
188 
189 #endif /* XAPIAN_INCLUDED_COMPACTOR_H */
190