1 /** @file xapian-compact.cc
2  * @brief Compact a database, or merge and compact several.
3  */
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include <xapian.h>
26 
27 #include <cstdlib>
28 #include <iostream>
29 
30 #include "gnu_getopt.h"
31 
32 using namespace std;
33 
34 #define PROG_NAME "xapian-compact"
35 #define PROG_DESC "Compact a database, or merge and compact several"
36 
37 #define OPT_HELP 1
38 #define OPT_VERSION 2
39 #define OPT_NO_RENUMBER 3
40 
show_usage()41 static void show_usage() {
42     cout << "Usage: " PROG_NAME " [OPTIONS] SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
43 "Options:\n"
44 "  -b, --blocksize=B  Set the blocksize in bytes (e.g. 4096) or K (e.g. 4K)\n"
45 "                     (must be between 2K and 64K and a power of 2, default 8K)\n"
46 "  -n, --no-full      Disable full compaction\n"
47 "  -F, --fuller       Enable fuller compaction (not recommended if you plan to\n"
48 "                     update the compacted database)\n"
49 "  -m, --multipass    If merging more than 3 databases, merge the postlists in\n"
50 "                     multiple passes (which is generally faster but requires\n"
51 "                     more disk space for temporary files)\n"
52 "      --no-renumber  Preserve the numbering of document ids (useful if you have\n"
53 "                     external references to them, or have set them to match\n"
54 "                     unique ids from an external source).  Currently this\n"
55 "                     option is only supported when merging databases if they\n"
56 "                     have disjoint ranges of used document ids\n"
57 "  --help             display this help and exit\n"
58 "  --version          output version information and exit" << endl;
59 }
60 
61 class MyCompactor : public Xapian::Compactor {
62     bool quiet;
63 
64   public:
MyCompactor()65     MyCompactor() : quiet(false) { }
66 
set_quiet(bool quiet_)67     void set_quiet(bool quiet_) { quiet = quiet_; }
68 
69     void set_status(const string & table, const string & status);
70 
71     string
72     resolve_duplicate_metadata(const string & key,
73 			       size_t n,
74 			       const string tags[]);
75 };
76 
77 void
set_status(const string & table,const string & status)78 MyCompactor::set_status(const string & table, const string & status)
79 {
80     if (quiet)
81 	return;
82     if (!status.empty())
83 	cout << '\r' << table << ": " << status << endl;
84     else
85 	cout << table << " ..." << flush;
86 }
87 
88 string
resolve_duplicate_metadata(const string & key,size_t n,const string tags[])89 MyCompactor::resolve_duplicate_metadata(const string & key,
90 					size_t n,
91 					const string tags[])
92 {
93     (void)key;
94     while (--n) {
95 	if (tags[0] != tags[n]) {
96 	    cerr << "Warning: duplicate user metadata key with different tag value - picking value from first source database with a non-empty value" << endl;
97 	    break;
98 	}
99     }
100     return tags[0];
101 }
102 
103 int
main(int argc,char ** argv)104 main(int argc, char **argv)
105 {
106     const char * opts = "b:nFmq";
107     const struct option long_opts[] = {
108 	{"fuller",	no_argument, 0, 'F'},
109 	{"no-full",	no_argument, 0, 'n'},
110 	{"multipass",	no_argument, 0, 'm'},
111 	{"blocksize",	required_argument, 0, 'b'},
112 	{"no-renumber", no_argument, 0, OPT_NO_RENUMBER},
113 	{"quiet",	no_argument, 0, 'q'},
114 	{"help",	no_argument, 0, OPT_HELP},
115 	{"version",	no_argument, 0, OPT_VERSION},
116 	{NULL,		0, 0, 0}
117     };
118 
119     MyCompactor compactor;
120 
121     int c;
122     while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
123 	switch (c) {
124 	    case 'b': {
125 		char *p;
126 		size_t block_size = strtoul(optarg, &p, 10);
127 		if (block_size <= 64 && (*p == 'K' || *p == 'k')) {
128 		    ++p;
129 		    block_size *= 1024;
130 		}
131 		if (*p || block_size < 2048 || block_size > 65536 ||
132 		    (block_size & (block_size - 1)) != 0) {
133 		    cerr << PROG_NAME": Bad value '" << optarg
134 			 << "' passed for blocksize, must be a power of 2 between 2K and 64K"
135 			 << endl;
136 		    exit(1);
137 		}
138 		compactor.set_block_size(block_size);
139 		break;
140 	    }
141 	    case 'n':
142 		compactor.set_compaction_level(compactor.STANDARD);
143 		break;
144 	    case 'F':
145 		compactor.set_compaction_level(compactor.FULLER);
146 		break;
147 	    case 'm':
148 		compactor.set_multipass(true);
149 		break;
150 	    case OPT_NO_RENUMBER:
151 		compactor.set_renumber(false);
152 		break;
153 	    case 'q':
154 		compactor.set_quiet(true);
155 		break;
156 	    case OPT_HELP:
157 		cout << PROG_NAME " - " PROG_DESC "\n\n";
158 		show_usage();
159 		exit(0);
160 	    case OPT_VERSION:
161 		cout << PROG_NAME " - " PACKAGE_STRING << endl;
162 		exit(0);
163 	    default:
164 		show_usage();
165 		exit(1);
166 	}
167     }
168 
169     if (argc - optind < 2) {
170 	show_usage();
171 	exit(1);
172     }
173 
174     // Path to the database to create.
175     compactor.set_destdir(argv[argc - 1]);
176 
177     try {
178 	for (int i = optind; i < argc - 1; ++i) {
179 	    compactor.add_source(argv[i]);
180 	}
181 
182 	compactor.compact();
183     } catch (const Xapian::Error &error) {
184 	cerr << argv[0] << ": " << error.get_description() << endl;
185 	exit(1);
186     } catch (const char * msg) {
187 	cerr << argv[0] << ": " << msg << endl;
188 	exit(1);
189     }
190 }
191