1 /** @file
2 * @brief Check the consistency of a database or table.
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
21 */
22
23 #include <config.h>
24 #include "xapian/database.h"
25
26 #include "xapian/constants.h"
27 #include "xapian/error.h"
28
29 #ifdef XAPIAN_HAS_GLASS_BACKEND
30 #include "glass/glass_changes.h"
31 #include "glass/glass_dbcheck.h"
32 #include "glass/glass_version.h"
33 #endif
34 #ifdef XAPIAN_HAS_CHERT_BACKEND
35 #include "chert/chert_database.h"
36 #include "chert/chert_dbcheck.h"
37 #include "chert/chert_types.h"
38 #include "chert/chert_version.h"
39 #endif
40
41 #include "backends.h"
42 #include "databasehelpers.h"
43 #include "filetests.h"
44 #include "omassert.h"
45 #include "stringutils.h"
46
47 #include <ostream>
48 #include <stdexcept>
49
50 using namespace std;
51
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 // Tables to check for a glass database. Note: it's important to check
54 // termlist before postlist so that we can cross-check the document lengths.
55 static const struct { char name[9]; } glass_tables[] = {
56 { "docdata" },
57 { "termlist" },
58 { "postlist" },
59 { "position" },
60 { "spelling" },
61 { "synonym" }
62 };
63 #endif
64
65 // FIXME: We don't currently cross-check wdf between postlist and termlist.
66 // It's hard to see how to efficiently. We do cross-check doclens, but that
67 // "only" requires (4 * last_docid()) bytes.
68
69 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
70 static void
reserve_doclens(vector<Xapian::termcount> & doclens,Xapian::docid last_docid,ostream * out)71 reserve_doclens(vector<Xapian::termcount>& doclens, Xapian::docid last_docid,
72 ostream * out)
73 {
74 if (last_docid >= 0x40000000ul / sizeof(Xapian::termcount)) {
75 // The memory block needed by the vector would be >= 1GB.
76 if (out)
77 *out << "Cross-checking document lengths between the postlist and "
78 "termlist tables would use more than 1GB of memory, so "
79 "skipping that check" << endl;
80 return;
81 }
82 try {
83 doclens.reserve(last_docid + 1);
84 } catch (const std::bad_alloc &) {
85 // Failed to allocate the required memory.
86 if (out)
87 *out << "Couldn't allocate enough memory for cross-checking document "
88 "lengths between the postlist and termlist tables, so "
89 "skipping that check" << endl;
90 } catch (const std::length_error &) {
91 // There are too many elements for the vector to handle!
92 if (out)
93 *out << "Couldn't allocate enough elements for cross-checking document "
94 "lengths between the postlist and termlist tables, so "
95 "skipping that check" << endl;
96 }
97 }
98 #endif
99
100 static size_t
check_db_dir(const string & path,int opts,std::ostream * out)101 check_db_dir(const string & path, int opts, std::ostream *out)
102 {
103 struct stat sb;
104 if (stat((path + "/iamchert").c_str(), &sb) == 0) {
105 #ifndef XAPIAN_HAS_CHERT_BACKEND
106 (void)opts;
107 (void)out;
108 throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
109 #else
110 // Check a whole chert database directory.
111 vector<Xapian::termcount> doclens;
112 size_t errors = 0;
113
114 // If we can't read the doccount or last docid, set them to their
115 // maximum values to suppress errors.
116 Xapian::doccount doccount = Xapian::doccount(-1);
117 Xapian::docid db_last_docid = CHERT_MAX_DOCID;
118
119 chert_revision_number_t rev = 0;
120 chert_revision_number_t * rev_ptr = &rev;
121 try {
122 // Open at the lower level so we can get the revision number.
123 ChertDatabase db(path);
124 doccount = db.get_doccount();
125 db_last_docid = db.get_lastdocid();
126 reserve_doclens(doclens, db_last_docid, out);
127 rev = db.get_revision_number();
128 } catch (const Xapian::Error & e) {
129 // Ignore so we can check a database too broken to open.
130 if (out)
131 *out << "Database couldn't be opened for reading: "
132 << e.get_description()
133 << "\nContinuing check anyway" << endl;
134 ++errors;
135 }
136
137 size_t pre_table_check_errors = errors;
138
139 // Check all the btrees.
140 //
141 // Note: it's important to check "termlist" before "postlist" so
142 // that we can cross-check the document lengths; also we check
143 // "record" first as that's the last committed, so has the most
144 // reliable rootblock revision in DBCHECK_FIX mode.
145 static const struct { char name[9]; } tables[] = {
146 { "record" },
147 { "termlist" },
148 { "postlist" },
149 { "position" },
150 { "spelling" },
151 { "synonym" }
152 };
153 for (auto t : tables) {
154 const char * name = t.name;
155 if (out)
156 *out << name << ":\n";
157 if (strcmp(name, "record") != 0 && strcmp(name, "postlist") != 0) {
158 // Other tables are created lazily, so may not exist.
159 string table(path);
160 table += '/';
161 table += name;
162 table += ".DB";
163 if (!file_exists(table)) {
164 if (out) {
165 if (strcmp(name, "termlist") == 0) {
166 *out << "Not present.\n";
167 } else {
168 *out << "Lazily created, and not yet used.\n";
169 }
170 *out << endl;
171 }
172 continue;
173 }
174 }
175 errors += check_chert_table(name, path, rev_ptr, opts, doclens,
176 doccount, db_last_docid, out);
177 }
178
179 if (errors == pre_table_check_errors && (opts & Xapian::DBCHECK_FIX)) {
180 // Check the version file is OK and if not, recreate it.
181 ChertVersion iam(path);
182 try {
183 iam.read_and_check();
184 } catch (const Xapian::DatabaseError &) {
185 iam.create();
186 }
187 }
188 return errors;
189 #endif
190 }
191
192 if (stat((path + "/iamglass").c_str(), &sb) == 0) {
193 #ifndef XAPIAN_HAS_GLASS_BACKEND
194 (void)opts;
195 (void)out;
196 throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
197 #else
198 // Check a whole glass database directory.
199 vector<Xapian::termcount> doclens;
200 size_t errors = 0;
201
202 try {
203 // Check if the database can actually be opened.
204 Xapian::Database db(path);
205 } catch (const Xapian::Error & e) {
206 // Continue - we can still usefully look at how it is broken.
207 if (out)
208 *out << "Database couldn't be opened for reading: "
209 << e.get_description()
210 << "\nContinuing check anyway" << endl;
211 ++errors;
212 }
213
214 GlassVersion version_file(path);
215 version_file.read();
216 for (glass_revision_number_t r = version_file.get_revision(); r != 0; --r) {
217 string changes_file = path;
218 changes_file += "/changes";
219 changes_file += str(r);
220 if (file_exists(changes_file))
221 GlassChanges::check(changes_file);
222 }
223
224 Xapian::docid doccount = version_file.get_doccount();
225 Xapian::docid db_last_docid = version_file.get_last_docid();
226 if (db_last_docid < doccount) {
227 if (out)
228 *out << "last_docid = " << db_last_docid << " < doccount = "
229 << doccount << endl;
230 ++errors;
231 }
232 reserve_doclens(doclens, db_last_docid, out);
233
234 // Check all the tables.
235 for (auto t : glass_tables) {
236 errors += check_glass_table(t.name, path, version_file, opts,
237 doclens, out);
238 }
239 return errors;
240 #endif
241 }
242
243 if (stat((path + "/iamflint").c_str(), &sb) == 0) {
244 // Flint is no longer supported as of Xapian 1.3.0.
245 throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
246 }
247
248 if (stat((path + "/iambrass").c_str(), &sb) == 0) {
249 // Brass was renamed to glass as of Xapian 1.3.2.
250 throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
251 }
252
253 if (stat((path + "/record_DB").c_str(), &sb) == 0) {
254 // Quartz is no longer supported as of Xapian 1.1.0.
255 throw Xapian::FeatureUnavailableError("Quartz database support was removed in Xapian 1.1.0");
256 }
257
258 throw Xapian::DatabaseOpeningError(
259 "Directory does not contain a Xapian database");
260 }
261
262 /** Check a database table.
263 *
264 * @param filename The filename of the table (only used to get the directory and
265 * @param opts Xapian::check() options
266 * @param out std::ostream to write messages to (or NULL for no messages)
267 * @param backend Backend type (a BACKEND_XXX constant)
268 */
269 static size_t
check_db_table(const string & filename,int opts,std::ostream * out,int backend)270 check_db_table(const string& filename, int opts, std::ostream* out, int backend)
271 {
272 size_t p = filename.find_last_of(DIR_SEPS);
273 // If we found a directory separator, advance p to the next character. If
274 // we didn't, incrementing string::npos will give us 0, which is what we
275 // want.
276 ++p;
277
278 string dir(filename, 0, p);
279
280 string tablename;
281 while (p != filename.size()) {
282 char ch = filename[p++];
283 if (ch == '.') break;
284 tablename += C_tolower(ch);
285 }
286
287 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
288 vector<Xapian::termcount> doclens;
289 #else
290 (void)opts;
291 (void)out;
292 #endif
293
294 switch (backend) {
295 case BACKEND_GLASS: {
296 #ifndef XAPIAN_HAS_GLASS_BACKEND
297 auto msg = "Glass database support isn't enabled";
298 throw Xapian::FeatureUnavailableError(msg);
299 #else
300 GlassVersion version_file(dir);
301 version_file.read();
302 return check_glass_table(tablename.c_str(), dir, version_file, opts,
303 doclens, out);
304 #endif
305 }
306
307 case BACKEND_CHERT:
308 break;
309
310 default:
311 Assert(false);
312 break;
313 }
314
315 // Flint and brass also used the extension ".DB", so check that we
316 // haven't been passed a single table in a flint or brass database.
317 struct stat sb;
318 if (stat((dir + "/iamflint").c_str(), &sb) == 0) {
319 // Flint is no longer supported as of Xapian 1.3.0.
320 throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
321 }
322 if (stat((dir + "/iambrass").c_str(), &sb) == 0) {
323 // Brass was renamed to glass as of Xapian 1.3.2.
324 throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
325 }
326 #ifndef XAPIAN_HAS_CHERT_BACKEND
327 throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
328 #else
329 // Set the doccount and the last docid to their maximum values to suppress
330 // errors.
331 return check_chert_table(tablename.c_str(), dir, NULL, opts, doclens,
332 Xapian::doccount(-1), CHERT_MAX_DOCID, out);
333 #endif
334 }
335
336 /** Check a single file DB from an fd.
337 *
338 * Closes the fd.
339 */
340 static size_t
check_db_fd(int fd,int opts,std::ostream * out,int backend)341 check_db_fd(int fd, int opts, std::ostream* out, int backend)
342 {
343 if (backend == BACKEND_UNKNOWN) {
344 // FIXME: Actually probe.
345 backend = BACKEND_GLASS;
346 }
347
348 size_t errors = 0;
349 switch (backend) {
350 case BACKEND_GLASS: {
351 // Check a single-file glass database.
352 #ifdef XAPIAN_HAS_GLASS_BACKEND
353 // GlassVersion's destructor will close fd.
354 GlassVersion version_file(fd);
355 version_file.read();
356
357 Xapian::docid doccount = version_file.get_doccount();
358 Xapian::docid db_last_docid = version_file.get_last_docid();
359 if (db_last_docid < doccount) {
360 if (out)
361 *out << "last_docid = " << db_last_docid << " < doccount = "
362 << doccount << endl;
363 ++errors;
364 }
365 vector<Xapian::termcount> doclens;
366 reserve_doclens(doclens, db_last_docid, out);
367
368 // Check all the tables.
369 for (auto t : glass_tables) {
370 errors += check_glass_table(t.name, fd, version_file.get_offset(),
371 version_file, opts, doclens,
372 out);
373 }
374 break;
375 #else
376 (void)opts;
377 (void)out;
378 ::close(fd);
379 throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
380 #endif
381 }
382 default:
383 Assert(false);
384 }
385 return errors;
386 }
387
388 namespace Xapian {
389
390 static size_t
check_stub(const string & stub_path,int opts,std::ostream * out)391 check_stub(const string& stub_path, int opts, std::ostream* out)
392 {
393 size_t errors = 0;
394 read_stub_file(stub_path,
395 [&errors, opts, out](const string& path) {
396 errors += Database::check(path, opts, out);
397 },
398 [&errors, opts, out](const string& path) {
399 // FIXME: Doesn't check the database type is chert.
400 errors += Database::check(path, opts, out);
401 },
402 [&errors, opts, out](const string& path) {
403 // FIXME: Doesn't check the database type is glass.
404 errors += Database::check(path, opts, out);
405 },
406 [](const string&, const string&) {
407 auto msg = "Remote database checking not implemented";
408 throw Xapian::UnimplementedError(msg);
409 },
410 [](const string&, unsigned) {
411 auto msg = "Remote database checking not implemented";
412 throw Xapian::UnimplementedError(msg);
413 },
414 []() {
415 auto msg = "InMemory database checking not implemented";
416 throw Xapian::UnimplementedError(msg);
417 });
418 return errors;
419 }
420
421 size_t
check_(const string * path_ptr,int fd,int opts,std::ostream * out)422 Database::check_(const string * path_ptr, int fd, int opts, std::ostream *out)
423 {
424 if (!out) {
425 // If we have nowhere to write output, then disable all the options
426 // which only affect what we output.
427 opts &= Xapian::DBCHECK_FIX;
428 }
429
430 if (path_ptr == NULL) {
431 return check_db_fd(fd, opts, out, BACKEND_UNKNOWN);
432 }
433
434 const string & path = *path_ptr;
435 struct stat sb;
436 if (stat(path.c_str(), &sb) == 0) {
437 if (S_ISDIR(sb.st_mode)) {
438 return check_db_dir(path, opts, out);
439 }
440
441 if (S_ISREG(sb.st_mode)) {
442 int backend = test_if_single_file_db(sb, path, &fd);
443 if (backend != BACKEND_UNKNOWN) {
444 return check_db_fd(fd, opts, out, backend);
445 }
446 // Could be a single table or a stub database file. Look at the
447 // extension to determine the type.
448 if (endswith(path, ".DB")) {
449 // It could also be flint or brass, but we check for those below.
450 backend = BACKEND_CHERT;
451 } else if (endswith(path, "." GLASS_TABLE_EXTENSION)) {
452 backend = BACKEND_GLASS;
453 } else {
454 return check_stub(path, opts, out);
455 }
456
457 return check_db_table(path, opts, out, backend);
458 }
459
460 throw Xapian::DatabaseOpeningError("Not a regular file or directory");
461 }
462
463 // The filename passed doesn't exist - see if it's the basename of the
464 // table (perhaps with "." after it), so the user can do xapian-check on
465 // "foo/termlist" or "foo/termlist." (which you would get from filename
466 // completion with older backends).
467 string filename = path;
468 if (endswith(filename, '.')) {
469 filename.resize(filename.size() - 1);
470 }
471
472 int backend = BACKEND_UNKNOWN;
473 if (stat((filename + ".DB").c_str(), &sb) == 0) {
474 // It could also be flint or brass, but we check for those below.
475 backend = BACKEND_CHERT;
476 } else if (stat((filename + "." GLASS_TABLE_EXTENSION).c_str(), &sb) == 0) {
477 backend = BACKEND_GLASS;
478 } else {
479 auto msg = "Couldn't find Xapian database or table to check";
480 throw Xapian::DatabaseOpeningError(msg, ENOENT);
481 }
482
483 return check_db_table(path, opts, out, backend);
484 }
485
486 }
487