1 /** @file
2  * @brief Check the consistency of a database or table.
3  */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 #include "xapian/database.h"
25 
26 #include "xapian/constants.h"
27 #include "xapian/error.h"
28 
29 #ifdef XAPIAN_HAS_GLASS_BACKEND
30 #include "glass/glass_changes.h"
31 #include "glass/glass_dbcheck.h"
32 #include "glass/glass_version.h"
33 #endif
34 #ifdef XAPIAN_HAS_CHERT_BACKEND
35 #include "chert/chert_database.h"
36 #include "chert/chert_dbcheck.h"
37 #include "chert/chert_types.h"
38 #include "chert/chert_version.h"
39 #endif
40 
41 #include "backends.h"
42 #include "databasehelpers.h"
43 #include "filetests.h"
44 #include "omassert.h"
45 #include "stringutils.h"
46 
47 #include <ostream>
48 #include <stdexcept>
49 
50 using namespace std;
51 
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 // Tables to check for a glass database.  Note: it's important to check
54 // termlist before postlist so that we can cross-check the document lengths.
55 static const struct { char name[9]; } glass_tables[] = {
56     { "docdata" },
57     { "termlist" },
58     { "postlist" },
59     { "position" },
60     { "spelling" },
61     { "synonym" }
62 };
63 #endif
64 
65 // FIXME: We don't currently cross-check wdf between postlist and termlist.
66 // It's hard to see how to efficiently.  We do cross-check doclens, but that
67 // "only" requires (4 * last_docid()) bytes.
68 
69 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
70 static void
reserve_doclens(vector<Xapian::termcount> & doclens,Xapian::docid last_docid,ostream * out)71 reserve_doclens(vector<Xapian::termcount>& doclens, Xapian::docid last_docid,
72 		ostream * out)
73 {
74     if (last_docid >= 0x40000000ul / sizeof(Xapian::termcount)) {
75 	// The memory block needed by the vector would be >= 1GB.
76 	if (out)
77 	    *out << "Cross-checking document lengths between the postlist and "
78 		    "termlist tables would use more than 1GB of memory, so "
79 		    "skipping that check" << endl;
80 	return;
81     }
82     try {
83 	doclens.reserve(last_docid + 1);
84     } catch (const std::bad_alloc &) {
85 	// Failed to allocate the required memory.
86 	if (out)
87 	    *out << "Couldn't allocate enough memory for cross-checking document "
88 		    "lengths between the postlist and termlist tables, so "
89 		    "skipping that check" << endl;
90     } catch (const std::length_error &) {
91 	// There are too many elements for the vector to handle!
92 	if (out)
93 	    *out << "Couldn't allocate enough elements for cross-checking document "
94 		    "lengths between the postlist and termlist tables, so "
95 		    "skipping that check" << endl;
96     }
97 }
98 #endif
99 
100 static size_t
check_db_dir(const string & path,int opts,std::ostream * out)101 check_db_dir(const string & path, int opts, std::ostream *out)
102 {
103     struct stat sb;
104     if (stat((path + "/iamchert").c_str(), &sb) == 0) {
105 #ifndef XAPIAN_HAS_CHERT_BACKEND
106 	(void)opts;
107 	(void)out;
108 	throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
109 #else
110 	// Check a whole chert database directory.
111 	vector<Xapian::termcount> doclens;
112 	size_t errors = 0;
113 
114 	// If we can't read the doccount or last docid, set them to their
115 	// maximum values to suppress errors.
116 	Xapian::doccount doccount = Xapian::doccount(-1);
117 	Xapian::docid db_last_docid = CHERT_MAX_DOCID;
118 
119 	chert_revision_number_t rev = 0;
120 	chert_revision_number_t * rev_ptr = &rev;
121 	try {
122 	    // Open at the lower level so we can get the revision number.
123 	    ChertDatabase db(path);
124 	    doccount = db.get_doccount();
125 	    db_last_docid = db.get_lastdocid();
126 	    reserve_doclens(doclens, db_last_docid, out);
127 	    rev = db.get_revision_number();
128 	} catch (const Xapian::Error & e) {
129 	    // Ignore so we can check a database too broken to open.
130 	    if (out)
131 		*out << "Database couldn't be opened for reading: "
132 		     << e.get_description()
133 		     << "\nContinuing check anyway" << endl;
134 	    ++errors;
135 	}
136 
137 	size_t pre_table_check_errors = errors;
138 
139 	// Check all the btrees.
140 	//
141 	// Note: it's important to check "termlist" before "postlist" so
142 	// that we can cross-check the document lengths; also we check
143 	// "record" first as that's the last committed, so has the most
144 	// reliable rootblock revision in DBCHECK_FIX mode.
145 	static const struct { char name[9]; } tables[] = {
146 	    { "record" },
147 	    { "termlist" },
148 	    { "postlist" },
149 	    { "position" },
150 	    { "spelling" },
151 	    { "synonym" }
152 	};
153 	for (auto t : tables) {
154 	    const char * name = t.name;
155 	    if (out)
156 		*out << name << ":\n";
157 	    if (strcmp(name, "record") != 0 && strcmp(name, "postlist") != 0) {
158 		// Other tables are created lazily, so may not exist.
159 		string table(path);
160 		table += '/';
161 		table += name;
162 		table += ".DB";
163 		if (!file_exists(table)) {
164 		    if (out) {
165 			if (strcmp(name, "termlist") == 0) {
166 			    *out << "Not present.\n";
167 			} else {
168 			    *out << "Lazily created, and not yet used.\n";
169 			}
170 			*out << endl;
171 		    }
172 		    continue;
173 		}
174 	    }
175 	    errors += check_chert_table(name, path, rev_ptr, opts, doclens,
176 					doccount, db_last_docid, out);
177 	}
178 
179 	if (errors == pre_table_check_errors && (opts & Xapian::DBCHECK_FIX)) {
180 	    // Check the version file is OK and if not, recreate it.
181 	    ChertVersion iam(path);
182 	    try {
183 		iam.read_and_check();
184 	    } catch (const Xapian::DatabaseError &) {
185 		iam.create();
186 	    }
187 	}
188 	return errors;
189 #endif
190     }
191 
192     if (stat((path + "/iamglass").c_str(), &sb) == 0) {
193 #ifndef XAPIAN_HAS_GLASS_BACKEND
194 	(void)opts;
195 	(void)out;
196 	throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
197 #else
198 	// Check a whole glass database directory.
199 	vector<Xapian::termcount> doclens;
200 	size_t errors = 0;
201 
202 	try {
203 	    // Check if the database can actually be opened.
204 	    Xapian::Database db(path);
205 	} catch (const Xapian::Error & e) {
206 	    // Continue - we can still usefully look at how it is broken.
207 	    if (out)
208 		*out << "Database couldn't be opened for reading: "
209 		     << e.get_description()
210 		     << "\nContinuing check anyway" << endl;
211 	    ++errors;
212 	}
213 
214 	GlassVersion version_file(path);
215 	version_file.read();
216 	for (glass_revision_number_t r = version_file.get_revision(); r != 0; --r) {
217 	    string changes_file = path;
218 	    changes_file += "/changes";
219 	    changes_file += str(r);
220 	    if (file_exists(changes_file))
221 		GlassChanges::check(changes_file);
222 	}
223 
224 	Xapian::docid doccount = version_file.get_doccount();
225 	Xapian::docid db_last_docid = version_file.get_last_docid();
226 	if (db_last_docid < doccount) {
227 	    if (out)
228 		*out << "last_docid = " << db_last_docid << " < doccount = "
229 		     << doccount << endl;
230 	    ++errors;
231 	}
232 	reserve_doclens(doclens, db_last_docid, out);
233 
234 	// Check all the tables.
235 	for (auto t : glass_tables) {
236 	    errors += check_glass_table(t.name, path, version_file, opts,
237 					doclens, out);
238 	}
239 	return errors;
240 #endif
241     }
242 
243     if (stat((path + "/iamflint").c_str(), &sb) == 0) {
244 	// Flint is no longer supported as of Xapian 1.3.0.
245 	throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
246     }
247 
248     if (stat((path + "/iambrass").c_str(), &sb) == 0) {
249 	// Brass was renamed to glass as of Xapian 1.3.2.
250 	throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
251     }
252 
253     if (stat((path + "/record_DB").c_str(), &sb) == 0) {
254 	// Quartz is no longer supported as of Xapian 1.1.0.
255 	throw Xapian::FeatureUnavailableError("Quartz database support was removed in Xapian 1.1.0");
256     }
257 
258     throw Xapian::DatabaseOpeningError(
259 	    "Directory does not contain a Xapian database");
260 }
261 
262 /** Check a database table.
263  *
264  *  @param filename	The filename of the table (only used to get the directory and
265  *  @param opts		Xapian::check() options
266  *  @param out		std::ostream to write messages to (or NULL for no messages)
267  *  @param backend	Backend type (a BACKEND_XXX constant)
268  */
269 static size_t
check_db_table(const string & filename,int opts,std::ostream * out,int backend)270 check_db_table(const string& filename, int opts, std::ostream* out, int backend)
271 {
272     size_t p = filename.find_last_of(DIR_SEPS);
273     // If we found a directory separator, advance p to the next character.  If
274     // we didn't, incrementing string::npos will give us 0, which is what we
275     // want.
276     ++p;
277 
278     string dir(filename, 0, p);
279 
280     string tablename;
281     while (p != filename.size()) {
282 	char ch = filename[p++];
283 	if (ch == '.') break;
284 	tablename += C_tolower(ch);
285     }
286 
287 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
288     vector<Xapian::termcount> doclens;
289 #else
290     (void)opts;
291     (void)out;
292 #endif
293 
294     switch (backend) {
295       case BACKEND_GLASS: {
296 #ifndef XAPIAN_HAS_GLASS_BACKEND
297 	auto msg = "Glass database support isn't enabled";
298 	throw Xapian::FeatureUnavailableError(msg);
299 #else
300 	GlassVersion version_file(dir);
301 	version_file.read();
302 	return check_glass_table(tablename.c_str(), dir, version_file, opts,
303 				 doclens, out);
304 #endif
305       }
306 
307       case BACKEND_CHERT:
308 	break;
309 
310       default:
311 	Assert(false);
312 	break;
313     }
314 
315     // Flint and brass also used the extension ".DB", so check that we
316     // haven't been passed a single table in a flint or brass database.
317     struct stat sb;
318     if (stat((dir + "/iamflint").c_str(), &sb) == 0) {
319 	// Flint is no longer supported as of Xapian 1.3.0.
320 	throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
321     }
322     if (stat((dir + "/iambrass").c_str(), &sb) == 0) {
323 	// Brass was renamed to glass as of Xapian 1.3.2.
324 	throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
325     }
326 #ifndef XAPIAN_HAS_CHERT_BACKEND
327     throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
328 #else
329     // Set the doccount and the last docid to their maximum values to suppress
330     // errors.
331     return check_chert_table(tablename.c_str(), dir, NULL, opts, doclens,
332 			     Xapian::doccount(-1), CHERT_MAX_DOCID, out);
333 #endif
334 }
335 
336 /** Check a single file DB from an fd.
337  *
338  *  Closes the fd.
339  */
340 static size_t
check_db_fd(int fd,int opts,std::ostream * out,int backend)341 check_db_fd(int fd, int opts, std::ostream* out, int backend)
342 {
343     if (backend == BACKEND_UNKNOWN) {
344 	// FIXME: Actually probe.
345 	backend = BACKEND_GLASS;
346     }
347 
348     size_t errors = 0;
349     switch (backend) {
350       case BACKEND_GLASS: {
351 	// Check a single-file glass database.
352 #ifdef XAPIAN_HAS_GLASS_BACKEND
353 	// GlassVersion's destructor will close fd.
354 	GlassVersion version_file(fd);
355 	version_file.read();
356 
357 	Xapian::docid doccount = version_file.get_doccount();
358 	Xapian::docid db_last_docid = version_file.get_last_docid();
359 	if (db_last_docid < doccount) {
360 	    if (out)
361 		*out << "last_docid = " << db_last_docid << " < doccount = "
362 		     << doccount << endl;
363 	    ++errors;
364 	}
365 	vector<Xapian::termcount> doclens;
366 	reserve_doclens(doclens, db_last_docid, out);
367 
368 	// Check all the tables.
369 	for (auto t : glass_tables) {
370 	    errors += check_glass_table(t.name, fd, version_file.get_offset(),
371 					version_file, opts, doclens,
372 					out);
373 	}
374 	break;
375 #else
376 	(void)opts;
377 	(void)out;
378 	::close(fd);
379 	throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
380 #endif
381       }
382       default:
383 	Assert(false);
384     }
385     return errors;
386 }
387 
388 namespace Xapian {
389 
390 static size_t
check_stub(const string & stub_path,int opts,std::ostream * out)391 check_stub(const string& stub_path, int opts, std::ostream* out)
392 {
393     size_t errors = 0;
394     read_stub_file(stub_path,
395 		   [&errors, opts, out](const string& path) {
396 		       errors += Database::check(path, opts, out);
397 		   },
398 		   [&errors, opts, out](const string& path) {
399 		       // FIXME: Doesn't check the database type is chert.
400 		       errors += Database::check(path, opts, out);
401 		   },
402 		   [&errors, opts, out](const string& path) {
403 		       // FIXME: Doesn't check the database type is glass.
404 		       errors += Database::check(path, opts, out);
405 		   },
406 		   [](const string&, const string&) {
407 		       auto msg = "Remote database checking not implemented";
408 		       throw Xapian::UnimplementedError(msg);
409 		   },
410 		   [](const string&, unsigned) {
411 		       auto msg = "Remote database checking not implemented";
412 		       throw Xapian::UnimplementedError(msg);
413 		   },
414 		   []() {
415 		       auto msg = "InMemory database checking not implemented";
416 		       throw Xapian::UnimplementedError(msg);
417 		   });
418     return errors;
419 }
420 
421 size_t
check_(const string * path_ptr,int fd,int opts,std::ostream * out)422 Database::check_(const string * path_ptr, int fd, int opts, std::ostream *out)
423 {
424     if (!out) {
425 	// If we have nowhere to write output, then disable all the options
426 	// which only affect what we output.
427 	opts &= Xapian::DBCHECK_FIX;
428     }
429 
430     if (path_ptr == NULL) {
431 	return check_db_fd(fd, opts, out, BACKEND_UNKNOWN);
432     }
433 
434     const string & path = *path_ptr;
435     struct stat sb;
436     if (stat(path.c_str(), &sb) == 0) {
437 	if (S_ISDIR(sb.st_mode)) {
438 	    return check_db_dir(path, opts, out);
439 	}
440 
441 	if (S_ISREG(sb.st_mode)) {
442 	    int backend = test_if_single_file_db(sb, path, &fd);
443 	    if (backend != BACKEND_UNKNOWN) {
444 		return check_db_fd(fd, opts, out, backend);
445 	    }
446 	    // Could be a single table or a stub database file.  Look at the
447 	    // extension to determine the type.
448 	    if (endswith(path, ".DB")) {
449 		// It could also be flint or brass, but we check for those below.
450 		backend = BACKEND_CHERT;
451 	    } else if (endswith(path, "." GLASS_TABLE_EXTENSION)) {
452 		backend = BACKEND_GLASS;
453 	    } else {
454 		return check_stub(path, opts, out);
455 	    }
456 
457 	    return check_db_table(path, opts, out, backend);
458 	}
459 
460 	throw Xapian::DatabaseOpeningError("Not a regular file or directory");
461     }
462 
463     // The filename passed doesn't exist - see if it's the basename of the
464     // table (perhaps with "." after it), so the user can do xapian-check on
465     // "foo/termlist" or "foo/termlist." (which you would get from filename
466     // completion with older backends).
467     string filename = path;
468     if (endswith(filename, '.')) {
469 	filename.resize(filename.size() - 1);
470     }
471 
472     int backend = BACKEND_UNKNOWN;
473     if (stat((filename + ".DB").c_str(), &sb) == 0) {
474 	// It could also be flint or brass, but we check for those below.
475 	backend = BACKEND_CHERT;
476     } else if (stat((filename + "." GLASS_TABLE_EXTENSION).c_str(), &sb) == 0) {
477 	backend = BACKEND_GLASS;
478     } else {
479 	auto msg = "Couldn't find Xapian database or table to check";
480 	throw Xapian::DatabaseOpeningError(msg, ENOENT);
481     }
482 
483     return check_db_table(path, opts, out, backend);
484 }
485 
486 }
487