1 /** @file
2  * @brief GlassVersion class
3  */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2017 Olly Betts
5  * Copyright (C) 2011 Dan Colish
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "glass_version.h"
25 
26 #include "debuglog.h"
27 #include "fd.h"
28 #include "io_utils.h"
29 #include "omassert.h"
30 #include "pack.h"
31 #include "posixy_wrapper.h"
32 #include "stringutils.h" // For STRINGIZE() and CONST_STRLEN().
33 
34 #include <cerrno>
35 #include <cstring> // For memcmp().
36 #include <string>
37 #include <sys/types.h>
38 #include "safesysstat.h"
39 #include "safefcntl.h"
40 #include "safeunistd.h"
41 #include "str.h"
42 #include "stringutils.h"
43 
44 #include "backends/uuids.h"
45 
46 #include "xapian/constants.h"
47 #include "xapian/error.h"
48 
49 using namespace std;
50 
51 /// Glass format version (date of change):
52 #define GLASS_FORMAT_VERSION DATE_TO_VERSION(2016,03,14)
53 // 2016,03,14 1.3.5 compress_min in version file; partly eliminate component_of
54 // 2015,12,24 1.3.4 2 bytes "components_of" per item eliminated, and much more
55 // 2014,11,21 1.3.2 Brass renamed to Glass
56 
57 /// Convert date <-> version number.  Dates up to 2141-12-31 fit in 2 bytes.
58 #define DATE_TO_VERSION(Y,M,D) \
59 	((unsigned(Y) - 2014) << 9 | unsigned(M) << 5 | unsigned(D))
60 #define VERSION_TO_YEAR(V) ((unsigned(V) >> 9) + 2014)
61 #define VERSION_TO_MONTH(V) ((unsigned(V) >> 5) & 0x0f)
62 #define VERSION_TO_DAY(V) (unsigned(V) & 0x1f)
63 
64 #define GLASS_VERSION_MAGIC_LEN 14
65 #define GLASS_VERSION_MAGIC_AND_VERSION_LEN 16
66 
67 static const char GLASS_VERSION_MAGIC[GLASS_VERSION_MAGIC_AND_VERSION_LEN] = {
68     '\x0f', '\x0d', 'X', 'a', 'p', 'i', 'a', 'n', ' ', 'G', 'l', 'a', 's', 's',
69     char((GLASS_FORMAT_VERSION >> 8) & 0xff), char(GLASS_FORMAT_VERSION & 0xff)
70 };
71 
GlassVersion(int fd_)72 GlassVersion::GlassVersion(int fd_)
73     : rev(0), fd(fd_), offset(0), db_dir(), changes(NULL),
74       doccount(0), total_doclen(0), last_docid(0),
75       doclen_lbound(0), doclen_ubound(0),
76       wdf_ubound(0), spelling_wordfreq_ubound(0),
77       oldest_changeset(0)
78 {
79     offset = lseek(fd, 0, SEEK_CUR);
80     if (rare(offset < 0)) {
81 	string msg = "lseek failed on file descriptor ";
82 	msg += str(fd);
83 	throw Xapian::DatabaseOpeningError(msg, errno);
84     }
85 }
86 
~GlassVersion()87 GlassVersion::~GlassVersion()
88 {
89     // Either this is a single-file database, or this fd is from opening a new
90     // version file in write(), but sync() was never called.
91     if (fd != -1)
92 	(void)::close(fd);
93 }
94 
95 void
read()96 GlassVersion::read()
97 {
98     LOGCALL_VOID(DB, "GlassVersion::read", NO_ARGS);
99     FD close_fd(-1);
100     int fd_in;
101     if (single_file()) {
102 	if (rare(lseek(fd, offset, SEEK_SET) < 0)) {
103 	    string msg = "Failed to rewind file descriptor ";
104 	    msg += str(fd);
105 	    throw Xapian::DatabaseOpeningError(msg, errno);
106 	}
107 	fd_in = fd;
108     } else {
109 	string filename = db_dir;
110 	filename += "/iamglass";
111 	fd_in = posixy_open(filename.c_str(), O_RDONLY|O_BINARY);
112 	if (rare(fd_in < 0)) {
113 	    string msg = filename;
114 	    msg += ": Failed to open glass revision file for reading";
115 	    if (errno == ENOENT || errno == ENOTDIR) {
116 		throw Xapian::DatabaseNotFoundError(msg, errno);
117 	    }
118 	    throw Xapian::DatabaseOpeningError(msg, errno);
119 	}
120 	close_fd = fd_in;
121     }
122 
123     char buf[256];
124 
125     const char * p = buf;
126     const char * end = p + io_read(fd_in, buf, sizeof(buf), 33);
127 
128     if (memcmp(buf, GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_LEN) != 0)
129 	throw Xapian::DatabaseCorruptError("Rev file magic incorrect");
130 
131     unsigned version;
132     version = static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN]);
133     version <<= 8;
134     version |= static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN + 1]);
135     if (version != GLASS_FORMAT_VERSION) {
136 	string msg;
137 	if (!single_file()) {
138 	    msg = db_dir;
139 	    msg += ": ";
140 	}
141 	msg += "Database is format version ";
142 	msg += str(VERSION_TO_YEAR(version) * 10000 +
143 		   VERSION_TO_MONTH(version) * 100 +
144 		   VERSION_TO_DAY(version));
145 	msg += " but I only understand ";
146 	msg += str(VERSION_TO_YEAR(GLASS_FORMAT_VERSION) * 10000 +
147 		   VERSION_TO_MONTH(GLASS_FORMAT_VERSION) * 100 +
148 		   VERSION_TO_DAY(GLASS_FORMAT_VERSION));
149 	throw Xapian::DatabaseVersionError(msg);
150     }
151 
152     p += GLASS_VERSION_MAGIC_AND_VERSION_LEN;
153     uuid.assign(p);
154     p += uuid.BINARY_SIZE;
155 
156     if (!unpack_uint(&p, end, &rev))
157 	throw Xapian::DatabaseCorruptError("Rev file failed to decode revision");
158 
159     for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
160 	if (!root[table_no].unserialise(&p, end)) {
161 	    throw Xapian::DatabaseCorruptError("Rev file root_info missing");
162 	}
163 	old_root[table_no] = root[table_no];
164     }
165 
166     // For a single-file database, this will assign extra data.  We read
167     // sizeof(buf) above, then skip GLASS_VERSION_MAGIC_AND_VERSION_LEN,
168     // then 16, then the size of the serialised root info.
169     serialised_stats.assign(p, end);
170     unserialise_stats();
171 }
172 
173 void
serialise_stats()174 GlassVersion::serialise_stats()
175 {
176     serialised_stats.resize(0);
177     pack_uint(serialised_stats, doccount);
178     // last_docid must always be >= doccount.
179     pack_uint(serialised_stats, last_docid - doccount);
180     pack_uint(serialised_stats, doclen_lbound);
181     pack_uint(serialised_stats, wdf_ubound);
182     // doclen_ubound should always be >= wdf_ubound, so we store the
183     // difference as it may encode smaller.  wdf_ubound is likely to
184     // be larger than doclen_lbound.
185     pack_uint(serialised_stats, doclen_ubound - wdf_ubound);
186     pack_uint(serialised_stats, oldest_changeset);
187     pack_uint(serialised_stats, total_doclen);
188     pack_uint(serialised_stats, spelling_wordfreq_ubound);
189 }
190 
191 void
unserialise_stats()192 GlassVersion::unserialise_stats()
193 {
194     const char * p = serialised_stats.data();
195     const char * end = p + serialised_stats.size();
196     if (p == end) {
197 	doccount = 0;
198 	total_doclen = 0;
199 	last_docid = 0;
200 	doclen_lbound = 0;
201 	doclen_ubound = 0;
202 	wdf_ubound = 0;
203 	oldest_changeset = 0;
204 	spelling_wordfreq_ubound = 0;
205 	return;
206     }
207 
208     if (!unpack_uint(&p, end, &doccount) ||
209 	!unpack_uint(&p, end, &last_docid) ||
210 	!unpack_uint(&p, end, &doclen_lbound) ||
211 	!unpack_uint(&p, end, &wdf_ubound) ||
212 	!unpack_uint(&p, end, &doclen_ubound) ||
213 	!unpack_uint(&p, end, &oldest_changeset) ||
214 	!unpack_uint(&p, end, &total_doclen) ||
215 	!unpack_uint(&p, end, &spelling_wordfreq_ubound)) {
216 	const char * m = p ?
217 	    "Bad serialised DB stats (overflowed)" :
218 	    "Bad serialised DB stats (out of data)";
219 	throw Xapian::DatabaseCorruptError(m);
220     }
221 
222     // In the single-file DB case, there will be extra data in
223     // serialised_stats, so suppress this check.
224     if (p != end && !single_file())
225 	throw Xapian::DatabaseCorruptError("Rev file has junk at end");
226 
227     // last_docid must always be >= doccount.
228     last_docid += doccount;
229     // doclen_ubound should always be >= wdf_ubound, so we store the
230     // difference as it may encode smaller.  wdf_ubound is likely to
231     // be larger than doclen_lbound.
232     doclen_ubound += wdf_ubound;
233 }
234 
235 void
merge_stats(const GlassVersion & o)236 GlassVersion::merge_stats(const GlassVersion & o)
237 {
238     doccount += o.get_doccount();
239     if (doccount < o.get_doccount()) {
240 	throw Xapian::DatabaseError("doccount overflowed!");
241     }
242 
243     Xapian::termcount o_doclen_lbound = o.get_doclength_lower_bound();
244     if (o_doclen_lbound > 0) {
245 	if (doclen_lbound == 0 || o_doclen_lbound < doclen_lbound)
246 	    doclen_lbound = o_doclen_lbound;
247     }
248 
249     doclen_ubound = max(doclen_ubound, o.get_doclength_upper_bound());
250     wdf_ubound = max(wdf_ubound, o.get_wdf_upper_bound());
251     total_doclen += o.get_total_doclen();
252     if (total_doclen < o.get_total_doclen()) {
253 	throw Xapian::DatabaseError("Total document length overflowed!");
254     }
255 
256     // The upper bounds might be on the same word, so we must sum them.
257     spelling_wordfreq_ubound += o.get_spelling_wordfreq_upper_bound();
258 }
259 
260 void
cancel()261 GlassVersion::cancel()
262 {
263     LOGCALL_VOID(DB, "GlassVersion::cancel", NO_ARGS);
264     for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
265 	root[table_no] = old_root[table_no];
266     }
267     unserialise_stats();
268 }
269 
270 const string
write(glass_revision_number_t new_rev,int flags)271 GlassVersion::write(glass_revision_number_t new_rev, int flags)
272 {
273     LOGCALL(DB, const string, "GlassVersion::write", new_rev|flags);
274 
275     string s(GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_AND_VERSION_LEN);
276     s.append(uuid.data(), uuid.BINARY_SIZE);
277 
278     pack_uint(s, new_rev);
279 
280     for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
281 	root[table_no].serialise(s);
282     }
283 
284     // Serialise database statistics.
285     serialise_stats();
286     s += serialised_stats;
287 
288     string tmpfile;
289     if (!single_file()) {
290 	tmpfile = db_dir;
291 	// In dangerous mode, just write the new version file in place.
292 	if (flags & Xapian::DB_DANGEROUS)
293 	    tmpfile += "/iamglass";
294 	else
295 	    tmpfile += "/v.tmp";
296 
297 	fd = posixy_open(tmpfile.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
298 	if (rare(fd < 0))
299 	    throw Xapian::DatabaseOpeningError("Couldn't write new rev file: " + tmpfile,
300 					       errno);
301 
302 	if (flags & Xapian::DB_DANGEROUS)
303 	    tmpfile = string();
304     }
305 
306     try {
307 	io_write(fd, s.data(), s.size());
308     } catch (...) {
309 	if (!single_file())
310 	    (void)close(fd);
311 	throw;
312     }
313 
314     if (changes) {
315 	string changes_buf;
316 	changes_buf += '\xfe';
317 	pack_uint(changes_buf, new_rev);
318 	pack_uint(changes_buf, s.size());
319 	changes->write_block(changes_buf);
320 	changes->write_block(s);
321     }
322 
323     RETURN(tmpfile);
324 }
325 
326 bool
sync(const string & tmpfile,glass_revision_number_t new_rev,int flags)327 GlassVersion::sync(const string & tmpfile,
328 		   glass_revision_number_t new_rev, int flags)
329 {
330     Assert(new_rev > rev || rev == 0);
331 
332     if (single_file()) {
333 	if ((flags & Xapian::DB_NO_SYNC) == 0 &&
334 	    ((flags & Xapian::DB_FULL_SYNC) ?
335 	      !io_full_sync(fd) :
336 	      !io_sync(fd))) {
337 	    // FIXME what to do?
338 	}
339     } else {
340 	int fd_to_close = fd;
341 	fd = -1;
342 	if ((flags & Xapian::DB_NO_SYNC) == 0 &&
343 	    ((flags & Xapian::DB_FULL_SYNC) ?
344 	      !io_full_sync(fd_to_close) :
345 	      !io_sync(fd_to_close))) {
346 	    int save_errno = errno;
347 	    (void)close(fd_to_close);
348 	    if (!tmpfile.empty())
349 		(void)unlink(tmpfile.c_str());
350 	    errno = save_errno;
351 	    return false;
352 	}
353 
354 	if (close(fd_to_close) != 0) {
355 	    if (!tmpfile.empty()) {
356 		int save_errno = errno;
357 		(void)unlink(tmpfile.c_str());
358 		errno = save_errno;
359 	    }
360 	    return false;
361 	}
362 
363 	if (!tmpfile.empty()) {
364 	    if (!io_tmp_rename(tmpfile, db_dir + "/iamglass")) {
365 		return false;
366 	    }
367 	}
368     }
369 
370     for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
371 	old_root[table_no] = root[table_no];
372     }
373 
374     rev = new_rev;
375     return true;
376 }
377 
378 /* Only try to compress tags strictly longer than this many bytes.
379  *
380  * This can theoretically usefully be set as low as 4, but in practical terms
381  * zlib can't compress in very many cases for short inputs and even when it can
382  * the savings are small, so we default to a higher threshold to save CPU time
383  * for marginal size reductions.
384  */
385 const size_t COMPRESS_MIN = 18;
386 
387 static const uint4 compress_min_tab[] = {
388     0, // POSTLIST
389     COMPRESS_MIN, // DOCDATA
390     COMPRESS_MIN, // TERMLIST
391     0, // POSITION
392     COMPRESS_MIN, // SPELLING
393     COMPRESS_MIN  // SYNONYM
394 };
395 
396 void
create(unsigned blocksize)397 GlassVersion::create(unsigned blocksize)
398 {
399     AssertRel(blocksize,>=,2048);
400     uuid.generate();
401     for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
402 	root[table_no].init(blocksize, compress_min_tab[table_no]);
403     }
404 }
405 
406 namespace Glass {
407 
408 void
init(unsigned blocksize_,uint4 compress_min_)409 RootInfo::init(unsigned blocksize_, uint4 compress_min_)
410 {
411     AssertRel(blocksize_,>=,2048);
412     root = 0;
413     level = 0;
414     num_entries = 0;
415     root_is_fake = true;
416     sequential = true;
417     blocksize = blocksize_;
418     compress_min = compress_min_;
419     fl_serialised.resize(0);
420 }
421 
422 void
serialise(string & s) const423 RootInfo::serialise(string &s) const
424 {
425     pack_uint(s, root);
426     unsigned val = level << 2;
427     if (sequential) val |= 0x02;
428     if (root_is_fake) val |= 0x01;
429     pack_uint(s, val);
430     pack_uint(s, num_entries);
431     pack_uint(s, blocksize >> 11);
432     pack_uint(s, compress_min);
433     pack_string(s, fl_serialised);
434 }
435 
436 bool
unserialise(const char ** p,const char * end)437 RootInfo::unserialise(const char ** p, const char * end)
438 {
439     unsigned val;
440     if (!unpack_uint(p, end, &root) ||
441 	!unpack_uint(p, end, &val) ||
442 	!unpack_uint(p, end, &num_entries) ||
443 	!unpack_uint(p, end, &blocksize) ||
444 	!unpack_uint(p, end, &compress_min) ||
445 	!unpack_string(p, end, fl_serialised)) return false;
446     level = val >> 2;
447     sequential = val & 0x02;
448     root_is_fake = val & 0x01;
449     blocksize <<= 11;
450     AssertRel(blocksize,>=,2048);
451     // Map old default to new default.
452     if (compress_min == 4) {
453 	compress_min = COMPRESS_MIN;
454     }
455     return true;
456 }
457 
458 }
459