1 /** @file
2 * @brief GlassVersion class
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2011 Dan Colish
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include <config.h>
23
24 #include "glass_version.h"
25
26 #include "debuglog.h"
27 #include "fd.h"
28 #include "io_utils.h"
29 #include "omassert.h"
30 #include "pack.h"
31 #include "posixy_wrapper.h"
32 #include "stringutils.h" // For STRINGIZE() and CONST_STRLEN().
33
34 #include <cerrno>
35 #include <cstring> // For memcmp().
36 #include <string>
37 #include <sys/types.h>
38 #include "safesysstat.h"
39 #include "safefcntl.h"
40 #include "safeunistd.h"
41 #include "str.h"
42 #include "stringutils.h"
43
44 #include "backends/uuids.h"
45
46 #include "xapian/constants.h"
47 #include "xapian/error.h"
48
49 using namespace std;
50
51 /// Glass format version (date of change):
52 #define GLASS_FORMAT_VERSION DATE_TO_VERSION(2016,03,14)
53 // 2016,03,14 1.3.5 compress_min in version file; partly eliminate component_of
54 // 2015,12,24 1.3.4 2 bytes "components_of" per item eliminated, and much more
55 // 2014,11,21 1.3.2 Brass renamed to Glass
56
57 /// Convert date <-> version number. Dates up to 2141-12-31 fit in 2 bytes.
58 #define DATE_TO_VERSION(Y,M,D) \
59 ((unsigned(Y) - 2014) << 9 | unsigned(M) << 5 | unsigned(D))
60 #define VERSION_TO_YEAR(V) ((unsigned(V) >> 9) + 2014)
61 #define VERSION_TO_MONTH(V) ((unsigned(V) >> 5) & 0x0f)
62 #define VERSION_TO_DAY(V) (unsigned(V) & 0x1f)
63
64 #define GLASS_VERSION_MAGIC_LEN 14
65 #define GLASS_VERSION_MAGIC_AND_VERSION_LEN 16
66
67 static const char GLASS_VERSION_MAGIC[GLASS_VERSION_MAGIC_AND_VERSION_LEN] = {
68 '\x0f', '\x0d', 'X', 'a', 'p', 'i', 'a', 'n', ' ', 'G', 'l', 'a', 's', 's',
69 char((GLASS_FORMAT_VERSION >> 8) & 0xff), char(GLASS_FORMAT_VERSION & 0xff)
70 };
71
GlassVersion(int fd_)72 GlassVersion::GlassVersion(int fd_)
73 : rev(0), fd(fd_), offset(0), db_dir(), changes(NULL),
74 doccount(0), total_doclen(0), last_docid(0),
75 doclen_lbound(0), doclen_ubound(0),
76 wdf_ubound(0), spelling_wordfreq_ubound(0),
77 oldest_changeset(0)
78 {
79 offset = lseek(fd, 0, SEEK_CUR);
80 if (rare(offset < 0)) {
81 string msg = "lseek failed on file descriptor ";
82 msg += str(fd);
83 throw Xapian::DatabaseOpeningError(msg, errno);
84 }
85 }
86
~GlassVersion()87 GlassVersion::~GlassVersion()
88 {
89 // Either this is a single-file database, or this fd is from opening a new
90 // version file in write(), but sync() was never called.
91 if (fd != -1)
92 (void)::close(fd);
93 }
94
95 void
read()96 GlassVersion::read()
97 {
98 LOGCALL_VOID(DB, "GlassVersion::read", NO_ARGS);
99 FD close_fd(-1);
100 int fd_in;
101 if (single_file()) {
102 if (rare(lseek(fd, offset, SEEK_SET) < 0)) {
103 string msg = "Failed to rewind file descriptor ";
104 msg += str(fd);
105 throw Xapian::DatabaseOpeningError(msg, errno);
106 }
107 fd_in = fd;
108 } else {
109 string filename = db_dir;
110 filename += "/iamglass";
111 fd_in = posixy_open(filename.c_str(), O_RDONLY|O_BINARY);
112 if (rare(fd_in < 0)) {
113 string msg = filename;
114 msg += ": Failed to open glass revision file for reading";
115 if (errno == ENOENT || errno == ENOTDIR) {
116 throw Xapian::DatabaseNotFoundError(msg, errno);
117 }
118 throw Xapian::DatabaseOpeningError(msg, errno);
119 }
120 close_fd = fd_in;
121 }
122
123 char buf[256];
124
125 const char * p = buf;
126 const char * end = p + io_read(fd_in, buf, sizeof(buf), 33);
127
128 if (memcmp(buf, GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_LEN) != 0)
129 throw Xapian::DatabaseCorruptError("Rev file magic incorrect");
130
131 unsigned version;
132 version = static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN]);
133 version <<= 8;
134 version |= static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN + 1]);
135 if (version != GLASS_FORMAT_VERSION) {
136 string msg;
137 if (!single_file()) {
138 msg = db_dir;
139 msg += ": ";
140 }
141 msg += "Database is format version ";
142 msg += str(VERSION_TO_YEAR(version) * 10000 +
143 VERSION_TO_MONTH(version) * 100 +
144 VERSION_TO_DAY(version));
145 msg += " but I only understand ";
146 msg += str(VERSION_TO_YEAR(GLASS_FORMAT_VERSION) * 10000 +
147 VERSION_TO_MONTH(GLASS_FORMAT_VERSION) * 100 +
148 VERSION_TO_DAY(GLASS_FORMAT_VERSION));
149 throw Xapian::DatabaseVersionError(msg);
150 }
151
152 p += GLASS_VERSION_MAGIC_AND_VERSION_LEN;
153 uuid.assign(p);
154 p += uuid.BINARY_SIZE;
155
156 if (!unpack_uint(&p, end, &rev))
157 throw Xapian::DatabaseCorruptError("Rev file failed to decode revision");
158
159 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
160 if (!root[table_no].unserialise(&p, end)) {
161 throw Xapian::DatabaseCorruptError("Rev file root_info missing");
162 }
163 old_root[table_no] = root[table_no];
164 }
165
166 // For a single-file database, this will assign extra data. We read
167 // sizeof(buf) above, then skip GLASS_VERSION_MAGIC_AND_VERSION_LEN,
168 // then 16, then the size of the serialised root info.
169 serialised_stats.assign(p, end);
170 unserialise_stats();
171 }
172
173 void
serialise_stats()174 GlassVersion::serialise_stats()
175 {
176 serialised_stats.resize(0);
177 pack_uint(serialised_stats, doccount);
178 // last_docid must always be >= doccount.
179 pack_uint(serialised_stats, last_docid - doccount);
180 pack_uint(serialised_stats, doclen_lbound);
181 pack_uint(serialised_stats, wdf_ubound);
182 // doclen_ubound should always be >= wdf_ubound, so we store the
183 // difference as it may encode smaller. wdf_ubound is likely to
184 // be larger than doclen_lbound.
185 pack_uint(serialised_stats, doclen_ubound - wdf_ubound);
186 pack_uint(serialised_stats, oldest_changeset);
187 pack_uint(serialised_stats, total_doclen);
188 pack_uint(serialised_stats, spelling_wordfreq_ubound);
189 }
190
191 void
unserialise_stats()192 GlassVersion::unserialise_stats()
193 {
194 const char * p = serialised_stats.data();
195 const char * end = p + serialised_stats.size();
196 if (p == end) {
197 doccount = 0;
198 total_doclen = 0;
199 last_docid = 0;
200 doclen_lbound = 0;
201 doclen_ubound = 0;
202 wdf_ubound = 0;
203 oldest_changeset = 0;
204 spelling_wordfreq_ubound = 0;
205 return;
206 }
207
208 if (!unpack_uint(&p, end, &doccount) ||
209 !unpack_uint(&p, end, &last_docid) ||
210 !unpack_uint(&p, end, &doclen_lbound) ||
211 !unpack_uint(&p, end, &wdf_ubound) ||
212 !unpack_uint(&p, end, &doclen_ubound) ||
213 !unpack_uint(&p, end, &oldest_changeset) ||
214 !unpack_uint(&p, end, &total_doclen) ||
215 !unpack_uint(&p, end, &spelling_wordfreq_ubound)) {
216 const char * m = p ?
217 "Bad serialised DB stats (overflowed)" :
218 "Bad serialised DB stats (out of data)";
219 throw Xapian::DatabaseCorruptError(m);
220 }
221
222 // In the single-file DB case, there will be extra data in
223 // serialised_stats, so suppress this check.
224 if (p != end && !single_file())
225 throw Xapian::DatabaseCorruptError("Rev file has junk at end");
226
227 // last_docid must always be >= doccount.
228 last_docid += doccount;
229 // doclen_ubound should always be >= wdf_ubound, so we store the
230 // difference as it may encode smaller. wdf_ubound is likely to
231 // be larger than doclen_lbound.
232 doclen_ubound += wdf_ubound;
233 }
234
235 void
merge_stats(const GlassVersion & o)236 GlassVersion::merge_stats(const GlassVersion & o)
237 {
238 doccount += o.get_doccount();
239 if (doccount < o.get_doccount()) {
240 throw Xapian::DatabaseError("doccount overflowed!");
241 }
242
243 Xapian::termcount o_doclen_lbound = o.get_doclength_lower_bound();
244 if (o_doclen_lbound > 0) {
245 if (doclen_lbound == 0 || o_doclen_lbound < doclen_lbound)
246 doclen_lbound = o_doclen_lbound;
247 }
248
249 doclen_ubound = max(doclen_ubound, o.get_doclength_upper_bound());
250 wdf_ubound = max(wdf_ubound, o.get_wdf_upper_bound());
251 total_doclen += o.get_total_doclen();
252 if (total_doclen < o.get_total_doclen()) {
253 throw Xapian::DatabaseError("Total document length overflowed!");
254 }
255
256 // The upper bounds might be on the same word, so we must sum them.
257 spelling_wordfreq_ubound += o.get_spelling_wordfreq_upper_bound();
258 }
259
260 void
cancel()261 GlassVersion::cancel()
262 {
263 LOGCALL_VOID(DB, "GlassVersion::cancel", NO_ARGS);
264 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
265 root[table_no] = old_root[table_no];
266 }
267 unserialise_stats();
268 }
269
270 const string
write(glass_revision_number_t new_rev,int flags)271 GlassVersion::write(glass_revision_number_t new_rev, int flags)
272 {
273 LOGCALL(DB, const string, "GlassVersion::write", new_rev|flags);
274
275 string s(GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_AND_VERSION_LEN);
276 s.append(uuid.data(), uuid.BINARY_SIZE);
277
278 pack_uint(s, new_rev);
279
280 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
281 root[table_no].serialise(s);
282 }
283
284 // Serialise database statistics.
285 serialise_stats();
286 s += serialised_stats;
287
288 string tmpfile;
289 if (!single_file()) {
290 tmpfile = db_dir;
291 // In dangerous mode, just write the new version file in place.
292 if (flags & Xapian::DB_DANGEROUS)
293 tmpfile += "/iamglass";
294 else
295 tmpfile += "/v.tmp";
296
297 fd = posixy_open(tmpfile.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
298 if (rare(fd < 0))
299 throw Xapian::DatabaseOpeningError("Couldn't write new rev file: " + tmpfile,
300 errno);
301
302 if (flags & Xapian::DB_DANGEROUS)
303 tmpfile = string();
304 }
305
306 try {
307 io_write(fd, s.data(), s.size());
308 } catch (...) {
309 if (!single_file())
310 (void)close(fd);
311 throw;
312 }
313
314 if (changes) {
315 string changes_buf;
316 changes_buf += '\xfe';
317 pack_uint(changes_buf, new_rev);
318 pack_uint(changes_buf, s.size());
319 changes->write_block(changes_buf);
320 changes->write_block(s);
321 }
322
323 RETURN(tmpfile);
324 }
325
326 bool
sync(const string & tmpfile,glass_revision_number_t new_rev,int flags)327 GlassVersion::sync(const string & tmpfile,
328 glass_revision_number_t new_rev, int flags)
329 {
330 Assert(new_rev > rev || rev == 0);
331
332 if (single_file()) {
333 if ((flags & Xapian::DB_NO_SYNC) == 0 &&
334 ((flags & Xapian::DB_FULL_SYNC) ?
335 !io_full_sync(fd) :
336 !io_sync(fd))) {
337 // FIXME what to do?
338 }
339 } else {
340 int fd_to_close = fd;
341 fd = -1;
342 if ((flags & Xapian::DB_NO_SYNC) == 0 &&
343 ((flags & Xapian::DB_FULL_SYNC) ?
344 !io_full_sync(fd_to_close) :
345 !io_sync(fd_to_close))) {
346 int save_errno = errno;
347 (void)close(fd_to_close);
348 if (!tmpfile.empty())
349 (void)unlink(tmpfile.c_str());
350 errno = save_errno;
351 return false;
352 }
353
354 if (close(fd_to_close) != 0) {
355 if (!tmpfile.empty()) {
356 int save_errno = errno;
357 (void)unlink(tmpfile.c_str());
358 errno = save_errno;
359 }
360 return false;
361 }
362
363 if (!tmpfile.empty()) {
364 if (!io_tmp_rename(tmpfile, db_dir + "/iamglass")) {
365 return false;
366 }
367 }
368 }
369
370 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
371 old_root[table_no] = root[table_no];
372 }
373
374 rev = new_rev;
375 return true;
376 }
377
378 /* Only try to compress tags strictly longer than this many bytes.
379 *
380 * This can theoretically usefully be set as low as 4, but in practical terms
381 * zlib can't compress in very many cases for short inputs and even when it can
382 * the savings are small, so we default to a higher threshold to save CPU time
383 * for marginal size reductions.
384 */
385 const size_t COMPRESS_MIN = 18;
386
387 static const uint4 compress_min_tab[] = {
388 0, // POSTLIST
389 COMPRESS_MIN, // DOCDATA
390 COMPRESS_MIN, // TERMLIST
391 0, // POSITION
392 COMPRESS_MIN, // SPELLING
393 COMPRESS_MIN // SYNONYM
394 };
395
396 void
create(unsigned blocksize)397 GlassVersion::create(unsigned blocksize)
398 {
399 AssertRel(blocksize,>=,2048);
400 uuid.generate();
401 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
402 root[table_no].init(blocksize, compress_min_tab[table_no]);
403 }
404 }
405
406 namespace Glass {
407
408 void
init(unsigned blocksize_,uint4 compress_min_)409 RootInfo::init(unsigned blocksize_, uint4 compress_min_)
410 {
411 AssertRel(blocksize_,>=,2048);
412 root = 0;
413 level = 0;
414 num_entries = 0;
415 root_is_fake = true;
416 sequential = true;
417 blocksize = blocksize_;
418 compress_min = compress_min_;
419 fl_serialised.resize(0);
420 }
421
422 void
serialise(string & s) const423 RootInfo::serialise(string &s) const
424 {
425 pack_uint(s, root);
426 unsigned val = level << 2;
427 if (sequential) val |= 0x02;
428 if (root_is_fake) val |= 0x01;
429 pack_uint(s, val);
430 pack_uint(s, num_entries);
431 pack_uint(s, blocksize >> 11);
432 pack_uint(s, compress_min);
433 pack_string(s, fl_serialised);
434 }
435
436 bool
unserialise(const char ** p,const char * end)437 RootInfo::unserialise(const char ** p, const char * end)
438 {
439 unsigned val;
440 if (!unpack_uint(p, end, &root) ||
441 !unpack_uint(p, end, &val) ||
442 !unpack_uint(p, end, &num_entries) ||
443 !unpack_uint(p, end, &blocksize) ||
444 !unpack_uint(p, end, &compress_min) ||
445 !unpack_string(p, end, fl_serialised)) return false;
446 level = val >> 2;
447 sequential = val & 0x02;
448 root_is_fake = val & 0x01;
449 blocksize <<= 11;
450 AssertRel(blocksize,>=,2048);
451 // Map old default to new default.
452 if (compress_min == 4) {
453 compress_min = COMPRESS_MIN;
454 }
455 return true;
456 }
457
458 }
459