1 /** @file
2  * @brief Inspect the contents of a glass table for development or debugging.
3  */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <ios>
24 #include <iostream>
25 #include <string>
26 #include <cstdio> // For sprintf().
27 
28 #include "glass_cursor.h"
29 #include "glass_table.h"
30 #include "glass_version.h"
31 #include "filetests.h"
32 #include "stringutils.h"
33 
34 #include <xapian.h>
35 
36 #include "gnu_getopt.h"
37 
38 using namespace std;
39 
40 #define PROG_NAME "xapian-inspect"
41 #define PROG_DESC "Inspect the contents of a glass table for development or debugging"
42 
43 #define OPT_HELP 1
44 #define OPT_VERSION 2
45 
46 static bool keys = true, tags = true;
47 
show_usage()48 static void show_usage() {
49     cout << "Usage: " PROG_NAME " [OPTIONS] TABLE\n"
50 	    "       " PROG_NAME " [OPTIONS] -t TABLE DB\n\n"
51 "Options:\n"
52 "  -t, --table=TABLE  which table to inspect\n"
53 "  --help             display this help and exit\n"
54 "  --version          output version information and exit" << endl;
55 }
56 
57 static void
display_nicely(const string & data)58 display_nicely(const string& data)
59 {
60     for (unsigned char ch : data) {
61 	if (ch < 32 || ch >= 127) {
62 	    switch (ch) {
63 		case '\n': cout << "\\n"; break;
64 		case '\r': cout << "\\r"; break;
65 		case '\t': cout << "\\t"; break;
66 		default: {
67 		    char buf[20];
68 		    sprintf(buf, "\\x%02x", int(ch));
69 		    cout << buf;
70 		}
71 	    }
72 	} else if (ch == '\\') {
73 	    cout << "\\\\";
74 	} else {
75 	    cout << ch;
76 	}
77     }
78 }
79 
80 // Reverse display_nicely() encoding.
81 static string
unescape(const string & s)82 unescape(const string& s)
83 {
84     auto bslash = s.find('\\');
85     if (bslash == string::npos)
86 	return s;
87     string r(s, 0, bslash);
88     for (auto i = s.begin() + bslash; i != s.end(); ++i) {
89 	char ch = *i;
90 	if (ch == '\\') {
91 	    if (++i == s.end())
92 		goto bad_escaping;
93 	    ch = *i;
94 	    switch (ch) {
95 		case '\\':
96 		    break;
97 		case '0':
98 		    // \0 is not output by display_nicely(), but would
99 		    // reasonably be expected to work.
100 		    ch = '\0';
101 		    break;
102 		case 'n':
103 		    ch = '\n';
104 		    break;
105 		case 'r':
106 		    ch = '\r';
107 		    break;
108 		case 't':
109 		    ch = '\t';
110 		    break;
111 		case 'x': {
112 		    if (++i == s.end())
113 			goto bad_escaping;
114 		    char ch1 = *i;
115 		    if (++i == s.end())
116 			goto bad_escaping;
117 		    char ch2 = *i;
118 		    if (!C_isxdigit(ch1) || !C_isxdigit(ch2))
119 			goto bad_escaping;
120 		    ch = hex_digit(ch1) << 4 | hex_digit(ch2);
121 		    break;
122 		}
123 		default:
124 		    goto bad_escaping;
125 	    }
126 	}
127 	r += ch;
128     }
129     return r;
130 
131 bad_escaping:
132     cout << "Bad escaping in specified key value, assuming literal"
133 	 << endl;
134     return s;
135 }
136 
137 static void
show_help()138 show_help()
139 {
140     cout << "Commands:\n"
141 	    "next   : Next entry (alias 'n' or '')\n"
142 	    "prev   : Previous entry (alias 'p')\n"
143 	    "first  : First entry (alias 'f')\n"
144 	    "last   : Last entry (alias 'l')\n"
145 	    "goto K : Goto entry with key K (alias 'g')\n"
146 	    "until K: Display entries until key K (alias 'u')\n"
147 	    "until  : Display entries until end (alias 'u')\n"
148 	    "open T : Open table T instead (alias 'o') - e.g. open postlist\n"
149 	    "keys   : Toggle showing keys (default: true) (alias 'k')\n"
150 	    "tags   : Toggle showing tags (default: true) (alias 't')\n"
151 	    "help   : Show this (alias 'h' or '?')\n"
152 	    "quit   : Quit this utility (alias 'q')" << endl;
153 }
154 
155 static void
show_entry(GlassCursor & cursor)156 show_entry(GlassCursor& cursor)
157 {
158     if (cursor.after_end()) {
159 	cout << "After end" << endl;
160 	return;
161     }
162     if (keys) {
163 	cout << "Key: ";
164 	display_nicely(cursor.current_key);
165 	cout << endl;
166     }
167     if (tags) {
168 	cout << "Tag: ";
169 	cursor.read_tag();
170 	display_nicely(cursor.current_tag);
171 	cout << endl;
172     }
173 }
174 
175 static void
do_until(GlassCursor & cursor,const string & target)176 do_until(GlassCursor& cursor, const string& target)
177 {
178     if (cursor.after_end()) {
179 	cout << "At end already." << endl;
180 	return;
181     }
182 
183     if (!target.empty()) {
184 	int cmp = target.compare(cursor.current_key);
185 	if (cmp <= 0) {
186 	    if (cmp)
187 		cout << "Already after specified key." << endl;
188 	    else
189 		cout << "Already at specified key." << endl;
190 	    return;
191 	}
192     }
193 
194     size_t count = 0;
195     while (cursor.next()) {
196 	int cmp = 1;
197 	if (!target.empty()) {
198 	    cmp = target.compare(cursor.current_key);
199 	    if (cmp < 0) {
200 		cout << "No exact match, stopping at entry before, "
201 			"having advanced by " << count << " entries." << endl;
202 		cursor.find_entry_lt(cursor.current_key);
203 		return;
204 	    }
205 	}
206 	++count;
207 	show_entry(cursor);
208 	if (cmp == 0) {
209 	    cout << "Advanced by " << count << " entries." << endl;
210 	    return;
211 	}
212     }
213 
214     cout << "Reached end, having advanced by " << count << " entries." << endl;
215 }
216 
217 int
main(int argc,char ** argv)218 main(int argc, char** argv)
219 {
220     static const struct option long_opts[] = {
221 	{"table",	required_argument, 0, 't'},
222 	{"help",	no_argument, 0, OPT_HELP},
223 	{"version",	no_argument, 0, OPT_VERSION},
224 	{NULL,		0, 0, 0}
225     };
226 
227     string table_name;
228 
229     int c;
230     while ((c = gnu_getopt_long(argc, argv, "t:", long_opts, 0)) != -1) {
231 	switch (c) {
232 	    case 't':
233 		table_name = optarg;
234 		break;
235 	    case OPT_HELP:
236 		cout << PROG_NAME " - " PROG_DESC "\n\n";
237 		show_usage();
238 		exit(0);
239 	    case OPT_VERSION:
240 		cout << PROG_NAME " - " PACKAGE_STRING << endl;
241 		exit(0);
242 	    default:
243 		show_usage();
244 		exit(1);
245 	}
246     }
247 
248     if (argc - optind != 1) {
249 	show_usage();
250 	exit(1);
251     }
252 
253     // Path to the DB to inspect (possibly with a table name appended).
254     string db_path(argv[optind]);
255     bool arg_is_directory = dir_exists(db_path);
256     if (arg_is_directory && table_name.empty()) {
257 	cerr << argv[0]
258 	     << ": You need to specify a table name to inspect with --table."
259 	     << endl;
260 	exit(1);
261     }
262     int single_file_fd = -1;
263     if (table_name.empty()) {
264 	// db_path should be a path to a table, possibly without the extension
265 	// or with just a trailing '.' (supported mostly for historical
266 	// reasons).  First normalise away any extension or trailing '.'.
267 	if (endswith(db_path, "." GLASS_TABLE_EXTENSION)) {
268 	    db_path.resize(db_path.size() -
269 			   CONST_STRLEN(GLASS_TABLE_EXTENSION) - 1);
270 	} else if (endswith(db_path, '.')) {
271 	    db_path.resize(db_path.size() - 1);
272 	}
273 	size_t slash = db_path.find_last_of(DIR_SEPS);
274 	// If slash is std::string::npos, this assigns the whole of db_path to
275 	// table_name, which is what we want.
276 	table_name.assign(db_path, slash + 1, string::npos);
277 	if (slash != string::npos) {
278 	    db_path.resize(slash);
279 	} else {
280 	    db_path.resize(0);
281 	}
282     } else if (!arg_is_directory) {
283 	single_file_fd = open(db_path.c_str(), O_RDONLY | O_BINARY);
284 	if (single_file_fd < 0) {
285 	    cerr << argv[0] << ": Couldn't open file '" << db_path << "'"
286 		 << endl;
287 	    exit(1);
288 	}
289     }
290 
291     GlassVersion* version_file_ptr;
292     if (single_file_fd < 0) {
293 	version_file_ptr = new GlassVersion(db_path);
294     } else {
295 	version_file_ptr = new GlassVersion(single_file_fd);
296     }
297     GlassVersion& version_file = *version_file_ptr;
298 
299     version_file.read();
300     glass_revision_number_t rev = version_file.get_revision();
301 
302     show_help();
303     cout << endl;
304 
305 open_different_table:
306     try {
307 	Glass::table_type table_code;
308 	if (table_name == "docdata") {
309 	    table_code = Glass::DOCDATA;
310 	} else if (table_name == "spelling") {
311 	    table_code = Glass::SPELLING;
312 	} else if (table_name == "synonym") {
313 	    table_code = Glass::SYNONYM;
314 	} else if (table_name == "termlist") {
315 	    table_code = Glass::TERMLIST;
316 	} else if (table_name == "position") {
317 	    table_code = Glass::POSITION;
318 	} else if (table_name == "postlist") {
319 	    table_code = Glass::POSTLIST;
320 	} else {
321 	    cerr << "Unknown table: '" << table_name << "'" << endl;
322 	    exit(1);
323 	}
324 
325 	GlassTable* table_ptr;
326 	if (single_file_fd < 0) {
327 	    string table_path = db_path;
328 	    table_path += '/';
329 	    table_path += table_name;
330 	    table_path += '.';
331 	    table_ptr = new GlassTable("", table_path, true);
332 	} else {
333 	    auto offset = version_file.get_offset();
334 	    table_ptr = new GlassTable("", single_file_fd, offset, true);
335 	}
336 	GlassTable& table = *table_ptr;
337 
338 	table.open(0, version_file.get_root(table_code), rev);
339 	if (table.empty()) {
340 	    cout << "No entries!" << endl;
341 	    exit(0);
342 	}
343 	cout << "Table has " << table.get_entry_count() << " entries" << endl;
344 
345 	GlassCursor cursor(&table);
346 	cursor.find_entry(string());
347 	cursor.next();
348 
349 	while (!cin.eof()) {
350 	    show_entry(cursor);
351 wait_for_input:
352 	    cout << "? " << flush;
353 
354 	    string input;
355 	    getline(cin, input);
356 	    if (cin.eof()) break;
357 
358 	    if (endswith(input, '\r'))
359 		input.resize(input.size() - 1);
360 
361 	    if (input.empty() || input == "n" || input == "next") {
362 		if (cursor.after_end() || !cursor.next()) {
363 		    cout << "At end already." << endl;
364 		    goto wait_for_input;
365 		}
366 		continue;
367 	    } else if (input == "p" || input == "prev") {
368 		// If the cursor has fallen off the end, point it back at
369 		// the last entry.
370 		if (cursor.after_end()) cursor.find_entry(cursor.current_key);
371 		cursor.find_entry_lt(cursor.current_key);
372 		if (cursor.current_key.empty()) {
373 		    cout << "At start already." << endl;
374 		    goto wait_for_input;
375 		}
376 		continue;
377 	    } else if (startswith(input, "u ")) {
378 		do_until(cursor, unescape(input.substr(2)));
379 		goto wait_for_input;
380 	    } else if (startswith(input, "until ")) {
381 		do_until(cursor, unescape(input.substr(6)));
382 		goto wait_for_input;
383 	    } else if (input == "u" || input == "until") {
384 		do_until(cursor, string());
385 		goto wait_for_input;
386 	    } else if (input == "f" || input == "first") {
387 		cursor.find_entry(string());
388 		cursor.next();
389 		continue;
390 	    } else if (input == "l" || input == "last") {
391 		// To position on the last key we just search for a key with
392 		// the longest possible length consisting entirely of the
393 		// highest sorting byte value.
394 		cursor.find_entry(string(GLASS_BTREE_MAX_KEY_LEN, '\xff'));
395 		continue;
396 	    } else if (startswith(input, "g ")) {
397 		if (!cursor.find_entry(unescape(input.substr(2)))) {
398 		    cout << "No exact match, going to entry before." << endl;
399 		}
400 		continue;
401 	    } else if (startswith(input, "goto ")) {
402 		if (!cursor.find_entry(unescape(input.substr(5)))) {
403 		    cout << "No exact match, going to entry before." << endl;
404 		}
405 		continue;
406 	    } else if (startswith(input, "o ") || startswith(input, "open ")) {
407 		size_t trim = (input[1] == ' ' ? 2 : 5);
408 		table_name.assign(input, trim, string::npos);
409 		if (endswith(table_name, "." GLASS_TABLE_EXTENSION))
410 		    table_name.resize(table_name.size() -
411 				      CONST_STRLEN(GLASS_TABLE_EXTENSION) - 1);
412 		else if (endswith(table_name, '.'))
413 		    table_name.resize(table_name.size() - 1);
414 		goto open_different_table;
415 	    } else if (input == "t" || input == "tags") {
416 		tags = !tags;
417 		cout << "Showing tags: " << boolalpha << tags << endl;
418 	    } else if (input == "k" || input == "keys") {
419 		keys = !keys;
420 		cout << "Showing keys: " << boolalpha << keys << endl;
421 	    } else if (input == "q" || input == "quit") {
422 		break;
423 	    } else if (input == "h" || input == "help" || input == "?") {
424 		show_help();
425 		goto wait_for_input;
426 	    } else {
427 		cout << "Unknown command." << endl;
428 		goto wait_for_input;
429 	    }
430 	}
431     } catch (const Xapian::Error& error) {
432 	cerr << argv[0] << ": " << error.get_description() << endl;
433 	exit(1);
434     }
435 }
436