1 /** @file
2  * @brief index arbitrary data as described by an index script
3  */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2001 Sam Liddicott
6  * Copyright 2001,2002 Ananova Ltd
7  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2017,2018 Olly Betts
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include <xapian.h>
28 
29 #include <algorithm>
30 #include <fstream>
31 #include <iostream>
32 #include <list>
33 #include <map>
34 #include <memory>
35 #include <string>
36 #include <unordered_set>
37 #include <vector>
38 #include <cstring>
39 
40 #include <cerrno>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <ctime>
44 
45 #include "commonhelp.h"
46 #include "hashterm.h"
47 #include "loadfile.h"
48 #include "myhtmlparse.h"
49 #include "parseint.h"
50 #include "setenv.h"
51 #include "str.h"
52 #include "stringutils.h"
53 #include "timegm.h"
54 #include "utf8truncate.h"
55 #include "utils.h"
56 #include "values.h"
57 
58 #ifndef HAVE_STRPTIME
59 #include "portability/strptime.h"
60 #endif
61 
62 #include "gnu_getopt.h"
63 
64 using namespace std;
65 
66 #define PROG_NAME "scriptindex"
67 #define PROG_DESC "index arbitrary data as described by an index script"
68 
69 static bool verbose;
70 static int addcount;
71 static int repcount;
72 static int delcount;
73 
74 static inline bool
prefix_needs_colon(const string & prefix,unsigned ch)75 prefix_needs_colon(const string & prefix, unsigned ch)
76 {
77     if (!C_isupper(ch) && ch != ':') return false;
78     string::size_type len = prefix.length();
79     return (len > 1 && prefix[len - 1] != ':');
80 }
81 
82 const char * action_names[] = {
83     "bad", "new",
84     "boolean", "date", "field", "gap", "hash", "hextobin", "index",
85     "indexnopos", "load", "lower", "parsedate", "spell", "split", "truncate",
86     "unhtml", "unique", "value", "valuenumeric", "valuepacked", "weight"
87 };
88 
89 // For debugging:
90 #define DUMP_ACTION(A) cout << action_names[(A).get_action()] << "(" << (A).get_string_arg() << "," << (A).get_num_arg() << ")" << endl
91 
92 class Action {
93   public:
94     typedef enum {
95 	BAD, NEW,
96 	BOOLEAN, DATE, FIELD, GAP, HASH, HEXTOBIN, INDEX, INDEXNOPOS, LOAD,
97 	LOWER, PARSEDATE, SPELL, SPLIT, TRUNCATE, UNHTML, UNIQUE, VALUE,
98 	VALUENUMERIC, VALUEPACKED, WEIGHT
99     } type;
100     enum { SPLIT_NONE, SPLIT_DEDUP, SPLIT_SORT, SPLIT_PREFIXES };
101   private:
102     type action;
103     int num_arg;
104     string string_arg;
105     // Offset into indexscript line.
106     size_t pos;
107   public:
Action(type action_,size_t pos_)108     Action(type action_, size_t pos_)
109 	: action(action_), num_arg(0), pos(pos_) { }
Action(type action_,size_t pos_,const string & arg)110     Action(type action_, size_t pos_, const string & arg)
111 	: action(action_), string_arg(arg), pos(pos_) {
112 	num_arg = atoi(string_arg.c_str());
113     }
Action(type action_,size_t pos_,const string & arg,int num)114     Action(type action_, size_t pos_, const string & arg, int num)
115 	: action(action_), num_arg(num), string_arg(arg), pos(pos_) { }
get_action() const116     type get_action() const { return action; }
get_num_arg() const117     int get_num_arg() const { return num_arg; }
set_num_arg(int num)118     void set_num_arg(int num) { num_arg = num; }
get_string_arg() const119     const string & get_string_arg() const { return string_arg; }
get_pos() const120     size_t get_pos() const { return pos; }
121 };
122 
123 // These allow searching for an Action with a particular Action::type using
124 // std::find().
125 
126 inline bool
operator ==(const Action & a,Action::type t)127 operator==(const Action& a, Action::type t) { return a.get_action() == t; }
128 
129 inline bool
operator ==(Action::type t,const Action & a)130 operator==(Action::type t, const Action& a) { return a.get_action() == t; }
131 
132 inline bool
operator !=(const Action & a,Action::type t)133 operator!=(const Action& a, Action::type t) { return !(a == t); }
134 
135 inline bool
operator !=(Action::type t,const Action & a)136 operator!=(Action::type t, const Action& a) { return !(t == a); }
137 
138 enum diag_type { DIAG_ERROR, DIAG_WARN, DIAG_NOTE };
139 
140 static void
report_location(enum diag_type type,const string & filename,size_t line=0,size_t pos=string::npos)141 report_location(enum diag_type type,
142 		const string& filename,
143 		size_t line = 0,
144 		size_t pos = string::npos)
145 {
146     cerr << filename;
147     if (line != 0) {
148 	cerr << ':' << line;
149     }
150     if (pos != string::npos) {
151 	// The first column is numbered 1.
152 	cerr << ':' << pos + 1;
153     }
154     switch (type) {
155 	case DIAG_ERROR:
156 	    cerr << ": error: ";
157 	    break;
158 	case DIAG_WARN:
159 	    cerr << ": warning: ";
160 	    break;
161 	case DIAG_NOTE:
162 	    cerr << ": note: ";
163 	    break;
164     }
165 }
166 
167 static void
report_useless_action(const string & file,size_t line,size_t pos,const string & action)168 report_useless_action(const string &file, size_t line, size_t pos,
169 		      const string &action)
170 {
171     report_location(DIAG_WARN, file, line, pos);
172     cerr << "Index action '" << action << "' has no effect" << endl;
173 
174     static bool given_left_to_right_warning = false;
175     if (!given_left_to_right_warning) {
176 	given_left_to_right_warning = true;
177 	report_location(DIAG_NOTE, file, line, pos);
178 	cerr << "Actions are executed from left to right" << endl;
179     }
180 }
181 
182 static map<string, vector<Action>> index_spec;
183 
184 static void
parse_index_script(const string & filename)185 parse_index_script(const string &filename)
186 {
187     ifstream script(filename.c_str());
188     if (!script.is_open()) {
189 	report_location(DIAG_ERROR, filename);
190 	cerr << strerror(errno) << endl;
191 	exit(1);
192     }
193     string line;
194     size_t line_no = 0;
195     bool had_unique = false;
196     while (getline(script, line)) {
197 	++line_no;
198 	vector<string> fields;
199 	vector<Action> actions;
200 	string::const_iterator i, j;
201 	const string &s = line;
202 	i = find_if(s.begin(), s.end(), [](char ch) { return !C_isspace(ch); });
203 	if (i == s.end() || *i == '#') {
204 	    // Blank line or comment.
205 	    continue;
206 	}
207 	while (true) {
208 	    if (!C_isalnum(*i)) {
209 		report_location(DIAG_ERROR, filename, line_no, i - s.begin());
210 		cerr << "field name must start with alphanumeric" << endl;
211 		exit(1);
212 	    }
213 	    j = find_if(i, s.end(),
214 			[](char ch) { return !C_isalnum(ch) && ch != '_'; });
215 	    fields.push_back(string(i, j));
216 	    i = find_if(j, s.end(), [](char ch) { return !C_isspace(ch); });
217 	    if (i == s.end()) break;
218 	    if (*i == ':') {
219 		++i;
220 		i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
221 		break;
222 	    }
223 	    if (i == j) {
224 		report_location(DIAG_ERROR, filename, line_no, i - s.begin());
225 		cerr << "bad character '" << *i << "' in fieldname" << endl;
226 		exit(1);
227 	    }
228 	}
229 	Xapian::termcount weight = 1;
230 	size_t useless_weight_pos = string::npos;
231 	map<string, Action::type> boolmap;
232 	j = i;
233 	while (j != s.end()) {
234 	    size_t action_pos = j - s.begin();
235 	    i = find_if(j, s.end(), [](char ch) { return !C_isalnum(ch); });
236 	    string action(s, j - s.begin(), i - j);
237 	    Action::type code = Action::BAD;
238 	    unsigned min_args = 0, max_args = 0;
239 	    bool takes_integer_argument = false;
240 	    if (!action.empty()) {
241 		switch (action[0]) {
242 		    case 'b':
243 			if (action == "boolean") {
244 			    code = Action::BOOLEAN;
245 			    max_args = 1;
246 			}
247 			break;
248 		    case 'd':
249 			if (action == "date") {
250 			    code = Action::DATE;
251 			    min_args = max_args = 1;
252 			}
253 			break;
254 		    case 'f':
255 			if (action == "field") {
256 			    code = Action::FIELD;
257 			    max_args = 1;
258 			}
259 			break;
260 		    case 'g':
261 			if (action == "gap") {
262 			    code = Action::GAP;
263 			    max_args = 1;
264 			    takes_integer_argument = true;
265 			}
266 			break;
267 		    case 'h':
268 			if (action == "hash") {
269 			    code = Action::HASH;
270 			    max_args = 1;
271 			    takes_integer_argument = true;
272 			} else if (action == "hextobin") {
273 			    code = Action::HEXTOBIN;
274 			}
275 			break;
276 		    case 'i':
277 			if (action == "index") {
278 			    code = Action::INDEX;
279 			    max_args = 1;
280 			} else if (action == "indexnopos") {
281 			    code = Action::INDEXNOPOS;
282 			    max_args = 1;
283 			}
284 			break;
285 		    case 'l':
286 			if (action == "lower") {
287 			    code = Action::LOWER;
288 			} else if (action == "load") {
289 			    code = Action::LOAD;
290 			}
291 			break;
292 		    case 'p':
293 			if (action == "parsedate") {
294 			    code = Action::PARSEDATE;
295 			    min_args = max_args = 1;
296 			}
297 			break;
298 		    case 's':
299 			if (action == "spell") {
300 			    code = Action::SPELL;
301 			} else if (action == "split") {
302 			    code = Action::SPLIT;
303 			    min_args = 1;
304 			    max_args = 2;
305 			}
306 			break;
307 		    case 't':
308 			if (action == "truncate") {
309 			    code = Action::TRUNCATE;
310 			    min_args = max_args = 1;
311 			    takes_integer_argument = true;
312 			}
313 			break;
314 		    case 'u':
315 			if (action == "unhtml") {
316 			    code = Action::UNHTML;
317 			} else if (action == "unique") {
318 			    code = Action::UNIQUE;
319 			    min_args = max_args = 1;
320 			}
321 			break;
322 		    case 'v':
323 			if (action == "value") {
324 			    code = Action::VALUE;
325 			    min_args = max_args = 1;
326 			    takes_integer_argument = true;
327 			} else if (action == "valuenumeric") {
328 			    code = Action::VALUENUMERIC;
329 			    min_args = max_args = 1;
330 			    takes_integer_argument = true;
331 			} else if (action == "valuepacked") {
332 			    code = Action::VALUEPACKED;
333 			    min_args = max_args = 1;
334 			    takes_integer_argument = true;
335 			}
336 			break;
337 		    case 'w':
338 			if (action == "weight") {
339 			    code = Action::WEIGHT;
340 			    min_args = max_args = 1;
341 			    takes_integer_argument = true;
342 			}
343 			break;
344 		}
345 	    }
346 	    if (code == Action::BAD) {
347 		report_location(DIAG_ERROR, filename, line_no, action_pos);
348 		cerr << "Unknown index action '" << action << "'" << endl;
349 		exit(1);
350 	    }
351 	    auto i_after_action = i;
352 	    i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
353 
354 	    if (i != s.end() && *i == '=') {
355 		if (i != i_after_action) {
356 		    report_location(DIAG_WARN, filename, line_no,
357 				    i_after_action - s.begin());
358 		    cerr << "putting spaces between the action and '=' is "
359 			    "deprecated." << endl;
360 		}
361 
362 		if (max_args == 0) {
363 		    report_location(DIAG_ERROR, filename, line_no,
364 				    i - s.begin());
365 		    cerr << "Index action '" << action
366 			 << "' doesn't take an argument" << endl;
367 		    exit(1);
368 		}
369 
370 		++i;
371 		j = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
372 		if (i != j) {
373 		    report_location(DIAG_WARN, filename, line_no,
374 				    i - s.begin());
375 		    cerr << "putting spaces between '=' and the argument is "
376 			    "deprecated." << endl;
377 		}
378 
379 		vector<string> vals;
380 		while (true) {
381 		    if (j != s.end() && *j == '"') {
382 			// Quoted argument.
383 			++j;
384 			string arg;
385 			while (true) {
386 			    i = find_if(j, s.end(),
387 					[](char ch) {
388 					    return ch == '"' || ch == '\\';
389 					});
390 			    if (i == s.end()) {
391 				report_location(DIAG_ERROR, filename, line_no,
392 						s.size());
393 				cerr << "No closing quote" << endl;
394 				exit(1);
395 			    }
396 			    arg.append(j, i);
397 			    if (*i++ == '"')
398 				break;
399 
400 			    // Escape sequence.
401 			    if (i == s.end()) {
402 bad_escaping:
403 				report_location(DIAG_ERROR, filename, line_no,
404 						i - s.begin());
405 				cerr << "Bad escaping in quoted action argument"
406 				     << endl;
407 				exit(1);
408 			    }
409 
410 			    char ch = *i;
411 			    switch (ch) {
412 				case '\\':
413 				case '"':
414 				    break;
415 				case '0':
416 				    ch = '\0';
417 				    break;
418 				case 'n':
419 				    ch = '\n';
420 				    break;
421 				case 'r':
422 				    ch = '\r';
423 				    break;
424 				case 't':
425 				    ch = '\t';
426 				    break;
427 				case 'x': {
428 				    if (++i == s.end())
429 					goto bad_escaping;
430 				    char ch1 = *i;
431 				    if (++i == s.end())
432 					goto bad_escaping;
433 				    char ch2 = *i;
434 				    if (!C_isxdigit(ch1) ||
435 					!C_isxdigit(ch2))
436 					goto bad_escaping;
437 				    ch = hex_digit(ch1) << 4 |
438 					 hex_digit(ch2);
439 				    break;
440 				}
441 				default:
442 				    goto bad_escaping;
443 			    }
444 			    arg += ch;
445 			    j = i + 1;
446 			}
447 			vals.emplace_back(std::move(arg));
448 			if (i == s.end() || C_isspace(*i)) break;
449 			if (*i != ',') {
450 			    report_location(DIAG_ERROR, filename, line_no,
451 					    i - s.begin());
452 			    cerr << "Unexpected character '" << *i
453 				 << "' after closing quote" << endl;
454 			    exit(1);
455 			}
456 			++i;
457 		    } else if (max_args > 1) {
458 			// Unquoted argument, split on comma.
459 			i = find_if(j, s.end(),
460 				    [](char ch) {
461 					return C_isspace(ch) || ch == ',';
462 				    });
463 			vals.emplace_back(j, i);
464 			if (*i != ',') break;
465 			++i;
466 		    } else {
467 			// Unquoted argument, including any commas.
468 			i = find_if(j, s.end(),
469 				    [](char ch) { return C_isspace(ch); });
470 			vals.emplace_back(j, i);
471 			break;
472 		    }
473 		    j = i;
474 
475 		    if (vals.size() == max_args) {
476 			report_location(DIAG_ERROR, filename, line_no,
477 					i - s.begin());
478 			cerr << "Index action '" << action
479 			     << "' takes at most " << max_args << " arguments"
480 			     << endl;
481 			exit(1);
482 		    }
483 		}
484 
485 		if (vals.size() < min_args) {
486 		    report_location(DIAG_ERROR, filename, line_no,
487 				    i - s.begin());
488 		    if (min_args == max_args) {
489 			cerr << "Index action '" << action
490 			     << "' requires " << min_args << " arguments"
491 			     << endl;
492 			exit(1);
493 		    }
494 		    cerr << "Index action '" << action
495 			 << "' requires at least " << min_args << " arguments"
496 			 << endl;
497 		    exit(1);
498 		}
499 
500 		string val;
501 		if (!vals.empty()) {
502 		    val = vals.front();
503 		}
504 
505 		if (takes_integer_argument) {
506 		    auto dot = val.find('.');
507 		    if (dot != string::npos) {
508 			report_location(DIAG_WARN, filename, line_no,
509 					j - s.begin() + dot);
510 			cerr << "Index action '" << action
511 			     << "' takes an integer argument" << endl;
512 		    }
513 		}
514 		switch (code) {
515 		    case Action::DATE:
516 			if (val != "unix" &&
517 			    val != "unixutc" &&
518 			    val != "yyyymmdd") {
519 			    report_location(DIAG_ERROR, filename, line_no);
520 			    cerr << "Invalid parameter '" << val << "' for "
521 				    "action 'date'" << endl;
522 			    exit(1);
523 			}
524 			actions.emplace_back(code, action_pos, val);
525 			break;
526 		    case Action::INDEX:
527 		    case Action::INDEXNOPOS:
528 			actions.emplace_back(code, action_pos, val, weight);
529 			useless_weight_pos = string::npos;
530 			break;
531 		    case Action::WEIGHT:
532 			// We don't push an Action for WEIGHT - instead we
533 			// store it ready to use in the INDEX and INDEXNOPOS
534 			// Actions.
535 			weight = atoi(val.c_str());
536 			if (useless_weight_pos != string::npos) {
537 			    report_useless_action(filename, line_no,
538 						  useless_weight_pos, action);
539 			}
540 			useless_weight_pos = action_pos;
541 			break;
542 		    case Action::PARSEDATE: {
543 			if (val.find("%Z") != val.npos) {
544 			    report_location(DIAG_ERROR, filename, line_no);
545 			    cerr << "Parsing timezone names with %Z is not supported" << endl;
546 			    exit(1);
547 			}
548 #ifndef HAVE_STRUCT_TM_TM_GMTOFF
549 			if (val.find("%z") != val.npos) {
550 			    report_location(DIAG_ERROR, filename, line_no);
551 			    cerr << "Parsing timezone offsets with %z is not supported on "
552 				    "this platform" << endl;
553 			    exit(1);
554 			}
555 #endif
556 			actions.emplace_back(code, action_pos, val);
557 			break;
558 		    }
559 		    case Action::SPLIT: {
560 			if (val.empty()) {
561 			    report_location(DIAG_ERROR, filename, line_no);
562 			    cerr << "Split delimiter can't be empty" << endl;
563 			    exit(1);
564 			}
565 			int operation = Action::SPLIT_NONE;
566 			if (vals.size() >= 2) {
567 			    if (vals[1] == "dedup") {
568 				operation = Action::SPLIT_DEDUP;
569 			    } else if (vals[1] == "sort") {
570 				operation = Action::SPLIT_SORT;
571 			    } else if (vals[1] == "none") {
572 				operation = Action::SPLIT_NONE;
573 			    } else if (vals[1] == "prefixes") {
574 				operation = Action::SPLIT_PREFIXES;
575 			    } else {
576 				report_location(DIAG_ERROR, filename, line_no);
577 				cerr << "Bad split operation '" << vals[1]
578 				     << "'" << endl;
579 				exit(1);
580 			    }
581 			}
582 			actions.emplace_back(code, action_pos, val, operation);
583 			break;
584 		    }
585 		    case Action::TRUNCATE:
586 			if (!actions.empty() &&
587 			    actions.back().get_action() == Action::LOAD) {
588 			    /* Turn "load truncate=n" into "load" with
589 			     * num_arg n, so that we don't needlessly
590 			     * allocate memory and read data we're just
591 			     * going to ignore.
592 			     */
593 			    actions.pop_back();
594 			    code = Action::LOAD;
595 			}
596 			actions.emplace_back(code, action_pos, val);
597 			break;
598 		    case Action::UNIQUE:
599 			if (had_unique) {
600 			    report_location(DIAG_ERROR, filename, line_no,
601 					    action_pos);
602 			    cerr << "Index action 'unique' used more than once"
603 				 << endl;
604 			    exit(1);
605 			}
606 			had_unique = true;
607 			if (boolmap.find(val) == boolmap.end())
608 			    boolmap[val] = Action::UNIQUE;
609 			actions.emplace_back(code, action_pos, val);
610 			break;
611 		    case Action::GAP: {
612 			actions.emplace_back(code, action_pos, val);
613 			auto& obj = actions.back();
614 			auto gap_size = obj.get_num_arg();
615 			if (gap_size <= 0) {
616 			    report_location(DIAG_ERROR, filename, line_no,
617 					    obj.get_pos() + 3 + 1);
618 			    cerr << "Index action 'gap' takes a strictly "
619 				    "positive integer argument" << endl;
620 			    exit(1);
621 			}
622 			break;
623 		    }
624 		    case Action::HASH: {
625 			actions.emplace_back(code, action_pos, val);
626 			auto& obj = actions.back();
627 			auto max_length = obj.get_num_arg();
628 			if (max_length < 6) {
629 			    report_location(DIAG_ERROR, filename, line_no,
630 					    obj.get_pos() + 4 + 1);
631 			    cerr << "Index action 'hash' takes an integer "
632 				    "argument which must be at least 6" << endl;
633 			    exit(1);
634 			}
635 			break;
636 		    }
637 		    case Action::BOOLEAN:
638 			boolmap[val] = Action::BOOLEAN;
639 			/* FALLTHRU */
640 		    default:
641 			actions.emplace_back(code, action_pos, val);
642 		}
643 		i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
644 	    } else {
645 		if (min_args > 0) {
646 		    report_location(DIAG_ERROR, filename, line_no,
647 				    i_after_action - s.begin());
648 		    if (min_args == max_args) {
649 			cerr << "Index action '" << action << "' requires "
650 			     << min_args << " arguments" << endl;
651 			exit(1);
652 		    }
653 		    cerr << "Index action '" << action << "' requires at least "
654 			 << min_args << " arguments" << endl;
655 		    exit(1);
656 		}
657 		if (code == Action::INDEX || code == Action::INDEXNOPOS) {
658 		    useless_weight_pos = string::npos;
659 		    actions.emplace_back(code, action_pos, "", weight);
660 		} else if (code == Action::GAP) {
661 		    actions.emplace_back(code, action_pos, "", 100);
662 		} else if (code == Action::HASH) {
663 		    actions.emplace_back(code, action_pos, "",
664 					 MAX_SAFE_TERM_LENGTH - 1);
665 		} else {
666 		    actions.emplace_back(code, action_pos);
667 		}
668 	    }
669 	    j = i;
670 	}
671 
672 	if (useless_weight_pos != string::npos) {
673 	    report_useless_action(filename, line_no, useless_weight_pos,
674 				  "weight");
675 	}
676 
677 	while (!actions.empty()) {
678 	    bool done = true;
679 	    Action::type action = actions.back().get_action();
680 	    switch (action) {
681 		case Action::HASH:
682 		case Action::HEXTOBIN:
683 		case Action::LOWER:
684 		case Action::PARSEDATE:
685 		case Action::SPELL:
686 		case Action::TRUNCATE:
687 		case Action::UNHTML:
688 		    done = false;
689 		    report_useless_action(filename, line_no,
690 					  actions.back().get_pos(),
691 					  action_names[action]);
692 		    actions.pop_back();
693 		    break;
694 		default:
695 		    break;
696 	    }
697 	    if (done) break;
698 	}
699 
700 	map<string, Action::type>::const_iterator boolpfx;
701 	for (boolpfx = boolmap.begin(); boolpfx != boolmap.end(); ++boolpfx) {
702 	    if (boolpfx->second == Action::UNIQUE) {
703 		report_location(DIAG_WARN, filename, line_no);
704 		cerr << "Index action 'unique=" << boolpfx->first
705 		     << "' without 'boolean=" << boolpfx->first << "'" << endl;
706 		static bool given_doesnt_imply_boolean_warning = false;
707 		if (!given_doesnt_imply_boolean_warning) {
708 		    given_doesnt_imply_boolean_warning = true;
709 		    report_location(DIAG_NOTE, filename, line_no);
710 		    cerr << "'unique' doesn't implicitly add a boolean term"
711 			 << endl;
712 		}
713 	    }
714 	}
715 
716 	vector<string>::const_iterator field;
717 	for (field = fields.begin(); field != fields.end(); ++field) {
718 	    vector<Action> &v = index_spec[*field];
719 	    if (v.empty()) {
720 		if (fields.size() == 1) {
721 		    // Optimise common case where there's only one fieldname
722 		    // for a list of actions.
723 		    v = std::move(actions);
724 		} else {
725 		    v = actions;
726 		}
727 	    } else {
728 		v.emplace_back(Action::NEW, string::npos);
729 		v.insert(v.end(), actions.begin(), actions.end());
730 	    }
731 	}
732     }
733 
734     if (index_spec.empty()) {
735 	report_location(DIAG_ERROR, filename, line_no);
736 	cerr << "No rules found in index script" << endl;
737 	exit(1);
738     }
739 }
740 
741 static bool
run_actions(vector<Action>::const_iterator action_it,vector<Action>::const_iterator action_end,Xapian::WritableDatabase & database,Xapian::TermGenerator & indexer,const string & old_value,bool & this_field_is_content,Xapian::Document & doc,map<string,list<string>> & fields,string & field,const char * fname,size_t line_no,Xapian::docid & docid)742 run_actions(vector<Action>::const_iterator action_it,
743 	    vector<Action>::const_iterator action_end,
744 	    Xapian::WritableDatabase& database,
745 	    Xapian::TermGenerator& indexer,
746 	    const string& old_value,
747 	    bool& this_field_is_content, Xapian::Document& doc,
748 	    map<string, list<string>>& fields,
749 	    string& field, const char* fname,
750 	    size_t line_no, Xapian::docid& docid)
751 {
752     string value = old_value;
753     while (action_it != action_end) {
754 	auto& action = *action_it++;
755 	switch (action.get_action()) {
756 	    case Action::BAD:
757 		abort();
758 	    case Action::NEW:
759 		value = old_value;
760 		// We're processing the same field again - give it a reprieve.
761 		this_field_is_content = true;
762 		break;
763 	    case Action::FIELD:
764 		if (!value.empty()) {
765 		    string f = action.get_string_arg();
766 		    if (f.empty()) f = field;
767 		    // replace newlines with spaces
768 		    string s = value;
769 		    string::size_type j = 0;
770 		    while ((j = s.find('\n', j)) != string::npos)
771 			s[j] = ' ';
772 		    fields[f].push_back(s);
773 		}
774 		break;
775 	    case Action::INDEX:
776 		indexer.index_text(value,
777 				   action.get_num_arg(),
778 				   action.get_string_arg());
779 		break;
780 	    case Action::INDEXNOPOS:
781 		// No positional information so phrase searching won't work.
782 		// However, the database will use much less diskspace.
783 		indexer.index_text_without_positions(value,
784 						     action.get_num_arg(),
785 						     action.get_string_arg());
786 		break;
787 	    case Action::BOOLEAN: {
788 		// Do nothing if there's no text.
789 		if (value.empty()) break;
790 
791 		string term = action.get_string_arg();
792 		if (prefix_needs_colon(term, value[0])) term += ':';
793 		term += value;
794 
795 		doc.add_boolean_term(term);
796 		break;
797 	    }
798 	    case Action::GAP:
799 		indexer.increase_termpos(action.get_num_arg());
800 		break;
801 	    case Action::HASH: {
802 		unsigned int max_length = action.get_num_arg();
803 		if (value.length() > max_length)
804 		    value = hash_long_term(value, max_length);
805 		break;
806 	    }
807 	    case Action::HEXTOBIN: {
808 		size_t len = value.length();
809 		if (len & 1) {
810 		    report_location(DIAG_ERROR, fname, line_no);
811 		    cerr << "hextobin: input must have even length"
812 			 << endl;
813 		} else {
814 		    string output;
815 		    output.reserve(len / 2);
816 		    for (size_t j = 0; j < len; j += 2) {
817 			char a = value[j];
818 			char b = value[j + 1];
819 			if (!C_isxdigit(a) || !C_isxdigit(b)) {
820 			    report_location(DIAG_ERROR, fname, line_no);
821 			    cerr << "hextobin: input must be all hex "
822 				    "digits" << endl;
823 			    goto badhex;
824 			}
825 			char r = (hex_digit(a) << 4) | hex_digit(b);
826 			output.push_back(r);
827 		    }
828 		    value = std::move(output);
829 		}
830 badhex:
831 		break;
832 	    }
833 	    case Action::LOWER:
834 		value = Xapian::Unicode::tolower(value);
835 		break;
836 	    case Action::LOAD: {
837 		// If there's no input, just issue a warning.
838 		if (value.empty()) {
839 		    report_location(DIAG_WARN, fname, line_no);
840 		    cerr << "Empty filename in LOAD action" << endl;
841 		    break;
842 		}
843 		bool truncated = false;
844 		string filename = std::move(value);
845 		// FIXME: Use NOATIME if we own the file or are root.
846 		if (!load_file(filename, action.get_num_arg(), NOCACHE,
847 			       value, truncated)) {
848 		    report_location(DIAG_ERROR, fname, line_no);
849 		    cerr << "Couldn't load file '" << filename << "': "
850 			 << strerror(errno) << endl;
851 		    value.resize(0);
852 		    break;
853 		}
854 		if (!truncated) break;
855 	    }
856 	    /* FALLTHRU */
857 	    case Action::TRUNCATE:
858 		utf8_truncate(value, action.get_num_arg());
859 		break;
860 	    case Action::SPELL:
861 		indexer.set_flags(indexer.FLAG_SPELLING);
862 		break;
863 	    case Action::SPLIT: {
864 		// Find the end of the actions which split should execute.
865 		auto split_end = find(action_it, action_end, Action::NEW);
866 
867 		int split_type = action.get_num_arg();
868 		if (value.empty()) {
869 		    // Nothing to do.
870 		} else if (split_type != Action::SPLIT_SORT) {
871 		    // Generate split as we consume it.
872 		    const string& delimiter = action.get_string_arg();
873 
874 		    unique_ptr<unordered_set<string>> seen;
875 		    if (split_type == Action::SPLIT_DEDUP) {
876 			seen.reset(new unordered_set<string>);
877 		    }
878 
879 		    if (delimiter.size() == 1) {
880 			// Special case for common single character delimiter.
881 			char ch = delimiter[0];
882 			string::size_type i = 0;
883 			while (true) {
884 			    string::size_type j = value.find(ch, i);
885 			    if (split_type == Action::SPLIT_PREFIXES) {
886 				if (j > 0) {
887 				    string val(value, 0, j);
888 				    run_actions(action_it, split_end,
889 						database, indexer,
890 						val,
891 						this_field_is_content, doc,
892 						fields,
893 						field, fname, line_no,
894 						docid);
895 				}
896 			    } else if (i != j) {
897 				string val(value, i, j - i);
898 				if (!seen.get() || seen->insert(val).second) {
899 				    run_actions(action_it, split_end,
900 						database, indexer,
901 						val,
902 						this_field_is_content, doc,
903 						fields,
904 						field, fname, line_no,
905 						docid);
906 				}
907 			    }
908 			    if (j == string::npos) break;
909 			    i = j + 1;
910 			}
911 		    } else {
912 			string::size_type i = 0;
913 			while (true) {
914 			    string::size_type j = value.find(delimiter, i);
915 			    if (split_type == Action::SPLIT_PREFIXES) {
916 				if (j > 0) {
917 				    string val(value, 0, j);
918 				    run_actions(action_it, split_end,
919 						database, indexer,
920 						val,
921 						this_field_is_content, doc,
922 						fields,
923 						field, fname, line_no,
924 						docid);
925 				}
926 			    } else if (i != j) {
927 				string val(value, i, j - i);
928 				if (!seen.get() || seen->insert(val).second) {
929 				    run_actions(action_it, split_end,
930 						database, indexer,
931 						val,
932 						this_field_is_content, doc,
933 						fields,
934 						field, fname, line_no,
935 						docid);
936 				}
937 			    }
938 			    if (j == string::npos) break;
939 			    i = j + delimiter.size();
940 			}
941 		    }
942 		} else {
943 		    vector<string> split_values;
944 		    const string& delimiter = action.get_string_arg();
945 		    if (delimiter.size() == 1) {
946 			// Special case for common single character delimiter.
947 			char ch = delimiter[0];
948 			string::size_type i = 0;
949 			while (true) {
950 			    string::size_type j = value.find(ch, i);
951 			    if (i != j) {
952 				split_values.emplace_back(value, i, j - i);
953 			    }
954 			    if (j == string::npos) break;
955 			    i = j + 1;
956 			}
957 		    } else {
958 			string::size_type i = 0;
959 			while (true) {
960 			    string::size_type j = value.find(delimiter, i);
961 			    if (i != j) {
962 				split_values.emplace_back(value, i, j - i);
963 			    }
964 			    if (j == string::npos) break;
965 			    i = j + delimiter.size();
966 			}
967 		    }
968 
969 		    sort(split_values.begin(), split_values.end());
970 
971 		    for (auto&& val : split_values) {
972 			run_actions(action_it, split_end,
973 				    database, indexer, val,
974 				    this_field_is_content, doc, fields,
975 				    field, fname, line_no,
976 				    docid);
977 		    }
978 		}
979 
980 		action_it = split_end;
981 		break;
982 	    }
983 	    case Action::UNHTML: {
984 		MyHtmlParser p;
985 		try {
986 		    // Default HTML character set is latin 1, though
987 		    // not specifying one is deprecated these days.
988 		    p.parse_html(value, "iso-8859-1", false);
989 		} catch (const string & newcharset) {
990 		    p.reset();
991 		    p.parse_html(value, newcharset, true);
992 		}
993 		if (p.indexing_allowed)
994 		    value = p.dump;
995 		else
996 		    value = "";
997 		break;
998 	    }
999 	    case Action::UNIQUE: {
1000 		// If there's no text, just issue a warning.
1001 		if (value.empty()) {
1002 		    report_location(DIAG_WARN, fname, line_no);
1003 		    cerr << "Ignoring UNIQUE action on empty text"
1004 			 << endl;
1005 		    break;
1006 		}
1007 
1008 		// Ensure that the value of this field is unique.
1009 		// If a record already exists with the same value,
1010 		// it will be replaced with the new record.
1011 
1012 		// Unique fields aren't considered content - if
1013 		// there are no other fields in the document, the
1014 		// document is to be deleted.
1015 		this_field_is_content = false;
1016 
1017 		// Argument is the prefix to add to the field value
1018 		// to get the unique term.
1019 		string t = action.get_string_arg();
1020 		if (prefix_needs_colon(t, value[0])) t += ':';
1021 		t += value;
1022 		Xapian::PostingIterator p = database.postlist_begin(t);
1023 		if (p != database.postlist_end(t)) {
1024 		    docid = *p;
1025 		}
1026 		break;
1027 	    }
1028 	    case Action::VALUE:
1029 		if (!value.empty())
1030 		    doc.add_value(action.get_num_arg(), value);
1031 		break;
1032 	    case Action::VALUENUMERIC: {
1033 		if (value.empty()) break;
1034 		char * end;
1035 		double dbl = strtod(value.c_str(), &end);
1036 		if (*end) {
1037 		    report_location(DIAG_WARN, fname, line_no);
1038 		    cerr << "Trailing characters in VALUENUMERIC: '"
1039 			 << value << "'" << endl;
1040 		}
1041 		doc.add_value(action.get_num_arg(),
1042 			      Xapian::sortable_serialise(dbl));
1043 		break;
1044 	    }
1045 	    case Action::VALUEPACKED: {
1046 		uint32_t word = 0;
1047 		if (value.empty() || !C_isdigit(value[0])) {
1048 		    // strtoul() accepts leading whitespace and negated
1049 		    // values, neither of which we want to allow.
1050 		    errno = EINVAL;
1051 		} else {
1052 		    errno = 0;
1053 		    char* q;
1054 		    word = strtoul(value.c_str(), &q, 10);
1055 		    if (!errno && *q != '\0') {
1056 			// Trailing characters after converted value.
1057 			errno = EINVAL;
1058 		    }
1059 		}
1060 		if (errno) {
1061 		    report_location(DIAG_WARN, fname, line_no);
1062 		    cerr << "valuepacked \"" << value << "\" ";
1063 		    if (errno == ERANGE) {
1064 			cerr << "out of range";
1065 		    } else {
1066 			cerr << "not an unsigned integer";
1067 		    }
1068 		    cerr << endl;
1069 		}
1070 		int valueslot = action.get_num_arg();
1071 		doc.add_value(valueslot, int_to_binary_string(word));
1072 		break;
1073 	    }
1074 	    case Action::DATE: {
1075 		// Do nothing for empty input.
1076 		if (value.empty()) break;
1077 
1078 		const string & type = action.get_string_arg();
1079 		string yyyymmdd;
1080 		if (type == "unix") {
1081 		    time_t t;
1082 		    if (!parse_signed(value.c_str(), t)) {
1083 			report_location(DIAG_WARN, fname, line_no);
1084 			cerr << "Date value (in secs) for action DATE "
1085 				"must be an integer - ignoring" << endl;
1086 			break;
1087 		    }
1088 		    struct tm *tm = localtime(&t);
1089 		    int y = tm->tm_year + 1900;
1090 		    int m = tm->tm_mon + 1;
1091 		    yyyymmdd = date_to_string(y, m, tm->tm_mday);
1092 		} else if (type == "unixutc") {
1093 		    time_t t;
1094 		    if (!parse_signed(value.c_str(), t)) {
1095 			report_location(DIAG_WARN, fname, line_no);
1096 			cerr << "Date value (in secs) for action DATE "
1097 				"must be an integer - ignoring" << endl;
1098 			break;
1099 		    }
1100 		    struct tm *tm = gmtime(&t);
1101 		    int y = tm->tm_year + 1900;
1102 		    int m = tm->tm_mon + 1;
1103 		    yyyymmdd = date_to_string(y, m, tm->tm_mday);
1104 		} else if (type == "yyyymmdd") {
1105 		    if (value.length() != 8) {
1106 			report_location(DIAG_WARN, fname, line_no);
1107 			cerr << "date=yyyymmdd expects an 8 character value "
1108 				"- ignoring" << endl;
1109 			break;
1110 		    }
1111 		    yyyymmdd = value;
1112 		}
1113 
1114 		// Date (YYYYMMDD)
1115 		doc.add_boolean_term("D" + yyyymmdd);
1116 		yyyymmdd.resize(6);
1117 		// Month (YYYYMM)
1118 		doc.add_boolean_term("M" + yyyymmdd);
1119 		yyyymmdd.resize(4);
1120 		// Year (YYYY)
1121 		doc.add_boolean_term("Y" + yyyymmdd);
1122 		break;
1123 	    }
1124 	    case Action::PARSEDATE: {
1125 		string dateformat = action.get_string_arg();
1126 		struct tm tm;
1127 		memset(&tm, 0, sizeof(tm));
1128 		auto ret = strptime(value.c_str(), dateformat.c_str(), &tm);
1129 		if (ret == NULL) {
1130 		    report_location(DIAG_WARN, fname, line_no);
1131 		    cerr << "\"" << value << "\" doesn't match format "
1132 			    "\"" << dateformat << '\"' << endl;
1133 		    break;
1134 		}
1135 
1136 		if (*ret != '\0') {
1137 		    report_location(DIAG_WARN, fname, line_no);
1138 		    cerr << "\"" << value << "\" not fully matched by "
1139 			    "format \"" << dateformat << "\" "
1140 			    "(\"" << ret << "\" left over) but "
1141 			    "indexing anyway" << endl;
1142 		}
1143 #ifdef HAVE_STRUCT_TM_TM_GMTOFF
1144 		auto gmtoff = tm.tm_gmtoff;
1145 #endif
1146 		auto secs_since_epoch = timegm(&tm);
1147 #ifdef HAVE_STRUCT_TM_TM_GMTOFF
1148 		secs_since_epoch -= gmtoff;
1149 #endif
1150 		value = str(secs_since_epoch);
1151 		break;
1152 	    }
1153 	    default:
1154 		/* Empty default case to avoid "unhandled enum value"
1155 		 * warnings. */
1156 		break;
1157 	}
1158     }
1159     return true;
1160 }
1161 
1162 static void
index_file(const char * fname,istream & stream,Xapian::WritableDatabase & database,Xapian::TermGenerator & indexer)1163 index_file(const char *fname, istream &stream,
1164 	   Xapian::WritableDatabase &database, Xapian::TermGenerator &indexer)
1165 {
1166     string line;
1167     size_t line_no = 0;
1168     while (!stream.eof() && getline(stream, line)) {
1169 	++line_no;
1170 	Xapian::Document doc;
1171 	indexer.set_document(doc);
1172 	Xapian::docid docid = 0;
1173 	map<string, list<string>> fields;
1174 	bool seen_content = false;
1175 	while (!line.empty()) {
1176 	    // Cope with files from MS Windows (\r\n end of lines).
1177 	    // Trim multiple \r characters, since that seems the best way
1178 	    // to handle that case.
1179 	    string::size_type last = line.find_last_not_of('\r');
1180 	    if (last == string::npos) break;
1181 	    line.resize(last + 1);
1182 
1183 	    string::size_type eq = line.find('=');
1184 	    if (eq == string::npos && !line.empty()) {
1185 		report_location(DIAG_ERROR, fname, line_no, line.size());
1186 		cerr << "expected = somewhere in this line" << endl;
1187 		// FIXME: die or what?
1188 	    }
1189 	    string field(line, 0, eq);
1190 	    string value(line, eq + 1, string::npos);
1191 	    while (getline(stream, line)) {
1192 		++line_no;
1193 		if (line.empty() || line[0] != '=') break;
1194 		// Cope with files from MS Windows (\r\n end of lines).
1195 		// Trim multiple \r characters, since that seems the best way
1196 		// to handle that case.
1197 		last = line.find_last_not_of('\r');
1198 		// line[0] == '=', so last != string::npos.
1199 		// Replace the '=' with a '\n' so we don't have to use substr.
1200 		line[0] = '\n';
1201 		line.resize(last + 1);
1202 		value += line;
1203 	    }
1204 
1205 	    // Default to not indexing spellings.
1206 	    indexer.set_flags(Xapian::TermGenerator::flags(0));
1207 
1208 	    bool this_field_is_content = true;
1209 	    const vector<Action>& v = index_spec[field];
1210 	    run_actions(v.begin(), v.end(),
1211 			database, indexer, value,
1212 			this_field_is_content, doc, fields,
1213 			field, fname, line_no,
1214 			docid);
1215 	    if (this_field_is_content) seen_content = true;
1216 	    if (stream.eof()) break;
1217 	}
1218 
1219 	// If we haven't seen any fields (other than unique identifiers)
1220 	// the document is to be deleted.
1221 	if (!seen_content) {
1222 	    if (docid) {
1223 		database.delete_document(docid);
1224 		if (verbose) cout << "Del: " << docid << endl;
1225 		++delcount;
1226 	    }
1227 	} else {
1228 	    string data;
1229 	    for (auto&& i : fields) {
1230 		for (auto&& field_val : i.second) {
1231 		    data += i.first;
1232 		    data += '=';
1233 		    data += field_val;
1234 		    data += '\n';
1235 		}
1236 	    }
1237 
1238 	    // Put the data in the document
1239 	    doc.set_data(data);
1240 
1241 	    // Add the document to the database
1242 	    if (docid) {
1243 		database.replace_document(docid, doc);
1244 		if (verbose) cout << "Replace: " << docid << endl;
1245 		++repcount;
1246 	    } else {
1247 		docid = database.add_document(doc);
1248 		if (verbose) cout << "Add: " << docid << endl;
1249 		++addcount;
1250 	    }
1251 	}
1252     }
1253 
1254     // Commit after each file to make sure all changes from that file make it
1255     // in.
1256     if (verbose) cout << "Committing: " << endl;
1257     database.commit();
1258 }
1259 
1260 static void
show_help(int exit_code)1261 show_help(int exit_code)
1262 {
1263     cout << PROG_NAME " - " PROG_DESC "\n"
1264 "Usage: " PROG_NAME " [OPTIONS] DATABASE INDEXER_SCRIPT [INPUT_FILE]...\n"
1265 "\n"
1266 "Creates or updates a Xapian database with the data from the input files listed\n"
1267 "on the command line.  If no files are specified, data is read from stdin.\n"
1268 "\n"
1269 "See https://xapian.org/docs/omega/scriptindex.html for documentation of the\n"
1270 "format for INDEXER_SCRIPT.\n"
1271 "\n"
1272 "Options:\n"
1273 "  -v, --verbose       display additional messages to aid debugging\n"
1274 "      --overwrite     create the database anew (the default is to update if\n"
1275 "                      the database already exists)\n";
1276     print_stemmer_help("");
1277     print_help_and_version_help("");
1278     exit(exit_code);
1279 }
1280 
1281 int
main(int argc,char ** argv)1282 main(int argc, char **argv)
1283 try {
1284     // If the database already exists, default to updating not overwriting.
1285     int database_mode = Xapian::DB_CREATE_OR_OPEN;
1286     verbose = false;
1287     Xapian::Stem stemmer("english");
1288 
1289     // Without this, strptime() seems to treat formats without a timezone as
1290     // being local time, including %s.
1291     setenv("TZ", "UTC", 1);
1292 
1293     constexpr auto NO_ARG = no_argument;
1294     constexpr auto REQ_ARG = required_argument;
1295     static const struct option longopts[] = {
1296 	{ "help",	NO_ARG,		NULL, 'h' },
1297 	{ "version",	NO_ARG,		NULL, 'V' },
1298 	{ "stemmer",	REQ_ARG,	NULL, 's' },
1299 	{ "overwrite",	NO_ARG,		NULL, 'o' },
1300 	{ "verbose",	NO_ARG,		NULL, 'v' },
1301 	{ 0, 0, NULL, 0 }
1302     };
1303 
1304     int getopt_ret;
1305     while ((getopt_ret = gnu_getopt_long(argc, argv, "vs:hV",
1306 					 longopts, NULL)) != -1) {
1307 	switch (getopt_ret) {
1308 	    default:
1309 		show_help(1);
1310 		break;
1311 	    case 'h': // --help
1312 		show_help(0);
1313 		break;
1314 	    case 'V': // --version
1315 		print_package_info(PROG_NAME);
1316 		return 0;
1317 	    case 'o': // --overwrite
1318 		database_mode = Xapian::DB_CREATE_OR_OVERWRITE;
1319 		break;
1320 	    case 'v':
1321 		verbose = true;
1322 		break;
1323 	    case 's':
1324 		try {
1325 		    stemmer = Xapian::Stem(optarg);
1326 		} catch (const Xapian::InvalidArgumentError &) {
1327 		    cerr << "Unknown stemming language '" << optarg << "'.\n";
1328 		    cerr << "Available language names are: "
1329 			 << Xapian::Stem::get_available_languages() << endl;
1330 		    return 1;
1331 		}
1332 		break;
1333 	}
1334     }
1335 
1336     argv += optind;
1337     argc -= optind;
1338     if (argc < 2) {
1339 	show_help(1);
1340     }
1341 
1342     parse_index_script(argv[1]);
1343 
1344     // Open the database.  If another process is currently updating the
1345     // database, wait for the lock to become available.
1346     auto flags = database_mode | Xapian::DB_RETRY_LOCK;
1347     Xapian::WritableDatabase database(argv[0], flags);
1348 
1349     Xapian::TermGenerator indexer;
1350     indexer.set_stemmer(stemmer);
1351     // Set the database for spellings to be added to by the "spell" action.
1352     indexer.set_database(database);
1353 
1354     addcount = 0;
1355     repcount = 0;
1356     delcount = 0;
1357 
1358     if (argc == 2) {
1359 	// Read from stdin.
1360 	index_file("<stdin>", cin, database, indexer);
1361     } else {
1362 	// Read file(s) listed on the command line.
1363 	for (int i = 2; i < argc; ++i) {
1364 	    ifstream stream(argv[i]);
1365 	    if (stream) {
1366 		index_file(argv[i], stream, database, indexer);
1367 	    } else {
1368 		cerr << "Can't open file " << argv[i] << endl;
1369 	    }
1370 	}
1371     }
1372 
1373     cout << "records (added, replaced, deleted) = (" << addcount << ", "
1374 	 << repcount << ", " << delcount << ")" << endl;
1375 } catch (const Xapian::Error &error) {
1376     cerr << "Exception: " << error.get_description() << endl;
1377     exit(1);
1378 } catch (const std::bad_alloc &) {
1379     cerr << "Exception: std::bad_alloc" << endl;
1380     exit(1);
1381 } catch (...) {
1382     cerr << "Unknown Exception" << endl;
1383     exit(1);
1384 }
1385