1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include "input/line_readers.h"
6 
7 #include "lib/mlr_arch.h"
8 #include "lib/mlrutil.h"
9 #include "lib/mlr_globals.h"
10 #include "lib/mtrand.h"
11 #include "containers/slls.h"
12 #include "containers/lhmss.h"
13 #include "containers/lhmsll.h"
14 #include "input/lrec_readers.h"
15 #include "dsl/function_manager.h"
16 #include "dsl/mlr_dsl_cst.h"
17 #include "mapping/mappers.h"
18 #include "output/lrec_writers.h"
19 #include "cli/mlrcli.h"
20 #include "cli/quoting.h"
21 #include "cli/argparse.h"
22 #include "auxents/aux_entries.h"
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #define VERSION_STRING PACKAGE_VERSION
27 #else
28 #include "mlrvers.h"
29 #define VERSION_STRING MLR_VERSION
30 #endif
31 
32 // ----------------------------------------------------------------
33 #define DEFAULT_OFMT                     "%lf"
34 #define DEFAULT_OQUOTING                 QUOTE_MINIMAL
35 #define DEFAULT_JSON_FLATTEN_SEPARATOR   ":"
36 #define DEFAULT_OOSVAR_FLATTEN_SEPARATOR ":"
37 #define DEFAULT_COMMENT_STRING           "#"
38 
39 // ASCII 1f and 1e
40 #define ASV_FS "\x1f"
41 #define ASV_RS "\x1e"
42 
43 #define ASV_FS_FOR_HELP "0x1f"
44 #define ASV_RS_FOR_HELP "0x1e"
45 
46 // Unicode code points U+241F and U+241E, encoded as UTF-8.
47 #define USV_FS "\xe2\x90\x9f"
48 #define USV_RS "\xe2\x90\x9e"
49 
50 #define USV_FS_FOR_HELP "U+241F (UTF-8 0xe2909f)"
51 #define USV_RS_FOR_HELP "U+241E (UTF-8 0xe2909e)"
52 
53 // ----------------------------------------------------------------
54 static mapper_setup_t* mapper_lookup_table[] = {
55 
56 	&mapper_altkv_setup,
57 	&mapper_bar_setup,
58 	&mapper_bootstrap_setup,
59 	&mapper_cat_setup,
60 	&mapper_check_setup,
61 	&mapper_clean_whitespace_setup,
62 	&mapper_count_setup,
63 	&mapper_count_distinct_setup,
64 	&mapper_count_similar_setup,
65 	&mapper_cut_setup,
66 	&mapper_decimate_setup,
67 	&mapper_fill_down_setup,
68 	&mapper_filter_setup,
69 	&mapper_format_values_setup,
70 	&mapper_fraction_setup,
71 	&mapper_grep_setup,
72 	&mapper_group_by_setup,
73 	&mapper_group_like_setup,
74 	&mapper_having_fields_setup,
75 	&mapper_head_setup,
76 	&mapper_histogram_setup,
77 	&mapper_join_setup,
78 	&mapper_label_setup,
79 	&mapper_least_frequent_setup,
80 	&mapper_merge_fields_setup,
81 	&mapper_most_frequent_setup,
82 	&mapper_nest_setup,
83 	&mapper_nothing_setup,
84 	&mapper_put_setup,
85 	&mapper_regularize_setup,
86 	&mapper_remove_empty_columns_setup,
87 	&mapper_rename_setup,
88 	&mapper_reorder_setup,
89 	&mapper_repeat_setup,
90 	&mapper_reshape_setup,
91 	&mapper_sample_setup,
92 	&mapper_sec2gmt_setup,
93 	&mapper_sec2gmtdate_setup,
94 	&mapper_seqgen_setup,
95 	&mapper_shuffle_setup,
96 	&mapper_skip_trivial_records_setup,
97 	&mapper_sort_setup,
98 	&mapper_sort_within_records_setup,
99 	&mapper_stats1_setup,
100 	&mapper_stats2_setup,
101 	&mapper_step_setup,
102 	&mapper_tac_setup,
103 	&mapper_tail_setup,
104 	&mapper_tee_setup,
105 	&mapper_top_setup,
106 	&mapper_uniq_setup,
107 	&mapper_unsparsify_setup,
108 
109 };
110 static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]);
111 
112 // ----------------------------------------------------------------
113 static void cli_load_mlrrc(cli_opts_t* popts);
114 static void cli_try_load_mlrrc(cli_opts_t* popts, char* path);
115 static int handle_mlrrc_line_1(cli_opts_t* popts, char* line);
116 static int handle_mlrrc_line_2(cli_opts_t* popts, char* line);
117 static int handle_mlrrc_line_3(cli_opts_t* popts, char* line);
118 static int handle_mlrrc_line_4(cli_opts_t* popts, char** argv, int argc);
119 
120 static int cli_handle_misc_options(char** argv, int argc, int *pargi, cli_opts_t* popts);
121 
122 static lhmss_t* get_desc_to_chars_map();
123 static lhmsll_t* get_default_repeat_ifses();
124 static lhmsll_t* get_default_repeat_ipses();
125 static lhmss_t* get_default_fses();
126 static lhmss_t* get_default_pses();
127 static lhmss_t* get_default_rses();
128 static void free_opt_singletons();
129 static char* rebackslash(char* sep);
130 
131 static void main_usage_long(FILE* o, char* argv0);
132 static void main_usage_short(FILE* o, char* argv0);
133 static void main_usage_synopsis(FILE* o, char* argv0);
134 static void main_usage_examples(FILE* o, char* argv0, char* leader);
135 static void list_all_verbs_raw(FILE* o);
136 static void list_all_verbs(FILE* o, char* leader);
137 static void main_usage_help_options(FILE* o, char* argv0);
138 static void main_usage_mlrrc(FILE* o, char* argv0);
139 static void main_usage_functions(FILE* o, char* argv0, char* leader);
140 static void main_usage_data_format_examples(FILE* o, char* argv0);
141 static void main_usage_data_format_options(FILE* o, char* argv0);
142 static void main_usage_comments_in_data(FILE* o, char* argv0);
143 static void main_usage_format_conversion_keystroke_saver_options(FILE* o, char* argv0);
144 static void main_usage_compressed_data_options(FILE* o, char* argv0);
145 static void main_usage_separator_options(FILE* o, char* argv0);
146 static void main_usage_csv_options(FILE* o, char* argv0);
147 static void main_usage_double_quoting(FILE* o, char* argv0);
148 static void main_usage_numerical_formatting(FILE* o, char* argv0);
149 static void main_usage_other_options(FILE* o, char* argv0);
150 static void main_usage_then_chaining(FILE* o, char* argv0);
151 static void main_usage_auxents(FILE* o, char* argv0);
152 static void main_usage_see_also(FILE* o, char* argv0);
153 static void print_type_arithmetic_info(FILE* o, char* argv0);
154 static void usage_all_verbs(char* argv0);
155 static void usage_unrecognized_verb(char* argv0, char* arg);
156 
157 static void check_arg_count(char** argv, int argi, int argc, int n);
158 static mapper_setup_t* look_up_mapper_setup(char* verb);
159 
160 static int handle_terminal_usage(char** argv, int argc, int argi);
161 
162 static char* lhmss_get_or_die(lhmss_t* pmap, char* key);
163 static int lhmsll_get_or_die(lhmsll_t* pmap, char* key);
164 
165 // ----------------------------------------------------------------
parse_command_line(int argc,char ** argv,sllv_t ** ppmapper_list)166 cli_opts_t* parse_command_line(int argc, char** argv, sllv_t** ppmapper_list) {
167 	cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t));
168 
169 	int argi = 1;
170 
171 	// Set defaults for options
172 	cli_opts_init(popts);
173 
174 	// Try .mlrrc overrides (then command-line on top of that).
175 	// A --norc flag (if provided) must come before all other options.
176 	// Or, they can set the environment variable MLRRC="__none__".
177 	if (argc >= 2 && streq(argv[1], "--norc")) {
178 		argi++;
179 	} else {
180 		cli_load_mlrrc(popts);
181 	}
182 
183 	for (; argi < argc; /* variable increment: 1 or 2 depending on flag */) {
184 
185 		if (argv[argi][0] != '-') {
186 			break; // No more flag options to process
187 		} else if (handle_terminal_usage(argv, argc, argi)) {
188 			exit(0);
189 		} else if (cli_handle_reader_options(argv, argc, &argi, &popts->reader_opts)) {
190 			// handled
191 		} else if (cli_handle_writer_options(argv, argc, &argi, &popts->writer_opts)) {
192 			// handled
193 		} else if (cli_handle_reader_writer_options(argv, argc, &argi, &popts->reader_opts, &popts->writer_opts)) {
194 			// handled
195 		} else if (cli_handle_misc_options(argv, argc, &argi, popts)) {
196 			// handled
197 		} else {
198 			// unhandled
199 			usage_unrecognized_verb(MLR_GLOBALS.bargv0, argv[argi]);
200 		}
201 	}
202 
203 	cli_apply_defaults(popts);
204 
205 	lhmss_t* default_rses = get_default_rses();
206 	lhmss_t* default_fses = get_default_fses();
207 	lhmss_t* default_pses = get_default_pses();
208 	lhmsll_t* default_repeat_ifses = get_default_repeat_ifses();
209 	lhmsll_t* default_repeat_ipses = get_default_repeat_ipses();
210 
211 	if (popts->reader_opts.irs == NULL)
212 		popts->reader_opts.irs = lhmss_get_or_die(default_rses, popts->reader_opts.ifile_fmt);
213 	if (popts->reader_opts.ifs == NULL)
214 		popts->reader_opts.ifs = lhmss_get_or_die(default_fses, popts->reader_opts.ifile_fmt);
215 	if (popts->reader_opts.ips == NULL)
216 		popts->reader_opts.ips = lhmss_get_or_die(default_pses, popts->reader_opts.ifile_fmt);
217 
218 	if (popts->reader_opts.allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE)
219 		popts->reader_opts.allow_repeat_ifs = lhmsll_get_or_die(default_repeat_ifses, popts->reader_opts.ifile_fmt);
220 	if (popts->reader_opts.allow_repeat_ips == NEITHER_TRUE_NOR_FALSE)
221 		popts->reader_opts.allow_repeat_ips = lhmsll_get_or_die(default_repeat_ipses, popts->reader_opts.ifile_fmt);
222 
223 	if (popts->writer_opts.ors == NULL)
224 		popts->writer_opts.ors = lhmss_get_or_die(default_rses, popts->writer_opts.ofile_fmt);
225 	if (popts->writer_opts.ofs == NULL)
226 		popts->writer_opts.ofs = lhmss_get_or_die(default_fses, popts->writer_opts.ofile_fmt);
227 	if (popts->writer_opts.ops == NULL)
228 		popts->writer_opts.ops = lhmss_get_or_die(default_pses, popts->writer_opts.ofile_fmt);
229 
230 	if (streq(popts->writer_opts.ofile_fmt, "pprint") && strlen(popts->writer_opts.ofs) != 1) {
231 		fprintf(stderr, "%s: OFS for PPRINT format must be single-character; got \"%s\".\n",
232 			MLR_GLOBALS.bargv0, popts->writer_opts.ofs);
233 		return NULL;
234 	}
235 
236 	// Construct the mapper list for single use, e.g. the normal streaming case wherein the
237 	// mappers operate on all input files. Also retain information needed to construct them
238 	// for each input file, for in-place mode.
239 	popts->mapper_argb = argi;
240 	popts->original_argv = argv;
241 	popts->non_in_place_argv = copy_argv(argv);
242 	popts->argc = argc;
243 	*ppmapper_list = cli_parse_mappers(popts->non_in_place_argv, &argi, argc, popts);
244 
245 	for ( ; argi < argc; argi++) {
246 		slls_append(popts->filenames, argv[argi], NO_FREE);
247 	}
248 
249 	if (popts->no_input) {
250 		slls_free(popts->filenames);
251 		popts->filenames = NULL;
252 	}
253 
254 	if (popts->do_in_place && (popts->filenames == NULL || popts->filenames->length == 0)) {
255 		fprintf(stderr, "%s: -I option (in-place operation) requires input files.\n", MLR_GLOBALS.bargv0);
256 		exit(1);
257 	}
258 
259 	if (popts->have_rand_seed) {
260 		mtrand_init(popts->rand_seed);
261 	} else {
262 		mtrand_init_default();
263 	}
264 
265 	return popts;
266 }
267 
268 // ----------------------------------------------------------------
269 // Returns a list of mappers, from the starting point in argv given by *pargi. Bumps *pargi to
270 // point to remaining post-mapper-setup args, i.e. filenames.
cli_parse_mappers(char ** argv,int * pargi,int argc,cli_opts_t * popts)271 sllv_t* cli_parse_mappers(char** argv, int* pargi, int argc, cli_opts_t* popts) {
272 	sllv_t* pmapper_list = sllv_alloc();
273 	int argi = *pargi;
274 
275 	// Allow then-chains to start with an initial 'then': 'mlr verb1 then verb2 then verb3' or
276 	// 'mlr then verb1 then verb2 then verb3'. Particuarly useful in backslashy scripting contexts.
277 	if ((argc - argi) >= 1 && streq(argv[argi], "then")) {
278 		argi++;
279 	}
280 
281 	if ((argc - argi) < 1) {
282 		fprintf(stderr, "%s: no verb supplied.\n", MLR_GLOBALS.bargv0);
283 		main_usage_short(stderr, MLR_GLOBALS.bargv0);
284 		exit(1);
285 	}
286 
287 	// Note that the command-line parsers can operate destructively on argv, e.g. verbs
288 	// which take comma-delimited field names splitting on commas.  For this reason we
289 	// need to duplicate argv on each in-place run within the streamer module. But before
290 	// that ever happens, here we run through the verb-parsers once to find out where it
291 	// is on the command line that the verbs and their arguments end and the filenames
292 	// begin.
293 
294 	while (TRUE) {
295 		check_arg_count(argv, argi, argc, 1);
296 		char* verb = argv[argi];
297 
298 		mapper_setup_t* pmapper_setup = look_up_mapper_setup(verb);
299 		if (pmapper_setup == NULL) {
300 			fprintf(stderr, "%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n",
301 				MLR_GLOBALS.bargv0, verb, MLR_GLOBALS.bargv0);
302 			exit(1);
303 		}
304 
305 		if ((argc - argi) >= 2) {
306 			if (streq(argv[argi+1], "-h") || streq(argv[argi+1], "--help")) {
307 				pmapper_setup->pusage_func(stdout, MLR_GLOBALS.bargv0, verb);
308 				exit(0);
309 			}
310 		}
311 
312 		// It's up to the parse func to print its usage on CLI-parse failure.
313 		// Also note: this assumes main reader/writer opts are all parsed
314 		// *before* mapper parse-CLI methods are invoked.
315 		mapper_t* pmapper = pmapper_setup->pparse_func(&argi, argc, argv,
316 			&popts->reader_opts, &popts->writer_opts);
317 		if (pmapper == NULL) {
318 			exit(1);
319 		}
320 
321 		if (pmapper_setup->ignores_input && pmapper_list->length == 0) {
322 			// e.g. then-chain starts with seqgen
323 			popts->no_input = TRUE;
324 		}
325 
326 		sllv_append(pmapper_list, pmapper);
327 
328 		if (argi >= argc || !streq(argv[argi], "then"))
329 			break;
330 		argi++;
331 	}
332 
333 	*pargi = argi;
334 	return pmapper_list;
335 }
336 
337 // ----------------------------------------------------------------
cli_opts_free(cli_opts_t * popts)338 void cli_opts_free(cli_opts_t* popts) {
339 	if (popts == NULL)
340 		return;
341 
342 	slls_free(popts->filenames);
343 	free_argv_copy(popts->non_in_place_argv);
344 	free(popts);
345 	free_opt_singletons();
346 }
347 
348 // ----------------------------------------------------------------
349 static lhmss_t* singleton_pdesc_to_chars_map = NULL;
get_desc_to_chars_map()350 static lhmss_t* get_desc_to_chars_map() {
351 	if (singleton_pdesc_to_chars_map == NULL) {
352 		singleton_pdesc_to_chars_map = lhmss_alloc();
353 		lhmss_put(singleton_pdesc_to_chars_map, "cr",        "\r",       NO_FREE);
354 		lhmss_put(singleton_pdesc_to_chars_map, "crcr",      "\r\r",     NO_FREE);
355 		lhmss_put(singleton_pdesc_to_chars_map, "newline",   "\n",       NO_FREE);
356 		lhmss_put(singleton_pdesc_to_chars_map, "lf",        "\n",       NO_FREE);
357 		lhmss_put(singleton_pdesc_to_chars_map, "lflf",      "\n\n",     NO_FREE);
358 		lhmss_put(singleton_pdesc_to_chars_map, "crlf",      "\r\n",     NO_FREE);
359 		lhmss_put(singleton_pdesc_to_chars_map, "crlfcrlf",  "\r\n\r\n", NO_FREE);
360 		lhmss_put(singleton_pdesc_to_chars_map, "tab",       "\t",       NO_FREE);
361 		lhmss_put(singleton_pdesc_to_chars_map, "space",     " ",        NO_FREE);
362 		lhmss_put(singleton_pdesc_to_chars_map, "comma",     ",",        NO_FREE);
363 		lhmss_put(singleton_pdesc_to_chars_map, "newline",   "\n",       NO_FREE);
364 		lhmss_put(singleton_pdesc_to_chars_map, "pipe",      "|",        NO_FREE);
365 		lhmss_put(singleton_pdesc_to_chars_map, "slash",     "/",        NO_FREE);
366 		lhmss_put(singleton_pdesc_to_chars_map, "colon",     ":",        NO_FREE);
367 		lhmss_put(singleton_pdesc_to_chars_map, "semicolon", ";",        NO_FREE);
368 		lhmss_put(singleton_pdesc_to_chars_map, "equals",    "=",        NO_FREE);
369 	}
370 	return singleton_pdesc_to_chars_map;
371 }
372 // Always strdup so the caller can unconditionally free our return value
cli_sep_from_arg(char * arg)373 char* cli_sep_from_arg(char* arg) {
374 	char* chars = lhmss_get(get_desc_to_chars_map(), arg);
375 	if (chars != NULL) // E.g. crlf
376 		return mlr_strdup_or_die(chars);
377 	else // E.g. '\r\n'
378 		return mlr_alloc_unbackslash(arg);
379 }
380 
381 // ----------------------------------------------------------------
382 static lhmss_t* singleton_default_rses = NULL;
383 static lhmss_t* singleton_default_fses = NULL;
384 static lhmss_t* singleton_default_pses = NULL;
385 static lhmsll_t* singleton_default_repeat_ifses = NULL;
386 static lhmsll_t* singleton_default_repeat_ipses = NULL;
387 
get_default_rses()388 static lhmss_t* get_default_rses() {
389 	if (singleton_default_rses == NULL) {
390 		singleton_default_rses = lhmss_alloc();
391 
392 		lhmss_put(singleton_default_rses, "gen",      "N/A",  NO_FREE);
393 		lhmss_put(singleton_default_rses, "dkvp",     "auto",  NO_FREE);
394 		lhmss_put(singleton_default_rses, "json",     "auto",  NO_FREE);
395 		lhmss_put(singleton_default_rses, "nidx",     "auto",  NO_FREE);
396 		lhmss_put(singleton_default_rses, "csv",      "auto",  NO_FREE);
397 		lhmss_put(singleton_default_rses, "csvlite",  "auto",  NO_FREE);
398 		lhmss_put(singleton_default_rses, "markdown", "auto",  NO_FREE);
399 		lhmss_put(singleton_default_rses, "pprint",   "auto",  NO_FREE);
400 		lhmss_put(singleton_default_rses, "xtab",     "(N/A)", NO_FREE);
401 	}
402 	return singleton_default_rses;
403 }
404 
get_default_fses()405 static lhmss_t* get_default_fses() {
406 	if (singleton_default_fses == NULL) {
407 		singleton_default_fses = lhmss_alloc();
408 		lhmss_put(singleton_default_fses, "gen",      "(N/A)",  NO_FREE);
409 		lhmss_put(singleton_default_fses, "dkvp",     ",",      NO_FREE);
410 		lhmss_put(singleton_default_fses, "json",     "(N/A)",  NO_FREE);
411 		lhmss_put(singleton_default_fses, "nidx",     " ",      NO_FREE);
412 		lhmss_put(singleton_default_fses, "csv",      ",",      NO_FREE);
413 		lhmss_put(singleton_default_fses, "csvlite",  ",",      NO_FREE);
414 		lhmss_put(singleton_default_fses, "markdown", "(N/A)",  NO_FREE);
415 		lhmss_put(singleton_default_fses, "pprint",   " ",      NO_FREE);
416 		lhmss_put(singleton_default_fses, "xtab",     "auto",   NO_FREE);
417 	}
418 	return singleton_default_fses;
419 }
420 
get_default_pses()421 static lhmss_t* get_default_pses() {
422 	if (singleton_default_pses == NULL) {
423 		singleton_default_pses = lhmss_alloc();
424 		lhmss_put(singleton_default_pses, "gen",      "(N/A)", NO_FREE);
425 		lhmss_put(singleton_default_pses, "dkvp",     "=",     NO_FREE);
426 		lhmss_put(singleton_default_pses, "json",     "(N/A)", NO_FREE);
427 		lhmss_put(singleton_default_pses, "nidx",     "(N/A)", NO_FREE);
428 		lhmss_put(singleton_default_pses, "csv",      "(N/A)", NO_FREE);
429 		lhmss_put(singleton_default_pses, "csvlite",  "(N/A)", NO_FREE);
430 		lhmss_put(singleton_default_pses, "markdown", "(N/A)", NO_FREE);
431 		lhmss_put(singleton_default_pses, "pprint",   "(N/A)", NO_FREE);
432 		lhmss_put(singleton_default_pses, "xtab",     " ",     NO_FREE);
433 	}
434 	return singleton_default_pses;
435 }
436 
get_default_repeat_ifses()437 static lhmsll_t* get_default_repeat_ifses() {
438 	if (singleton_default_repeat_ifses == NULL) {
439 		singleton_default_repeat_ifses = lhmsll_alloc();
440 		lhmsll_put(singleton_default_repeat_ifses, "gen",      FALSE, NO_FREE);
441 		lhmsll_put(singleton_default_repeat_ifses, "dkvp",     FALSE, NO_FREE);
442 		lhmsll_put(singleton_default_repeat_ifses, "json",     FALSE, NO_FREE);
443 		lhmsll_put(singleton_default_repeat_ifses, "csv",      FALSE, NO_FREE);
444 		lhmsll_put(singleton_default_repeat_ifses, "csvlite",  FALSE, NO_FREE);
445 		lhmsll_put(singleton_default_repeat_ifses, "markdown", FALSE, NO_FREE);
446 		lhmsll_put(singleton_default_repeat_ifses, "nidx",     FALSE, NO_FREE);
447 		lhmsll_put(singleton_default_repeat_ifses, "xtab",     FALSE, NO_FREE);
448 		lhmsll_put(singleton_default_repeat_ifses, "pprint",   TRUE,  NO_FREE);
449 	}
450 	return singleton_default_repeat_ifses;
451 }
452 
get_default_repeat_ipses()453 static lhmsll_t* get_default_repeat_ipses() {
454 	if (singleton_default_repeat_ipses == NULL) {
455 		singleton_default_repeat_ipses = lhmsll_alloc();
456 		lhmsll_put(singleton_default_repeat_ipses, "gen",      FALSE, NO_FREE);
457 		lhmsll_put(singleton_default_repeat_ipses, "dkvp",     FALSE, NO_FREE);
458 		lhmsll_put(singleton_default_repeat_ipses, "json",     FALSE, NO_FREE);
459 		lhmsll_put(singleton_default_repeat_ipses, "csv",      FALSE, NO_FREE);
460 		lhmsll_put(singleton_default_repeat_ipses, "csvlite",  FALSE, NO_FREE);
461 		lhmsll_put(singleton_default_repeat_ipses, "markdown", FALSE, NO_FREE);
462 		lhmsll_put(singleton_default_repeat_ipses, "nidx",     FALSE, NO_FREE);
463 		lhmsll_put(singleton_default_repeat_ipses, "xtab",     TRUE,  NO_FREE);
464 		lhmsll_put(singleton_default_repeat_ipses, "pprint",   FALSE, NO_FREE);
465 	}
466 	return singleton_default_repeat_ipses;
467 }
468 
free_opt_singletons()469 static void free_opt_singletons() {
470 	lhmss_free(singleton_pdesc_to_chars_map);
471 	lhmss_free(singleton_default_rses);
472 	lhmss_free(singleton_default_fses);
473 	lhmss_free(singleton_default_pses);
474 	lhmsll_free(singleton_default_repeat_ifses);
475 	lhmsll_free(singleton_default_repeat_ipses);
476 }
477 
478 // For displaying the default separators in on-line help
rebackslash(char * sep)479 static char* rebackslash(char* sep) {
480 	if (streq(sep, "\r"))
481 		return "\\r";
482 	else if (streq(sep, "\n"))
483 		return "\\n";
484 	else if (streq(sep, "\r\n"))
485 		return "\\r\\n";
486 	else if (streq(sep, "\t"))
487 		return "\\t";
488 	else if (streq(sep, " "))
489 		return "space";
490 	else
491 		return sep;
492 }
493 
494 // ----------------------------------------------------------------
main_usage_short(FILE * fp,char * argv0)495 static void main_usage_short(FILE* fp, char* argv0) {
496 	fprintf(stderr, "Please run \"%s --help\" for detailed usage information.\n", argv0);
497 	exit(1);
498 }
499 
500 // ----------------------------------------------------------------
501 // The main_usage_long() function is split out into subroutines in support of the
502 // manpage autogenerator.
503 
main_usage_long(FILE * o,char * argv0)504 static void main_usage_long(FILE* o, char* argv0) {
505 	main_usage_synopsis(o, argv0);
506 	fprintf(o, "\n");
507 
508 	fprintf(o, "Command-line-syntax examples:\n");
509 	main_usage_examples(o, argv0, "  ");
510 	fprintf(o, "\n");
511 
512 	fprintf(o, "Data-format examples:\n");
513 	main_usage_data_format_examples(o, argv0);
514 	fprintf(o, "\n");
515 
516 	fprintf(o, "Help options:\n");
517 	main_usage_help_options(o, argv0);
518 	fprintf(o, "\n");
519 
520 	fprintf(o, "Customization via .mlrrc:\n");
521 	main_usage_mlrrc(o, argv0);
522 	fprintf(o, "\n");
523 
524 	fprintf(o, "Verbs:\n");
525 	list_all_verbs(o, "  ");
526 	fprintf(o, "\n");
527 
528 	fprintf(o, "Functions for the filter and put verbs:\n");
529 	main_usage_functions(o, argv0, "  ");
530 	fprintf(o, "\n");
531 
532 	fprintf(o, "Data-format options, for input, output, or both:\n");
533 	main_usage_data_format_options(o, argv0);
534 	fprintf(o, "\n");
535 
536 	fprintf(o, "Comments in data:\n");
537 	main_usage_comments_in_data(o, argv0);
538 	fprintf(o, "\n");
539 
540 	fprintf(o, "Format-conversion keystroke-saver options, for input, output, or both:\n");
541 	main_usage_format_conversion_keystroke_saver_options(o, argv0);
542 	fprintf(o, "\n");
543 
544 	fprintf(o, "Compressed-data options:\n");
545 	main_usage_compressed_data_options(o, argv0);
546 	fprintf(o, "\n");
547 
548 	fprintf(o, "Separator options, for input, output, or both:\n");
549 	main_usage_separator_options(o, argv0);
550 	fprintf(o, "\n");
551 
552 	fprintf(o, "Relevant to CSV/CSV-lite input only:\n");
553 	main_usage_csv_options(o, argv0);
554 	fprintf(o, "\n");
555 
556 	fprintf(o, "Double-quoting for CSV output:\n");
557 	main_usage_double_quoting(o, argv0);
558 	fprintf(o, "\n");
559 
560 	fprintf(o, "Numerical formatting:\n");
561 	main_usage_numerical_formatting(o, argv0);
562 	fprintf(o, "\n");
563 
564 	fprintf(o, "Other options:\n");
565 	main_usage_other_options(o, argv0);
566 	fprintf(o, "\n");
567 
568 	fprintf(o, "Then-chaining:\n");
569 	main_usage_then_chaining(o, argv0);
570 	fprintf(o, "\n");
571 
572 	fprintf(o, "Auxiliary commands:\n");
573 	main_usage_auxents(o, argv0);
574 	fprintf(o, "\n");
575 
576 	main_usage_see_also(o, argv0);
577 }
578 
main_usage_synopsis(FILE * o,char * argv0)579 static void main_usage_synopsis(FILE* o, char* argv0) {
580 	fprintf(o, "Usage: %s [I/O options] {verb} [verb-dependent options ...] {zero or more file names}\n", argv0);
581 }
582 
main_usage_examples(FILE * o,char * argv0,char * leader)583 static void main_usage_examples(FILE* o, char* argv0, char* leader) {
584 
585 	fprintf(o, "%s%s --csv cut -f hostname,uptime mydata.csv\n", leader, argv0);
586 	fprintf(o, "%s%s --tsv --rs lf filter '$status != \"down\" && $upsec >= 10000' *.tsv\n", leader, argv0);
587 	fprintf(o, "%s%s --nidx put '$sum = $7 < 0.0 ? 3.5 : $7 + 2.1*$8' *.dat\n", leader, argv0);
588 	fprintf(o, "%sgrep -v '^#' /etc/group | %s --ifs : --nidx --opprint label group,pass,gid,member then sort -f group\n", leader, argv0);
589 	fprintf(o, "%s%s join -j account_id -f accounts.dat then group-by account_name balances.dat\n", leader, argv0);
590 	fprintf(o, "%s%s --json put '$attr = sub($attr, \"([0-9]+)_([0-9]+)_.*\", \"\\1:\\2\")' data/*.json\n", leader, argv0);
591 	fprintf(o, "%s%s stats1 -a min,mean,max,p10,p50,p90 -f flag,u,v data/*\n", leader, argv0);
592 	fprintf(o, "%s%s stats2 -a linreg-pca -f u,v -g shape data/*\n", leader, argv0);
593 	fprintf(o, "%s%s put -q '@sum[$a][$b] += $x; end {emit @sum, \"a\", \"b\"}' data/*\n", leader, argv0);
594 	fprintf(o, "%s%s --from estimates.tbl put '\n", leader, argv0);
595 	fprintf(o, "  for (k,v in $*) {\n");
596 	fprintf(o, "    if (is_numeric(v) && k =~ \"^[t-z].*$\") {\n");
597 	fprintf(o, "      $sum += v; $count += 1\n");
598 	fprintf(o, "    }\n");
599 	fprintf(o, "  }\n");
600 	fprintf(o, "  $mean = $sum / $count # no assignment if count unset'\n");
601 	fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0);
602 	fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
603 	fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
604 	fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0);
605 	fprintf(o, "%s%s --from infile.dat put  '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n",
606 		leader, argv0);
607 }
608 
list_all_verbs_raw(FILE * o)609 static void list_all_verbs_raw(FILE* o) {
610 	for (int i = 0; i < mapper_lookup_table_length; i++) {
611 		fprintf(o, "%s\n", mapper_lookup_table[i]->verb);
612 	}
613 }
614 
list_all_verbs(FILE * o,char * leader)615 static void list_all_verbs(FILE* o, char* leader) {
616 	char* separator = " ";
617 	int leaderlen = strlen(leader);
618 	int separatorlen = strlen(separator);
619 	int linelen = leaderlen;
620 	int j = 0;
621 	for (int i = 0; i < mapper_lookup_table_length; i++) {
622 		char* verb = mapper_lookup_table[i]->verb;
623 		int verblen = strlen(verb);
624 		linelen += separatorlen + verblen;
625 		if (linelen >= 80) {
626 			fprintf(o, "\n");
627 			linelen = leaderlen + separatorlen + verblen;
628 			j = 0;
629 		}
630 		if (j == 0)
631 			fprintf(o, "%s", leader);
632 		fprintf(o, "%s%s", separator, verb);
633 		j++;
634 	}
635 	fprintf(o, "\n");
636 }
637 
main_usage_help_options(FILE * o,char * argv0)638 static void main_usage_help_options(FILE* o, char* argv0) {
639 	fprintf(o, "  -h or --help                 Show this message.\n");
640 	fprintf(o, "  --version                    Show the software version.\n");
641 	fprintf(o, "  {verb name} --help           Show verb-specific help.\n");
642 	fprintf(o, "  --help-all-verbs             Show help on all verbs.\n");
643 	fprintf(o, "  -l or --list-all-verbs       List only verb names.\n");
644 	fprintf(o, "  -L                           List only verb names, one per line.\n");
645 	fprintf(o, "  -f or --help-all-functions   Show help on all built-in functions.\n");
646 	fprintf(o, "  -F                           Show a bare listing of built-in functions by name.\n");
647 	fprintf(o, "  -k or --help-all-keywords    Show help on all keywords.\n");
648 	fprintf(o, "  -K                           Show a bare listing of keywords by name.\n");
649 }
650 
main_usage_mlrrc(FILE * o,char * argv0)651 static void main_usage_mlrrc(FILE* o, char* argv0) {
652 	fprintf(o, "You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.\n");
653 	fprintf(o, "For example, if you usually process CSV, then you can put \"--csv\" in your .mlrrc file\n");
654 	fprintf(o, "and that will be the default input/output format unless otherwise specified on the command line.\n");
655 	fprintf(o, "\n");
656 	fprintf(o, "The .mlrrc file format is one \"--flag\" or \"--option value\" per line, with the leading \"--\" optional.\n");
657 	fprintf(o, "Hash-style comments and blank lines are ignored.\n");
658 	fprintf(o, "\n");
659 	fprintf(o, "Sample .mlrrc:\n");
660 	fprintf(o, "# Input and output formats are CSV by default (unless otherwise specified\n");
661 	fprintf(o, "# on the mlr command line):\n");
662 	fprintf(o, "csv\n");
663 	fprintf(o, "# These are no-ops for CSV, but when I do use JSON output, I want these\n");
664 	fprintf(o, "# pretty-printing options to be used:\n");
665 	fprintf(o, "jvstack\n");
666 	fprintf(o, "jlistwrap\n");
667 	fprintf(o, "\n");
668 	fprintf(o, "How to specify location of .mlrrc:\n");
669 	fprintf(o, "* If $MLRRC is set:\n");
670 	fprintf(o, "  o If its value is \"__none__\" then no .mlrrc files are processed.\n");
671 	fprintf(o, "  o Otherwise, its value (as a filename) is loaded and processed. If there are syntax\n");
672 	fprintf(o, "    errors, they abort mlr with a usage message (as if you had mistyped something on the\n");
673 	fprintf(o, "    command line). If the file can't be loaded at all, though, it is silently skipped.\n");
674 	fprintf(o, "  o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is\n");
675 	fprintf(o, "    set in the environment.\n");
676 	fprintf(o, "* Otherwise:\n");
677 	fprintf(o, "  o If $HOME/.mlrrc exists, it's then processed as above.\n");
678 	fprintf(o, "  o If ./.mlrrc exists, it's then also processed as above.\n");
679 	fprintf(o, "  (I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)\n");
680 	fprintf(o, "\n");
681 	fprintf(o, "See also:\n");
682 	fprintf(o, "https://johnkerl.org/miller/doc/customization.html\n");
683 }
684 
main_usage_functions(FILE * o,char * argv0,char * leader)685 static void main_usage_functions(FILE* o, char* argv0, char* leader) {
686 	fmgr_t* pfmgr = fmgr_alloc();
687 	fmgr_list_functions(pfmgr, o, leader);
688 	fmgr_free(pfmgr, NULL);
689 	fprintf(o, "\n");
690 	fprintf(o, "Please use \"%s --help-function {function name}\" for function-specific help.\n", argv0);
691 }
692 
main_usage_data_format_examples(FILE * o,char * argv0)693 static void main_usage_data_format_examples(FILE* o, char* argv0) {
694 	fprintf(o,
695 		"  DKVP: delimited key-value pairs (Miller default format)\n"
696 		"  +---------------------+\n"
697 		"  | apple=1,bat=2,cog=3 | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
698 		"  | dish=7,egg=8,flint  | Record 2: \"dish\" => \"7\", \"egg\" => \"8\", \"3\" => \"flint\"\n"
699 		"  +---------------------+\n"
700 		"\n"
701 		"  NIDX: implicitly numerically indexed (Unix-toolkit style)\n"
702 		"  +---------------------+\n"
703 		"  | the quick brown     | Record 1: \"1\" => \"the\", \"2\" => \"quick\", \"3\" => \"brown\"\n"
704 		"  | fox jumped          | Record 2: \"1\" => \"fox\", \"2\" => \"jumped\"\n"
705 		"  +---------------------+\n"
706 		"\n"
707 		"  CSV/CSV-lite: comma-separated values with separate header line\n"
708 		"  +---------------------+\n"
709 		"  | apple,bat,cog       |\n"
710 		"  | 1,2,3               | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
711 		"  | 4,5,6               | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
712 		"  +---------------------+\n"
713 		"\n"
714 		"  Tabular JSON: nested objects are supported, although arrays within them are not:\n"
715 		"  +---------------------+\n"
716 		"  | {                   |\n"
717 		"  |  \"apple\": 1,        | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
718 		"  |  \"bat\": 2,          |\n"
719 		"  |  \"cog\": 3           |\n"
720 		"  | }                   |\n"
721 		"  | {                   |\n"
722 		"  |   \"dish\": {         | Record 2: \"dish:egg\" => \"7\", \"dish:flint\" => \"8\", \"garlic\" => \"\"\n"
723 		"  |     \"egg\": 7,       |\n"
724 		"  |     \"flint\": 8      |\n"
725 		"  |   },                |\n"
726 		"  |   \"garlic\": \"\"      |\n"
727 		"  | }                   |\n"
728 		"  +---------------------+\n"
729 		"\n"
730 		"  PPRINT: pretty-printed tabular\n"
731 		"  +---------------------+\n"
732 		"  | apple bat cog       |\n"
733 		"  | 1     2   3         | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
734 		"  | 4     5   6         | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
735 		"  +---------------------+\n"
736 		"\n"
737 		"  XTAB: pretty-printed transposed tabular\n"
738 		"  +---------------------+\n"
739 		"  | apple 1             | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
740 		"  | bat   2             |\n"
741 		"  | cog   3             |\n"
742 		"  |                     |\n"
743 		"  | dish 7              | Record 2: \"dish\" => \"7\", \"egg\" => \"8\"\n"
744 		"  | egg  8              |\n"
745 		"  +---------------------+\n"
746 		"\n"
747 		"  Markdown tabular (supported for output only):\n"
748 		"  +-----------------------+\n"
749 		"  | | apple | bat | cog | |\n"
750 		"  | | ---   | --- | --- | |\n"
751 		"  | | 1     | 2   | 3   | | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
752 		"  | | 4     | 5   | 6   | | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
753 		"  +-----------------------+\n");
754 }
755 
main_usage_data_format_options(FILE * o,char * argv0)756 static void main_usage_data_format_options(FILE* o, char* argv0) {
757 	fprintf(o, "  --idkvp   --odkvp   --dkvp      Delimited key-value pairs, e.g \"a=1,b=2\"\n");
758 	fprintf(o, "                                  (this is Miller's default format).\n");
759 	fprintf(o, "\n");
760 	fprintf(o, "  --inidx   --onidx   --nidx      Implicitly-integer-indexed fields\n");
761 	fprintf(o, "                                  (Unix-toolkit style).\n");
762 	fprintf(o, "  -T                              Synonymous with \"--nidx --fs tab\".\n");
763 	fprintf(o, "\n");
764 	fprintf(o, "  --icsv    --ocsv    --csv       Comma-separated value (or tab-separated\n");
765 	fprintf(o, "                                  with --fs tab, etc.)\n");
766 	fprintf(o, "\n");
767 	fprintf(o, "  --itsv    --otsv    --tsv       Keystroke-savers for \"--icsv --ifs tab\",\n");
768 	fprintf(o, "                                  \"--ocsv --ofs tab\", \"--csv --fs tab\".\n");
769 	fprintf(o, "  --iasv    --oasv    --asv       Similar but using ASCII FS %s and RS %s\n",
770 		ASV_FS_FOR_HELP, ASV_RS_FOR_HELP);
771 	fprintf(o, "  --iusv    --ousv    --usv       Similar but using Unicode FS %s\n",
772 		USV_FS_FOR_HELP);
773 	fprintf(o, "                                  and RS %s\n",
774 		USV_RS_FOR_HELP);
775 	fprintf(o, "\n");
776 	fprintf(o, "  --icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated\n");
777 	fprintf(o, "                                  with --fs tab, etc.). The 'lite' CSV does not handle\n");
778 	fprintf(o, "                                  RFC-CSV double-quoting rules; is slightly faster;\n");
779 	fprintf(o, "                                  and handles heterogeneity in the input stream via\n");
780 	fprintf(o, "                                  empty newline followed by new header line. See also\n");
781 	fprintf(o, "                                  http://johnkerl.org/miller/doc/file-formats.html#CSV/TSV/etc.\n");
782 	fprintf(o, "\n");
783 	fprintf(o, "  --itsvlite --otsvlite --tsvlite Keystroke-savers for \"--icsvlite --ifs tab\",\n");
784 	fprintf(o, "                                  \"--ocsvlite --ofs tab\", \"--csvlite --fs tab\".\n");
785 	fprintf(o, "  -t                              Synonymous with --tsvlite.\n");
786 	fprintf(o, "  --iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS %s and RS %s\n",
787 		ASV_FS_FOR_HELP, ASV_RS_FOR_HELP);
788 	fprintf(o, "  --iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS %s\n",
789 		USV_FS_FOR_HELP);
790 	fprintf(o, "                                  and RS %s\n",
791 		USV_RS_FOR_HELP);
792 	fprintf(o, "\n");
793 	fprintf(o, "  --ipprint --opprint --pprint    Pretty-printed tabular (produces no\n");
794 	fprintf(o, "                                  output until all input is in).\n");
795 	fprintf(o, "                      --right     Right-justifies all fields for PPRINT output.\n");
796 	fprintf(o, "                      --barred    Prints a border around PPRINT output\n");
797 	fprintf(o, "                                  (only available for output).\n");
798 	fprintf(o, "\n");
799 	fprintf(o, "            --omd                 Markdown-tabular (only available for output).\n");
800 	fprintf(o, "\n");
801 	fprintf(o, "  --ixtab   --oxtab   --xtab      Pretty-printed vertical-tabular.\n");
802 	fprintf(o, "                      --xvright   Right-justifies values for XTAB format.\n");
803 	fprintf(o, "\n");
804 	fprintf(o, "  --ijson   --ojson   --json      JSON tabular: sequence or list of one-level\n");
805 	fprintf(o, "                                  maps: {...}{...} or [{...},{...}].\n");
806 	fprintf(o, "    --json-map-arrays-on-input    JSON arrays are unmillerable. --json-map-arrays-on-input\n");
807 	fprintf(o, "    --json-skip-arrays-on-input   is the default: arrays are converted to integer-indexed\n");
808 	fprintf(o, "    --json-fatal-arrays-on-input  maps. The other two options cause them to be skipped, or\n");
809 	fprintf(o, "                                  to be treated as errors.  Please use the jq tool for full\n");
810 	fprintf(o, "                                  JSON (pre)processing.\n");
811 	fprintf(o, "                      --jvstack   Put one key-value pair per line for JSON\n");
812 	fprintf(o, "                                  output.\n");
813 	fprintf(o, "                --jsonx --ojsonx  Keystroke-savers for --json --jvstack\n");
814 	fprintf(o, "                --jsonx --ojsonx  and --ojson --jvstack, respectively.\n");
815 	fprintf(o, "                      --jlistwrap Wrap JSON output in outermost [ ].\n");
816 	fprintf(o, "                    --jknquoteint Do not quote non-string map keys in JSON output.\n");
817 	fprintf(o, "                     --jvquoteall Quote map values in JSON output, even if they're\n");
818 	fprintf(o, "                                  numeric.\n");
819 	fprintf(o, "              --jflatsep {string} Separator for flattening multi-level JSON keys,\n");
820 	fprintf(o, "                                  e.g. '{\"a\":{\"b\":3}}' becomes a:b => 3 for\n");
821 	fprintf(o, "                                  non-JSON formats. Defaults to %s.\n",
822 		DEFAULT_JSON_FLATTEN_SEPARATOR);
823 	fprintf(o, "\n");
824 	fprintf(o, "  -p is a keystroke-saver for --nidx --fs space --repifs\n");
825 	fprintf(o, "\n");
826 	fprintf(o, "  Examples: --csv for CSV-formatted input and output; --idkvp --opprint for\n");
827 	fprintf(o, "  DKVP-formatted input and pretty-printed output.\n");
828 	fprintf(o, "\n");
829 	fprintf(o, "  Please use --iformat1 --oformat2 rather than --format1 --oformat2.\n");
830 	fprintf(o, "  The latter sets up input and output flags for format1, not all of which\n");
831 	fprintf(o, "  are overridden in all cases by setting output format to format2.\n");
832 }
833 
main_usage_comments_in_data(FILE * o,char * argv0)834 static void main_usage_comments_in_data(FILE* o, char* argv0) {
835 	fprintf(o, "  --skip-comments                 Ignore commented lines (prefixed by \"%s\")\n",
836 		DEFAULT_COMMENT_STRING);
837 	fprintf(o, "                                  within the input.\n");
838 	fprintf(o, "  --skip-comments-with {string}   Ignore commented lines within input, with\n");
839 	fprintf(o, "                                  specified prefix.\n");
840 	fprintf(o, "  --pass-comments                 Immediately print commented lines (prefixed by \"%s\")\n",
841 		DEFAULT_COMMENT_STRING);
842 	fprintf(o, "                                  within the input.\n");
843 	fprintf(o, "  --pass-comments-with {string}   Immediately print commented lines within input, with\n");
844 	fprintf(o, "                                  specified prefix.\n");
845 	fprintf(o, "Notes:\n");
846 	fprintf(o, "* Comments are only honored at the start of a line.\n");
847 	fprintf(o, "* In the absence of any of the above four options, comments are data like\n");
848 	fprintf(o, "  any other text.\n");
849 	fprintf(o, "* When pass-comments is used, comment lines are written to standard output\n");
850 	fprintf(o, "  immediately upon being read; they are not part of the record stream.\n");
851 	fprintf(o, "  Results may be counterintuitive. A suggestion is to place comments at the\n");
852 	fprintf(o, "  start of data files.\n");
853 }
854 
main_usage_format_conversion_keystroke_saver_options(FILE * o,char * argv0)855 static void main_usage_format_conversion_keystroke_saver_options(FILE* o, char* argv0) {
856 	fprintf(o, "As keystroke-savers for format-conversion you may use the following:\n");
857 	fprintf(o, "        --c2t --c2d --c2n --c2j --c2x --c2p --c2m\n");
858 	fprintf(o, "  --t2c       --t2d --t2n --t2j --t2x --t2p --t2m\n");
859 	fprintf(o, "  --d2c --d2t       --d2n --d2j --d2x --d2p --d2m\n");
860 	fprintf(o, "  --n2c --n2t --n2d       --n2j --n2x --n2p --n2m\n");
861 	fprintf(o, "  --j2c --j2t --j2d --j2n       --j2x --j2p --j2m\n");
862 	fprintf(o, "  --x2c --x2t --x2d --x2n --x2j       --x2p --x2m\n");
863 	fprintf(o, "  --p2c --p2t --p2d --p2n --p2j --p2x       --p2m\n");
864 	fprintf(o, "The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,\n");
865 	fprintf(o, "PPRINT, and markdown, respectively. Note that markdown format is available for\n");
866 	fprintf(o, "output only.\n");
867 }
868 
main_usage_compressed_data_options(FILE * o,char * argv0)869 static void main_usage_compressed_data_options(FILE* o, char* argv0) {
870 	fprintf(o, "  --prepipe {command} This allows Miller to handle compressed inputs. You can do\n");
871 	fprintf(o, "  without this for single input files, e.g. \"gunzip < myfile.csv.gz | %s ...\".\n",
872 		argv0);
873 	fprintf(o, "\n");
874 	fprintf(o, "  However, when multiple input files are present, between-file separations are\n");
875 	fprintf(o, "  lost; also, the FILENAME variable doesn't iterate. Using --prepipe you can\n");
876 	fprintf(o, "  specify an action to be taken on each input file. This pre-pipe command must\n");
877 	fprintf(o, "  be able to read from standard input; it will be invoked with\n");
878 	fprintf(o, "    {command} < {filename}.\n");
879 	fprintf(o, "  Examples:\n");
880 	fprintf(o, "    %s --prepipe 'gunzip'\n", argv0);
881 	fprintf(o, "    %s --prepipe 'zcat -cf'\n", argv0);
882 	fprintf(o, "    %s --prepipe 'xz -cd'\n", argv0);
883 	fprintf(o, "    %s --prepipe cat\n", argv0);
884 	fprintf(o, "    %s --prepipe-gunzip\n", argv0);
885 	fprintf(o, "    %s --prepipe-zcat\n", argv0);
886 	fprintf(o, "  Note that this feature is quite general and is not limited to decompression\n");
887 	fprintf(o, "  utilities. You can use it to apply per-file filters of your choice.\n");
888 	fprintf(o, "  For output compression (or other) utilities, simply pipe the output:\n");
889 	fprintf(o, "    %s ... | {your compression command}\n", argv0);
890 	fprintf(o, "\n");
891 	fprintf(o, "  There are shorthands --prepipe-zcat and --prepipe-gunzip which are\n");
892 	fprintf(o, "  valid in .mlrrc files. The --prepipe flag is not valid in .mlrrc\n");
893 	fprintf(o, "  files since that would put execution of the prepipe command under \n");
894 	fprintf(o, "  control of the .mlrrc file.\n");
895 }
896 
main_usage_separator_options(FILE * o,char * argv0)897 static void main_usage_separator_options(FILE* o, char* argv0) {
898 	fprintf(o, "  --rs     --irs     --ors              Record separators, e.g. 'lf' or '\\r\\n'\n");
899 	fprintf(o, "  --fs     --ifs     --ofs  --repifs    Field separators, e.g. comma\n");
900 	fprintf(o, "  --ps     --ips     --ops              Pair separators, e.g. equals sign\n");
901 	fprintf(o, "\n");
902 	fprintf(o, "  Notes about line endings:\n");
903 	fprintf(o, "  * Default line endings (--irs and --ors) are \"auto\" which means autodetect from\n");
904 	fprintf(o, "    the input file format, as long as the input file(s) have lines ending in either\n");
905 	fprintf(o, "    LF (also known as linefeed, '\\n', 0x0a, Unix-style) or CRLF (also known as\n");
906 	fprintf(o, "    carriage-return/linefeed pairs, '\\r\\n', 0x0d 0x0a, Windows style).\n");
907 	fprintf(o, "  * If both irs and ors are auto (which is the default) then LF input will lead to LF\n");
908 	fprintf(o, "    output and CRLF input will lead to CRLF output, regardless of the platform you're\n");
909 	fprintf(o, "    running on.\n");
910 	fprintf(o, "  * The line-ending autodetector triggers on the first line ending detected in the input\n");
911 	fprintf(o, "    stream. E.g. if you specify a CRLF-terminated file on the command line followed by an\n");
912 	fprintf(o, "    LF-terminated file then autodetected line endings will be CRLF.\n");
913 	fprintf(o, "  * If you use --ors {something else} with (default or explicitly specified) --irs auto\n");
914 	fprintf(o, "    then line endings are autodetected on input and set to what you specify on output.\n");
915 	fprintf(o, "  * If you use --irs {something else} with (default or explicitly specified) --ors auto\n");
916 	fprintf(o, "    then the output line endings used are LF on Unix/Linux/BSD/MacOSX, and CRLF on Windows.\n");
917 	fprintf(o, "\n");
918 	fprintf(o, "  Notes about all other separators:\n");
919 	fprintf(o, "  * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats\n");
920 	fprintf(o, "    do key-value pairs appear juxtaposed.\n");
921 	fprintf(o, "  * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;\n");
922 	fprintf(o, "    XTAB records are separated by two or more consecutive IFS/OFS -- i.e.\n");
923 	fprintf(o, "    a blank line. Everything above about --irs/--ors/--rs auto becomes --ifs/--ofs/--fs\n");
924 	fprintf(o, "    auto for XTAB format. (XTAB's default IFS/OFS are \"auto\".)\n");
925 	fprintf(o, "  * OFS must be single-character for PPRINT format. This is because it is used\n");
926 	fprintf(o, "    with repetition for alignment; multi-character separators would make\n");
927 	fprintf(o, "    alignment impossible.\n");
928 	fprintf(o, "  * OPS may be multi-character for XTAB format, in which case alignment is\n");
929 	fprintf(o, "    disabled.\n");
930 	fprintf(o, "  * TSV is simply CSV using tab as field separator (\"--fs tab\").\n");
931 	fprintf(o, "  * FS/PS are ignored for markdown format; RS is used.\n");
932 	fprintf(o, "  * All FS and PS options are ignored for JSON format, since they are not relevant\n");
933 	fprintf(o, "    to the JSON format.\n");
934 	fprintf(o, "  * You can specify separators in any of the following ways, shown by example:\n");
935 	fprintf(o, "    - Type them out, quoting as necessary for shell escapes, e.g.\n");
936 	fprintf(o, "      \"--fs '|' --ips :\"\n");
937 	fprintf(o, "    - C-style escape sequences, e.g. \"--rs '\\r\\n' --fs '\\t'\".\n");
938 	fprintf(o, "    - To avoid backslashing, you can use any of the following names:\n");
939 	fprintf(o, "     ");
940 	lhmss_t* pmap = get_desc_to_chars_map();
941 	for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
942 		fprintf(o, " %s", pe->key);
943 	}
944 	fprintf(o, "\n");
945 	fprintf(o, "  * Default separators by format:\n");
946 	fprintf(o, "      %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS");
947 	lhmss_t* default_rses = get_default_rses();
948 	lhmss_t* default_fses = get_default_fses();
949 	lhmss_t* default_pses = get_default_pses();
950 	for (lhmsse_t* pe = default_rses->phead; pe != NULL; pe = pe->pnext) {
951 		char* filefmt = pe->key;
952 		char* rs = pe->value;
953 		char* fs = lhmss_get(default_fses, filefmt);
954 		char* ps = lhmss_get(default_pses, filefmt);
955 		fprintf(o, "      %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps));
956 	}
957 }
958 
main_usage_csv_options(FILE * o,char * argv0)959 static void main_usage_csv_options(FILE* o, char* argv0) {
960 	fprintf(o, "  --implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1\n");
961 	fprintf(o, "                     of input files. Tip: combine with \"label\" to recreate\n");
962 	fprintf(o, "                     missing headers.\n");
963 	fprintf(o, "  --allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,\n");
964 	fprintf(o, "                     fill remaining keys with empty string. If a data line has more\n");
965 	fprintf(o, "                     fields than the header line, use integer field labels as in\n");
966 	fprintf(o, "                     the implicit-header case.\n");
967 	fprintf(o, "  --headerless-csv-output   Print only CSV data lines.\n");
968 	fprintf(o, "  -N                 Keystroke-saver for --implicit-csv-header --headerless-csv-output.\n");
969 }
970 
main_usage_double_quoting(FILE * o,char * argv0)971 static void main_usage_double_quoting(FILE* o, char* argv0) {
972 	fprintf(o, "  --quote-all        Wrap all fields in double quotes\n");
973 	fprintf(o, "  --quote-none       Do not wrap any fields in double quotes, even if they have\n");
974 	fprintf(o, "                     OFS or ORS in them\n");
975 	fprintf(o, "  --quote-minimal    Wrap fields in double quotes only if they have OFS or ORS\n");
976 	fprintf(o, "                     in them (default)\n");
977 	fprintf(o, "  --quote-numeric    Wrap fields in double quotes only if they have numbers\n");
978 	fprintf(o, "                     in them\n");
979 	fprintf(o, "  --quote-original   Wrap fields in double quotes if and only if they were\n");
980 	fprintf(o, "                     quoted on input. This isn't sticky for computed fields:\n");
981 	fprintf(o, "                     e.g. if fields a and b were quoted on input and you do\n");
982 	fprintf(o, "                     \"put '$c = $a . $b'\" then field c won't inherit a or b's\n");
983 	fprintf(o, "                     was-quoted-on-input flag.\n");
984 }
985 
main_usage_numerical_formatting(FILE * o,char * argv0)986 static void main_usage_numerical_formatting(FILE* o, char* argv0) {
987 	fprintf(o, "  --ofmt {format}    E.g. %%.18lf, %%.0lf. Please use sprintf-style codes for\n");
988 	fprintf(o, "                     double-precision. Applies to verbs which compute new\n");
989 	fprintf(o, "                     values, e.g. put, stats1, stats2. See also the fmtnum\n");
990 	fprintf(o, "                     function within mlr put (mlr --help-all-functions).\n");
991 	fprintf(o, "                     Defaults to %s.\n", DEFAULT_OFMT);
992 }
993 
main_usage_other_options(FILE * o,char * argv0)994 static void main_usage_other_options(FILE* o, char* argv0) {
995 	fprintf(o, "  --seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter\n");
996 	fprintf(o, "                     urand()/urandint()/urand32().\n");
997 	fprintf(o, "  --nr-progress-mod {m}, with m a positive integer: print filename and record\n");
998 	fprintf(o, "                     count to stderr every m input records.\n");
999 	fprintf(o, "  --from {filename}  Use this to specify an input file before the verb(s),\n");
1000 	fprintf(o, "                     rather than after. May be used more than once. Example:\n");
1001 	fprintf(o, "                     \"%s --from a.dat --from b.dat cat\" is the same as\n", argv0);
1002 	fprintf(o, "                     \"%s cat a.dat b.dat\".\n", argv0);
1003 	fprintf(o, "  -n                 Process no input files, nor standard input either. Useful\n");
1004 	fprintf(o, "                     for %s put with begin/end statements only. (Same as --from\n", argv0);
1005 	fprintf(o, "                     /dev/null.) Also useful in \"%s -n put -v '...'\" for\n", argv0);
1006 	fprintf(o, "                     analyzing abstract syntax trees (if that's your thing).\n");
1007 	fprintf(o, "  -I                 Process files in-place. For each file name on the command\n");
1008 	fprintf(o, "                     line, output is written to a temp file in the same\n");
1009 	fprintf(o, "                     directory, which is then renamed over the original. Each\n");
1010 	fprintf(o, "                     file is processed in isolation: if the output format is\n");
1011 	fprintf(o, "                     CSV, CSV headers will be present in each output file;\n");
1012 	fprintf(o, "                     statistics are only over each file's own records; and so on.\n");
1013 }
1014 
main_usage_then_chaining(FILE * o,char * argv0)1015 static void main_usage_then_chaining(FILE* o, char* argv0) {
1016 	fprintf(o, "Output of one verb may be chained as input to another using \"then\", e.g.\n");
1017 	fprintf(o, "  %s stats1 -a min,mean,max -f flag,u,v -g color then sort -f color\n", argv0);
1018 }
1019 
main_usage_auxents(FILE * o,char * argv0)1020 static void main_usage_auxents(FILE* o, char* argv0) {
1021 	fprintf(o, "Miller has a few otherwise-standalone executables packaged within it.\n");
1022 	fprintf(o, "They do not participate in any other parts of Miller.\n");
1023 	show_aux_entries(o);
1024 }
1025 
main_usage_see_also(FILE * o,char * argv0)1026 static void main_usage_see_also(FILE* o, char* argv0) {
1027 	fprintf(o, "For more information please see http://johnkerl.org/miller/doc and/or\n");
1028 	fprintf(o, "http://github.com/johnkerl/miller.");
1029 	fprintf(o, " This is Miller version %s.\n", VERSION_STRING);
1030 }
1031 
print_type_arithmetic_info(FILE * o,char * argv0)1032 static void print_type_arithmetic_info(FILE* o, char* argv0) {
1033 	for (int i = -2; i < MT_DIM; i++) {
1034 		mv_t a = (mv_t) {.type = i, .free_flags = NO_FREE, .u.intv = 0};
1035 		if (i == -2)
1036 			printf("%-6s |", "(+)");
1037 		else if (i == -1)
1038 			printf("%-6s +", "------");
1039 		else
1040 			printf("%-6s |", mt_describe_type_simple(a.type));
1041 
1042 		for (int j = 0; j < MT_DIM; j++) {
1043 			mv_t b = (mv_t) {.type = j, .free_flags = NO_FREE, .u.intv = 0};
1044 			if (i == -2) {
1045 				printf(" %-6s", mt_describe_type_simple(b.type));
1046 			} else if (i == -1) {
1047 				printf(" %-6s", "------");
1048 			} else {
1049 				mv_t c = x_xx_plus_func(&a, &b);
1050 				printf(" %-6s", mt_describe_type_simple(c.type));
1051 			}
1052 		}
1053 
1054 		fprintf(o, "\n");
1055 	}
1056 }
1057 
1058 // ----------------------------------------------------------------
usage_all_verbs(char * argv0)1059 static void usage_all_verbs(char* argv0) {
1060 	char* separator = "================================================================";
1061 
1062 	for (int i = 0; i < mapper_lookup_table_length; i++) {
1063 		fprintf(stdout, "%s\n", separator);
1064 		mapper_lookup_table[i]->pusage_func(stdout, argv0, mapper_lookup_table[i]->verb);
1065 		fprintf(stdout, "\n");
1066 	}
1067 	fprintf(stdout, "%s\n", separator);
1068 	exit(0);
1069 }
1070 
usage_unrecognized_verb(char * argv0,char * arg)1071 static void usage_unrecognized_verb(char* argv0, char* arg) {
1072 	fprintf(stderr, "%s: option \"%s\" not recognized.\n", argv0, arg);
1073 	fprintf(stderr, "Please run \"%s --help\" for usage information.\n", argv0);
1074 	exit(1);
1075 }
1076 
check_arg_count(char ** argv,int argi,int argc,int n)1077 static void check_arg_count(char** argv, int argi, int argc, int n) {
1078 	if ((argc - argi) < n) {
1079 		fprintf(stderr, "%s: option \"%s\" missing argument(s).\n", MLR_GLOBALS.bargv0, argv[argi]);
1080 		main_usage_short(stderr, MLR_GLOBALS.bargv0);
1081 		exit(1);
1082 	}
1083 }
1084 
look_up_mapper_setup(char * verb)1085 static mapper_setup_t* look_up_mapper_setup(char* verb) {
1086 	mapper_setup_t* pmapper_setup = NULL;
1087 	for (int i = 0; i < mapper_lookup_table_length; i++) {
1088 		if (streq(mapper_lookup_table[i]->verb, verb))
1089 			return mapper_lookup_table[i];
1090 	}
1091 
1092 	return pmapper_setup;
1093 }
1094 
1095 // ----------------------------------------------------------------
cli_opts_init(cli_opts_t * popts)1096 void cli_opts_init(cli_opts_t* popts) {
1097 	memset(popts, 0, sizeof(*popts));
1098 
1099 	cli_reader_opts_init(&popts->reader_opts);
1100 	cli_writer_opts_init(&popts->writer_opts);
1101 
1102 	popts->mapper_argb     = 0;
1103 	popts->filenames       = slls_alloc();
1104 
1105 	popts->ofmt            = NULL;
1106 	popts->nr_progress_mod = 0LL;
1107 
1108 	popts->do_in_place     = FALSE;
1109 
1110 	popts->no_input        = FALSE;
1111 	popts->have_rand_seed  = FALSE;
1112 	popts->rand_seed       = 0;
1113 }
1114 
1115 // ----------------------------------------------------------------
1116 // * If $MLRRC is set, use it and only it.
1117 // * Otherwise try first $HOME/.mlrrc and then ./.mlrrc but let them
1118 //   stack: e.g. $HOME/.mlrrc is lots of settings and maybe in one
1119 //   subdir you want to override just a setting or two.
cli_load_mlrrc(cli_opts_t * popts)1120 static void cli_load_mlrrc(cli_opts_t* popts) {
1121 	char* env_mlrrc = getenv("MLRRC");
1122 	if (env_mlrrc != NULL) {
1123 		if (streq(env_mlrrc, "__none__")) {
1124 			return;
1125 		}
1126 		cli_try_load_mlrrc(popts, env_mlrrc);
1127 		return;
1128 	}
1129 
1130 	char* env_home = getenv("HOME");
1131 	if (env_home != NULL) {
1132 		char* path = mlr_paste_2_strings(env_home, "/.mlrrc");
1133 		cli_try_load_mlrrc(popts, path);
1134 		free(path);
1135 	}
1136 
1137 	cli_try_load_mlrrc(popts, "./.mlrrc");
1138 }
1139 
cli_try_load_mlrrc(cli_opts_t * popts,char * path)1140 static void cli_try_load_mlrrc(cli_opts_t* popts, char* path) {
1141 	FILE* fp = fopen(path, "r");
1142 	if (fp == NULL) {
1143 		return;
1144 	}
1145 
1146 	char* line = NULL;
1147 	size_t linecap = 0;
1148 	int rc;
1149 	int lineno = 0;
1150 
1151 	while ((rc = getline(&line, &linecap, fp)) != -1) {
1152 		lineno++;
1153 		char* line_to_destroy = strdup(line);
1154 		if (!handle_mlrrc_line_1(popts, line_to_destroy)) {
1155 			fprintf(stderr, "Parse error at file \"%s\" line %d: %s\n",
1156 				path, lineno, line);
1157 			exit(1);
1158 		}
1159 		free(line_to_destroy);
1160 	}
1161 
1162 	fclose(fp);
1163 	if (line != NULL) {
1164 		free(line);
1165 	}
1166 }
1167 
1168 // Chomps trailing CR, LF, or CR/LF; comment-strips; left-right trims.
handle_mlrrc_line_1(cli_opts_t * popts,char * line)1169 static int handle_mlrrc_line_1(cli_opts_t* popts, char* line) {
1170 	// chomp
1171 	size_t len = strlen(line);
1172 	if (len >= 2 && line[len-2] == '\r' && line[len-1] == '\n') {
1173 		line[len-2] = 0;
1174 	} else if (len >= 1 && (line[len-1] == '\r' || line[len-1] == '\n')) {
1175 		line[len-1] = 0;
1176 	}
1177 
1178 	// comment-strip
1179 	char* pbang = strstr(line, "#");
1180 	if (pbang != NULL) {
1181 		*pbang = 0;
1182 	}
1183 
1184 	// Left-trim
1185 	char* start = line;
1186 	while (*start == ' ' || *start == '\t') {
1187 		start++;
1188 	}
1189 
1190 	// Right-trim
1191 	len = strlen(start);
1192 	char* end = &start[len-1];
1193 	while (end > start && (*end == ' ' || *end == '\t')) {
1194 		*end = 0;
1195 		end--;
1196 	}
1197 	if (end < start) { // line was whitespace-only
1198 		return TRUE;
1199 	} else {
1200 		return handle_mlrrc_line_2(popts, start);
1201 	}
1202 }
1203 
1204 // Prepends initial "--" if it's not already there
handle_mlrrc_line_2(cli_opts_t * popts,char * line)1205 static int handle_mlrrc_line_2(cli_opts_t* popts, char* line) {
1206 	size_t len = strlen(line);
1207 
1208 	char* dashed_line = NULL;
1209 	if (len >= 2 && line[0] != '-' && line[1] != '-') {
1210 		dashed_line = mlr_paste_2_strings("--", line);
1211 	} else {
1212 		dashed_line = strdup(line);
1213 	}
1214 
1215 	int rc = handle_mlrrc_line_3(popts, dashed_line);
1216 
1217 	// Do not free these. The command-line parsers can retain pointers into argv strings (rather
1218 	// than copying), resulting in freed-memory reads later in the data-processing verbs.
1219 	//
1220 	// It would be possible to be diligent about making sure all current command-line-parsing
1221 	// callsites copy strings rather than pointing to them -- but it would be easy to miss some, and
1222 	// also any future codemods might make the same mistake as well.
1223 	//
1224 	// It's safer (and no big leak) to simply leave these parsed mlrrc lines unfreed.
1225 	//
1226 	// free(dashed_line);
1227 	return rc;
1228 }
1229 
1230 // Splits line into argv array
handle_mlrrc_line_3(cli_opts_t * popts,char * line)1231 static int handle_mlrrc_line_3(cli_opts_t* popts, char* line) {
1232 	char* argv[3];
1233 	int argc = 0;
1234 	char* split = strpbrk(line, " \t");
1235 	if (split == NULL) {
1236 		argv[0] = line;
1237 		argv[1] = NULL;
1238 		argc = 1;
1239 	} else {
1240 		*split = 0;
1241 		char* p = split + 1;
1242 		while (*p == ' ' || *p == '\t') {
1243 			p++;
1244 		}
1245 		argv[0] = line;
1246 		argv[1] = p;
1247 		argv[2] = NULL;
1248 		argc = 2;
1249 	}
1250 	return handle_mlrrc_line_4(popts, argv, argc);
1251 }
1252 
handle_mlrrc_line_4(cli_opts_t * popts,char ** argv,int argc)1253 static int handle_mlrrc_line_4(cli_opts_t* popts, char** argv, int argc) {
1254 	int argi = 0;
1255 	if (streq(argv[0], "--prepipe")) {
1256 		// Don't allow code execution via .mlrrc
1257 		return FALSE;
1258 	}
1259 	if (cli_handle_reader_options(argv, argc, &argi, &popts->reader_opts)) {
1260 		// handled
1261 	} else if (cli_handle_writer_options(argv, argc, &argi, &popts->writer_opts)) {
1262 		// handled
1263 	} else if (cli_handle_reader_writer_options(argv, argc, &argi, &popts->reader_opts, &popts->writer_opts)) {
1264 		// handled
1265 	} else if (cli_handle_misc_options(argv, argc, &argi, popts)) {
1266 		// handled
1267 	} else {
1268 		// unhandled
1269 		return FALSE;
1270 	}
1271 
1272 	return TRUE;
1273 }
1274 
1275 // ----------------------------------------------------------------
cli_reader_opts_init(cli_reader_opts_t * preader_opts)1276 void cli_reader_opts_init(cli_reader_opts_t* preader_opts) {
1277 	preader_opts->ifile_fmt                      = NULL;
1278 	preader_opts->irs                            = NULL;
1279 	preader_opts->ifs                            = NULL;
1280 	preader_opts->ips                            = NULL;
1281 	preader_opts->input_json_flatten_separator   = NULL;
1282 	preader_opts->json_array_ingest              = JSON_ARRAY_INGEST_UNSPECIFIED;
1283 
1284 	preader_opts->allow_repeat_ifs               = NEITHER_TRUE_NOR_FALSE;
1285 	preader_opts->allow_repeat_ips               = NEITHER_TRUE_NOR_FALSE;
1286 	preader_opts->use_implicit_csv_header        = NEITHER_TRUE_NOR_FALSE;
1287 	preader_opts->allow_ragged_csv_input         = NEITHER_TRUE_NOR_FALSE;
1288 
1289 	preader_opts->prepipe                        = NULL;
1290 	preader_opts->comment_handling               = COMMENTS_ARE_DATA;
1291 	preader_opts->comment_string                 = NULL;
1292 
1293 	preader_opts->generator_opts.field_name     = "i";
1294 	preader_opts->generator_opts.start          = 0LL;
1295 	preader_opts->generator_opts.stop           = 100LL;
1296 	preader_opts->generator_opts.step           = 1LL;
1297 }
1298 
cli_writer_opts_init(cli_writer_opts_t * pwriter_opts)1299 void cli_writer_opts_init(cli_writer_opts_t* pwriter_opts) {
1300 	pwriter_opts->ofile_fmt                      = NULL;
1301 	pwriter_opts->ors                            = NULL;
1302 	pwriter_opts->ofs                            = NULL;
1303 	pwriter_opts->ops                            = NULL;
1304 
1305 	pwriter_opts->headerless_csv_output          = NEITHER_TRUE_NOR_FALSE;
1306 	pwriter_opts->right_justify_xtab_value       = NEITHER_TRUE_NOR_FALSE;
1307 	pwriter_opts->right_align_pprint             = NEITHER_TRUE_NOR_FALSE;
1308 	pwriter_opts->pprint_barred                  = NEITHER_TRUE_NOR_FALSE;
1309 	pwriter_opts->stack_json_output_vertically   = NEITHER_TRUE_NOR_FALSE;
1310 	pwriter_opts->wrap_json_output_in_outer_list = NEITHER_TRUE_NOR_FALSE;
1311 	pwriter_opts->json_quote_int_keys            = NEITHER_TRUE_NOR_FALSE;
1312 	pwriter_opts->json_quote_non_string_values   = NEITHER_TRUE_NOR_FALSE;
1313 
1314 	pwriter_opts->output_json_flatten_separator  = NULL;
1315 	pwriter_opts->oosvar_flatten_separator       = NULL;
1316 
1317 	pwriter_opts->oquoting                       = QUOTE_UNSPECIFIED;
1318 }
1319 
cli_apply_defaults(cli_opts_t * popts)1320 void cli_apply_defaults(cli_opts_t* popts) {
1321 
1322 	cli_apply_reader_defaults(&popts->reader_opts);
1323 
1324 	cli_apply_writer_defaults(&popts->writer_opts);
1325 
1326 	if (popts->ofmt == NULL)
1327 		popts->ofmt = DEFAULT_OFMT;
1328 }
1329 
cli_apply_reader_defaults(cli_reader_opts_t * preader_opts)1330 void cli_apply_reader_defaults(cli_reader_opts_t* preader_opts) {
1331 	if (preader_opts->ifile_fmt == NULL)
1332 		preader_opts->ifile_fmt = "dkvp";
1333 
1334 	if (preader_opts->json_array_ingest == JSON_ARRAY_INGEST_UNSPECIFIED)
1335 		preader_opts->json_array_ingest = JSON_ARRAY_INGEST_AS_MAP;
1336 
1337 	if (preader_opts->use_implicit_csv_header == NEITHER_TRUE_NOR_FALSE)
1338 		preader_opts->use_implicit_csv_header = FALSE;
1339 
1340 	if (preader_opts->allow_ragged_csv_input == NEITHER_TRUE_NOR_FALSE)
1341 		preader_opts->allow_ragged_csv_input = FALSE;
1342 
1343 	if (preader_opts->input_json_flatten_separator == NULL)
1344 		preader_opts->input_json_flatten_separator = DEFAULT_JSON_FLATTEN_SEPARATOR;
1345 }
1346 
cli_apply_writer_defaults(cli_writer_opts_t * pwriter_opts)1347 void cli_apply_writer_defaults(cli_writer_opts_t* pwriter_opts) {
1348 	if (pwriter_opts->ofile_fmt == NULL)
1349 		pwriter_opts->ofile_fmt = "dkvp";
1350 
1351 	if (pwriter_opts->headerless_csv_output == NEITHER_TRUE_NOR_FALSE)
1352 		pwriter_opts->headerless_csv_output = FALSE;
1353 
1354 	if (pwriter_opts->right_justify_xtab_value == NEITHER_TRUE_NOR_FALSE)
1355 		pwriter_opts->right_justify_xtab_value = FALSE;
1356 
1357 	if (pwriter_opts->right_align_pprint == NEITHER_TRUE_NOR_FALSE)
1358 		pwriter_opts->right_align_pprint = FALSE;
1359 
1360 	if (pwriter_opts->pprint_barred == NEITHER_TRUE_NOR_FALSE)
1361 		pwriter_opts->pprint_barred = FALSE;
1362 
1363 	if (pwriter_opts->stack_json_output_vertically == NEITHER_TRUE_NOR_FALSE)
1364 		pwriter_opts->stack_json_output_vertically = FALSE;
1365 
1366 	if (pwriter_opts->wrap_json_output_in_outer_list == NEITHER_TRUE_NOR_FALSE)
1367 		pwriter_opts->wrap_json_output_in_outer_list = FALSE;
1368 
1369 	if (pwriter_opts->json_quote_int_keys == NEITHER_TRUE_NOR_FALSE)
1370 		pwriter_opts->json_quote_int_keys = TRUE;
1371 
1372 	if (pwriter_opts->json_quote_non_string_values == NEITHER_TRUE_NOR_FALSE)
1373 		pwriter_opts->json_quote_non_string_values = FALSE;
1374 
1375 	if (pwriter_opts->output_json_flatten_separator == NULL)
1376 		pwriter_opts->output_json_flatten_separator = DEFAULT_JSON_FLATTEN_SEPARATOR;
1377 
1378 	if (pwriter_opts->oosvar_flatten_separator == NULL)
1379 		pwriter_opts->oosvar_flatten_separator = DEFAULT_OOSVAR_FLATTEN_SEPARATOR;
1380 
1381 	if (pwriter_opts->oquoting == QUOTE_UNSPECIFIED)
1382 		pwriter_opts->oquoting = DEFAULT_OQUOTING;
1383 }
1384 
1385 // ----------------------------------------------------------------
1386 // For mapper join which has its own input-format overrides.
1387 //
1388 // Mainly this just takes the main-opts flag whenever the join-opts flag was not
1389 // specified by the user. But it's a bit more complex when main and join input
1390 // formats are different. Example: main input format is CSV, for which IPS is
1391 // "(N/A)", and join input format is DKVP. Then we should not use "(N/A)"
1392 // for DKVP IPS. However if main input format were DKVP with IPS set to ":",
1393 // then we should take that.
1394 //
1395 // The logic is:
1396 //
1397 // * If the join input format was unspecified, take all unspecified values from
1398 //   main opts.
1399 //
1400 // * If the join input format was specified and is the same as main input
1401 //   format, take unspecified values from main opts.
1402 //
1403 // * If the join input format was specified and is not the same as main input
1404 //   format, take unspecified values from defaults for the join input format.
1405 
cli_merge_reader_opts(cli_reader_opts_t * pfunc_opts,cli_reader_opts_t * pmain_opts)1406 void cli_merge_reader_opts(cli_reader_opts_t* pfunc_opts, cli_reader_opts_t* pmain_opts) {
1407 
1408 	if (pfunc_opts->ifile_fmt == NULL) {
1409 		pfunc_opts->ifile_fmt = pmain_opts->ifile_fmt;
1410 	}
1411 
1412 	if (streq(pfunc_opts->ifile_fmt, pmain_opts->ifile_fmt)) {
1413 
1414 		if (pfunc_opts->irs == NULL)
1415 			pfunc_opts->irs = pmain_opts->irs;
1416 		if (pfunc_opts->ifs == NULL)
1417 			pfunc_opts->ifs = pmain_opts->ifs;
1418 		if (pfunc_opts->ips == NULL)
1419 			pfunc_opts->ips = pmain_opts->ips;
1420 		if (pfunc_opts->allow_repeat_ifs  == NEITHER_TRUE_NOR_FALSE)
1421 			pfunc_opts->allow_repeat_ifs = pmain_opts->allow_repeat_ifs;
1422 		if (pfunc_opts->allow_repeat_ips  == NEITHER_TRUE_NOR_FALSE)
1423 			pfunc_opts->allow_repeat_ips = pmain_opts->allow_repeat_ips;
1424 
1425 	} else {
1426 
1427 		if (pfunc_opts->irs == NULL)
1428 			pfunc_opts->irs = lhmss_get_or_die(get_default_rses(), pfunc_opts->ifile_fmt);
1429 		if (pfunc_opts->ifs == NULL)
1430 			pfunc_opts->ifs = lhmss_get_or_die(get_default_fses(), pfunc_opts->ifile_fmt);
1431 		if (pfunc_opts->ips == NULL)
1432 			pfunc_opts->ips = lhmss_get_or_die(get_default_pses(), pfunc_opts->ifile_fmt);
1433 		if (pfunc_opts->allow_repeat_ifs  == NEITHER_TRUE_NOR_FALSE)
1434 			pfunc_opts->allow_repeat_ifs = lhmsll_get_or_die(get_default_repeat_ifses(), pfunc_opts->ifile_fmt);
1435 		if (pfunc_opts->allow_repeat_ips  == NEITHER_TRUE_NOR_FALSE)
1436 			pfunc_opts->allow_repeat_ips = lhmsll_get_or_die(get_default_repeat_ipses(), pfunc_opts->ifile_fmt);
1437 
1438 	}
1439 
1440 	if (pfunc_opts->json_array_ingest == JSON_ARRAY_INGEST_UNSPECIFIED)
1441 		pfunc_opts->json_array_ingest = pmain_opts->json_array_ingest;
1442 
1443 	if (pfunc_opts->use_implicit_csv_header == NEITHER_TRUE_NOR_FALSE)
1444 		pfunc_opts->use_implicit_csv_header = pmain_opts->use_implicit_csv_header;
1445 
1446 	if (pfunc_opts->allow_ragged_csv_input == NEITHER_TRUE_NOR_FALSE)
1447 		pfunc_opts->allow_ragged_csv_input = pmain_opts->allow_ragged_csv_input;
1448 
1449 	if (pfunc_opts->input_json_flatten_separator == NULL)
1450 		pfunc_opts->input_json_flatten_separator = pmain_opts->input_json_flatten_separator;
1451 }
1452 
1453 // Similar to cli_merge_reader_opts but for mapper tee & mapper put which have their
1454 // own output-format overrides.
cli_merge_writer_opts(cli_writer_opts_t * pfunc_opts,cli_writer_opts_t * pmain_opts)1455 void cli_merge_writer_opts(cli_writer_opts_t* pfunc_opts, cli_writer_opts_t* pmain_opts) {
1456 
1457 	if (pfunc_opts->ofile_fmt == NULL) {
1458 		pfunc_opts->ofile_fmt = pmain_opts->ofile_fmt;
1459 	}
1460 
1461 	if (streq(pfunc_opts->ofile_fmt, pmain_opts->ofile_fmt)) {
1462 		if (pfunc_opts->ors == NULL)
1463 			pfunc_opts->ors = pmain_opts->ors;
1464 		if (pfunc_opts->ofs == NULL)
1465 			pfunc_opts->ofs = pmain_opts->ofs;
1466 		if (pfunc_opts->ops == NULL)
1467 			pfunc_opts->ops = pmain_opts->ops;
1468 	} else {
1469 		if (pfunc_opts->ors == NULL)
1470 			pfunc_opts->ors = lhmss_get_or_die(get_default_rses(), pfunc_opts->ofile_fmt);
1471 		if (pfunc_opts->ofs == NULL)
1472 			pfunc_opts->ofs = lhmss_get_or_die(get_default_fses(), pfunc_opts->ofile_fmt);
1473 		if (pfunc_opts->ops == NULL)
1474 			pfunc_opts->ops = lhmss_get_or_die(get_default_pses(), pfunc_opts->ofile_fmt);
1475 	}
1476 
1477 	if (pfunc_opts->headerless_csv_output == NEITHER_TRUE_NOR_FALSE)
1478 		pfunc_opts->headerless_csv_output = pmain_opts->headerless_csv_output;
1479 
1480 	if (pfunc_opts->right_justify_xtab_value == NEITHER_TRUE_NOR_FALSE)
1481 		pfunc_opts->right_justify_xtab_value = pmain_opts->right_justify_xtab_value;
1482 
1483 	if (pfunc_opts->right_align_pprint == NEITHER_TRUE_NOR_FALSE)
1484 		pfunc_opts->right_align_pprint = pmain_opts->right_align_pprint;
1485 
1486 	if (pfunc_opts->pprint_barred == NEITHER_TRUE_NOR_FALSE)
1487 		pfunc_opts->pprint_barred = pmain_opts->pprint_barred;
1488 
1489 	if (pfunc_opts->stack_json_output_vertically == NEITHER_TRUE_NOR_FALSE)
1490 		pfunc_opts->stack_json_output_vertically = pmain_opts->stack_json_output_vertically;
1491 
1492 	if (pfunc_opts->wrap_json_output_in_outer_list == NEITHER_TRUE_NOR_FALSE)
1493 		pfunc_opts->wrap_json_output_in_outer_list = pmain_opts->wrap_json_output_in_outer_list;
1494 
1495 	if (pfunc_opts->json_quote_int_keys == NEITHER_TRUE_NOR_FALSE)
1496 		pfunc_opts->json_quote_int_keys = pmain_opts->json_quote_int_keys;
1497 
1498 	if (pfunc_opts->json_quote_non_string_values == NEITHER_TRUE_NOR_FALSE)
1499 		pfunc_opts->json_quote_non_string_values = pmain_opts->json_quote_non_string_values;
1500 
1501 	if (pfunc_opts->output_json_flatten_separator == NULL)
1502 		pfunc_opts->output_json_flatten_separator = pmain_opts->output_json_flatten_separator;
1503 
1504 	if (pfunc_opts->oosvar_flatten_separator == NULL)
1505 		pfunc_opts->oosvar_flatten_separator = pmain_opts->oosvar_flatten_separator;
1506 
1507 	if (pfunc_opts->oquoting == QUOTE_UNSPECIFIED)
1508 		pfunc_opts->oquoting = pmain_opts->oquoting;
1509 }
1510 
1511 // ----------------------------------------------------------------
handle_terminal_usage(char ** argv,int argc,int argi)1512 static int handle_terminal_usage(char** argv, int argc, int argi) {
1513 	if (streq(argv[argi], "--version")) {
1514 		printf("Miller %s\n", VERSION_STRING);
1515 		return TRUE;
1516 	} else if (streq(argv[argi], "-h")) {
1517 		main_usage_long(stdout, MLR_GLOBALS.bargv0);
1518 		return TRUE;
1519 	} else if (streq(argv[argi], "--help")) {
1520 		main_usage_long(stdout, MLR_GLOBALS.bargv0);
1521 		return TRUE;
1522 	} else if (streq(argv[argi], "--print-type-arithmetic-info")) {
1523 		print_type_arithmetic_info(stdout, MLR_GLOBALS.bargv0);
1524 		return TRUE;
1525 
1526 	} else if (streq(argv[argi], "--help-all-verbs")) {
1527 		usage_all_verbs(MLR_GLOBALS.bargv0);
1528 	} else if (streq(argv[argi], "--list-all-verbs") || streq(argv[argi], "-l")) {
1529 		list_all_verbs(stdout, "");
1530 		return TRUE;
1531 	} else if (streq(argv[argi], "--list-all-verbs-raw") || streq(argv[argi], "-L")) {
1532 		list_all_verbs_raw(stdout);
1533 		return TRUE;
1534 
1535 	} else if (streq(argv[argi], "--list-all-functions-raw") || streq(argv[argi], "-F")) {
1536 		fmgr_t* pfmgr = fmgr_alloc();
1537 		fmgr_list_all_functions_raw(pfmgr, stdout);
1538 		fmgr_free(pfmgr, NULL);
1539 		return TRUE;
1540 	} else if (streq(argv[argi], "--list-all-functions-as-table")) {
1541 		fmgr_t* pfmgr = fmgr_alloc();
1542 		fmgr_list_all_functions_as_table(pfmgr, stdout);
1543 		fmgr_free(pfmgr, NULL);
1544 		return TRUE;
1545 	} else if (streq(argv[argi], "--help-all-functions") || streq(argv[argi], "-f")) {
1546 		fmgr_t* pfmgr = fmgr_alloc();
1547 		fmgr_function_usage(pfmgr, stdout, NULL);
1548 		fmgr_free(pfmgr, NULL);
1549 		return TRUE;
1550 	} else if (streq(argv[argi], "--help-function") || streq(argv[argi], "--hf")) {
1551 		check_arg_count(argv, argi, argc, 2);
1552 		fmgr_t* pfmgr = fmgr_alloc();
1553 		fmgr_function_usage(pfmgr, stdout, argv[argi+1]);
1554 		fmgr_free(pfmgr, NULL);
1555 		return TRUE;
1556 
1557 	} else if (streq(argv[argi], "--list-all-keywords-raw") || streq(argv[argi], "-K")) {
1558 		mlr_dsl_list_all_keywords_raw(stdout);
1559 		return TRUE;
1560 	} else if (streq(argv[argi], "--help-all-keywords") || streq(argv[argi], "-k")) {
1561 		mlr_dsl_keyword_usage(stdout, NULL);
1562 		return TRUE;
1563 	} else if (streq(argv[argi], "--help-keyword") || streq(argv[argi], "--hk")) {
1564 		check_arg_count(argv, argi, argc, 2);
1565 		mlr_dsl_keyword_usage(stdout, argv[argi+1]);
1566 		return TRUE;
1567 
1568 	// main-usage subsections, individually accessible for the benefit of
1569 	// the manpage-autogenerator
1570 	} else if (streq(argv[argi], "--usage-synopsis")) {
1571 		main_usage_synopsis(stdout, MLR_GLOBALS.bargv0);
1572 		return TRUE;
1573 	} else if (streq(argv[argi], "--usage-examples")) {
1574 		main_usage_examples(stdout, MLR_GLOBALS.bargv0, "");
1575 		return TRUE;
1576 	} else if (streq(argv[argi], "--usage-list-all-verbs")) {
1577 		list_all_verbs(stdout, "");
1578 		return TRUE;
1579 	} else if (streq(argv[argi], "--usage-help-options")) {
1580 		main_usage_help_options(stdout, MLR_GLOBALS.bargv0);
1581 		return TRUE;
1582 	} else if (streq(argv[argi], "--usage-mlrrc")) {
1583 		main_usage_mlrrc(stdout, MLR_GLOBALS.bargv0);
1584 		return TRUE;
1585 	} else if (streq(argv[argi], "--usage-functions")) {
1586 		main_usage_functions(stdout, MLR_GLOBALS.bargv0, "");
1587 		return TRUE;
1588 	} else if (streq(argv[argi], "--usage-data-format-examples")) {
1589 		main_usage_data_format_examples(stdout, MLR_GLOBALS.bargv0);
1590 		return TRUE;
1591 	} else if (streq(argv[argi], "--usage-data-format-options")) {
1592 		main_usage_data_format_options(stdout, MLR_GLOBALS.bargv0);
1593 		return TRUE;
1594 	} else if (streq(argv[argi], "--usage-comments-in-data")) {
1595 		main_usage_comments_in_data(stdout, MLR_GLOBALS.bargv0);
1596 		return TRUE;
1597 	} else if (streq(argv[argi], "--usage-format-conversion-keystroke-saver-options")) {
1598 		main_usage_format_conversion_keystroke_saver_options(stdout, MLR_GLOBALS.bargv0);
1599 		return TRUE;
1600 	} else if (streq(argv[argi], "--usage-compressed-data-options")) {
1601 		main_usage_compressed_data_options(stdout, MLR_GLOBALS.bargv0);
1602 		return TRUE;
1603 	} else if (streq(argv[argi], "--usage-separator-options")) {
1604 		main_usage_separator_options(stdout, MLR_GLOBALS.bargv0);
1605 		return TRUE;
1606 	} else if (streq(argv[argi], "--usage-csv-options")) {
1607 		main_usage_csv_options(stdout, MLR_GLOBALS.bargv0);
1608 		return TRUE;
1609 	} else if (streq(argv[argi], "--usage-double-quoting")) {
1610 		main_usage_double_quoting(stdout, MLR_GLOBALS.bargv0);
1611 		return TRUE;
1612 	} else if (streq(argv[argi], "--usage-numerical-formatting")) {
1613 		main_usage_numerical_formatting(stdout, MLR_GLOBALS.bargv0);
1614 		return TRUE;
1615 	} else if (streq(argv[argi], "--usage-other-options")) {
1616 		main_usage_other_options(stdout, MLR_GLOBALS.bargv0);
1617 		return TRUE;
1618 	} else if (streq(argv[argi], "--usage-then-chaining")) {
1619 		main_usage_then_chaining(stdout, MLR_GLOBALS.bargv0);
1620 		return TRUE;
1621 	} else if (streq(argv[argi], "--usage-auxents")) {
1622 		main_usage_auxents(stdout, MLR_GLOBALS.bargv0);
1623 		return TRUE;
1624 	} else if (streq(argv[argi], "--usage-see-also")) {
1625 		main_usage_see_also(stdout, MLR_GLOBALS.bargv0);
1626 		return TRUE;
1627 	}
1628 	return FALSE;
1629 }
1630 
1631 // Returns TRUE if the current flag was handled.
cli_handle_reader_options(char ** argv,int argc,int * pargi,cli_reader_opts_t * preader_opts)1632 int cli_handle_reader_options(char** argv, int argc, int *pargi, cli_reader_opts_t* preader_opts) {
1633 	int argi = *pargi;
1634 	int oargi = argi;
1635 
1636 	if (streq(argv[argi], "--irs")) {
1637 		check_arg_count(argv, argi, argc, 2);
1638 		preader_opts->irs = cli_sep_from_arg(argv[argi+1]);
1639 		argi += 2;
1640 
1641 	} else if (streq(argv[argi], "--ifs")) {
1642 		check_arg_count(argv, argi, argc, 2);
1643 		preader_opts->ifs = cli_sep_from_arg(argv[argi+1]);
1644 		argi += 2;
1645 
1646 	} else if (streq(argv[argi], "--repifs")) {
1647 		preader_opts->allow_repeat_ifs = TRUE;
1648 		argi += 1;
1649 
1650 	} else if (streq(argv[argi], "--json-fatal-arrays-on-input")) {
1651 		preader_opts->json_array_ingest = JSON_ARRAY_INGEST_FATAL;
1652 		argi += 1;
1653 	} else if (streq(argv[argi], "--json-skip-arrays-on-input")) {
1654 		preader_opts->json_array_ingest = JSON_ARRAY_INGEST_SKIP;
1655 		argi += 1;
1656 	} else if (streq(argv[argi], "--json-map-arrays-on-input")) {
1657 		preader_opts->json_array_ingest = JSON_ARRAY_INGEST_AS_MAP;
1658 		argi += 1;
1659 
1660 	} else if (streq(argv[argi], "--implicit-csv-header")) {
1661 		preader_opts->use_implicit_csv_header = TRUE;
1662 		argi += 1;
1663 
1664 	} else if (streq(argv[argi], "--no-implicit-csv-header")) {
1665 		preader_opts->use_implicit_csv_header = FALSE;
1666 		argi += 1;
1667 
1668 	} else if (streq(argv[argi], "--allow-ragged-csv-input") || streq(argv[argi], "--ragged")) {
1669 		preader_opts->allow_ragged_csv_input = TRUE;
1670 		argi += 1;
1671 
1672 	} else if (streq(argv[argi], "--ips")) {
1673 		check_arg_count(argv, argi, argc, 2);
1674 		preader_opts->ips = cli_sep_from_arg(argv[argi+1]);
1675 		argi += 2;
1676 
1677 	} else if (streq(argv[argi], "-i")) {
1678 		check_arg_count(argv, argi, argc, 2);
1679 		if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
1680 			fprintf(stderr, "%s: unrecognized input format \"%s\".\n",
1681 				MLR_GLOBALS.bargv0, argv[argi+1]);
1682 			exit(1);
1683 		}
1684 		preader_opts->ifile_fmt = argv[argi+1];
1685 		argi += 2;
1686 
1687 	} else if (streq(argv[argi], "--igen")) {
1688 		preader_opts->ifile_fmt = "gen";
1689 		argi += 1;
1690 	} else if (streq(argv[argi], "--gen-start")) {
1691 		preader_opts->ifile_fmt = "gen";
1692 		check_arg_count(argv, argi, argc, 2);
1693 		if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.start) != 1) {
1694 			fprintf(stderr, "%s: could not scan \"%s\".\n",
1695 				MLR_GLOBALS.bargv0, argv[argi+1]);
1696 		}
1697 		argi += 2;
1698 	} else if (streq(argv[argi], "--gen-stop")) {
1699 		preader_opts->ifile_fmt = "gen";
1700 		check_arg_count(argv, argi, argc, 2);
1701 		if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.stop) != 1) {
1702 			fprintf(stderr, "%s: could not scan \"%s\".\n",
1703 				MLR_GLOBALS.bargv0, argv[argi+1]);
1704 		}
1705 		argi += 2;
1706 	} else if (streq(argv[argi], "--gen-step")) {
1707 		preader_opts->ifile_fmt = "gen";
1708 		check_arg_count(argv, argi, argc, 2);
1709 		if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.step) != 1) {
1710 			fprintf(stderr, "%s: could not scan \"%s\".\n",
1711 				MLR_GLOBALS.bargv0, argv[argi+1]);
1712 		}
1713 		argi += 2;
1714 
1715 	} else if (streq(argv[argi], "--icsv")) {
1716 		preader_opts->ifile_fmt = "csv";
1717 		argi += 1;
1718 
1719 	} else if (streq(argv[argi], "--icsvlite")) {
1720 		preader_opts->ifile_fmt = "csvlite";
1721 		argi += 1;
1722 
1723 	} else if (streq(argv[argi], "--itsv")) {
1724 		preader_opts->ifile_fmt = "csv";
1725 		preader_opts->ifs = "\t";
1726 		argi += 1;
1727 
1728 	} else if (streq(argv[argi], "--itsvlite")) {
1729 		preader_opts->ifile_fmt = "csvlite";
1730 		preader_opts->ifs = "\t";
1731 		argi += 1;
1732 
1733 	} else if (streq(argv[argi], "--iasv")) {
1734 		preader_opts->ifile_fmt = "csv";
1735 		preader_opts->ifs = ASV_FS;
1736 		preader_opts->irs = ASV_RS;
1737 		argi += 1;
1738 
1739 	} else if (streq(argv[argi], "--iasvlite")) {
1740 		preader_opts->ifile_fmt = "csvlite";
1741 		preader_opts->ifs = ASV_FS;
1742 		preader_opts->irs = ASV_RS;
1743 		argi += 1;
1744 
1745 	} else if (streq(argv[argi], "--iusv")) {
1746 		preader_opts->ifile_fmt = "csv";
1747 		preader_opts->ifs = USV_FS;
1748 		preader_opts->irs = USV_RS;
1749 		argi += 1;
1750 
1751 	} else if (streq(argv[argi], "--iusvlite")) {
1752 		preader_opts->ifile_fmt = "csvlite";
1753 		preader_opts->ifs = USV_FS;
1754 		preader_opts->irs = USV_RS;
1755 		argi += 1;
1756 
1757 	} else if (streq(argv[argi], "--idkvp")) {
1758 		preader_opts->ifile_fmt = "dkvp";
1759 		argi += 1;
1760 
1761 	} else if (streq(argv[argi], "--ijson")) {
1762 		preader_opts->ifile_fmt = "json";
1763 		argi += 1;
1764 
1765 	} else if (streq(argv[argi], "--inidx")) {
1766 		preader_opts->ifile_fmt = "nidx";
1767 		argi += 1;
1768 
1769 	} else if (streq(argv[argi], "--ixtab")) {
1770 		preader_opts->ifile_fmt = "xtab";
1771 		argi += 1;
1772 
1773 	} else if (streq(argv[argi], "--ipprint")) {
1774 		preader_opts->ifile_fmt        = "csvlite";
1775 		preader_opts->ifs              = " ";
1776 		preader_opts->allow_repeat_ifs = TRUE;
1777 		argi += 1;
1778 
1779 	} else if (streq(argv[argi], "--mmap")) {
1780 		// No-op as of 5.6.3 (mmap is being abandoned) but don't break
1781 		// the command-line user experience.
1782 		argi += 1;
1783 
1784 	} else if (streq(argv[argi], "--no-mmap")) {
1785 		// No-op as of 5.6.3 (mmap is being abandoned) but don't break
1786 		// the command-line user experience.
1787 		argi += 1;
1788 
1789 	} else if (streq(argv[argi], "--prepipe")) {
1790 		check_arg_count(argv, argi, argc, 2);
1791 		preader_opts->prepipe = argv[argi+1];
1792 		argi += 2;
1793 
1794 	} else if (streq(argv[argi], "--prepipe-gunzip")) {
1795 		preader_opts->prepipe = "gunzip";
1796 		argi += 1;
1797 
1798 	} else if (streq(argv[argi], "--prepipe-zcat")) {
1799 		preader_opts->prepipe = "zcat";
1800 		argi += 1;
1801 
1802 	} else if (streq(argv[argi], "--skip-comments")) {
1803 		preader_opts->comment_string = DEFAULT_COMMENT_STRING;
1804 		preader_opts->comment_handling = SKIP_COMMENTS;
1805 		argi += 1;
1806 
1807 	} else if (streq(argv[argi], "--skip-comments-with")) {
1808 		check_arg_count(argv, argi, argc, 2);
1809 		preader_opts->comment_string = argv[argi+1];
1810 		preader_opts->comment_handling = SKIP_COMMENTS;
1811 		argi += 2;
1812 
1813 	} else if (streq(argv[argi], "--pass-comments")) {
1814 		preader_opts->comment_string = DEFAULT_COMMENT_STRING;
1815 		preader_opts->comment_handling = PASS_COMMENTS;
1816 		argi += 1;
1817 
1818 	} else if (streq(argv[argi], "--pass-comments-with")) {
1819 		check_arg_count(argv, argi, argc, 2);
1820 		preader_opts->comment_string = argv[argi+1];
1821 		preader_opts->comment_handling = PASS_COMMENTS;
1822 		argi += 2;
1823 
1824 	}
1825 	*pargi = argi;
1826 	return argi != oargi;
1827 }
1828 
1829 // Returns TRUE if the current flag was handled.
cli_handle_writer_options(char ** argv,int argc,int * pargi,cli_writer_opts_t * pwriter_opts)1830 int cli_handle_writer_options(char** argv, int argc, int *pargi, cli_writer_opts_t* pwriter_opts) {
1831 	int argi = *pargi;
1832 	int oargi = argi;
1833 
1834 	if (streq(argv[argi], "--ors")) {
1835 		check_arg_count(argv, argi, argc, 2);
1836 		pwriter_opts->ors = cli_sep_from_arg(argv[argi+1]);
1837 		argi += 2;
1838 
1839 	} else if (streq(argv[argi], "--ofs")) {
1840 		check_arg_count(argv, argi, argc, 2);
1841 		pwriter_opts->ofs = cli_sep_from_arg(argv[argi+1]);
1842 		argi += 2;
1843 
1844 	} else if (streq(argv[argi], "--headerless-csv-output")) {
1845 		pwriter_opts->headerless_csv_output = TRUE;
1846 		argi += 1;
1847 
1848 	} else if (streq(argv[argi], "--ops")) {
1849 		check_arg_count(argv, argi, argc, 2);
1850 		pwriter_opts->ops = cli_sep_from_arg(argv[argi+1]);
1851 		argi += 2;
1852 
1853 	} else if (streq(argv[argi], "--xvright")) {
1854 		pwriter_opts->right_justify_xtab_value = TRUE;
1855 		argi += 1;
1856 
1857 	} else if (streq(argv[argi], "--jvstack")) {
1858 		pwriter_opts->stack_json_output_vertically = TRUE;
1859 		argi += 1;
1860 
1861 	} else if (streq(argv[argi], "--jlistwrap")) {
1862 		pwriter_opts->wrap_json_output_in_outer_list = TRUE;
1863 		argi += 1;
1864 
1865 	} else if (streq(argv[argi], "--jknquoteint")) {
1866 		pwriter_opts->json_quote_int_keys = FALSE;
1867 		argi += 1;
1868 	} else if (streq(argv[argi], "--jquoteall")) {
1869 		pwriter_opts->json_quote_non_string_values = TRUE;
1870 		argi += 1;
1871 	} else if (streq(argv[argi], "--jvquoteall")) {
1872 		pwriter_opts->json_quote_non_string_values = TRUE;
1873 		argi += 1;
1874 
1875 	} else if (streq(argv[argi], "--vflatsep")) {
1876 		check_arg_count(argv, argi, argc, 2);
1877 		pwriter_opts->oosvar_flatten_separator = cli_sep_from_arg(argv[argi+1]);
1878 		argi += 2;
1879 
1880 	} else if (streq(argv[argi], "-o")) {
1881 		check_arg_count(argv, argi, argc, 2);
1882 		if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
1883 			fprintf(stderr, "%s: unrecognized output format \"%s\".\n",
1884 				MLR_GLOBALS.bargv0, argv[argi+1]);
1885 			exit(1);
1886 		}
1887 		pwriter_opts->ofile_fmt = argv[argi+1];
1888 		argi += 2;
1889 
1890 	} else if (streq(argv[argi], "--ocsv")) {
1891 		pwriter_opts->ofile_fmt = "csv";
1892 		argi += 1;
1893 
1894 	} else if (streq(argv[argi], "--ocsvlite")) {
1895 		pwriter_opts->ofile_fmt = "csvlite";
1896 		argi += 1;
1897 
1898 	} else if (streq(argv[argi], "--otsv")) {
1899 		pwriter_opts->ofile_fmt = "csv";
1900 		pwriter_opts->ofs = "\t";
1901 		argi += 1;
1902 
1903 	} else if (streq(argv[argi], "--otsvlite")) {
1904 		pwriter_opts->ofile_fmt = "csvlite";
1905 		pwriter_opts->ofs = "\t";
1906 		argi += 1;
1907 
1908 	} else if (streq(argv[argi], "--oasv")) {
1909 		pwriter_opts->ofile_fmt = "csv";
1910 		pwriter_opts->ofs = ASV_FS;
1911 		pwriter_opts->ors = ASV_RS;
1912 		argi += 1;
1913 
1914 	} else if (streq(argv[argi], "--oasvlite")) {
1915 		pwriter_opts->ofile_fmt = "csvlite";
1916 		pwriter_opts->ofs = ASV_FS;
1917 		pwriter_opts->ors = ASV_RS;
1918 		argi += 1;
1919 
1920 	} else if (streq(argv[argi], "--ousv")) {
1921 		pwriter_opts->ofile_fmt = "csv";
1922 		pwriter_opts->ofs = USV_FS;
1923 		pwriter_opts->ors = USV_RS;
1924 		argi += 1;
1925 
1926 	} else if (streq(argv[argi], "--ousvlite")) {
1927 		pwriter_opts->ofile_fmt = "csvlite";
1928 		pwriter_opts->ofs = USV_FS;
1929 		pwriter_opts->ors = USV_RS;
1930 		argi += 1;
1931 
1932 	} else if (streq(argv[argi], "--omd")) {
1933 		pwriter_opts->ofile_fmt = "markdown";
1934 		argi += 1;
1935 
1936 	} else if (streq(argv[argi], "--odkvp")) {
1937 		pwriter_opts->ofile_fmt = "dkvp";
1938 		argi += 1;
1939 
1940 	} else if (streq(argv[argi], "--ojson")) {
1941 		pwriter_opts->ofile_fmt = "json";
1942 		argi += 1;
1943 	} else if (streq(argv[argi], "--ojsonx")) {
1944 		pwriter_opts->ofile_fmt = "json";
1945 		pwriter_opts->stack_json_output_vertically = TRUE;
1946 		argi += 1;
1947 
1948 	} else if (streq(argv[argi], "--onidx")) {
1949 		pwriter_opts->ofile_fmt = "nidx";
1950 		argi += 1;
1951 
1952 	} else if (streq(argv[argi], "--oxtab")) {
1953 		pwriter_opts->ofile_fmt = "xtab";
1954 		argi += 1;
1955 
1956 	} else if (streq(argv[argi], "--opprint")) {
1957 		pwriter_opts->ofile_fmt = "pprint";
1958 		argi += 1;
1959 
1960 	} else if (streq(argv[argi], "--right")) {
1961 		pwriter_opts->right_align_pprint = TRUE;
1962 		argi += 1;
1963 
1964 	} else if (streq(argv[argi], "--barred")) {
1965 		pwriter_opts->pprint_barred = TRUE;
1966 		argi += 1;
1967 
1968 	} else if (streq(argv[argi], "--quote-all")) {
1969 		pwriter_opts->oquoting = QUOTE_ALL;
1970 		argi += 1;
1971 
1972 	} else if (streq(argv[argi], "--quote-none")) {
1973 		pwriter_opts->oquoting = QUOTE_NONE;
1974 		argi += 1;
1975 
1976 	} else if (streq(argv[argi], "--quote-minimal")) {
1977 		pwriter_opts->oquoting = QUOTE_MINIMAL;
1978 		argi += 1;
1979 
1980 	} else if (streq(argv[argi], "--quote-numeric")) {
1981 		pwriter_opts->oquoting = QUOTE_NUMERIC;
1982 		argi += 1;
1983 
1984 	} else if (streq(argv[argi], "--quote-original")) {
1985 		pwriter_opts->oquoting = QUOTE_ORIGINAL;
1986 		argi += 1;
1987 
1988 	}
1989 	*pargi = argi;
1990 	return argi != oargi;
1991 }
1992 
1993 // Returns TRUE if the current flag was handled.
cli_handle_reader_writer_options(char ** argv,int argc,int * pargi,cli_reader_opts_t * preader_opts,cli_writer_opts_t * pwriter_opts)1994 int cli_handle_reader_writer_options(char** argv, int argc, int *pargi,
1995 	cli_reader_opts_t* preader_opts, cli_writer_opts_t* pwriter_opts)
1996 {
1997 	int argi = *pargi;
1998 	int oargi = argi;
1999 
2000 	if (streq(argv[argi], "--rs")) {
2001 		check_arg_count(argv, argi, argc, 2);
2002 		preader_opts->irs = cli_sep_from_arg(argv[argi+1]);
2003 		pwriter_opts->ors = cli_sep_from_arg(argv[argi+1]);
2004 		argi += 2;
2005 
2006 	} else if (streq(argv[argi], "--fs")) {
2007 		check_arg_count(argv, argi, argc, 2);
2008 		preader_opts->ifs = cli_sep_from_arg(argv[argi+1]);
2009 		pwriter_opts->ofs = cli_sep_from_arg(argv[argi+1]);
2010 		argi += 2;
2011 
2012 	} else if (streq(argv[argi], "-p")) {
2013 		preader_opts->ifile_fmt = "nidx";
2014 		pwriter_opts->ofile_fmt = "nidx";
2015 		preader_opts->ifs = " ";
2016 		pwriter_opts->ofs = " ";
2017 		preader_opts->allow_repeat_ifs = TRUE;
2018 		argi += 1;
2019 
2020 	} else if (streq(argv[argi], "--ps")) {
2021 		check_arg_count(argv, argi, argc, 2);
2022 		preader_opts->ips = cli_sep_from_arg(argv[argi+1]);
2023 		pwriter_opts->ops = cli_sep_from_arg(argv[argi+1]);
2024 		argi += 2;
2025 
2026 	} else if (streq(argv[argi], "--jflatsep")) {
2027 		check_arg_count(argv, argi, argc, 2);
2028 		preader_opts->input_json_flatten_separator  = cli_sep_from_arg(argv[argi+1]);
2029 		pwriter_opts->output_json_flatten_separator = cli_sep_from_arg(argv[argi+1]);
2030 		argi += 2;
2031 
2032 	} else if (streq(argv[argi], "--io")) {
2033 		check_arg_count(argv, argi, argc, 2);
2034 		if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
2035 			fprintf(stderr, "%s: unrecognized I/O format \"%s\".\n",
2036 				MLR_GLOBALS.bargv0, argv[argi+1]);
2037 			exit(1);
2038 		}
2039 		preader_opts->ifile_fmt = argv[argi+1];
2040 		pwriter_opts->ofile_fmt = argv[argi+1];
2041 		argi += 2;
2042 
2043 	} else if (streq(argv[argi], "--csv")) {
2044 		preader_opts->ifile_fmt = "csv";
2045 		pwriter_opts->ofile_fmt = "csv";
2046 		argi += 1;
2047 
2048 	} else if (streq(argv[argi], "--csvlite")) {
2049 		preader_opts->ifile_fmt = "csvlite";
2050 		pwriter_opts->ofile_fmt = "csvlite";
2051 		argi += 1;
2052 
2053 	} else if (streq(argv[argi], "--tsv")) {
2054 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2055 		preader_opts->ifs = "\t";
2056 		pwriter_opts->ofs = "\t";
2057 		argi += 1;
2058 
2059 	} else if (streq(argv[argi], "--tsvlite") || streq(argv[argi], "-t")) {
2060 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2061 		preader_opts->ifs = "\t";
2062 		pwriter_opts->ofs = "\t";
2063 		argi += 1;
2064 
2065 	} else if (streq(argv[argi], "--asv")) {
2066 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2067 		preader_opts->ifs = ASV_FS;
2068 		pwriter_opts->ofs = ASV_FS;
2069 		preader_opts->irs = ASV_RS;
2070 		pwriter_opts->ors = ASV_RS;
2071 		argi += 1;
2072 
2073 	} else if (streq(argv[argi], "--asvlite")) {
2074 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2075 		preader_opts->ifs = ASV_FS;
2076 		pwriter_opts->ofs = ASV_FS;
2077 		preader_opts->irs = ASV_RS;
2078 		pwriter_opts->ors = ASV_RS;
2079 		argi += 1;
2080 
2081 	} else if (streq(argv[argi], "--usv")) {
2082 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2083 		preader_opts->ifs = USV_FS;
2084 		pwriter_opts->ofs = USV_FS;
2085 		preader_opts->irs = USV_RS;
2086 		pwriter_opts->ors = USV_RS;
2087 		argi += 1;
2088 
2089 	} else if (streq(argv[argi], "--usvlite")) {
2090 		preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2091 		preader_opts->ifs = USV_FS;
2092 		pwriter_opts->ofs = USV_FS;
2093 		preader_opts->irs = USV_RS;
2094 		pwriter_opts->ors = USV_RS;
2095 		argi += 1;
2096 
2097 	} else if (streq(argv[argi], "--dkvp")) {
2098 		preader_opts->ifile_fmt = "dkvp";
2099 		pwriter_opts->ofile_fmt = "dkvp";
2100 		argi += 1;
2101 
2102 	} else if (streq(argv[argi], "--json")) {
2103 		preader_opts->ifile_fmt = "json";
2104 		pwriter_opts->ofile_fmt = "json";
2105 		argi += 1;
2106 	} else if (streq(argv[argi], "--jsonx")) {
2107 		preader_opts->ifile_fmt = "json";
2108 		pwriter_opts->ofile_fmt = "json";
2109 		pwriter_opts->stack_json_output_vertically = TRUE;
2110 		argi += 1;
2111 
2112 	} else if (streq(argv[argi], "--nidx")) {
2113 		preader_opts->ifile_fmt = "nidx";
2114 		pwriter_opts->ofile_fmt = "nidx";
2115 		argi += 1;
2116 
2117 	} else if (streq(argv[argi], "-T")) {
2118 		preader_opts->ifile_fmt = "nidx";
2119 		pwriter_opts->ofile_fmt = "nidx";
2120 		preader_opts->ifs = "\t";
2121 		pwriter_opts->ofs = "\t";
2122 		argi += 1;
2123 
2124 	} else if (streq(argv[argi], "--xtab")) {
2125 		preader_opts->ifile_fmt = "xtab";
2126 		pwriter_opts->ofile_fmt = "xtab";
2127 		argi += 1;
2128 
2129 	} else if (streq(argv[argi], "--pprint")) {
2130 		preader_opts->ifile_fmt        = "csvlite";
2131 		preader_opts->ifs              = " ";
2132 		preader_opts->allow_repeat_ifs = TRUE;
2133 		pwriter_opts->ofile_fmt        = "pprint";
2134 		argi += 1;
2135 
2136 	} else if (streq(argv[argi], "--c2t")) {
2137 		preader_opts->ifile_fmt = "csv";
2138 		preader_opts->irs       = "auto";
2139 		pwriter_opts->ofile_fmt = "csv";
2140 		pwriter_opts->ors       = "auto";
2141 		pwriter_opts->ofs       = "\t";
2142 		argi += 1;
2143 	} else if (streq(argv[argi], "--c2d")) {
2144 		preader_opts->ifile_fmt = "csv";
2145 		preader_opts->irs       = "auto";
2146 		pwriter_opts->ofile_fmt = "dkvp";
2147 		argi += 1;
2148 	} else if (streq(argv[argi], "--c2n")) {
2149 		preader_opts->ifile_fmt = "csv";
2150 		preader_opts->irs       = "auto";
2151 		pwriter_opts->ofile_fmt = "nidx";
2152 		argi += 1;
2153 	} else if (streq(argv[argi], "--c2j")) {
2154 		preader_opts->ifile_fmt = "csv";
2155 		preader_opts->irs       = "auto";
2156 		pwriter_opts->ofile_fmt = "json";
2157 		argi += 1;
2158 	} else if (streq(argv[argi], "--c2p")) {
2159 		preader_opts->ifile_fmt = "csv";
2160 		preader_opts->irs       = "auto";
2161 		pwriter_opts->ofile_fmt = "pprint";
2162 		argi += 1;
2163 	} else if (streq(argv[argi], "--c2x")) {
2164 		preader_opts->ifile_fmt = "csv";
2165 		preader_opts->irs       = "auto";
2166 		pwriter_opts->ofile_fmt = "xtab";
2167 		argi += 1;
2168 	} else if (streq(argv[argi], "--c2m")) {
2169 		preader_opts->ifile_fmt = "csv";
2170 		preader_opts->irs       = "auto";
2171 		pwriter_opts->ofile_fmt = "markdown";
2172 		argi += 1;
2173 
2174 	} else if (streq(argv[argi], "--t2c")) {
2175 		preader_opts->ifile_fmt = "csv";
2176 		preader_opts->ifs       = "\t";
2177 		preader_opts->irs       = "auto";
2178 		pwriter_opts->ofile_fmt = "csv";
2179 		pwriter_opts->ors       = "auto";
2180 		argi += 1;
2181 	} else if (streq(argv[argi], "--t2d")) {
2182 		preader_opts->ifile_fmt = "csv";
2183 		preader_opts->ifs       = "\t";
2184 		preader_opts->irs       = "auto";
2185 		pwriter_opts->ofile_fmt = "dkvp";
2186 		argi += 1;
2187 	} else if (streq(argv[argi], "--t2n")) {
2188 		preader_opts->ifile_fmt = "csv";
2189 		preader_opts->ifs       = "\t";
2190 		preader_opts->irs       = "auto";
2191 		pwriter_opts->ofile_fmt = "nidx";
2192 		argi += 1;
2193 	} else if (streq(argv[argi], "--t2j")) {
2194 		preader_opts->ifile_fmt = "csv";
2195 		preader_opts->ifs       = "\t";
2196 		preader_opts->irs       = "auto";
2197 		pwriter_opts->ofile_fmt = "json";
2198 		argi += 1;
2199 	} else if (streq(argv[argi], "--t2p")) {
2200 		preader_opts->ifile_fmt = "csv";
2201 		preader_opts->ifs       = "\t";
2202 		preader_opts->irs       = "auto";
2203 		pwriter_opts->ofile_fmt = "pprint";
2204 		argi += 1;
2205 	} else if (streq(argv[argi], "--t2x")) {
2206 		preader_opts->ifile_fmt = "csv";
2207 		preader_opts->ifs       = "\t";
2208 		preader_opts->irs       = "auto";
2209 		pwriter_opts->ofile_fmt = "xtab";
2210 		argi += 1;
2211 	} else if (streq(argv[argi], "--t2m")) {
2212 		preader_opts->ifile_fmt = "csv";
2213 		preader_opts->ifs       = "\t";
2214 		preader_opts->irs       = "auto";
2215 		pwriter_opts->ofile_fmt = "markdown";
2216 		argi += 1;
2217 
2218 	} else if (streq(argv[argi], "--d2c")) {
2219 		preader_opts->ifile_fmt = "dkvp";
2220 		pwriter_opts->ofile_fmt = "csv";
2221 		pwriter_opts->ors       = "auto";
2222 		argi += 1;
2223 	} else if (streq(argv[argi], "--d2t")) {
2224 		preader_opts->ifile_fmt = "dkvp";
2225 		pwriter_opts->ofile_fmt = "csv";
2226 		pwriter_opts->ors       = "auto";
2227 		pwriter_opts->ofs       = "\t";
2228 		argi += 1;
2229 	} else if (streq(argv[argi], "--d2n")) {
2230 		preader_opts->ifile_fmt = "dkvp";
2231 		pwriter_opts->ofile_fmt = "nidx";
2232 		argi += 1;
2233 	} else if (streq(argv[argi], "--d2j")) {
2234 		preader_opts->ifile_fmt = "dkvp";
2235 		pwriter_opts->ofile_fmt = "json";
2236 		argi += 1;
2237 	} else if (streq(argv[argi], "--d2p")) {
2238 		preader_opts->ifile_fmt = "dkvp";
2239 		pwriter_opts->ofile_fmt = "pprint";
2240 		argi += 1;
2241 	} else if (streq(argv[argi], "--d2x")) {
2242 		preader_opts->ifile_fmt = "dkvp";
2243 		pwriter_opts->ofile_fmt = "xtab";
2244 		argi += 1;
2245 	} else if (streq(argv[argi], "--d2m")) {
2246 		preader_opts->ifile_fmt = "dkvp";
2247 		pwriter_opts->ofile_fmt = "markdown";
2248 		argi += 1;
2249 
2250 	} else if (streq(argv[argi], "--n2c")) {
2251 		preader_opts->ifile_fmt = "nidx";
2252 		pwriter_opts->ofile_fmt = "csv";
2253 		pwriter_opts->ors       = "auto";
2254 		argi += 1;
2255 	} else if (streq(argv[argi], "--n2t")) {
2256 		preader_opts->ifile_fmt = "nidx";
2257 		pwriter_opts->ofile_fmt = "csv";
2258 		pwriter_opts->ors       = "auto";
2259 		pwriter_opts->ofs       = "\t";
2260 		argi += 1;
2261 	} else if (streq(argv[argi], "--n2d")) {
2262 		preader_opts->ifile_fmt = "nidx";
2263 		pwriter_opts->ofile_fmt = "dkvp";
2264 		argi += 1;
2265 	} else if (streq(argv[argi], "--n2j")) {
2266 		preader_opts->ifile_fmt = "nidx";
2267 		pwriter_opts->ofile_fmt = "json";
2268 		argi += 1;
2269 	} else if (streq(argv[argi], "--n2p")) {
2270 		preader_opts->ifile_fmt = "nidx";
2271 		pwriter_opts->ofile_fmt = "pprint";
2272 		argi += 1;
2273 	} else if (streq(argv[argi], "--n2x")) {
2274 		preader_opts->ifile_fmt = "nidx";
2275 		pwriter_opts->ofile_fmt = "xtab";
2276 		argi += 1;
2277 	} else if (streq(argv[argi], "--n2m")) {
2278 		preader_opts->ifile_fmt = "nidx";
2279 		pwriter_opts->ofile_fmt = "markdown";
2280 		argi += 1;
2281 
2282 	} else if (streq(argv[argi], "--j2c")) {
2283 		preader_opts->ifile_fmt = "json";
2284 		pwriter_opts->ofile_fmt = "csv";
2285 		pwriter_opts->ors       = "auto";
2286 		argi += 1;
2287 	} else if (streq(argv[argi], "--j2t")) {
2288 		preader_opts->ifile_fmt = "json";
2289 		pwriter_opts->ofile_fmt = "csv";
2290 		pwriter_opts->ors       = "auto";
2291 		pwriter_opts->ofs       = "\t";
2292 		argi += 1;
2293 	} else if (streq(argv[argi], "--j2d")) {
2294 		preader_opts->ifile_fmt = "json";
2295 		pwriter_opts->ofile_fmt = "dkvp";
2296 		argi += 1;
2297 	} else if (streq(argv[argi], "--j2n")) {
2298 		preader_opts->ifile_fmt = "json";
2299 		pwriter_opts->ofile_fmt = "nidx";
2300 		argi += 1;
2301 	} else if (streq(argv[argi], "--j2p")) {
2302 		preader_opts->ifile_fmt = "json";
2303 		pwriter_opts->ofile_fmt = "pprint";
2304 		argi += 1;
2305 	} else if (streq(argv[argi], "--j2x")) {
2306 		preader_opts->ifile_fmt = "json";
2307 		pwriter_opts->ofile_fmt = "xtab";
2308 		argi += 1;
2309 	} else if (streq(argv[argi], "--j2m")) {
2310 		preader_opts->ifile_fmt = "json";
2311 		pwriter_opts->ofile_fmt = "markdown";
2312 		argi += 1;
2313 
2314 	} else if (streq(argv[argi], "--p2c")) {
2315 		preader_opts->ifile_fmt        = "csvlite";
2316 		preader_opts->ifs              = " ";
2317 		preader_opts->allow_repeat_ifs = TRUE;
2318 		pwriter_opts->ofile_fmt        = "csv";
2319 		pwriter_opts->ors              = "auto";
2320 		argi += 1;
2321 	} else if (streq(argv[argi], "--p2t")) {
2322 		preader_opts->ifile_fmt        = "csvlite";
2323 		preader_opts->ifs              = " ";
2324 		preader_opts->allow_repeat_ifs = TRUE;
2325 		pwriter_opts->ofile_fmt        = "csv";
2326 		pwriter_opts->ors              = "auto";
2327 		pwriter_opts->ofs              = "\t";
2328 		argi += 1;
2329 	} else if (streq(argv[argi], "--p2d")) {
2330 		preader_opts->ifile_fmt        = "csvlite";
2331 		preader_opts->ifs              = " ";
2332 		preader_opts->allow_repeat_ifs = TRUE;
2333 		pwriter_opts->ofile_fmt        = "dkvp";
2334 		argi += 1;
2335 	} else if (streq(argv[argi], "--p2n")) {
2336 		preader_opts->ifile_fmt        = "csvlite";
2337 		preader_opts->ifs              = " ";
2338 		preader_opts->allow_repeat_ifs = TRUE;
2339 		pwriter_opts->ofile_fmt        = "nidx";
2340 		argi += 1;
2341 	} else if (streq(argv[argi], "--p2j")) {
2342 		preader_opts->ifile_fmt        = "csvlite";
2343 		preader_opts->ifs              = " ";
2344 		preader_opts->allow_repeat_ifs = TRUE;
2345 		pwriter_opts->ofile_fmt = "json";
2346 		argi += 1;
2347 	} else if (streq(argv[argi], "--p2x")) {
2348 		preader_opts->ifile_fmt        = "csvlite";
2349 		preader_opts->ifs              = " ";
2350 		preader_opts->allow_repeat_ifs = TRUE;
2351 		pwriter_opts->ofile_fmt        = "xtab";
2352 		argi += 1;
2353 	} else if (streq(argv[argi], "--p2m")) {
2354 		preader_opts->ifile_fmt        = "csvlite";
2355 		preader_opts->ifs              = " ";
2356 		preader_opts->allow_repeat_ifs = TRUE;
2357 		pwriter_opts->ofile_fmt        = "markdown";
2358 		argi += 1;
2359 
2360 	} else if (streq(argv[argi], "--x2c")) {
2361 		preader_opts->ifile_fmt = "xtab";
2362 		pwriter_opts->ofile_fmt = "csv";
2363 		pwriter_opts->ors       = "auto";
2364 		argi += 1;
2365 	} else if (streq(argv[argi], "--x2t")) {
2366 		preader_opts->ifile_fmt = "xtab";
2367 		pwriter_opts->ofile_fmt = "csv";
2368 		pwriter_opts->ors       = "auto";
2369 		pwriter_opts->ofs       = "\t";
2370 		argi += 1;
2371 	} else if (streq(argv[argi], "--x2d")) {
2372 		preader_opts->ifile_fmt = "xtab";
2373 		pwriter_opts->ofile_fmt = "dkvp";
2374 		argi += 1;
2375 	} else if (streq(argv[argi], "--x2n")) {
2376 		preader_opts->ifile_fmt = "xtab";
2377 		pwriter_opts->ofile_fmt = "nidx";
2378 		argi += 1;
2379 	} else if (streq(argv[argi], "--x2j")) {
2380 		preader_opts->ifile_fmt = "xtab";
2381 		pwriter_opts->ofile_fmt = "json";
2382 		argi += 1;
2383 	} else if (streq(argv[argi], "--x2p")) {
2384 		preader_opts->ifile_fmt = "xtab";
2385 		pwriter_opts->ofile_fmt = "pprint";
2386 		argi += 1;
2387 	} else if (streq(argv[argi], "--x2m")) {
2388 		preader_opts->ifile_fmt = "xtab";
2389 		pwriter_opts->ofile_fmt = "markdown";
2390 		argi += 1;
2391 
2392 	} else if (streq(argv[argi], "-N")) {
2393 		preader_opts->use_implicit_csv_header = TRUE;
2394 		pwriter_opts->headerless_csv_output = TRUE;
2395 		argi += 1;
2396 	}
2397 	*pargi = argi;
2398 	return argi != oargi;
2399 }
2400 
2401 // Returns TRUE if the current flag was handled.
cli_handle_misc_options(char ** argv,int argc,int * pargi,cli_opts_t * popts)2402 static int cli_handle_misc_options(char** argv, int argc, int *pargi, cli_opts_t* popts) {
2403 	int argi = *pargi;
2404 	int oargi = argi;
2405 
2406 	if (streq(argv[argi], "-I")) {
2407 		popts->do_in_place = TRUE;
2408 		argi += 1;
2409 
2410 	} else if (streq(argv[argi], "-n")) {
2411 		popts->no_input = TRUE;
2412 		argi += 1;
2413 
2414 	} else if (streq(argv[argi], "--from")) {
2415 		check_arg_count(argv, argi, argc, 2);
2416 		slls_append(popts->filenames, argv[argi+1], NO_FREE);
2417 		argi += 2;
2418 
2419 	} else if (streq(argv[argi], "--ofmt")) {
2420 		check_arg_count(argv, argi, argc, 2);
2421 		popts->ofmt = argv[argi+1];
2422 		argi += 2;
2423 
2424 	} else if (streq(argv[argi], "--nr-progress-mod")) {
2425 		check_arg_count(argv, argi, argc, 2);
2426 		if (sscanf(argv[argi+1], "%lld", &popts->nr_progress_mod) != 1) {
2427 			fprintf(stderr,
2428 				"%s: --nr-progress-mod argument must be a positive integer; got \"%s\".\n",
2429 				MLR_GLOBALS.bargv0, argv[argi+1]);
2430 			main_usage_short(stderr, MLR_GLOBALS.bargv0);
2431 			exit(1);
2432 		}
2433 		if (popts->nr_progress_mod <= 0) {
2434 			fprintf(stderr,
2435 				"%s: --nr-progress-mod argument must be a positive integer; got \"%s\".\n",
2436 				MLR_GLOBALS.bargv0, argv[argi+1]);
2437 			main_usage_short(stderr, MLR_GLOBALS.bargv0);
2438 			exit(1);
2439 		}
2440 		argi += 2;
2441 
2442 	} else if (streq(argv[argi], "--seed")) {
2443 		check_arg_count(argv, argi, argc, 2);
2444 		if (sscanf(argv[argi+1], "0x%x", &popts->rand_seed) == 1) {
2445 			popts->have_rand_seed = TRUE;
2446 		} else if (sscanf(argv[argi+1], "%u", &popts->rand_seed) == 1) {
2447 			popts->have_rand_seed = TRUE;
2448 		} else {
2449 			fprintf(stderr,
2450 				"%s: --seed argument must be a decimal or hexadecimal integer; got \"%s\".\n",
2451 				MLR_GLOBALS.bargv0, argv[argi+1]);
2452 			main_usage_short(stderr, MLR_GLOBALS.bargv0);
2453 			exit(1);
2454 		}
2455 		argi += 2;
2456 
2457 	}
2458 	*pargi = argi;
2459 	return argi != oargi;
2460 }
2461 
2462 // ----------------------------------------------------------------
lhmss_get_or_die(lhmss_t * pmap,char * key)2463 static char* lhmss_get_or_die(lhmss_t* pmap, char* key) {
2464 	char* value = lhmss_get(pmap, key);
2465 	MLR_INTERNAL_CODING_ERROR_IF(value == NULL);
2466 	return value;
2467 }
2468 
2469 // ----------------------------------------------------------------
lhmsll_get_or_die(lhmsll_t * pmap,char * key)2470 static int lhmsll_get_or_die(lhmsll_t* pmap, char* key) {
2471 	MLR_INTERNAL_CODING_ERROR_UNLESS(lhmsll_has_key(pmap, key));
2472 	return lhmsll_get(pmap, key);
2473 }
2474