1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4
5 #include "input/line_readers.h"
6
7 #include "lib/mlr_arch.h"
8 #include "lib/mlrutil.h"
9 #include "lib/mlr_globals.h"
10 #include "lib/mtrand.h"
11 #include "containers/slls.h"
12 #include "containers/lhmss.h"
13 #include "containers/lhmsll.h"
14 #include "input/lrec_readers.h"
15 #include "dsl/function_manager.h"
16 #include "dsl/mlr_dsl_cst.h"
17 #include "mapping/mappers.h"
18 #include "output/lrec_writers.h"
19 #include "cli/mlrcli.h"
20 #include "cli/quoting.h"
21 #include "cli/argparse.h"
22 #include "auxents/aux_entries.h"
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #define VERSION_STRING PACKAGE_VERSION
27 #else
28 #include "mlrvers.h"
29 #define VERSION_STRING MLR_VERSION
30 #endif
31
32 // ----------------------------------------------------------------
33 #define DEFAULT_OFMT "%lf"
34 #define DEFAULT_OQUOTING QUOTE_MINIMAL
35 #define DEFAULT_JSON_FLATTEN_SEPARATOR ":"
36 #define DEFAULT_OOSVAR_FLATTEN_SEPARATOR ":"
37 #define DEFAULT_COMMENT_STRING "#"
38
39 // ASCII 1f and 1e
40 #define ASV_FS "\x1f"
41 #define ASV_RS "\x1e"
42
43 #define ASV_FS_FOR_HELP "0x1f"
44 #define ASV_RS_FOR_HELP "0x1e"
45
46 // Unicode code points U+241F and U+241E, encoded as UTF-8.
47 #define USV_FS "\xe2\x90\x9f"
48 #define USV_RS "\xe2\x90\x9e"
49
50 #define USV_FS_FOR_HELP "U+241F (UTF-8 0xe2909f)"
51 #define USV_RS_FOR_HELP "U+241E (UTF-8 0xe2909e)"
52
53 // ----------------------------------------------------------------
54 static mapper_setup_t* mapper_lookup_table[] = {
55
56 &mapper_altkv_setup,
57 &mapper_bar_setup,
58 &mapper_bootstrap_setup,
59 &mapper_cat_setup,
60 &mapper_check_setup,
61 &mapper_clean_whitespace_setup,
62 &mapper_count_setup,
63 &mapper_count_distinct_setup,
64 &mapper_count_similar_setup,
65 &mapper_cut_setup,
66 &mapper_decimate_setup,
67 &mapper_fill_down_setup,
68 &mapper_filter_setup,
69 &mapper_format_values_setup,
70 &mapper_fraction_setup,
71 &mapper_grep_setup,
72 &mapper_group_by_setup,
73 &mapper_group_like_setup,
74 &mapper_having_fields_setup,
75 &mapper_head_setup,
76 &mapper_histogram_setup,
77 &mapper_join_setup,
78 &mapper_label_setup,
79 &mapper_least_frequent_setup,
80 &mapper_merge_fields_setup,
81 &mapper_most_frequent_setup,
82 &mapper_nest_setup,
83 &mapper_nothing_setup,
84 &mapper_put_setup,
85 &mapper_regularize_setup,
86 &mapper_remove_empty_columns_setup,
87 &mapper_rename_setup,
88 &mapper_reorder_setup,
89 &mapper_repeat_setup,
90 &mapper_reshape_setup,
91 &mapper_sample_setup,
92 &mapper_sec2gmt_setup,
93 &mapper_sec2gmtdate_setup,
94 &mapper_seqgen_setup,
95 &mapper_shuffle_setup,
96 &mapper_skip_trivial_records_setup,
97 &mapper_sort_setup,
98 &mapper_sort_within_records_setup,
99 &mapper_stats1_setup,
100 &mapper_stats2_setup,
101 &mapper_step_setup,
102 &mapper_tac_setup,
103 &mapper_tail_setup,
104 &mapper_tee_setup,
105 &mapper_top_setup,
106 &mapper_uniq_setup,
107 &mapper_unsparsify_setup,
108
109 };
110 static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]);
111
112 // ----------------------------------------------------------------
113 static void cli_load_mlrrc(cli_opts_t* popts);
114 static void cli_try_load_mlrrc(cli_opts_t* popts, char* path);
115 static int handle_mlrrc_line_1(cli_opts_t* popts, char* line);
116 static int handle_mlrrc_line_2(cli_opts_t* popts, char* line);
117 static int handle_mlrrc_line_3(cli_opts_t* popts, char* line);
118 static int handle_mlrrc_line_4(cli_opts_t* popts, char** argv, int argc);
119
120 static int cli_handle_misc_options(char** argv, int argc, int *pargi, cli_opts_t* popts);
121
122 static lhmss_t* get_desc_to_chars_map();
123 static lhmsll_t* get_default_repeat_ifses();
124 static lhmsll_t* get_default_repeat_ipses();
125 static lhmss_t* get_default_fses();
126 static lhmss_t* get_default_pses();
127 static lhmss_t* get_default_rses();
128 static void free_opt_singletons();
129 static char* rebackslash(char* sep);
130
131 static void main_usage_long(FILE* o, char* argv0);
132 static void main_usage_short(FILE* o, char* argv0);
133 static void main_usage_synopsis(FILE* o, char* argv0);
134 static void main_usage_examples(FILE* o, char* argv0, char* leader);
135 static void list_all_verbs_raw(FILE* o);
136 static void list_all_verbs(FILE* o, char* leader);
137 static void main_usage_help_options(FILE* o, char* argv0);
138 static void main_usage_mlrrc(FILE* o, char* argv0);
139 static void main_usage_functions(FILE* o, char* argv0, char* leader);
140 static void main_usage_data_format_examples(FILE* o, char* argv0);
141 static void main_usage_data_format_options(FILE* o, char* argv0);
142 static void main_usage_comments_in_data(FILE* o, char* argv0);
143 static void main_usage_format_conversion_keystroke_saver_options(FILE* o, char* argv0);
144 static void main_usage_compressed_data_options(FILE* o, char* argv0);
145 static void main_usage_separator_options(FILE* o, char* argv0);
146 static void main_usage_csv_options(FILE* o, char* argv0);
147 static void main_usage_double_quoting(FILE* o, char* argv0);
148 static void main_usage_numerical_formatting(FILE* o, char* argv0);
149 static void main_usage_other_options(FILE* o, char* argv0);
150 static void main_usage_then_chaining(FILE* o, char* argv0);
151 static void main_usage_auxents(FILE* o, char* argv0);
152 static void main_usage_see_also(FILE* o, char* argv0);
153 static void print_type_arithmetic_info(FILE* o, char* argv0);
154 static void usage_all_verbs(char* argv0);
155 static void usage_unrecognized_verb(char* argv0, char* arg);
156
157 static void check_arg_count(char** argv, int argi, int argc, int n);
158 static mapper_setup_t* look_up_mapper_setup(char* verb);
159
160 static int handle_terminal_usage(char** argv, int argc, int argi);
161
162 static char* lhmss_get_or_die(lhmss_t* pmap, char* key);
163 static int lhmsll_get_or_die(lhmsll_t* pmap, char* key);
164
165 // ----------------------------------------------------------------
parse_command_line(int argc,char ** argv,sllv_t ** ppmapper_list)166 cli_opts_t* parse_command_line(int argc, char** argv, sllv_t** ppmapper_list) {
167 cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t));
168
169 int argi = 1;
170
171 // Set defaults for options
172 cli_opts_init(popts);
173
174 // Try .mlrrc overrides (then command-line on top of that).
175 // A --norc flag (if provided) must come before all other options.
176 // Or, they can set the environment variable MLRRC="__none__".
177 if (argc >= 2 && streq(argv[1], "--norc")) {
178 argi++;
179 } else {
180 cli_load_mlrrc(popts);
181 }
182
183 for (; argi < argc; /* variable increment: 1 or 2 depending on flag */) {
184
185 if (argv[argi][0] != '-') {
186 break; // No more flag options to process
187 } else if (handle_terminal_usage(argv, argc, argi)) {
188 exit(0);
189 } else if (cli_handle_reader_options(argv, argc, &argi, &popts->reader_opts)) {
190 // handled
191 } else if (cli_handle_writer_options(argv, argc, &argi, &popts->writer_opts)) {
192 // handled
193 } else if (cli_handle_reader_writer_options(argv, argc, &argi, &popts->reader_opts, &popts->writer_opts)) {
194 // handled
195 } else if (cli_handle_misc_options(argv, argc, &argi, popts)) {
196 // handled
197 } else {
198 // unhandled
199 usage_unrecognized_verb(MLR_GLOBALS.bargv0, argv[argi]);
200 }
201 }
202
203 cli_apply_defaults(popts);
204
205 lhmss_t* default_rses = get_default_rses();
206 lhmss_t* default_fses = get_default_fses();
207 lhmss_t* default_pses = get_default_pses();
208 lhmsll_t* default_repeat_ifses = get_default_repeat_ifses();
209 lhmsll_t* default_repeat_ipses = get_default_repeat_ipses();
210
211 if (popts->reader_opts.irs == NULL)
212 popts->reader_opts.irs = lhmss_get_or_die(default_rses, popts->reader_opts.ifile_fmt);
213 if (popts->reader_opts.ifs == NULL)
214 popts->reader_opts.ifs = lhmss_get_or_die(default_fses, popts->reader_opts.ifile_fmt);
215 if (popts->reader_opts.ips == NULL)
216 popts->reader_opts.ips = lhmss_get_or_die(default_pses, popts->reader_opts.ifile_fmt);
217
218 if (popts->reader_opts.allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE)
219 popts->reader_opts.allow_repeat_ifs = lhmsll_get_or_die(default_repeat_ifses, popts->reader_opts.ifile_fmt);
220 if (popts->reader_opts.allow_repeat_ips == NEITHER_TRUE_NOR_FALSE)
221 popts->reader_opts.allow_repeat_ips = lhmsll_get_or_die(default_repeat_ipses, popts->reader_opts.ifile_fmt);
222
223 if (popts->writer_opts.ors == NULL)
224 popts->writer_opts.ors = lhmss_get_or_die(default_rses, popts->writer_opts.ofile_fmt);
225 if (popts->writer_opts.ofs == NULL)
226 popts->writer_opts.ofs = lhmss_get_or_die(default_fses, popts->writer_opts.ofile_fmt);
227 if (popts->writer_opts.ops == NULL)
228 popts->writer_opts.ops = lhmss_get_or_die(default_pses, popts->writer_opts.ofile_fmt);
229
230 if (streq(popts->writer_opts.ofile_fmt, "pprint") && strlen(popts->writer_opts.ofs) != 1) {
231 fprintf(stderr, "%s: OFS for PPRINT format must be single-character; got \"%s\".\n",
232 MLR_GLOBALS.bargv0, popts->writer_opts.ofs);
233 return NULL;
234 }
235
236 // Construct the mapper list for single use, e.g. the normal streaming case wherein the
237 // mappers operate on all input files. Also retain information needed to construct them
238 // for each input file, for in-place mode.
239 popts->mapper_argb = argi;
240 popts->original_argv = argv;
241 popts->non_in_place_argv = copy_argv(argv);
242 popts->argc = argc;
243 *ppmapper_list = cli_parse_mappers(popts->non_in_place_argv, &argi, argc, popts);
244
245 for ( ; argi < argc; argi++) {
246 slls_append(popts->filenames, argv[argi], NO_FREE);
247 }
248
249 if (popts->no_input) {
250 slls_free(popts->filenames);
251 popts->filenames = NULL;
252 }
253
254 if (popts->do_in_place && (popts->filenames == NULL || popts->filenames->length == 0)) {
255 fprintf(stderr, "%s: -I option (in-place operation) requires input files.\n", MLR_GLOBALS.bargv0);
256 exit(1);
257 }
258
259 if (popts->have_rand_seed) {
260 mtrand_init(popts->rand_seed);
261 } else {
262 mtrand_init_default();
263 }
264
265 return popts;
266 }
267
268 // ----------------------------------------------------------------
269 // Returns a list of mappers, from the starting point in argv given by *pargi. Bumps *pargi to
270 // point to remaining post-mapper-setup args, i.e. filenames.
cli_parse_mappers(char ** argv,int * pargi,int argc,cli_opts_t * popts)271 sllv_t* cli_parse_mappers(char** argv, int* pargi, int argc, cli_opts_t* popts) {
272 sllv_t* pmapper_list = sllv_alloc();
273 int argi = *pargi;
274
275 // Allow then-chains to start with an initial 'then': 'mlr verb1 then verb2 then verb3' or
276 // 'mlr then verb1 then verb2 then verb3'. Particuarly useful in backslashy scripting contexts.
277 if ((argc - argi) >= 1 && streq(argv[argi], "then")) {
278 argi++;
279 }
280
281 if ((argc - argi) < 1) {
282 fprintf(stderr, "%s: no verb supplied.\n", MLR_GLOBALS.bargv0);
283 main_usage_short(stderr, MLR_GLOBALS.bargv0);
284 exit(1);
285 }
286
287 // Note that the command-line parsers can operate destructively on argv, e.g. verbs
288 // which take comma-delimited field names splitting on commas. For this reason we
289 // need to duplicate argv on each in-place run within the streamer module. But before
290 // that ever happens, here we run through the verb-parsers once to find out where it
291 // is on the command line that the verbs and their arguments end and the filenames
292 // begin.
293
294 while (TRUE) {
295 check_arg_count(argv, argi, argc, 1);
296 char* verb = argv[argi];
297
298 mapper_setup_t* pmapper_setup = look_up_mapper_setup(verb);
299 if (pmapper_setup == NULL) {
300 fprintf(stderr, "%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n",
301 MLR_GLOBALS.bargv0, verb, MLR_GLOBALS.bargv0);
302 exit(1);
303 }
304
305 if ((argc - argi) >= 2) {
306 if (streq(argv[argi+1], "-h") || streq(argv[argi+1], "--help")) {
307 pmapper_setup->pusage_func(stdout, MLR_GLOBALS.bargv0, verb);
308 exit(0);
309 }
310 }
311
312 // It's up to the parse func to print its usage on CLI-parse failure.
313 // Also note: this assumes main reader/writer opts are all parsed
314 // *before* mapper parse-CLI methods are invoked.
315 mapper_t* pmapper = pmapper_setup->pparse_func(&argi, argc, argv,
316 &popts->reader_opts, &popts->writer_opts);
317 if (pmapper == NULL) {
318 exit(1);
319 }
320
321 if (pmapper_setup->ignores_input && pmapper_list->length == 0) {
322 // e.g. then-chain starts with seqgen
323 popts->no_input = TRUE;
324 }
325
326 sllv_append(pmapper_list, pmapper);
327
328 if (argi >= argc || !streq(argv[argi], "then"))
329 break;
330 argi++;
331 }
332
333 *pargi = argi;
334 return pmapper_list;
335 }
336
337 // ----------------------------------------------------------------
cli_opts_free(cli_opts_t * popts)338 void cli_opts_free(cli_opts_t* popts) {
339 if (popts == NULL)
340 return;
341
342 slls_free(popts->filenames);
343 free_argv_copy(popts->non_in_place_argv);
344 free(popts);
345 free_opt_singletons();
346 }
347
348 // ----------------------------------------------------------------
349 static lhmss_t* singleton_pdesc_to_chars_map = NULL;
get_desc_to_chars_map()350 static lhmss_t* get_desc_to_chars_map() {
351 if (singleton_pdesc_to_chars_map == NULL) {
352 singleton_pdesc_to_chars_map = lhmss_alloc();
353 lhmss_put(singleton_pdesc_to_chars_map, "cr", "\r", NO_FREE);
354 lhmss_put(singleton_pdesc_to_chars_map, "crcr", "\r\r", NO_FREE);
355 lhmss_put(singleton_pdesc_to_chars_map, "newline", "\n", NO_FREE);
356 lhmss_put(singleton_pdesc_to_chars_map, "lf", "\n", NO_FREE);
357 lhmss_put(singleton_pdesc_to_chars_map, "lflf", "\n\n", NO_FREE);
358 lhmss_put(singleton_pdesc_to_chars_map, "crlf", "\r\n", NO_FREE);
359 lhmss_put(singleton_pdesc_to_chars_map, "crlfcrlf", "\r\n\r\n", NO_FREE);
360 lhmss_put(singleton_pdesc_to_chars_map, "tab", "\t", NO_FREE);
361 lhmss_put(singleton_pdesc_to_chars_map, "space", " ", NO_FREE);
362 lhmss_put(singleton_pdesc_to_chars_map, "comma", ",", NO_FREE);
363 lhmss_put(singleton_pdesc_to_chars_map, "newline", "\n", NO_FREE);
364 lhmss_put(singleton_pdesc_to_chars_map, "pipe", "|", NO_FREE);
365 lhmss_put(singleton_pdesc_to_chars_map, "slash", "/", NO_FREE);
366 lhmss_put(singleton_pdesc_to_chars_map, "colon", ":", NO_FREE);
367 lhmss_put(singleton_pdesc_to_chars_map, "semicolon", ";", NO_FREE);
368 lhmss_put(singleton_pdesc_to_chars_map, "equals", "=", NO_FREE);
369 }
370 return singleton_pdesc_to_chars_map;
371 }
372 // Always strdup so the caller can unconditionally free our return value
cli_sep_from_arg(char * arg)373 char* cli_sep_from_arg(char* arg) {
374 char* chars = lhmss_get(get_desc_to_chars_map(), arg);
375 if (chars != NULL) // E.g. crlf
376 return mlr_strdup_or_die(chars);
377 else // E.g. '\r\n'
378 return mlr_alloc_unbackslash(arg);
379 }
380
381 // ----------------------------------------------------------------
382 static lhmss_t* singleton_default_rses = NULL;
383 static lhmss_t* singleton_default_fses = NULL;
384 static lhmss_t* singleton_default_pses = NULL;
385 static lhmsll_t* singleton_default_repeat_ifses = NULL;
386 static lhmsll_t* singleton_default_repeat_ipses = NULL;
387
get_default_rses()388 static lhmss_t* get_default_rses() {
389 if (singleton_default_rses == NULL) {
390 singleton_default_rses = lhmss_alloc();
391
392 lhmss_put(singleton_default_rses, "gen", "N/A", NO_FREE);
393 lhmss_put(singleton_default_rses, "dkvp", "auto", NO_FREE);
394 lhmss_put(singleton_default_rses, "json", "auto", NO_FREE);
395 lhmss_put(singleton_default_rses, "nidx", "auto", NO_FREE);
396 lhmss_put(singleton_default_rses, "csv", "auto", NO_FREE);
397 lhmss_put(singleton_default_rses, "csvlite", "auto", NO_FREE);
398 lhmss_put(singleton_default_rses, "markdown", "auto", NO_FREE);
399 lhmss_put(singleton_default_rses, "pprint", "auto", NO_FREE);
400 lhmss_put(singleton_default_rses, "xtab", "(N/A)", NO_FREE);
401 }
402 return singleton_default_rses;
403 }
404
get_default_fses()405 static lhmss_t* get_default_fses() {
406 if (singleton_default_fses == NULL) {
407 singleton_default_fses = lhmss_alloc();
408 lhmss_put(singleton_default_fses, "gen", "(N/A)", NO_FREE);
409 lhmss_put(singleton_default_fses, "dkvp", ",", NO_FREE);
410 lhmss_put(singleton_default_fses, "json", "(N/A)", NO_FREE);
411 lhmss_put(singleton_default_fses, "nidx", " ", NO_FREE);
412 lhmss_put(singleton_default_fses, "csv", ",", NO_FREE);
413 lhmss_put(singleton_default_fses, "csvlite", ",", NO_FREE);
414 lhmss_put(singleton_default_fses, "markdown", "(N/A)", NO_FREE);
415 lhmss_put(singleton_default_fses, "pprint", " ", NO_FREE);
416 lhmss_put(singleton_default_fses, "xtab", "auto", NO_FREE);
417 }
418 return singleton_default_fses;
419 }
420
get_default_pses()421 static lhmss_t* get_default_pses() {
422 if (singleton_default_pses == NULL) {
423 singleton_default_pses = lhmss_alloc();
424 lhmss_put(singleton_default_pses, "gen", "(N/A)", NO_FREE);
425 lhmss_put(singleton_default_pses, "dkvp", "=", NO_FREE);
426 lhmss_put(singleton_default_pses, "json", "(N/A)", NO_FREE);
427 lhmss_put(singleton_default_pses, "nidx", "(N/A)", NO_FREE);
428 lhmss_put(singleton_default_pses, "csv", "(N/A)", NO_FREE);
429 lhmss_put(singleton_default_pses, "csvlite", "(N/A)", NO_FREE);
430 lhmss_put(singleton_default_pses, "markdown", "(N/A)", NO_FREE);
431 lhmss_put(singleton_default_pses, "pprint", "(N/A)", NO_FREE);
432 lhmss_put(singleton_default_pses, "xtab", " ", NO_FREE);
433 }
434 return singleton_default_pses;
435 }
436
get_default_repeat_ifses()437 static lhmsll_t* get_default_repeat_ifses() {
438 if (singleton_default_repeat_ifses == NULL) {
439 singleton_default_repeat_ifses = lhmsll_alloc();
440 lhmsll_put(singleton_default_repeat_ifses, "gen", FALSE, NO_FREE);
441 lhmsll_put(singleton_default_repeat_ifses, "dkvp", FALSE, NO_FREE);
442 lhmsll_put(singleton_default_repeat_ifses, "json", FALSE, NO_FREE);
443 lhmsll_put(singleton_default_repeat_ifses, "csv", FALSE, NO_FREE);
444 lhmsll_put(singleton_default_repeat_ifses, "csvlite", FALSE, NO_FREE);
445 lhmsll_put(singleton_default_repeat_ifses, "markdown", FALSE, NO_FREE);
446 lhmsll_put(singleton_default_repeat_ifses, "nidx", FALSE, NO_FREE);
447 lhmsll_put(singleton_default_repeat_ifses, "xtab", FALSE, NO_FREE);
448 lhmsll_put(singleton_default_repeat_ifses, "pprint", TRUE, NO_FREE);
449 }
450 return singleton_default_repeat_ifses;
451 }
452
get_default_repeat_ipses()453 static lhmsll_t* get_default_repeat_ipses() {
454 if (singleton_default_repeat_ipses == NULL) {
455 singleton_default_repeat_ipses = lhmsll_alloc();
456 lhmsll_put(singleton_default_repeat_ipses, "gen", FALSE, NO_FREE);
457 lhmsll_put(singleton_default_repeat_ipses, "dkvp", FALSE, NO_FREE);
458 lhmsll_put(singleton_default_repeat_ipses, "json", FALSE, NO_FREE);
459 lhmsll_put(singleton_default_repeat_ipses, "csv", FALSE, NO_FREE);
460 lhmsll_put(singleton_default_repeat_ipses, "csvlite", FALSE, NO_FREE);
461 lhmsll_put(singleton_default_repeat_ipses, "markdown", FALSE, NO_FREE);
462 lhmsll_put(singleton_default_repeat_ipses, "nidx", FALSE, NO_FREE);
463 lhmsll_put(singleton_default_repeat_ipses, "xtab", TRUE, NO_FREE);
464 lhmsll_put(singleton_default_repeat_ipses, "pprint", FALSE, NO_FREE);
465 }
466 return singleton_default_repeat_ipses;
467 }
468
free_opt_singletons()469 static void free_opt_singletons() {
470 lhmss_free(singleton_pdesc_to_chars_map);
471 lhmss_free(singleton_default_rses);
472 lhmss_free(singleton_default_fses);
473 lhmss_free(singleton_default_pses);
474 lhmsll_free(singleton_default_repeat_ifses);
475 lhmsll_free(singleton_default_repeat_ipses);
476 }
477
478 // For displaying the default separators in on-line help
rebackslash(char * sep)479 static char* rebackslash(char* sep) {
480 if (streq(sep, "\r"))
481 return "\\r";
482 else if (streq(sep, "\n"))
483 return "\\n";
484 else if (streq(sep, "\r\n"))
485 return "\\r\\n";
486 else if (streq(sep, "\t"))
487 return "\\t";
488 else if (streq(sep, " "))
489 return "space";
490 else
491 return sep;
492 }
493
494 // ----------------------------------------------------------------
main_usage_short(FILE * fp,char * argv0)495 static void main_usage_short(FILE* fp, char* argv0) {
496 fprintf(stderr, "Please run \"%s --help\" for detailed usage information.\n", argv0);
497 exit(1);
498 }
499
500 // ----------------------------------------------------------------
501 // The main_usage_long() function is split out into subroutines in support of the
502 // manpage autogenerator.
503
main_usage_long(FILE * o,char * argv0)504 static void main_usage_long(FILE* o, char* argv0) {
505 main_usage_synopsis(o, argv0);
506 fprintf(o, "\n");
507
508 fprintf(o, "Command-line-syntax examples:\n");
509 main_usage_examples(o, argv0, " ");
510 fprintf(o, "\n");
511
512 fprintf(o, "Data-format examples:\n");
513 main_usage_data_format_examples(o, argv0);
514 fprintf(o, "\n");
515
516 fprintf(o, "Help options:\n");
517 main_usage_help_options(o, argv0);
518 fprintf(o, "\n");
519
520 fprintf(o, "Customization via .mlrrc:\n");
521 main_usage_mlrrc(o, argv0);
522 fprintf(o, "\n");
523
524 fprintf(o, "Verbs:\n");
525 list_all_verbs(o, " ");
526 fprintf(o, "\n");
527
528 fprintf(o, "Functions for the filter and put verbs:\n");
529 main_usage_functions(o, argv0, " ");
530 fprintf(o, "\n");
531
532 fprintf(o, "Data-format options, for input, output, or both:\n");
533 main_usage_data_format_options(o, argv0);
534 fprintf(o, "\n");
535
536 fprintf(o, "Comments in data:\n");
537 main_usage_comments_in_data(o, argv0);
538 fprintf(o, "\n");
539
540 fprintf(o, "Format-conversion keystroke-saver options, for input, output, or both:\n");
541 main_usage_format_conversion_keystroke_saver_options(o, argv0);
542 fprintf(o, "\n");
543
544 fprintf(o, "Compressed-data options:\n");
545 main_usage_compressed_data_options(o, argv0);
546 fprintf(o, "\n");
547
548 fprintf(o, "Separator options, for input, output, or both:\n");
549 main_usage_separator_options(o, argv0);
550 fprintf(o, "\n");
551
552 fprintf(o, "Relevant to CSV/CSV-lite input only:\n");
553 main_usage_csv_options(o, argv0);
554 fprintf(o, "\n");
555
556 fprintf(o, "Double-quoting for CSV output:\n");
557 main_usage_double_quoting(o, argv0);
558 fprintf(o, "\n");
559
560 fprintf(o, "Numerical formatting:\n");
561 main_usage_numerical_formatting(o, argv0);
562 fprintf(o, "\n");
563
564 fprintf(o, "Other options:\n");
565 main_usage_other_options(o, argv0);
566 fprintf(o, "\n");
567
568 fprintf(o, "Then-chaining:\n");
569 main_usage_then_chaining(o, argv0);
570 fprintf(o, "\n");
571
572 fprintf(o, "Auxiliary commands:\n");
573 main_usage_auxents(o, argv0);
574 fprintf(o, "\n");
575
576 main_usage_see_also(o, argv0);
577 }
578
main_usage_synopsis(FILE * o,char * argv0)579 static void main_usage_synopsis(FILE* o, char* argv0) {
580 fprintf(o, "Usage: %s [I/O options] {verb} [verb-dependent options ...] {zero or more file names}\n", argv0);
581 }
582
main_usage_examples(FILE * o,char * argv0,char * leader)583 static void main_usage_examples(FILE* o, char* argv0, char* leader) {
584
585 fprintf(o, "%s%s --csv cut -f hostname,uptime mydata.csv\n", leader, argv0);
586 fprintf(o, "%s%s --tsv --rs lf filter '$status != \"down\" && $upsec >= 10000' *.tsv\n", leader, argv0);
587 fprintf(o, "%s%s --nidx put '$sum = $7 < 0.0 ? 3.5 : $7 + 2.1*$8' *.dat\n", leader, argv0);
588 fprintf(o, "%sgrep -v '^#' /etc/group | %s --ifs : --nidx --opprint label group,pass,gid,member then sort -f group\n", leader, argv0);
589 fprintf(o, "%s%s join -j account_id -f accounts.dat then group-by account_name balances.dat\n", leader, argv0);
590 fprintf(o, "%s%s --json put '$attr = sub($attr, \"([0-9]+)_([0-9]+)_.*\", \"\\1:\\2\")' data/*.json\n", leader, argv0);
591 fprintf(o, "%s%s stats1 -a min,mean,max,p10,p50,p90 -f flag,u,v data/*\n", leader, argv0);
592 fprintf(o, "%s%s stats2 -a linreg-pca -f u,v -g shape data/*\n", leader, argv0);
593 fprintf(o, "%s%s put -q '@sum[$a][$b] += $x; end {emit @sum, \"a\", \"b\"}' data/*\n", leader, argv0);
594 fprintf(o, "%s%s --from estimates.tbl put '\n", leader, argv0);
595 fprintf(o, " for (k,v in $*) {\n");
596 fprintf(o, " if (is_numeric(v) && k =~ \"^[t-z].*$\") {\n");
597 fprintf(o, " $sum += v; $count += 1\n");
598 fprintf(o, " }\n");
599 fprintf(o, " }\n");
600 fprintf(o, " $mean = $sum / $count # no assignment if count unset'\n");
601 fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0);
602 fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
603 fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
604 fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0);
605 fprintf(o, "%s%s --from infile.dat put '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n",
606 leader, argv0);
607 }
608
list_all_verbs_raw(FILE * o)609 static void list_all_verbs_raw(FILE* o) {
610 for (int i = 0; i < mapper_lookup_table_length; i++) {
611 fprintf(o, "%s\n", mapper_lookup_table[i]->verb);
612 }
613 }
614
list_all_verbs(FILE * o,char * leader)615 static void list_all_verbs(FILE* o, char* leader) {
616 char* separator = " ";
617 int leaderlen = strlen(leader);
618 int separatorlen = strlen(separator);
619 int linelen = leaderlen;
620 int j = 0;
621 for (int i = 0; i < mapper_lookup_table_length; i++) {
622 char* verb = mapper_lookup_table[i]->verb;
623 int verblen = strlen(verb);
624 linelen += separatorlen + verblen;
625 if (linelen >= 80) {
626 fprintf(o, "\n");
627 linelen = leaderlen + separatorlen + verblen;
628 j = 0;
629 }
630 if (j == 0)
631 fprintf(o, "%s", leader);
632 fprintf(o, "%s%s", separator, verb);
633 j++;
634 }
635 fprintf(o, "\n");
636 }
637
main_usage_help_options(FILE * o,char * argv0)638 static void main_usage_help_options(FILE* o, char* argv0) {
639 fprintf(o, " -h or --help Show this message.\n");
640 fprintf(o, " --version Show the software version.\n");
641 fprintf(o, " {verb name} --help Show verb-specific help.\n");
642 fprintf(o, " --help-all-verbs Show help on all verbs.\n");
643 fprintf(o, " -l or --list-all-verbs List only verb names.\n");
644 fprintf(o, " -L List only verb names, one per line.\n");
645 fprintf(o, " -f or --help-all-functions Show help on all built-in functions.\n");
646 fprintf(o, " -F Show a bare listing of built-in functions by name.\n");
647 fprintf(o, " -k or --help-all-keywords Show help on all keywords.\n");
648 fprintf(o, " -K Show a bare listing of keywords by name.\n");
649 }
650
main_usage_mlrrc(FILE * o,char * argv0)651 static void main_usage_mlrrc(FILE* o, char* argv0) {
652 fprintf(o, "You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.\n");
653 fprintf(o, "For example, if you usually process CSV, then you can put \"--csv\" in your .mlrrc file\n");
654 fprintf(o, "and that will be the default input/output format unless otherwise specified on the command line.\n");
655 fprintf(o, "\n");
656 fprintf(o, "The .mlrrc file format is one \"--flag\" or \"--option value\" per line, with the leading \"--\" optional.\n");
657 fprintf(o, "Hash-style comments and blank lines are ignored.\n");
658 fprintf(o, "\n");
659 fprintf(o, "Sample .mlrrc:\n");
660 fprintf(o, "# Input and output formats are CSV by default (unless otherwise specified\n");
661 fprintf(o, "# on the mlr command line):\n");
662 fprintf(o, "csv\n");
663 fprintf(o, "# These are no-ops for CSV, but when I do use JSON output, I want these\n");
664 fprintf(o, "# pretty-printing options to be used:\n");
665 fprintf(o, "jvstack\n");
666 fprintf(o, "jlistwrap\n");
667 fprintf(o, "\n");
668 fprintf(o, "How to specify location of .mlrrc:\n");
669 fprintf(o, "* If $MLRRC is set:\n");
670 fprintf(o, " o If its value is \"__none__\" then no .mlrrc files are processed.\n");
671 fprintf(o, " o Otherwise, its value (as a filename) is loaded and processed. If there are syntax\n");
672 fprintf(o, " errors, they abort mlr with a usage message (as if you had mistyped something on the\n");
673 fprintf(o, " command line). If the file can't be loaded at all, though, it is silently skipped.\n");
674 fprintf(o, " o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is\n");
675 fprintf(o, " set in the environment.\n");
676 fprintf(o, "* Otherwise:\n");
677 fprintf(o, " o If $HOME/.mlrrc exists, it's then processed as above.\n");
678 fprintf(o, " o If ./.mlrrc exists, it's then also processed as above.\n");
679 fprintf(o, " (I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)\n");
680 fprintf(o, "\n");
681 fprintf(o, "See also:\n");
682 fprintf(o, "https://johnkerl.org/miller/doc/customization.html\n");
683 }
684
main_usage_functions(FILE * o,char * argv0,char * leader)685 static void main_usage_functions(FILE* o, char* argv0, char* leader) {
686 fmgr_t* pfmgr = fmgr_alloc();
687 fmgr_list_functions(pfmgr, o, leader);
688 fmgr_free(pfmgr, NULL);
689 fprintf(o, "\n");
690 fprintf(o, "Please use \"%s --help-function {function name}\" for function-specific help.\n", argv0);
691 }
692
main_usage_data_format_examples(FILE * o,char * argv0)693 static void main_usage_data_format_examples(FILE* o, char* argv0) {
694 fprintf(o,
695 " DKVP: delimited key-value pairs (Miller default format)\n"
696 " +---------------------+\n"
697 " | apple=1,bat=2,cog=3 | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
698 " | dish=7,egg=8,flint | Record 2: \"dish\" => \"7\", \"egg\" => \"8\", \"3\" => \"flint\"\n"
699 " +---------------------+\n"
700 "\n"
701 " NIDX: implicitly numerically indexed (Unix-toolkit style)\n"
702 " +---------------------+\n"
703 " | the quick brown | Record 1: \"1\" => \"the\", \"2\" => \"quick\", \"3\" => \"brown\"\n"
704 " | fox jumped | Record 2: \"1\" => \"fox\", \"2\" => \"jumped\"\n"
705 " +---------------------+\n"
706 "\n"
707 " CSV/CSV-lite: comma-separated values with separate header line\n"
708 " +---------------------+\n"
709 " | apple,bat,cog |\n"
710 " | 1,2,3 | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
711 " | 4,5,6 | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
712 " +---------------------+\n"
713 "\n"
714 " Tabular JSON: nested objects are supported, although arrays within them are not:\n"
715 " +---------------------+\n"
716 " | { |\n"
717 " | \"apple\": 1, | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
718 " | \"bat\": 2, |\n"
719 " | \"cog\": 3 |\n"
720 " | } |\n"
721 " | { |\n"
722 " | \"dish\": { | Record 2: \"dish:egg\" => \"7\", \"dish:flint\" => \"8\", \"garlic\" => \"\"\n"
723 " | \"egg\": 7, |\n"
724 " | \"flint\": 8 |\n"
725 " | }, |\n"
726 " | \"garlic\": \"\" |\n"
727 " | } |\n"
728 " +---------------------+\n"
729 "\n"
730 " PPRINT: pretty-printed tabular\n"
731 " +---------------------+\n"
732 " | apple bat cog |\n"
733 " | 1 2 3 | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
734 " | 4 5 6 | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
735 " +---------------------+\n"
736 "\n"
737 " XTAB: pretty-printed transposed tabular\n"
738 " +---------------------+\n"
739 " | apple 1 | Record 1: \"apple\" => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
740 " | bat 2 |\n"
741 " | cog 3 |\n"
742 " | |\n"
743 " | dish 7 | Record 2: \"dish\" => \"7\", \"egg\" => \"8\"\n"
744 " | egg 8 |\n"
745 " +---------------------+\n"
746 "\n"
747 " Markdown tabular (supported for output only):\n"
748 " +-----------------------+\n"
749 " | | apple | bat | cog | |\n"
750 " | | --- | --- | --- | |\n"
751 " | | 1 | 2 | 3 | | Record 1: \"apple => \"1\", \"bat\" => \"2\", \"cog\" => \"3\"\n"
752 " | | 4 | 5 | 6 | | Record 2: \"apple\" => \"4\", \"bat\" => \"5\", \"cog\" => \"6\"\n"
753 " +-----------------------+\n");
754 }
755
main_usage_data_format_options(FILE * o,char * argv0)756 static void main_usage_data_format_options(FILE* o, char* argv0) {
757 fprintf(o, " --idkvp --odkvp --dkvp Delimited key-value pairs, e.g \"a=1,b=2\"\n");
758 fprintf(o, " (this is Miller's default format).\n");
759 fprintf(o, "\n");
760 fprintf(o, " --inidx --onidx --nidx Implicitly-integer-indexed fields\n");
761 fprintf(o, " (Unix-toolkit style).\n");
762 fprintf(o, " -T Synonymous with \"--nidx --fs tab\".\n");
763 fprintf(o, "\n");
764 fprintf(o, " --icsv --ocsv --csv Comma-separated value (or tab-separated\n");
765 fprintf(o, " with --fs tab, etc.)\n");
766 fprintf(o, "\n");
767 fprintf(o, " --itsv --otsv --tsv Keystroke-savers for \"--icsv --ifs tab\",\n");
768 fprintf(o, " \"--ocsv --ofs tab\", \"--csv --fs tab\".\n");
769 fprintf(o, " --iasv --oasv --asv Similar but using ASCII FS %s and RS %s\n",
770 ASV_FS_FOR_HELP, ASV_RS_FOR_HELP);
771 fprintf(o, " --iusv --ousv --usv Similar but using Unicode FS %s\n",
772 USV_FS_FOR_HELP);
773 fprintf(o, " and RS %s\n",
774 USV_RS_FOR_HELP);
775 fprintf(o, "\n");
776 fprintf(o, " --icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated\n");
777 fprintf(o, " with --fs tab, etc.). The 'lite' CSV does not handle\n");
778 fprintf(o, " RFC-CSV double-quoting rules; is slightly faster;\n");
779 fprintf(o, " and handles heterogeneity in the input stream via\n");
780 fprintf(o, " empty newline followed by new header line. See also\n");
781 fprintf(o, " http://johnkerl.org/miller/doc/file-formats.html#CSV/TSV/etc.\n");
782 fprintf(o, "\n");
783 fprintf(o, " --itsvlite --otsvlite --tsvlite Keystroke-savers for \"--icsvlite --ifs tab\",\n");
784 fprintf(o, " \"--ocsvlite --ofs tab\", \"--csvlite --fs tab\".\n");
785 fprintf(o, " -t Synonymous with --tsvlite.\n");
786 fprintf(o, " --iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS %s and RS %s\n",
787 ASV_FS_FOR_HELP, ASV_RS_FOR_HELP);
788 fprintf(o, " --iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS %s\n",
789 USV_FS_FOR_HELP);
790 fprintf(o, " and RS %s\n",
791 USV_RS_FOR_HELP);
792 fprintf(o, "\n");
793 fprintf(o, " --ipprint --opprint --pprint Pretty-printed tabular (produces no\n");
794 fprintf(o, " output until all input is in).\n");
795 fprintf(o, " --right Right-justifies all fields for PPRINT output.\n");
796 fprintf(o, " --barred Prints a border around PPRINT output\n");
797 fprintf(o, " (only available for output).\n");
798 fprintf(o, "\n");
799 fprintf(o, " --omd Markdown-tabular (only available for output).\n");
800 fprintf(o, "\n");
801 fprintf(o, " --ixtab --oxtab --xtab Pretty-printed vertical-tabular.\n");
802 fprintf(o, " --xvright Right-justifies values for XTAB format.\n");
803 fprintf(o, "\n");
804 fprintf(o, " --ijson --ojson --json JSON tabular: sequence or list of one-level\n");
805 fprintf(o, " maps: {...}{...} or [{...},{...}].\n");
806 fprintf(o, " --json-map-arrays-on-input JSON arrays are unmillerable. --json-map-arrays-on-input\n");
807 fprintf(o, " --json-skip-arrays-on-input is the default: arrays are converted to integer-indexed\n");
808 fprintf(o, " --json-fatal-arrays-on-input maps. The other two options cause them to be skipped, or\n");
809 fprintf(o, " to be treated as errors. Please use the jq tool for full\n");
810 fprintf(o, " JSON (pre)processing.\n");
811 fprintf(o, " --jvstack Put one key-value pair per line for JSON\n");
812 fprintf(o, " output.\n");
813 fprintf(o, " --jsonx --ojsonx Keystroke-savers for --json --jvstack\n");
814 fprintf(o, " --jsonx --ojsonx and --ojson --jvstack, respectively.\n");
815 fprintf(o, " --jlistwrap Wrap JSON output in outermost [ ].\n");
816 fprintf(o, " --jknquoteint Do not quote non-string map keys in JSON output.\n");
817 fprintf(o, " --jvquoteall Quote map values in JSON output, even if they're\n");
818 fprintf(o, " numeric.\n");
819 fprintf(o, " --jflatsep {string} Separator for flattening multi-level JSON keys,\n");
820 fprintf(o, " e.g. '{\"a\":{\"b\":3}}' becomes a:b => 3 for\n");
821 fprintf(o, " non-JSON formats. Defaults to %s.\n",
822 DEFAULT_JSON_FLATTEN_SEPARATOR);
823 fprintf(o, "\n");
824 fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n");
825 fprintf(o, "\n");
826 fprintf(o, " Examples: --csv for CSV-formatted input and output; --idkvp --opprint for\n");
827 fprintf(o, " DKVP-formatted input and pretty-printed output.\n");
828 fprintf(o, "\n");
829 fprintf(o, " Please use --iformat1 --oformat2 rather than --format1 --oformat2.\n");
830 fprintf(o, " The latter sets up input and output flags for format1, not all of which\n");
831 fprintf(o, " are overridden in all cases by setting output format to format2.\n");
832 }
833
main_usage_comments_in_data(FILE * o,char * argv0)834 static void main_usage_comments_in_data(FILE* o, char* argv0) {
835 fprintf(o, " --skip-comments Ignore commented lines (prefixed by \"%s\")\n",
836 DEFAULT_COMMENT_STRING);
837 fprintf(o, " within the input.\n");
838 fprintf(o, " --skip-comments-with {string} Ignore commented lines within input, with\n");
839 fprintf(o, " specified prefix.\n");
840 fprintf(o, " --pass-comments Immediately print commented lines (prefixed by \"%s\")\n",
841 DEFAULT_COMMENT_STRING);
842 fprintf(o, " within the input.\n");
843 fprintf(o, " --pass-comments-with {string} Immediately print commented lines within input, with\n");
844 fprintf(o, " specified prefix.\n");
845 fprintf(o, "Notes:\n");
846 fprintf(o, "* Comments are only honored at the start of a line.\n");
847 fprintf(o, "* In the absence of any of the above four options, comments are data like\n");
848 fprintf(o, " any other text.\n");
849 fprintf(o, "* When pass-comments is used, comment lines are written to standard output\n");
850 fprintf(o, " immediately upon being read; they are not part of the record stream.\n");
851 fprintf(o, " Results may be counterintuitive. A suggestion is to place comments at the\n");
852 fprintf(o, " start of data files.\n");
853 }
854
main_usage_format_conversion_keystroke_saver_options(FILE * o,char * argv0)855 static void main_usage_format_conversion_keystroke_saver_options(FILE* o, char* argv0) {
856 fprintf(o, "As keystroke-savers for format-conversion you may use the following:\n");
857 fprintf(o, " --c2t --c2d --c2n --c2j --c2x --c2p --c2m\n");
858 fprintf(o, " --t2c --t2d --t2n --t2j --t2x --t2p --t2m\n");
859 fprintf(o, " --d2c --d2t --d2n --d2j --d2x --d2p --d2m\n");
860 fprintf(o, " --n2c --n2t --n2d --n2j --n2x --n2p --n2m\n");
861 fprintf(o, " --j2c --j2t --j2d --j2n --j2x --j2p --j2m\n");
862 fprintf(o, " --x2c --x2t --x2d --x2n --x2j --x2p --x2m\n");
863 fprintf(o, " --p2c --p2t --p2d --p2n --p2j --p2x --p2m\n");
864 fprintf(o, "The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,\n");
865 fprintf(o, "PPRINT, and markdown, respectively. Note that markdown format is available for\n");
866 fprintf(o, "output only.\n");
867 }
868
main_usage_compressed_data_options(FILE * o,char * argv0)869 static void main_usage_compressed_data_options(FILE* o, char* argv0) {
870 fprintf(o, " --prepipe {command} This allows Miller to handle compressed inputs. You can do\n");
871 fprintf(o, " without this for single input files, e.g. \"gunzip < myfile.csv.gz | %s ...\".\n",
872 argv0);
873 fprintf(o, "\n");
874 fprintf(o, " However, when multiple input files are present, between-file separations are\n");
875 fprintf(o, " lost; also, the FILENAME variable doesn't iterate. Using --prepipe you can\n");
876 fprintf(o, " specify an action to be taken on each input file. This pre-pipe command must\n");
877 fprintf(o, " be able to read from standard input; it will be invoked with\n");
878 fprintf(o, " {command} < {filename}.\n");
879 fprintf(o, " Examples:\n");
880 fprintf(o, " %s --prepipe 'gunzip'\n", argv0);
881 fprintf(o, " %s --prepipe 'zcat -cf'\n", argv0);
882 fprintf(o, " %s --prepipe 'xz -cd'\n", argv0);
883 fprintf(o, " %s --prepipe cat\n", argv0);
884 fprintf(o, " %s --prepipe-gunzip\n", argv0);
885 fprintf(o, " %s --prepipe-zcat\n", argv0);
886 fprintf(o, " Note that this feature is quite general and is not limited to decompression\n");
887 fprintf(o, " utilities. You can use it to apply per-file filters of your choice.\n");
888 fprintf(o, " For output compression (or other) utilities, simply pipe the output:\n");
889 fprintf(o, " %s ... | {your compression command}\n", argv0);
890 fprintf(o, "\n");
891 fprintf(o, " There are shorthands --prepipe-zcat and --prepipe-gunzip which are\n");
892 fprintf(o, " valid in .mlrrc files. The --prepipe flag is not valid in .mlrrc\n");
893 fprintf(o, " files since that would put execution of the prepipe command under \n");
894 fprintf(o, " control of the .mlrrc file.\n");
895 }
896
main_usage_separator_options(FILE * o,char * argv0)897 static void main_usage_separator_options(FILE* o, char* argv0) {
898 fprintf(o, " --rs --irs --ors Record separators, e.g. 'lf' or '\\r\\n'\n");
899 fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n");
900 fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n");
901 fprintf(o, "\n");
902 fprintf(o, " Notes about line endings:\n");
903 fprintf(o, " * Default line endings (--irs and --ors) are \"auto\" which means autodetect from\n");
904 fprintf(o, " the input file format, as long as the input file(s) have lines ending in either\n");
905 fprintf(o, " LF (also known as linefeed, '\\n', 0x0a, Unix-style) or CRLF (also known as\n");
906 fprintf(o, " carriage-return/linefeed pairs, '\\r\\n', 0x0d 0x0a, Windows style).\n");
907 fprintf(o, " * If both irs and ors are auto (which is the default) then LF input will lead to LF\n");
908 fprintf(o, " output and CRLF input will lead to CRLF output, regardless of the platform you're\n");
909 fprintf(o, " running on.\n");
910 fprintf(o, " * The line-ending autodetector triggers on the first line ending detected in the input\n");
911 fprintf(o, " stream. E.g. if you specify a CRLF-terminated file on the command line followed by an\n");
912 fprintf(o, " LF-terminated file then autodetected line endings will be CRLF.\n");
913 fprintf(o, " * If you use --ors {something else} with (default or explicitly specified) --irs auto\n");
914 fprintf(o, " then line endings are autodetected on input and set to what you specify on output.\n");
915 fprintf(o, " * If you use --irs {something else} with (default or explicitly specified) --ors auto\n");
916 fprintf(o, " then the output line endings used are LF on Unix/Linux/BSD/MacOSX, and CRLF on Windows.\n");
917 fprintf(o, "\n");
918 fprintf(o, " Notes about all other separators:\n");
919 fprintf(o, " * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats\n");
920 fprintf(o, " do key-value pairs appear juxtaposed.\n");
921 fprintf(o, " * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;\n");
922 fprintf(o, " XTAB records are separated by two or more consecutive IFS/OFS -- i.e.\n");
923 fprintf(o, " a blank line. Everything above about --irs/--ors/--rs auto becomes --ifs/--ofs/--fs\n");
924 fprintf(o, " auto for XTAB format. (XTAB's default IFS/OFS are \"auto\".)\n");
925 fprintf(o, " * OFS must be single-character for PPRINT format. This is because it is used\n");
926 fprintf(o, " with repetition for alignment; multi-character separators would make\n");
927 fprintf(o, " alignment impossible.\n");
928 fprintf(o, " * OPS may be multi-character for XTAB format, in which case alignment is\n");
929 fprintf(o, " disabled.\n");
930 fprintf(o, " * TSV is simply CSV using tab as field separator (\"--fs tab\").\n");
931 fprintf(o, " * FS/PS are ignored for markdown format; RS is used.\n");
932 fprintf(o, " * All FS and PS options are ignored for JSON format, since they are not relevant\n");
933 fprintf(o, " to the JSON format.\n");
934 fprintf(o, " * You can specify separators in any of the following ways, shown by example:\n");
935 fprintf(o, " - Type them out, quoting as necessary for shell escapes, e.g.\n");
936 fprintf(o, " \"--fs '|' --ips :\"\n");
937 fprintf(o, " - C-style escape sequences, e.g. \"--rs '\\r\\n' --fs '\\t'\".\n");
938 fprintf(o, " - To avoid backslashing, you can use any of the following names:\n");
939 fprintf(o, " ");
940 lhmss_t* pmap = get_desc_to_chars_map();
941 for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
942 fprintf(o, " %s", pe->key);
943 }
944 fprintf(o, "\n");
945 fprintf(o, " * Default separators by format:\n");
946 fprintf(o, " %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS");
947 lhmss_t* default_rses = get_default_rses();
948 lhmss_t* default_fses = get_default_fses();
949 lhmss_t* default_pses = get_default_pses();
950 for (lhmsse_t* pe = default_rses->phead; pe != NULL; pe = pe->pnext) {
951 char* filefmt = pe->key;
952 char* rs = pe->value;
953 char* fs = lhmss_get(default_fses, filefmt);
954 char* ps = lhmss_get(default_pses, filefmt);
955 fprintf(o, " %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps));
956 }
957 }
958
main_usage_csv_options(FILE * o,char * argv0)959 static void main_usage_csv_options(FILE* o, char* argv0) {
960 fprintf(o, " --implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1\n");
961 fprintf(o, " of input files. Tip: combine with \"label\" to recreate\n");
962 fprintf(o, " missing headers.\n");
963 fprintf(o, " --allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,\n");
964 fprintf(o, " fill remaining keys with empty string. If a data line has more\n");
965 fprintf(o, " fields than the header line, use integer field labels as in\n");
966 fprintf(o, " the implicit-header case.\n");
967 fprintf(o, " --headerless-csv-output Print only CSV data lines.\n");
968 fprintf(o, " -N Keystroke-saver for --implicit-csv-header --headerless-csv-output.\n");
969 }
970
main_usage_double_quoting(FILE * o,char * argv0)971 static void main_usage_double_quoting(FILE* o, char* argv0) {
972 fprintf(o, " --quote-all Wrap all fields in double quotes\n");
973 fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have\n");
974 fprintf(o, " OFS or ORS in them\n");
975 fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS\n");
976 fprintf(o, " in them (default)\n");
977 fprintf(o, " --quote-numeric Wrap fields in double quotes only if they have numbers\n");
978 fprintf(o, " in them\n");
979 fprintf(o, " --quote-original Wrap fields in double quotes if and only if they were\n");
980 fprintf(o, " quoted on input. This isn't sticky for computed fields:\n");
981 fprintf(o, " e.g. if fields a and b were quoted on input and you do\n");
982 fprintf(o, " \"put '$c = $a . $b'\" then field c won't inherit a or b's\n");
983 fprintf(o, " was-quoted-on-input flag.\n");
984 }
985
main_usage_numerical_formatting(FILE * o,char * argv0)986 static void main_usage_numerical_formatting(FILE* o, char* argv0) {
987 fprintf(o, " --ofmt {format} E.g. %%.18lf, %%.0lf. Please use sprintf-style codes for\n");
988 fprintf(o, " double-precision. Applies to verbs which compute new\n");
989 fprintf(o, " values, e.g. put, stats1, stats2. See also the fmtnum\n");
990 fprintf(o, " function within mlr put (mlr --help-all-functions).\n");
991 fprintf(o, " Defaults to %s.\n", DEFAULT_OFMT);
992 }
993
main_usage_other_options(FILE * o,char * argv0)994 static void main_usage_other_options(FILE* o, char* argv0) {
995 fprintf(o, " --seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter\n");
996 fprintf(o, " urand()/urandint()/urand32().\n");
997 fprintf(o, " --nr-progress-mod {m}, with m a positive integer: print filename and record\n");
998 fprintf(o, " count to stderr every m input records.\n");
999 fprintf(o, " --from {filename} Use this to specify an input file before the verb(s),\n");
1000 fprintf(o, " rather than after. May be used more than once. Example:\n");
1001 fprintf(o, " \"%s --from a.dat --from b.dat cat\" is the same as\n", argv0);
1002 fprintf(o, " \"%s cat a.dat b.dat\".\n", argv0);
1003 fprintf(o, " -n Process no input files, nor standard input either. Useful\n");
1004 fprintf(o, " for %s put with begin/end statements only. (Same as --from\n", argv0);
1005 fprintf(o, " /dev/null.) Also useful in \"%s -n put -v '...'\" for\n", argv0);
1006 fprintf(o, " analyzing abstract syntax trees (if that's your thing).\n");
1007 fprintf(o, " -I Process files in-place. For each file name on the command\n");
1008 fprintf(o, " line, output is written to a temp file in the same\n");
1009 fprintf(o, " directory, which is then renamed over the original. Each\n");
1010 fprintf(o, " file is processed in isolation: if the output format is\n");
1011 fprintf(o, " CSV, CSV headers will be present in each output file;\n");
1012 fprintf(o, " statistics are only over each file's own records; and so on.\n");
1013 }
1014
main_usage_then_chaining(FILE * o,char * argv0)1015 static void main_usage_then_chaining(FILE* o, char* argv0) {
1016 fprintf(o, "Output of one verb may be chained as input to another using \"then\", e.g.\n");
1017 fprintf(o, " %s stats1 -a min,mean,max -f flag,u,v -g color then sort -f color\n", argv0);
1018 }
1019
main_usage_auxents(FILE * o,char * argv0)1020 static void main_usage_auxents(FILE* o, char* argv0) {
1021 fprintf(o, "Miller has a few otherwise-standalone executables packaged within it.\n");
1022 fprintf(o, "They do not participate in any other parts of Miller.\n");
1023 show_aux_entries(o);
1024 }
1025
main_usage_see_also(FILE * o,char * argv0)1026 static void main_usage_see_also(FILE* o, char* argv0) {
1027 fprintf(o, "For more information please see http://johnkerl.org/miller/doc and/or\n");
1028 fprintf(o, "http://github.com/johnkerl/miller.");
1029 fprintf(o, " This is Miller version %s.\n", VERSION_STRING);
1030 }
1031
print_type_arithmetic_info(FILE * o,char * argv0)1032 static void print_type_arithmetic_info(FILE* o, char* argv0) {
1033 for (int i = -2; i < MT_DIM; i++) {
1034 mv_t a = (mv_t) {.type = i, .free_flags = NO_FREE, .u.intv = 0};
1035 if (i == -2)
1036 printf("%-6s |", "(+)");
1037 else if (i == -1)
1038 printf("%-6s +", "------");
1039 else
1040 printf("%-6s |", mt_describe_type_simple(a.type));
1041
1042 for (int j = 0; j < MT_DIM; j++) {
1043 mv_t b = (mv_t) {.type = j, .free_flags = NO_FREE, .u.intv = 0};
1044 if (i == -2) {
1045 printf(" %-6s", mt_describe_type_simple(b.type));
1046 } else if (i == -1) {
1047 printf(" %-6s", "------");
1048 } else {
1049 mv_t c = x_xx_plus_func(&a, &b);
1050 printf(" %-6s", mt_describe_type_simple(c.type));
1051 }
1052 }
1053
1054 fprintf(o, "\n");
1055 }
1056 }
1057
1058 // ----------------------------------------------------------------
usage_all_verbs(char * argv0)1059 static void usage_all_verbs(char* argv0) {
1060 char* separator = "================================================================";
1061
1062 for (int i = 0; i < mapper_lookup_table_length; i++) {
1063 fprintf(stdout, "%s\n", separator);
1064 mapper_lookup_table[i]->pusage_func(stdout, argv0, mapper_lookup_table[i]->verb);
1065 fprintf(stdout, "\n");
1066 }
1067 fprintf(stdout, "%s\n", separator);
1068 exit(0);
1069 }
1070
usage_unrecognized_verb(char * argv0,char * arg)1071 static void usage_unrecognized_verb(char* argv0, char* arg) {
1072 fprintf(stderr, "%s: option \"%s\" not recognized.\n", argv0, arg);
1073 fprintf(stderr, "Please run \"%s --help\" for usage information.\n", argv0);
1074 exit(1);
1075 }
1076
check_arg_count(char ** argv,int argi,int argc,int n)1077 static void check_arg_count(char** argv, int argi, int argc, int n) {
1078 if ((argc - argi) < n) {
1079 fprintf(stderr, "%s: option \"%s\" missing argument(s).\n", MLR_GLOBALS.bargv0, argv[argi]);
1080 main_usage_short(stderr, MLR_GLOBALS.bargv0);
1081 exit(1);
1082 }
1083 }
1084
look_up_mapper_setup(char * verb)1085 static mapper_setup_t* look_up_mapper_setup(char* verb) {
1086 mapper_setup_t* pmapper_setup = NULL;
1087 for (int i = 0; i < mapper_lookup_table_length; i++) {
1088 if (streq(mapper_lookup_table[i]->verb, verb))
1089 return mapper_lookup_table[i];
1090 }
1091
1092 return pmapper_setup;
1093 }
1094
1095 // ----------------------------------------------------------------
cli_opts_init(cli_opts_t * popts)1096 void cli_opts_init(cli_opts_t* popts) {
1097 memset(popts, 0, sizeof(*popts));
1098
1099 cli_reader_opts_init(&popts->reader_opts);
1100 cli_writer_opts_init(&popts->writer_opts);
1101
1102 popts->mapper_argb = 0;
1103 popts->filenames = slls_alloc();
1104
1105 popts->ofmt = NULL;
1106 popts->nr_progress_mod = 0LL;
1107
1108 popts->do_in_place = FALSE;
1109
1110 popts->no_input = FALSE;
1111 popts->have_rand_seed = FALSE;
1112 popts->rand_seed = 0;
1113 }
1114
1115 // ----------------------------------------------------------------
1116 // * If $MLRRC is set, use it and only it.
1117 // * Otherwise try first $HOME/.mlrrc and then ./.mlrrc but let them
1118 // stack: e.g. $HOME/.mlrrc is lots of settings and maybe in one
1119 // subdir you want to override just a setting or two.
cli_load_mlrrc(cli_opts_t * popts)1120 static void cli_load_mlrrc(cli_opts_t* popts) {
1121 char* env_mlrrc = getenv("MLRRC");
1122 if (env_mlrrc != NULL) {
1123 if (streq(env_mlrrc, "__none__")) {
1124 return;
1125 }
1126 cli_try_load_mlrrc(popts, env_mlrrc);
1127 return;
1128 }
1129
1130 char* env_home = getenv("HOME");
1131 if (env_home != NULL) {
1132 char* path = mlr_paste_2_strings(env_home, "/.mlrrc");
1133 cli_try_load_mlrrc(popts, path);
1134 free(path);
1135 }
1136
1137 cli_try_load_mlrrc(popts, "./.mlrrc");
1138 }
1139
cli_try_load_mlrrc(cli_opts_t * popts,char * path)1140 static void cli_try_load_mlrrc(cli_opts_t* popts, char* path) {
1141 FILE* fp = fopen(path, "r");
1142 if (fp == NULL) {
1143 return;
1144 }
1145
1146 char* line = NULL;
1147 size_t linecap = 0;
1148 int rc;
1149 int lineno = 0;
1150
1151 while ((rc = getline(&line, &linecap, fp)) != -1) {
1152 lineno++;
1153 char* line_to_destroy = strdup(line);
1154 if (!handle_mlrrc_line_1(popts, line_to_destroy)) {
1155 fprintf(stderr, "Parse error at file \"%s\" line %d: %s\n",
1156 path, lineno, line);
1157 exit(1);
1158 }
1159 free(line_to_destroy);
1160 }
1161
1162 fclose(fp);
1163 if (line != NULL) {
1164 free(line);
1165 }
1166 }
1167
1168 // Chomps trailing CR, LF, or CR/LF; comment-strips; left-right trims.
handle_mlrrc_line_1(cli_opts_t * popts,char * line)1169 static int handle_mlrrc_line_1(cli_opts_t* popts, char* line) {
1170 // chomp
1171 size_t len = strlen(line);
1172 if (len >= 2 && line[len-2] == '\r' && line[len-1] == '\n') {
1173 line[len-2] = 0;
1174 } else if (len >= 1 && (line[len-1] == '\r' || line[len-1] == '\n')) {
1175 line[len-1] = 0;
1176 }
1177
1178 // comment-strip
1179 char* pbang = strstr(line, "#");
1180 if (pbang != NULL) {
1181 *pbang = 0;
1182 }
1183
1184 // Left-trim
1185 char* start = line;
1186 while (*start == ' ' || *start == '\t') {
1187 start++;
1188 }
1189
1190 // Right-trim
1191 len = strlen(start);
1192 char* end = &start[len-1];
1193 while (end > start && (*end == ' ' || *end == '\t')) {
1194 *end = 0;
1195 end--;
1196 }
1197 if (end < start) { // line was whitespace-only
1198 return TRUE;
1199 } else {
1200 return handle_mlrrc_line_2(popts, start);
1201 }
1202 }
1203
1204 // Prepends initial "--" if it's not already there
handle_mlrrc_line_2(cli_opts_t * popts,char * line)1205 static int handle_mlrrc_line_2(cli_opts_t* popts, char* line) {
1206 size_t len = strlen(line);
1207
1208 char* dashed_line = NULL;
1209 if (len >= 2 && line[0] != '-' && line[1] != '-') {
1210 dashed_line = mlr_paste_2_strings("--", line);
1211 } else {
1212 dashed_line = strdup(line);
1213 }
1214
1215 int rc = handle_mlrrc_line_3(popts, dashed_line);
1216
1217 // Do not free these. The command-line parsers can retain pointers into argv strings (rather
1218 // than copying), resulting in freed-memory reads later in the data-processing verbs.
1219 //
1220 // It would be possible to be diligent about making sure all current command-line-parsing
1221 // callsites copy strings rather than pointing to them -- but it would be easy to miss some, and
1222 // also any future codemods might make the same mistake as well.
1223 //
1224 // It's safer (and no big leak) to simply leave these parsed mlrrc lines unfreed.
1225 //
1226 // free(dashed_line);
1227 return rc;
1228 }
1229
1230 // Splits line into argv array
handle_mlrrc_line_3(cli_opts_t * popts,char * line)1231 static int handle_mlrrc_line_3(cli_opts_t* popts, char* line) {
1232 char* argv[3];
1233 int argc = 0;
1234 char* split = strpbrk(line, " \t");
1235 if (split == NULL) {
1236 argv[0] = line;
1237 argv[1] = NULL;
1238 argc = 1;
1239 } else {
1240 *split = 0;
1241 char* p = split + 1;
1242 while (*p == ' ' || *p == '\t') {
1243 p++;
1244 }
1245 argv[0] = line;
1246 argv[1] = p;
1247 argv[2] = NULL;
1248 argc = 2;
1249 }
1250 return handle_mlrrc_line_4(popts, argv, argc);
1251 }
1252
handle_mlrrc_line_4(cli_opts_t * popts,char ** argv,int argc)1253 static int handle_mlrrc_line_4(cli_opts_t* popts, char** argv, int argc) {
1254 int argi = 0;
1255 if (streq(argv[0], "--prepipe")) {
1256 // Don't allow code execution via .mlrrc
1257 return FALSE;
1258 }
1259 if (cli_handle_reader_options(argv, argc, &argi, &popts->reader_opts)) {
1260 // handled
1261 } else if (cli_handle_writer_options(argv, argc, &argi, &popts->writer_opts)) {
1262 // handled
1263 } else if (cli_handle_reader_writer_options(argv, argc, &argi, &popts->reader_opts, &popts->writer_opts)) {
1264 // handled
1265 } else if (cli_handle_misc_options(argv, argc, &argi, popts)) {
1266 // handled
1267 } else {
1268 // unhandled
1269 return FALSE;
1270 }
1271
1272 return TRUE;
1273 }
1274
1275 // ----------------------------------------------------------------
cli_reader_opts_init(cli_reader_opts_t * preader_opts)1276 void cli_reader_opts_init(cli_reader_opts_t* preader_opts) {
1277 preader_opts->ifile_fmt = NULL;
1278 preader_opts->irs = NULL;
1279 preader_opts->ifs = NULL;
1280 preader_opts->ips = NULL;
1281 preader_opts->input_json_flatten_separator = NULL;
1282 preader_opts->json_array_ingest = JSON_ARRAY_INGEST_UNSPECIFIED;
1283
1284 preader_opts->allow_repeat_ifs = NEITHER_TRUE_NOR_FALSE;
1285 preader_opts->allow_repeat_ips = NEITHER_TRUE_NOR_FALSE;
1286 preader_opts->use_implicit_csv_header = NEITHER_TRUE_NOR_FALSE;
1287 preader_opts->allow_ragged_csv_input = NEITHER_TRUE_NOR_FALSE;
1288
1289 preader_opts->prepipe = NULL;
1290 preader_opts->comment_handling = COMMENTS_ARE_DATA;
1291 preader_opts->comment_string = NULL;
1292
1293 preader_opts->generator_opts.field_name = "i";
1294 preader_opts->generator_opts.start = 0LL;
1295 preader_opts->generator_opts.stop = 100LL;
1296 preader_opts->generator_opts.step = 1LL;
1297 }
1298
cli_writer_opts_init(cli_writer_opts_t * pwriter_opts)1299 void cli_writer_opts_init(cli_writer_opts_t* pwriter_opts) {
1300 pwriter_opts->ofile_fmt = NULL;
1301 pwriter_opts->ors = NULL;
1302 pwriter_opts->ofs = NULL;
1303 pwriter_opts->ops = NULL;
1304
1305 pwriter_opts->headerless_csv_output = NEITHER_TRUE_NOR_FALSE;
1306 pwriter_opts->right_justify_xtab_value = NEITHER_TRUE_NOR_FALSE;
1307 pwriter_opts->right_align_pprint = NEITHER_TRUE_NOR_FALSE;
1308 pwriter_opts->pprint_barred = NEITHER_TRUE_NOR_FALSE;
1309 pwriter_opts->stack_json_output_vertically = NEITHER_TRUE_NOR_FALSE;
1310 pwriter_opts->wrap_json_output_in_outer_list = NEITHER_TRUE_NOR_FALSE;
1311 pwriter_opts->json_quote_int_keys = NEITHER_TRUE_NOR_FALSE;
1312 pwriter_opts->json_quote_non_string_values = NEITHER_TRUE_NOR_FALSE;
1313
1314 pwriter_opts->output_json_flatten_separator = NULL;
1315 pwriter_opts->oosvar_flatten_separator = NULL;
1316
1317 pwriter_opts->oquoting = QUOTE_UNSPECIFIED;
1318 }
1319
cli_apply_defaults(cli_opts_t * popts)1320 void cli_apply_defaults(cli_opts_t* popts) {
1321
1322 cli_apply_reader_defaults(&popts->reader_opts);
1323
1324 cli_apply_writer_defaults(&popts->writer_opts);
1325
1326 if (popts->ofmt == NULL)
1327 popts->ofmt = DEFAULT_OFMT;
1328 }
1329
cli_apply_reader_defaults(cli_reader_opts_t * preader_opts)1330 void cli_apply_reader_defaults(cli_reader_opts_t* preader_opts) {
1331 if (preader_opts->ifile_fmt == NULL)
1332 preader_opts->ifile_fmt = "dkvp";
1333
1334 if (preader_opts->json_array_ingest == JSON_ARRAY_INGEST_UNSPECIFIED)
1335 preader_opts->json_array_ingest = JSON_ARRAY_INGEST_AS_MAP;
1336
1337 if (preader_opts->use_implicit_csv_header == NEITHER_TRUE_NOR_FALSE)
1338 preader_opts->use_implicit_csv_header = FALSE;
1339
1340 if (preader_opts->allow_ragged_csv_input == NEITHER_TRUE_NOR_FALSE)
1341 preader_opts->allow_ragged_csv_input = FALSE;
1342
1343 if (preader_opts->input_json_flatten_separator == NULL)
1344 preader_opts->input_json_flatten_separator = DEFAULT_JSON_FLATTEN_SEPARATOR;
1345 }
1346
cli_apply_writer_defaults(cli_writer_opts_t * pwriter_opts)1347 void cli_apply_writer_defaults(cli_writer_opts_t* pwriter_opts) {
1348 if (pwriter_opts->ofile_fmt == NULL)
1349 pwriter_opts->ofile_fmt = "dkvp";
1350
1351 if (pwriter_opts->headerless_csv_output == NEITHER_TRUE_NOR_FALSE)
1352 pwriter_opts->headerless_csv_output = FALSE;
1353
1354 if (pwriter_opts->right_justify_xtab_value == NEITHER_TRUE_NOR_FALSE)
1355 pwriter_opts->right_justify_xtab_value = FALSE;
1356
1357 if (pwriter_opts->right_align_pprint == NEITHER_TRUE_NOR_FALSE)
1358 pwriter_opts->right_align_pprint = FALSE;
1359
1360 if (pwriter_opts->pprint_barred == NEITHER_TRUE_NOR_FALSE)
1361 pwriter_opts->pprint_barred = FALSE;
1362
1363 if (pwriter_opts->stack_json_output_vertically == NEITHER_TRUE_NOR_FALSE)
1364 pwriter_opts->stack_json_output_vertically = FALSE;
1365
1366 if (pwriter_opts->wrap_json_output_in_outer_list == NEITHER_TRUE_NOR_FALSE)
1367 pwriter_opts->wrap_json_output_in_outer_list = FALSE;
1368
1369 if (pwriter_opts->json_quote_int_keys == NEITHER_TRUE_NOR_FALSE)
1370 pwriter_opts->json_quote_int_keys = TRUE;
1371
1372 if (pwriter_opts->json_quote_non_string_values == NEITHER_TRUE_NOR_FALSE)
1373 pwriter_opts->json_quote_non_string_values = FALSE;
1374
1375 if (pwriter_opts->output_json_flatten_separator == NULL)
1376 pwriter_opts->output_json_flatten_separator = DEFAULT_JSON_FLATTEN_SEPARATOR;
1377
1378 if (pwriter_opts->oosvar_flatten_separator == NULL)
1379 pwriter_opts->oosvar_flatten_separator = DEFAULT_OOSVAR_FLATTEN_SEPARATOR;
1380
1381 if (pwriter_opts->oquoting == QUOTE_UNSPECIFIED)
1382 pwriter_opts->oquoting = DEFAULT_OQUOTING;
1383 }
1384
1385 // ----------------------------------------------------------------
1386 // For mapper join which has its own input-format overrides.
1387 //
1388 // Mainly this just takes the main-opts flag whenever the join-opts flag was not
1389 // specified by the user. But it's a bit more complex when main and join input
1390 // formats are different. Example: main input format is CSV, for which IPS is
1391 // "(N/A)", and join input format is DKVP. Then we should not use "(N/A)"
1392 // for DKVP IPS. However if main input format were DKVP with IPS set to ":",
1393 // then we should take that.
1394 //
1395 // The logic is:
1396 //
1397 // * If the join input format was unspecified, take all unspecified values from
1398 // main opts.
1399 //
1400 // * If the join input format was specified and is the same as main input
1401 // format, take unspecified values from main opts.
1402 //
1403 // * If the join input format was specified and is not the same as main input
1404 // format, take unspecified values from defaults for the join input format.
1405
cli_merge_reader_opts(cli_reader_opts_t * pfunc_opts,cli_reader_opts_t * pmain_opts)1406 void cli_merge_reader_opts(cli_reader_opts_t* pfunc_opts, cli_reader_opts_t* pmain_opts) {
1407
1408 if (pfunc_opts->ifile_fmt == NULL) {
1409 pfunc_opts->ifile_fmt = pmain_opts->ifile_fmt;
1410 }
1411
1412 if (streq(pfunc_opts->ifile_fmt, pmain_opts->ifile_fmt)) {
1413
1414 if (pfunc_opts->irs == NULL)
1415 pfunc_opts->irs = pmain_opts->irs;
1416 if (pfunc_opts->ifs == NULL)
1417 pfunc_opts->ifs = pmain_opts->ifs;
1418 if (pfunc_opts->ips == NULL)
1419 pfunc_opts->ips = pmain_opts->ips;
1420 if (pfunc_opts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE)
1421 pfunc_opts->allow_repeat_ifs = pmain_opts->allow_repeat_ifs;
1422 if (pfunc_opts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE)
1423 pfunc_opts->allow_repeat_ips = pmain_opts->allow_repeat_ips;
1424
1425 } else {
1426
1427 if (pfunc_opts->irs == NULL)
1428 pfunc_opts->irs = lhmss_get_or_die(get_default_rses(), pfunc_opts->ifile_fmt);
1429 if (pfunc_opts->ifs == NULL)
1430 pfunc_opts->ifs = lhmss_get_or_die(get_default_fses(), pfunc_opts->ifile_fmt);
1431 if (pfunc_opts->ips == NULL)
1432 pfunc_opts->ips = lhmss_get_or_die(get_default_pses(), pfunc_opts->ifile_fmt);
1433 if (pfunc_opts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE)
1434 pfunc_opts->allow_repeat_ifs = lhmsll_get_or_die(get_default_repeat_ifses(), pfunc_opts->ifile_fmt);
1435 if (pfunc_opts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE)
1436 pfunc_opts->allow_repeat_ips = lhmsll_get_or_die(get_default_repeat_ipses(), pfunc_opts->ifile_fmt);
1437
1438 }
1439
1440 if (pfunc_opts->json_array_ingest == JSON_ARRAY_INGEST_UNSPECIFIED)
1441 pfunc_opts->json_array_ingest = pmain_opts->json_array_ingest;
1442
1443 if (pfunc_opts->use_implicit_csv_header == NEITHER_TRUE_NOR_FALSE)
1444 pfunc_opts->use_implicit_csv_header = pmain_opts->use_implicit_csv_header;
1445
1446 if (pfunc_opts->allow_ragged_csv_input == NEITHER_TRUE_NOR_FALSE)
1447 pfunc_opts->allow_ragged_csv_input = pmain_opts->allow_ragged_csv_input;
1448
1449 if (pfunc_opts->input_json_flatten_separator == NULL)
1450 pfunc_opts->input_json_flatten_separator = pmain_opts->input_json_flatten_separator;
1451 }
1452
1453 // Similar to cli_merge_reader_opts but for mapper tee & mapper put which have their
1454 // own output-format overrides.
cli_merge_writer_opts(cli_writer_opts_t * pfunc_opts,cli_writer_opts_t * pmain_opts)1455 void cli_merge_writer_opts(cli_writer_opts_t* pfunc_opts, cli_writer_opts_t* pmain_opts) {
1456
1457 if (pfunc_opts->ofile_fmt == NULL) {
1458 pfunc_opts->ofile_fmt = pmain_opts->ofile_fmt;
1459 }
1460
1461 if (streq(pfunc_opts->ofile_fmt, pmain_opts->ofile_fmt)) {
1462 if (pfunc_opts->ors == NULL)
1463 pfunc_opts->ors = pmain_opts->ors;
1464 if (pfunc_opts->ofs == NULL)
1465 pfunc_opts->ofs = pmain_opts->ofs;
1466 if (pfunc_opts->ops == NULL)
1467 pfunc_opts->ops = pmain_opts->ops;
1468 } else {
1469 if (pfunc_opts->ors == NULL)
1470 pfunc_opts->ors = lhmss_get_or_die(get_default_rses(), pfunc_opts->ofile_fmt);
1471 if (pfunc_opts->ofs == NULL)
1472 pfunc_opts->ofs = lhmss_get_or_die(get_default_fses(), pfunc_opts->ofile_fmt);
1473 if (pfunc_opts->ops == NULL)
1474 pfunc_opts->ops = lhmss_get_or_die(get_default_pses(), pfunc_opts->ofile_fmt);
1475 }
1476
1477 if (pfunc_opts->headerless_csv_output == NEITHER_TRUE_NOR_FALSE)
1478 pfunc_opts->headerless_csv_output = pmain_opts->headerless_csv_output;
1479
1480 if (pfunc_opts->right_justify_xtab_value == NEITHER_TRUE_NOR_FALSE)
1481 pfunc_opts->right_justify_xtab_value = pmain_opts->right_justify_xtab_value;
1482
1483 if (pfunc_opts->right_align_pprint == NEITHER_TRUE_NOR_FALSE)
1484 pfunc_opts->right_align_pprint = pmain_opts->right_align_pprint;
1485
1486 if (pfunc_opts->pprint_barred == NEITHER_TRUE_NOR_FALSE)
1487 pfunc_opts->pprint_barred = pmain_opts->pprint_barred;
1488
1489 if (pfunc_opts->stack_json_output_vertically == NEITHER_TRUE_NOR_FALSE)
1490 pfunc_opts->stack_json_output_vertically = pmain_opts->stack_json_output_vertically;
1491
1492 if (pfunc_opts->wrap_json_output_in_outer_list == NEITHER_TRUE_NOR_FALSE)
1493 pfunc_opts->wrap_json_output_in_outer_list = pmain_opts->wrap_json_output_in_outer_list;
1494
1495 if (pfunc_opts->json_quote_int_keys == NEITHER_TRUE_NOR_FALSE)
1496 pfunc_opts->json_quote_int_keys = pmain_opts->json_quote_int_keys;
1497
1498 if (pfunc_opts->json_quote_non_string_values == NEITHER_TRUE_NOR_FALSE)
1499 pfunc_opts->json_quote_non_string_values = pmain_opts->json_quote_non_string_values;
1500
1501 if (pfunc_opts->output_json_flatten_separator == NULL)
1502 pfunc_opts->output_json_flatten_separator = pmain_opts->output_json_flatten_separator;
1503
1504 if (pfunc_opts->oosvar_flatten_separator == NULL)
1505 pfunc_opts->oosvar_flatten_separator = pmain_opts->oosvar_flatten_separator;
1506
1507 if (pfunc_opts->oquoting == QUOTE_UNSPECIFIED)
1508 pfunc_opts->oquoting = pmain_opts->oquoting;
1509 }
1510
1511 // ----------------------------------------------------------------
handle_terminal_usage(char ** argv,int argc,int argi)1512 static int handle_terminal_usage(char** argv, int argc, int argi) {
1513 if (streq(argv[argi], "--version")) {
1514 printf("Miller %s\n", VERSION_STRING);
1515 return TRUE;
1516 } else if (streq(argv[argi], "-h")) {
1517 main_usage_long(stdout, MLR_GLOBALS.bargv0);
1518 return TRUE;
1519 } else if (streq(argv[argi], "--help")) {
1520 main_usage_long(stdout, MLR_GLOBALS.bargv0);
1521 return TRUE;
1522 } else if (streq(argv[argi], "--print-type-arithmetic-info")) {
1523 print_type_arithmetic_info(stdout, MLR_GLOBALS.bargv0);
1524 return TRUE;
1525
1526 } else if (streq(argv[argi], "--help-all-verbs")) {
1527 usage_all_verbs(MLR_GLOBALS.bargv0);
1528 } else if (streq(argv[argi], "--list-all-verbs") || streq(argv[argi], "-l")) {
1529 list_all_verbs(stdout, "");
1530 return TRUE;
1531 } else if (streq(argv[argi], "--list-all-verbs-raw") || streq(argv[argi], "-L")) {
1532 list_all_verbs_raw(stdout);
1533 return TRUE;
1534
1535 } else if (streq(argv[argi], "--list-all-functions-raw") || streq(argv[argi], "-F")) {
1536 fmgr_t* pfmgr = fmgr_alloc();
1537 fmgr_list_all_functions_raw(pfmgr, stdout);
1538 fmgr_free(pfmgr, NULL);
1539 return TRUE;
1540 } else if (streq(argv[argi], "--list-all-functions-as-table")) {
1541 fmgr_t* pfmgr = fmgr_alloc();
1542 fmgr_list_all_functions_as_table(pfmgr, stdout);
1543 fmgr_free(pfmgr, NULL);
1544 return TRUE;
1545 } else if (streq(argv[argi], "--help-all-functions") || streq(argv[argi], "-f")) {
1546 fmgr_t* pfmgr = fmgr_alloc();
1547 fmgr_function_usage(pfmgr, stdout, NULL);
1548 fmgr_free(pfmgr, NULL);
1549 return TRUE;
1550 } else if (streq(argv[argi], "--help-function") || streq(argv[argi], "--hf")) {
1551 check_arg_count(argv, argi, argc, 2);
1552 fmgr_t* pfmgr = fmgr_alloc();
1553 fmgr_function_usage(pfmgr, stdout, argv[argi+1]);
1554 fmgr_free(pfmgr, NULL);
1555 return TRUE;
1556
1557 } else if (streq(argv[argi], "--list-all-keywords-raw") || streq(argv[argi], "-K")) {
1558 mlr_dsl_list_all_keywords_raw(stdout);
1559 return TRUE;
1560 } else if (streq(argv[argi], "--help-all-keywords") || streq(argv[argi], "-k")) {
1561 mlr_dsl_keyword_usage(stdout, NULL);
1562 return TRUE;
1563 } else if (streq(argv[argi], "--help-keyword") || streq(argv[argi], "--hk")) {
1564 check_arg_count(argv, argi, argc, 2);
1565 mlr_dsl_keyword_usage(stdout, argv[argi+1]);
1566 return TRUE;
1567
1568 // main-usage subsections, individually accessible for the benefit of
1569 // the manpage-autogenerator
1570 } else if (streq(argv[argi], "--usage-synopsis")) {
1571 main_usage_synopsis(stdout, MLR_GLOBALS.bargv0);
1572 return TRUE;
1573 } else if (streq(argv[argi], "--usage-examples")) {
1574 main_usage_examples(stdout, MLR_GLOBALS.bargv0, "");
1575 return TRUE;
1576 } else if (streq(argv[argi], "--usage-list-all-verbs")) {
1577 list_all_verbs(stdout, "");
1578 return TRUE;
1579 } else if (streq(argv[argi], "--usage-help-options")) {
1580 main_usage_help_options(stdout, MLR_GLOBALS.bargv0);
1581 return TRUE;
1582 } else if (streq(argv[argi], "--usage-mlrrc")) {
1583 main_usage_mlrrc(stdout, MLR_GLOBALS.bargv0);
1584 return TRUE;
1585 } else if (streq(argv[argi], "--usage-functions")) {
1586 main_usage_functions(stdout, MLR_GLOBALS.bargv0, "");
1587 return TRUE;
1588 } else if (streq(argv[argi], "--usage-data-format-examples")) {
1589 main_usage_data_format_examples(stdout, MLR_GLOBALS.bargv0);
1590 return TRUE;
1591 } else if (streq(argv[argi], "--usage-data-format-options")) {
1592 main_usage_data_format_options(stdout, MLR_GLOBALS.bargv0);
1593 return TRUE;
1594 } else if (streq(argv[argi], "--usage-comments-in-data")) {
1595 main_usage_comments_in_data(stdout, MLR_GLOBALS.bargv0);
1596 return TRUE;
1597 } else if (streq(argv[argi], "--usage-format-conversion-keystroke-saver-options")) {
1598 main_usage_format_conversion_keystroke_saver_options(stdout, MLR_GLOBALS.bargv0);
1599 return TRUE;
1600 } else if (streq(argv[argi], "--usage-compressed-data-options")) {
1601 main_usage_compressed_data_options(stdout, MLR_GLOBALS.bargv0);
1602 return TRUE;
1603 } else if (streq(argv[argi], "--usage-separator-options")) {
1604 main_usage_separator_options(stdout, MLR_GLOBALS.bargv0);
1605 return TRUE;
1606 } else if (streq(argv[argi], "--usage-csv-options")) {
1607 main_usage_csv_options(stdout, MLR_GLOBALS.bargv0);
1608 return TRUE;
1609 } else if (streq(argv[argi], "--usage-double-quoting")) {
1610 main_usage_double_quoting(stdout, MLR_GLOBALS.bargv0);
1611 return TRUE;
1612 } else if (streq(argv[argi], "--usage-numerical-formatting")) {
1613 main_usage_numerical_formatting(stdout, MLR_GLOBALS.bargv0);
1614 return TRUE;
1615 } else if (streq(argv[argi], "--usage-other-options")) {
1616 main_usage_other_options(stdout, MLR_GLOBALS.bargv0);
1617 return TRUE;
1618 } else if (streq(argv[argi], "--usage-then-chaining")) {
1619 main_usage_then_chaining(stdout, MLR_GLOBALS.bargv0);
1620 return TRUE;
1621 } else if (streq(argv[argi], "--usage-auxents")) {
1622 main_usage_auxents(stdout, MLR_GLOBALS.bargv0);
1623 return TRUE;
1624 } else if (streq(argv[argi], "--usage-see-also")) {
1625 main_usage_see_also(stdout, MLR_GLOBALS.bargv0);
1626 return TRUE;
1627 }
1628 return FALSE;
1629 }
1630
1631 // Returns TRUE if the current flag was handled.
cli_handle_reader_options(char ** argv,int argc,int * pargi,cli_reader_opts_t * preader_opts)1632 int cli_handle_reader_options(char** argv, int argc, int *pargi, cli_reader_opts_t* preader_opts) {
1633 int argi = *pargi;
1634 int oargi = argi;
1635
1636 if (streq(argv[argi], "--irs")) {
1637 check_arg_count(argv, argi, argc, 2);
1638 preader_opts->irs = cli_sep_from_arg(argv[argi+1]);
1639 argi += 2;
1640
1641 } else if (streq(argv[argi], "--ifs")) {
1642 check_arg_count(argv, argi, argc, 2);
1643 preader_opts->ifs = cli_sep_from_arg(argv[argi+1]);
1644 argi += 2;
1645
1646 } else if (streq(argv[argi], "--repifs")) {
1647 preader_opts->allow_repeat_ifs = TRUE;
1648 argi += 1;
1649
1650 } else if (streq(argv[argi], "--json-fatal-arrays-on-input")) {
1651 preader_opts->json_array_ingest = JSON_ARRAY_INGEST_FATAL;
1652 argi += 1;
1653 } else if (streq(argv[argi], "--json-skip-arrays-on-input")) {
1654 preader_opts->json_array_ingest = JSON_ARRAY_INGEST_SKIP;
1655 argi += 1;
1656 } else if (streq(argv[argi], "--json-map-arrays-on-input")) {
1657 preader_opts->json_array_ingest = JSON_ARRAY_INGEST_AS_MAP;
1658 argi += 1;
1659
1660 } else if (streq(argv[argi], "--implicit-csv-header")) {
1661 preader_opts->use_implicit_csv_header = TRUE;
1662 argi += 1;
1663
1664 } else if (streq(argv[argi], "--no-implicit-csv-header")) {
1665 preader_opts->use_implicit_csv_header = FALSE;
1666 argi += 1;
1667
1668 } else if (streq(argv[argi], "--allow-ragged-csv-input") || streq(argv[argi], "--ragged")) {
1669 preader_opts->allow_ragged_csv_input = TRUE;
1670 argi += 1;
1671
1672 } else if (streq(argv[argi], "--ips")) {
1673 check_arg_count(argv, argi, argc, 2);
1674 preader_opts->ips = cli_sep_from_arg(argv[argi+1]);
1675 argi += 2;
1676
1677 } else if (streq(argv[argi], "-i")) {
1678 check_arg_count(argv, argi, argc, 2);
1679 if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
1680 fprintf(stderr, "%s: unrecognized input format \"%s\".\n",
1681 MLR_GLOBALS.bargv0, argv[argi+1]);
1682 exit(1);
1683 }
1684 preader_opts->ifile_fmt = argv[argi+1];
1685 argi += 2;
1686
1687 } else if (streq(argv[argi], "--igen")) {
1688 preader_opts->ifile_fmt = "gen";
1689 argi += 1;
1690 } else if (streq(argv[argi], "--gen-start")) {
1691 preader_opts->ifile_fmt = "gen";
1692 check_arg_count(argv, argi, argc, 2);
1693 if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.start) != 1) {
1694 fprintf(stderr, "%s: could not scan \"%s\".\n",
1695 MLR_GLOBALS.bargv0, argv[argi+1]);
1696 }
1697 argi += 2;
1698 } else if (streq(argv[argi], "--gen-stop")) {
1699 preader_opts->ifile_fmt = "gen";
1700 check_arg_count(argv, argi, argc, 2);
1701 if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.stop) != 1) {
1702 fprintf(stderr, "%s: could not scan \"%s\".\n",
1703 MLR_GLOBALS.bargv0, argv[argi+1]);
1704 }
1705 argi += 2;
1706 } else if (streq(argv[argi], "--gen-step")) {
1707 preader_opts->ifile_fmt = "gen";
1708 check_arg_count(argv, argi, argc, 2);
1709 if (sscanf(argv[argi+1], "%lld", &preader_opts->generator_opts.step) != 1) {
1710 fprintf(stderr, "%s: could not scan \"%s\".\n",
1711 MLR_GLOBALS.bargv0, argv[argi+1]);
1712 }
1713 argi += 2;
1714
1715 } else if (streq(argv[argi], "--icsv")) {
1716 preader_opts->ifile_fmt = "csv";
1717 argi += 1;
1718
1719 } else if (streq(argv[argi], "--icsvlite")) {
1720 preader_opts->ifile_fmt = "csvlite";
1721 argi += 1;
1722
1723 } else if (streq(argv[argi], "--itsv")) {
1724 preader_opts->ifile_fmt = "csv";
1725 preader_opts->ifs = "\t";
1726 argi += 1;
1727
1728 } else if (streq(argv[argi], "--itsvlite")) {
1729 preader_opts->ifile_fmt = "csvlite";
1730 preader_opts->ifs = "\t";
1731 argi += 1;
1732
1733 } else if (streq(argv[argi], "--iasv")) {
1734 preader_opts->ifile_fmt = "csv";
1735 preader_opts->ifs = ASV_FS;
1736 preader_opts->irs = ASV_RS;
1737 argi += 1;
1738
1739 } else if (streq(argv[argi], "--iasvlite")) {
1740 preader_opts->ifile_fmt = "csvlite";
1741 preader_opts->ifs = ASV_FS;
1742 preader_opts->irs = ASV_RS;
1743 argi += 1;
1744
1745 } else if (streq(argv[argi], "--iusv")) {
1746 preader_opts->ifile_fmt = "csv";
1747 preader_opts->ifs = USV_FS;
1748 preader_opts->irs = USV_RS;
1749 argi += 1;
1750
1751 } else if (streq(argv[argi], "--iusvlite")) {
1752 preader_opts->ifile_fmt = "csvlite";
1753 preader_opts->ifs = USV_FS;
1754 preader_opts->irs = USV_RS;
1755 argi += 1;
1756
1757 } else if (streq(argv[argi], "--idkvp")) {
1758 preader_opts->ifile_fmt = "dkvp";
1759 argi += 1;
1760
1761 } else if (streq(argv[argi], "--ijson")) {
1762 preader_opts->ifile_fmt = "json";
1763 argi += 1;
1764
1765 } else if (streq(argv[argi], "--inidx")) {
1766 preader_opts->ifile_fmt = "nidx";
1767 argi += 1;
1768
1769 } else if (streq(argv[argi], "--ixtab")) {
1770 preader_opts->ifile_fmt = "xtab";
1771 argi += 1;
1772
1773 } else if (streq(argv[argi], "--ipprint")) {
1774 preader_opts->ifile_fmt = "csvlite";
1775 preader_opts->ifs = " ";
1776 preader_opts->allow_repeat_ifs = TRUE;
1777 argi += 1;
1778
1779 } else if (streq(argv[argi], "--mmap")) {
1780 // No-op as of 5.6.3 (mmap is being abandoned) but don't break
1781 // the command-line user experience.
1782 argi += 1;
1783
1784 } else if (streq(argv[argi], "--no-mmap")) {
1785 // No-op as of 5.6.3 (mmap is being abandoned) but don't break
1786 // the command-line user experience.
1787 argi += 1;
1788
1789 } else if (streq(argv[argi], "--prepipe")) {
1790 check_arg_count(argv, argi, argc, 2);
1791 preader_opts->prepipe = argv[argi+1];
1792 argi += 2;
1793
1794 } else if (streq(argv[argi], "--prepipe-gunzip")) {
1795 preader_opts->prepipe = "gunzip";
1796 argi += 1;
1797
1798 } else if (streq(argv[argi], "--prepipe-zcat")) {
1799 preader_opts->prepipe = "zcat";
1800 argi += 1;
1801
1802 } else if (streq(argv[argi], "--skip-comments")) {
1803 preader_opts->comment_string = DEFAULT_COMMENT_STRING;
1804 preader_opts->comment_handling = SKIP_COMMENTS;
1805 argi += 1;
1806
1807 } else if (streq(argv[argi], "--skip-comments-with")) {
1808 check_arg_count(argv, argi, argc, 2);
1809 preader_opts->comment_string = argv[argi+1];
1810 preader_opts->comment_handling = SKIP_COMMENTS;
1811 argi += 2;
1812
1813 } else if (streq(argv[argi], "--pass-comments")) {
1814 preader_opts->comment_string = DEFAULT_COMMENT_STRING;
1815 preader_opts->comment_handling = PASS_COMMENTS;
1816 argi += 1;
1817
1818 } else if (streq(argv[argi], "--pass-comments-with")) {
1819 check_arg_count(argv, argi, argc, 2);
1820 preader_opts->comment_string = argv[argi+1];
1821 preader_opts->comment_handling = PASS_COMMENTS;
1822 argi += 2;
1823
1824 }
1825 *pargi = argi;
1826 return argi != oargi;
1827 }
1828
1829 // Returns TRUE if the current flag was handled.
cli_handle_writer_options(char ** argv,int argc,int * pargi,cli_writer_opts_t * pwriter_opts)1830 int cli_handle_writer_options(char** argv, int argc, int *pargi, cli_writer_opts_t* pwriter_opts) {
1831 int argi = *pargi;
1832 int oargi = argi;
1833
1834 if (streq(argv[argi], "--ors")) {
1835 check_arg_count(argv, argi, argc, 2);
1836 pwriter_opts->ors = cli_sep_from_arg(argv[argi+1]);
1837 argi += 2;
1838
1839 } else if (streq(argv[argi], "--ofs")) {
1840 check_arg_count(argv, argi, argc, 2);
1841 pwriter_opts->ofs = cli_sep_from_arg(argv[argi+1]);
1842 argi += 2;
1843
1844 } else if (streq(argv[argi], "--headerless-csv-output")) {
1845 pwriter_opts->headerless_csv_output = TRUE;
1846 argi += 1;
1847
1848 } else if (streq(argv[argi], "--ops")) {
1849 check_arg_count(argv, argi, argc, 2);
1850 pwriter_opts->ops = cli_sep_from_arg(argv[argi+1]);
1851 argi += 2;
1852
1853 } else if (streq(argv[argi], "--xvright")) {
1854 pwriter_opts->right_justify_xtab_value = TRUE;
1855 argi += 1;
1856
1857 } else if (streq(argv[argi], "--jvstack")) {
1858 pwriter_opts->stack_json_output_vertically = TRUE;
1859 argi += 1;
1860
1861 } else if (streq(argv[argi], "--jlistwrap")) {
1862 pwriter_opts->wrap_json_output_in_outer_list = TRUE;
1863 argi += 1;
1864
1865 } else if (streq(argv[argi], "--jknquoteint")) {
1866 pwriter_opts->json_quote_int_keys = FALSE;
1867 argi += 1;
1868 } else if (streq(argv[argi], "--jquoteall")) {
1869 pwriter_opts->json_quote_non_string_values = TRUE;
1870 argi += 1;
1871 } else if (streq(argv[argi], "--jvquoteall")) {
1872 pwriter_opts->json_quote_non_string_values = TRUE;
1873 argi += 1;
1874
1875 } else if (streq(argv[argi], "--vflatsep")) {
1876 check_arg_count(argv, argi, argc, 2);
1877 pwriter_opts->oosvar_flatten_separator = cli_sep_from_arg(argv[argi+1]);
1878 argi += 2;
1879
1880 } else if (streq(argv[argi], "-o")) {
1881 check_arg_count(argv, argi, argc, 2);
1882 if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
1883 fprintf(stderr, "%s: unrecognized output format \"%s\".\n",
1884 MLR_GLOBALS.bargv0, argv[argi+1]);
1885 exit(1);
1886 }
1887 pwriter_opts->ofile_fmt = argv[argi+1];
1888 argi += 2;
1889
1890 } else if (streq(argv[argi], "--ocsv")) {
1891 pwriter_opts->ofile_fmt = "csv";
1892 argi += 1;
1893
1894 } else if (streq(argv[argi], "--ocsvlite")) {
1895 pwriter_opts->ofile_fmt = "csvlite";
1896 argi += 1;
1897
1898 } else if (streq(argv[argi], "--otsv")) {
1899 pwriter_opts->ofile_fmt = "csv";
1900 pwriter_opts->ofs = "\t";
1901 argi += 1;
1902
1903 } else if (streq(argv[argi], "--otsvlite")) {
1904 pwriter_opts->ofile_fmt = "csvlite";
1905 pwriter_opts->ofs = "\t";
1906 argi += 1;
1907
1908 } else if (streq(argv[argi], "--oasv")) {
1909 pwriter_opts->ofile_fmt = "csv";
1910 pwriter_opts->ofs = ASV_FS;
1911 pwriter_opts->ors = ASV_RS;
1912 argi += 1;
1913
1914 } else if (streq(argv[argi], "--oasvlite")) {
1915 pwriter_opts->ofile_fmt = "csvlite";
1916 pwriter_opts->ofs = ASV_FS;
1917 pwriter_opts->ors = ASV_RS;
1918 argi += 1;
1919
1920 } else if (streq(argv[argi], "--ousv")) {
1921 pwriter_opts->ofile_fmt = "csv";
1922 pwriter_opts->ofs = USV_FS;
1923 pwriter_opts->ors = USV_RS;
1924 argi += 1;
1925
1926 } else if (streq(argv[argi], "--ousvlite")) {
1927 pwriter_opts->ofile_fmt = "csvlite";
1928 pwriter_opts->ofs = USV_FS;
1929 pwriter_opts->ors = USV_RS;
1930 argi += 1;
1931
1932 } else if (streq(argv[argi], "--omd")) {
1933 pwriter_opts->ofile_fmt = "markdown";
1934 argi += 1;
1935
1936 } else if (streq(argv[argi], "--odkvp")) {
1937 pwriter_opts->ofile_fmt = "dkvp";
1938 argi += 1;
1939
1940 } else if (streq(argv[argi], "--ojson")) {
1941 pwriter_opts->ofile_fmt = "json";
1942 argi += 1;
1943 } else if (streq(argv[argi], "--ojsonx")) {
1944 pwriter_opts->ofile_fmt = "json";
1945 pwriter_opts->stack_json_output_vertically = TRUE;
1946 argi += 1;
1947
1948 } else if (streq(argv[argi], "--onidx")) {
1949 pwriter_opts->ofile_fmt = "nidx";
1950 argi += 1;
1951
1952 } else if (streq(argv[argi], "--oxtab")) {
1953 pwriter_opts->ofile_fmt = "xtab";
1954 argi += 1;
1955
1956 } else if (streq(argv[argi], "--opprint")) {
1957 pwriter_opts->ofile_fmt = "pprint";
1958 argi += 1;
1959
1960 } else if (streq(argv[argi], "--right")) {
1961 pwriter_opts->right_align_pprint = TRUE;
1962 argi += 1;
1963
1964 } else if (streq(argv[argi], "--barred")) {
1965 pwriter_opts->pprint_barred = TRUE;
1966 argi += 1;
1967
1968 } else if (streq(argv[argi], "--quote-all")) {
1969 pwriter_opts->oquoting = QUOTE_ALL;
1970 argi += 1;
1971
1972 } else if (streq(argv[argi], "--quote-none")) {
1973 pwriter_opts->oquoting = QUOTE_NONE;
1974 argi += 1;
1975
1976 } else if (streq(argv[argi], "--quote-minimal")) {
1977 pwriter_opts->oquoting = QUOTE_MINIMAL;
1978 argi += 1;
1979
1980 } else if (streq(argv[argi], "--quote-numeric")) {
1981 pwriter_opts->oquoting = QUOTE_NUMERIC;
1982 argi += 1;
1983
1984 } else if (streq(argv[argi], "--quote-original")) {
1985 pwriter_opts->oquoting = QUOTE_ORIGINAL;
1986 argi += 1;
1987
1988 }
1989 *pargi = argi;
1990 return argi != oargi;
1991 }
1992
1993 // Returns TRUE if the current flag was handled.
cli_handle_reader_writer_options(char ** argv,int argc,int * pargi,cli_reader_opts_t * preader_opts,cli_writer_opts_t * pwriter_opts)1994 int cli_handle_reader_writer_options(char** argv, int argc, int *pargi,
1995 cli_reader_opts_t* preader_opts, cli_writer_opts_t* pwriter_opts)
1996 {
1997 int argi = *pargi;
1998 int oargi = argi;
1999
2000 if (streq(argv[argi], "--rs")) {
2001 check_arg_count(argv, argi, argc, 2);
2002 preader_opts->irs = cli_sep_from_arg(argv[argi+1]);
2003 pwriter_opts->ors = cli_sep_from_arg(argv[argi+1]);
2004 argi += 2;
2005
2006 } else if (streq(argv[argi], "--fs")) {
2007 check_arg_count(argv, argi, argc, 2);
2008 preader_opts->ifs = cli_sep_from_arg(argv[argi+1]);
2009 pwriter_opts->ofs = cli_sep_from_arg(argv[argi+1]);
2010 argi += 2;
2011
2012 } else if (streq(argv[argi], "-p")) {
2013 preader_opts->ifile_fmt = "nidx";
2014 pwriter_opts->ofile_fmt = "nidx";
2015 preader_opts->ifs = " ";
2016 pwriter_opts->ofs = " ";
2017 preader_opts->allow_repeat_ifs = TRUE;
2018 argi += 1;
2019
2020 } else if (streq(argv[argi], "--ps")) {
2021 check_arg_count(argv, argi, argc, 2);
2022 preader_opts->ips = cli_sep_from_arg(argv[argi+1]);
2023 pwriter_opts->ops = cli_sep_from_arg(argv[argi+1]);
2024 argi += 2;
2025
2026 } else if (streq(argv[argi], "--jflatsep")) {
2027 check_arg_count(argv, argi, argc, 2);
2028 preader_opts->input_json_flatten_separator = cli_sep_from_arg(argv[argi+1]);
2029 pwriter_opts->output_json_flatten_separator = cli_sep_from_arg(argv[argi+1]);
2030 argi += 2;
2031
2032 } else if (streq(argv[argi], "--io")) {
2033 check_arg_count(argv, argi, argc, 2);
2034 if (!lhmss_has_key(get_default_rses(), argv[argi+1])) {
2035 fprintf(stderr, "%s: unrecognized I/O format \"%s\".\n",
2036 MLR_GLOBALS.bargv0, argv[argi+1]);
2037 exit(1);
2038 }
2039 preader_opts->ifile_fmt = argv[argi+1];
2040 pwriter_opts->ofile_fmt = argv[argi+1];
2041 argi += 2;
2042
2043 } else if (streq(argv[argi], "--csv")) {
2044 preader_opts->ifile_fmt = "csv";
2045 pwriter_opts->ofile_fmt = "csv";
2046 argi += 1;
2047
2048 } else if (streq(argv[argi], "--csvlite")) {
2049 preader_opts->ifile_fmt = "csvlite";
2050 pwriter_opts->ofile_fmt = "csvlite";
2051 argi += 1;
2052
2053 } else if (streq(argv[argi], "--tsv")) {
2054 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2055 preader_opts->ifs = "\t";
2056 pwriter_opts->ofs = "\t";
2057 argi += 1;
2058
2059 } else if (streq(argv[argi], "--tsvlite") || streq(argv[argi], "-t")) {
2060 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2061 preader_opts->ifs = "\t";
2062 pwriter_opts->ofs = "\t";
2063 argi += 1;
2064
2065 } else if (streq(argv[argi], "--asv")) {
2066 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2067 preader_opts->ifs = ASV_FS;
2068 pwriter_opts->ofs = ASV_FS;
2069 preader_opts->irs = ASV_RS;
2070 pwriter_opts->ors = ASV_RS;
2071 argi += 1;
2072
2073 } else if (streq(argv[argi], "--asvlite")) {
2074 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2075 preader_opts->ifs = ASV_FS;
2076 pwriter_opts->ofs = ASV_FS;
2077 preader_opts->irs = ASV_RS;
2078 pwriter_opts->ors = ASV_RS;
2079 argi += 1;
2080
2081 } else if (streq(argv[argi], "--usv")) {
2082 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csv";
2083 preader_opts->ifs = USV_FS;
2084 pwriter_opts->ofs = USV_FS;
2085 preader_opts->irs = USV_RS;
2086 pwriter_opts->ors = USV_RS;
2087 argi += 1;
2088
2089 } else if (streq(argv[argi], "--usvlite")) {
2090 preader_opts->ifile_fmt = pwriter_opts->ofile_fmt = "csvlite";
2091 preader_opts->ifs = USV_FS;
2092 pwriter_opts->ofs = USV_FS;
2093 preader_opts->irs = USV_RS;
2094 pwriter_opts->ors = USV_RS;
2095 argi += 1;
2096
2097 } else if (streq(argv[argi], "--dkvp")) {
2098 preader_opts->ifile_fmt = "dkvp";
2099 pwriter_opts->ofile_fmt = "dkvp";
2100 argi += 1;
2101
2102 } else if (streq(argv[argi], "--json")) {
2103 preader_opts->ifile_fmt = "json";
2104 pwriter_opts->ofile_fmt = "json";
2105 argi += 1;
2106 } else if (streq(argv[argi], "--jsonx")) {
2107 preader_opts->ifile_fmt = "json";
2108 pwriter_opts->ofile_fmt = "json";
2109 pwriter_opts->stack_json_output_vertically = TRUE;
2110 argi += 1;
2111
2112 } else if (streq(argv[argi], "--nidx")) {
2113 preader_opts->ifile_fmt = "nidx";
2114 pwriter_opts->ofile_fmt = "nidx";
2115 argi += 1;
2116
2117 } else if (streq(argv[argi], "-T")) {
2118 preader_opts->ifile_fmt = "nidx";
2119 pwriter_opts->ofile_fmt = "nidx";
2120 preader_opts->ifs = "\t";
2121 pwriter_opts->ofs = "\t";
2122 argi += 1;
2123
2124 } else if (streq(argv[argi], "--xtab")) {
2125 preader_opts->ifile_fmt = "xtab";
2126 pwriter_opts->ofile_fmt = "xtab";
2127 argi += 1;
2128
2129 } else if (streq(argv[argi], "--pprint")) {
2130 preader_opts->ifile_fmt = "csvlite";
2131 preader_opts->ifs = " ";
2132 preader_opts->allow_repeat_ifs = TRUE;
2133 pwriter_opts->ofile_fmt = "pprint";
2134 argi += 1;
2135
2136 } else if (streq(argv[argi], "--c2t")) {
2137 preader_opts->ifile_fmt = "csv";
2138 preader_opts->irs = "auto";
2139 pwriter_opts->ofile_fmt = "csv";
2140 pwriter_opts->ors = "auto";
2141 pwriter_opts->ofs = "\t";
2142 argi += 1;
2143 } else if (streq(argv[argi], "--c2d")) {
2144 preader_opts->ifile_fmt = "csv";
2145 preader_opts->irs = "auto";
2146 pwriter_opts->ofile_fmt = "dkvp";
2147 argi += 1;
2148 } else if (streq(argv[argi], "--c2n")) {
2149 preader_opts->ifile_fmt = "csv";
2150 preader_opts->irs = "auto";
2151 pwriter_opts->ofile_fmt = "nidx";
2152 argi += 1;
2153 } else if (streq(argv[argi], "--c2j")) {
2154 preader_opts->ifile_fmt = "csv";
2155 preader_opts->irs = "auto";
2156 pwriter_opts->ofile_fmt = "json";
2157 argi += 1;
2158 } else if (streq(argv[argi], "--c2p")) {
2159 preader_opts->ifile_fmt = "csv";
2160 preader_opts->irs = "auto";
2161 pwriter_opts->ofile_fmt = "pprint";
2162 argi += 1;
2163 } else if (streq(argv[argi], "--c2x")) {
2164 preader_opts->ifile_fmt = "csv";
2165 preader_opts->irs = "auto";
2166 pwriter_opts->ofile_fmt = "xtab";
2167 argi += 1;
2168 } else if (streq(argv[argi], "--c2m")) {
2169 preader_opts->ifile_fmt = "csv";
2170 preader_opts->irs = "auto";
2171 pwriter_opts->ofile_fmt = "markdown";
2172 argi += 1;
2173
2174 } else if (streq(argv[argi], "--t2c")) {
2175 preader_opts->ifile_fmt = "csv";
2176 preader_opts->ifs = "\t";
2177 preader_opts->irs = "auto";
2178 pwriter_opts->ofile_fmt = "csv";
2179 pwriter_opts->ors = "auto";
2180 argi += 1;
2181 } else if (streq(argv[argi], "--t2d")) {
2182 preader_opts->ifile_fmt = "csv";
2183 preader_opts->ifs = "\t";
2184 preader_opts->irs = "auto";
2185 pwriter_opts->ofile_fmt = "dkvp";
2186 argi += 1;
2187 } else if (streq(argv[argi], "--t2n")) {
2188 preader_opts->ifile_fmt = "csv";
2189 preader_opts->ifs = "\t";
2190 preader_opts->irs = "auto";
2191 pwriter_opts->ofile_fmt = "nidx";
2192 argi += 1;
2193 } else if (streq(argv[argi], "--t2j")) {
2194 preader_opts->ifile_fmt = "csv";
2195 preader_opts->ifs = "\t";
2196 preader_opts->irs = "auto";
2197 pwriter_opts->ofile_fmt = "json";
2198 argi += 1;
2199 } else if (streq(argv[argi], "--t2p")) {
2200 preader_opts->ifile_fmt = "csv";
2201 preader_opts->ifs = "\t";
2202 preader_opts->irs = "auto";
2203 pwriter_opts->ofile_fmt = "pprint";
2204 argi += 1;
2205 } else if (streq(argv[argi], "--t2x")) {
2206 preader_opts->ifile_fmt = "csv";
2207 preader_opts->ifs = "\t";
2208 preader_opts->irs = "auto";
2209 pwriter_opts->ofile_fmt = "xtab";
2210 argi += 1;
2211 } else if (streq(argv[argi], "--t2m")) {
2212 preader_opts->ifile_fmt = "csv";
2213 preader_opts->ifs = "\t";
2214 preader_opts->irs = "auto";
2215 pwriter_opts->ofile_fmt = "markdown";
2216 argi += 1;
2217
2218 } else if (streq(argv[argi], "--d2c")) {
2219 preader_opts->ifile_fmt = "dkvp";
2220 pwriter_opts->ofile_fmt = "csv";
2221 pwriter_opts->ors = "auto";
2222 argi += 1;
2223 } else if (streq(argv[argi], "--d2t")) {
2224 preader_opts->ifile_fmt = "dkvp";
2225 pwriter_opts->ofile_fmt = "csv";
2226 pwriter_opts->ors = "auto";
2227 pwriter_opts->ofs = "\t";
2228 argi += 1;
2229 } else if (streq(argv[argi], "--d2n")) {
2230 preader_opts->ifile_fmt = "dkvp";
2231 pwriter_opts->ofile_fmt = "nidx";
2232 argi += 1;
2233 } else if (streq(argv[argi], "--d2j")) {
2234 preader_opts->ifile_fmt = "dkvp";
2235 pwriter_opts->ofile_fmt = "json";
2236 argi += 1;
2237 } else if (streq(argv[argi], "--d2p")) {
2238 preader_opts->ifile_fmt = "dkvp";
2239 pwriter_opts->ofile_fmt = "pprint";
2240 argi += 1;
2241 } else if (streq(argv[argi], "--d2x")) {
2242 preader_opts->ifile_fmt = "dkvp";
2243 pwriter_opts->ofile_fmt = "xtab";
2244 argi += 1;
2245 } else if (streq(argv[argi], "--d2m")) {
2246 preader_opts->ifile_fmt = "dkvp";
2247 pwriter_opts->ofile_fmt = "markdown";
2248 argi += 1;
2249
2250 } else if (streq(argv[argi], "--n2c")) {
2251 preader_opts->ifile_fmt = "nidx";
2252 pwriter_opts->ofile_fmt = "csv";
2253 pwriter_opts->ors = "auto";
2254 argi += 1;
2255 } else if (streq(argv[argi], "--n2t")) {
2256 preader_opts->ifile_fmt = "nidx";
2257 pwriter_opts->ofile_fmt = "csv";
2258 pwriter_opts->ors = "auto";
2259 pwriter_opts->ofs = "\t";
2260 argi += 1;
2261 } else if (streq(argv[argi], "--n2d")) {
2262 preader_opts->ifile_fmt = "nidx";
2263 pwriter_opts->ofile_fmt = "dkvp";
2264 argi += 1;
2265 } else if (streq(argv[argi], "--n2j")) {
2266 preader_opts->ifile_fmt = "nidx";
2267 pwriter_opts->ofile_fmt = "json";
2268 argi += 1;
2269 } else if (streq(argv[argi], "--n2p")) {
2270 preader_opts->ifile_fmt = "nidx";
2271 pwriter_opts->ofile_fmt = "pprint";
2272 argi += 1;
2273 } else if (streq(argv[argi], "--n2x")) {
2274 preader_opts->ifile_fmt = "nidx";
2275 pwriter_opts->ofile_fmt = "xtab";
2276 argi += 1;
2277 } else if (streq(argv[argi], "--n2m")) {
2278 preader_opts->ifile_fmt = "nidx";
2279 pwriter_opts->ofile_fmt = "markdown";
2280 argi += 1;
2281
2282 } else if (streq(argv[argi], "--j2c")) {
2283 preader_opts->ifile_fmt = "json";
2284 pwriter_opts->ofile_fmt = "csv";
2285 pwriter_opts->ors = "auto";
2286 argi += 1;
2287 } else if (streq(argv[argi], "--j2t")) {
2288 preader_opts->ifile_fmt = "json";
2289 pwriter_opts->ofile_fmt = "csv";
2290 pwriter_opts->ors = "auto";
2291 pwriter_opts->ofs = "\t";
2292 argi += 1;
2293 } else if (streq(argv[argi], "--j2d")) {
2294 preader_opts->ifile_fmt = "json";
2295 pwriter_opts->ofile_fmt = "dkvp";
2296 argi += 1;
2297 } else if (streq(argv[argi], "--j2n")) {
2298 preader_opts->ifile_fmt = "json";
2299 pwriter_opts->ofile_fmt = "nidx";
2300 argi += 1;
2301 } else if (streq(argv[argi], "--j2p")) {
2302 preader_opts->ifile_fmt = "json";
2303 pwriter_opts->ofile_fmt = "pprint";
2304 argi += 1;
2305 } else if (streq(argv[argi], "--j2x")) {
2306 preader_opts->ifile_fmt = "json";
2307 pwriter_opts->ofile_fmt = "xtab";
2308 argi += 1;
2309 } else if (streq(argv[argi], "--j2m")) {
2310 preader_opts->ifile_fmt = "json";
2311 pwriter_opts->ofile_fmt = "markdown";
2312 argi += 1;
2313
2314 } else if (streq(argv[argi], "--p2c")) {
2315 preader_opts->ifile_fmt = "csvlite";
2316 preader_opts->ifs = " ";
2317 preader_opts->allow_repeat_ifs = TRUE;
2318 pwriter_opts->ofile_fmt = "csv";
2319 pwriter_opts->ors = "auto";
2320 argi += 1;
2321 } else if (streq(argv[argi], "--p2t")) {
2322 preader_opts->ifile_fmt = "csvlite";
2323 preader_opts->ifs = " ";
2324 preader_opts->allow_repeat_ifs = TRUE;
2325 pwriter_opts->ofile_fmt = "csv";
2326 pwriter_opts->ors = "auto";
2327 pwriter_opts->ofs = "\t";
2328 argi += 1;
2329 } else if (streq(argv[argi], "--p2d")) {
2330 preader_opts->ifile_fmt = "csvlite";
2331 preader_opts->ifs = " ";
2332 preader_opts->allow_repeat_ifs = TRUE;
2333 pwriter_opts->ofile_fmt = "dkvp";
2334 argi += 1;
2335 } else if (streq(argv[argi], "--p2n")) {
2336 preader_opts->ifile_fmt = "csvlite";
2337 preader_opts->ifs = " ";
2338 preader_opts->allow_repeat_ifs = TRUE;
2339 pwriter_opts->ofile_fmt = "nidx";
2340 argi += 1;
2341 } else if (streq(argv[argi], "--p2j")) {
2342 preader_opts->ifile_fmt = "csvlite";
2343 preader_opts->ifs = " ";
2344 preader_opts->allow_repeat_ifs = TRUE;
2345 pwriter_opts->ofile_fmt = "json";
2346 argi += 1;
2347 } else if (streq(argv[argi], "--p2x")) {
2348 preader_opts->ifile_fmt = "csvlite";
2349 preader_opts->ifs = " ";
2350 preader_opts->allow_repeat_ifs = TRUE;
2351 pwriter_opts->ofile_fmt = "xtab";
2352 argi += 1;
2353 } else if (streq(argv[argi], "--p2m")) {
2354 preader_opts->ifile_fmt = "csvlite";
2355 preader_opts->ifs = " ";
2356 preader_opts->allow_repeat_ifs = TRUE;
2357 pwriter_opts->ofile_fmt = "markdown";
2358 argi += 1;
2359
2360 } else if (streq(argv[argi], "--x2c")) {
2361 preader_opts->ifile_fmt = "xtab";
2362 pwriter_opts->ofile_fmt = "csv";
2363 pwriter_opts->ors = "auto";
2364 argi += 1;
2365 } else if (streq(argv[argi], "--x2t")) {
2366 preader_opts->ifile_fmt = "xtab";
2367 pwriter_opts->ofile_fmt = "csv";
2368 pwriter_opts->ors = "auto";
2369 pwriter_opts->ofs = "\t";
2370 argi += 1;
2371 } else if (streq(argv[argi], "--x2d")) {
2372 preader_opts->ifile_fmt = "xtab";
2373 pwriter_opts->ofile_fmt = "dkvp";
2374 argi += 1;
2375 } else if (streq(argv[argi], "--x2n")) {
2376 preader_opts->ifile_fmt = "xtab";
2377 pwriter_opts->ofile_fmt = "nidx";
2378 argi += 1;
2379 } else if (streq(argv[argi], "--x2j")) {
2380 preader_opts->ifile_fmt = "xtab";
2381 pwriter_opts->ofile_fmt = "json";
2382 argi += 1;
2383 } else if (streq(argv[argi], "--x2p")) {
2384 preader_opts->ifile_fmt = "xtab";
2385 pwriter_opts->ofile_fmt = "pprint";
2386 argi += 1;
2387 } else if (streq(argv[argi], "--x2m")) {
2388 preader_opts->ifile_fmt = "xtab";
2389 pwriter_opts->ofile_fmt = "markdown";
2390 argi += 1;
2391
2392 } else if (streq(argv[argi], "-N")) {
2393 preader_opts->use_implicit_csv_header = TRUE;
2394 pwriter_opts->headerless_csv_output = TRUE;
2395 argi += 1;
2396 }
2397 *pargi = argi;
2398 return argi != oargi;
2399 }
2400
2401 // Returns TRUE if the current flag was handled.
cli_handle_misc_options(char ** argv,int argc,int * pargi,cli_opts_t * popts)2402 static int cli_handle_misc_options(char** argv, int argc, int *pargi, cli_opts_t* popts) {
2403 int argi = *pargi;
2404 int oargi = argi;
2405
2406 if (streq(argv[argi], "-I")) {
2407 popts->do_in_place = TRUE;
2408 argi += 1;
2409
2410 } else if (streq(argv[argi], "-n")) {
2411 popts->no_input = TRUE;
2412 argi += 1;
2413
2414 } else if (streq(argv[argi], "--from")) {
2415 check_arg_count(argv, argi, argc, 2);
2416 slls_append(popts->filenames, argv[argi+1], NO_FREE);
2417 argi += 2;
2418
2419 } else if (streq(argv[argi], "--ofmt")) {
2420 check_arg_count(argv, argi, argc, 2);
2421 popts->ofmt = argv[argi+1];
2422 argi += 2;
2423
2424 } else if (streq(argv[argi], "--nr-progress-mod")) {
2425 check_arg_count(argv, argi, argc, 2);
2426 if (sscanf(argv[argi+1], "%lld", &popts->nr_progress_mod) != 1) {
2427 fprintf(stderr,
2428 "%s: --nr-progress-mod argument must be a positive integer; got \"%s\".\n",
2429 MLR_GLOBALS.bargv0, argv[argi+1]);
2430 main_usage_short(stderr, MLR_GLOBALS.bargv0);
2431 exit(1);
2432 }
2433 if (popts->nr_progress_mod <= 0) {
2434 fprintf(stderr,
2435 "%s: --nr-progress-mod argument must be a positive integer; got \"%s\".\n",
2436 MLR_GLOBALS.bargv0, argv[argi+1]);
2437 main_usage_short(stderr, MLR_GLOBALS.bargv0);
2438 exit(1);
2439 }
2440 argi += 2;
2441
2442 } else if (streq(argv[argi], "--seed")) {
2443 check_arg_count(argv, argi, argc, 2);
2444 if (sscanf(argv[argi+1], "0x%x", &popts->rand_seed) == 1) {
2445 popts->have_rand_seed = TRUE;
2446 } else if (sscanf(argv[argi+1], "%u", &popts->rand_seed) == 1) {
2447 popts->have_rand_seed = TRUE;
2448 } else {
2449 fprintf(stderr,
2450 "%s: --seed argument must be a decimal or hexadecimal integer; got \"%s\".\n",
2451 MLR_GLOBALS.bargv0, argv[argi+1]);
2452 main_usage_short(stderr, MLR_GLOBALS.bargv0);
2453 exit(1);
2454 }
2455 argi += 2;
2456
2457 }
2458 *pargi = argi;
2459 return argi != oargi;
2460 }
2461
2462 // ----------------------------------------------------------------
lhmss_get_or_die(lhmss_t * pmap,char * key)2463 static char* lhmss_get_or_die(lhmss_t* pmap, char* key) {
2464 char* value = lhmss_get(pmap, key);
2465 MLR_INTERNAL_CODING_ERROR_IF(value == NULL);
2466 return value;
2467 }
2468
2469 // ----------------------------------------------------------------
lhmsll_get_or_die(lhmsll_t * pmap,char * key)2470 static int lhmsll_get_or_die(lhmsll_t* pmap, char* key) {
2471 MLR_INTERNAL_CODING_ERROR_UNLESS(lhmsll_has_key(pmap, key));
2472 return lhmsll_get(pmap, key);
2473 }
2474