1--[=[--
2 Parse and process command line options.
3
4 Prototype Chain
5 ---------------
6
7      table
8       `-> Object
9            `-> OptionParser
10
11 @classmod std.optparse
12]=]
13
14
15local base = require "std.base"
16
17local Object = require "std.object" {}
18
19local ipairs, pairs = base.ipairs, base.pairs
20local insert, last, len = base.insert, base.last, base.len
21
22
23
24--[[ ================= ]]--
25--[[ Helper Functions. ]]--
26--[[ ================= ]]--
27
28
29local optional, required
30
31
32--- Normalise an argument list.
33-- Separate short options, remove `=` separators from
34-- `--long-option=optarg` etc.
35-- @local
36-- @function normalise
37-- @tparam table arglist list of arguments to normalise
38-- @treturn table normalised argument list
39local function normalise (self, arglist)
40  local normal = {}
41  local i = 0
42  while i < len (arglist) do
43    i = i + 1
44    local opt = arglist[i]
45
46    -- Split '--long-option=option-argument'.
47    if opt:sub (1, 2) == "--" then
48      local x = opt:find ("=", 3, true)
49      if x then
50        local optname = opt:sub (1, x -1)
51
52	-- Only split recognised long options.
53	if self[optname] then
54          insert (normal, optname)
55          insert (normal, opt:sub (x + 1))
56	else
57	  x = nil
58	end
59      end
60
61      if x == nil then
62	-- No '=', or substring before '=' is not a known option name.
63        insert (normal, opt)
64      end
65
66    elseif opt:sub (1, 1) == "-" and string.len (opt) > 2 then
67      local orig, split, rest = opt, {}
68      repeat
69        opt, rest = opt:sub (1, 2), opt:sub (3)
70
71        split[#split + 1] = opt
72
73	-- If there's no handler, the option was a typo, or not supposed
74	-- to be an option at all.
75	if self[opt] == nil then
76	  opt, split = nil, { orig }
77
78        -- Split '-xyz' into '-x -yz', and reiterate for '-yz'
79        elseif self[opt].handler ~= optional and
80          self[opt].handler ~= required then
81	  if string.len (rest) > 0 then
82            opt = "-" .. rest
83	  else
84	    opt = nil
85	  end
86
87        -- Split '-xshortargument' into '-x shortargument'.
88        else
89          split[#split + 1] = rest
90          opt = nil
91        end
92      until opt == nil
93
94      -- Append split options to normalised list
95      for _, v in ipairs (split) do insert (normal, v) end
96    else
97      insert (normal, opt)
98    end
99  end
100
101  normal[-1], normal[0]  = arglist[-1], arglist[0]
102  return normal
103end
104
105
106--- Store `value` with `opt`.
107-- @local
108-- @function set
109-- @string opt option name
110-- @param value option argument value
111local function set (self, opt, value)
112  local key = self[opt].key
113  local opts = self.opts[key]
114
115  if type (opts) == "table" then
116    insert (opts, value)
117  elseif opts ~= nil then
118    self.opts[key] = { opts, value }
119  else
120    self.opts[key] = value
121  end
122end
123
124
125
126--[[ ============= ]]--
127--[[ Option Types. ]]--
128--[[ ============= ]]--
129
130
131--- Option at `arglist[i]` can take an argument.
132-- Argument is accepted only if there is a following entry that does not
133-- begin with a '-'.
134--
135-- This is the handler automatically assigned to options that have
136-- `--opt=[ARG]` style specifications in the @{OptionParser} spec
137-- argument.  You can also pass it as the `handler` argument to @{on} for
138-- options you want to add manually without putting them in the
139-- @{OptionParser} spec.
140--
141-- Like @{required}, this handler will store multiple occurrences of a
142-- command-line option.
143-- @static
144-- @tparam table arglist list of arguments
145-- @int i index of last processed element of *arglist*
146-- @param[opt=true] value either a function to process the option
147--   argument, or a default value if encountered without an optarg
148-- @treturn int index of next element of *arglist* to process
149-- @usage
150-- parser:on ("--enable-nls", parser.option, parser.boolean)
151function optional (self, arglist, i, value)
152  if i + 1 <= len (arglist) and arglist[i + 1]:sub (1, 1) ~= "-" then
153    return self:required (arglist, i, value)
154  end
155
156  if type (value) == "function" then
157    value = value (self, opt, nil)
158  elseif value == nil then
159    value = true
160  end
161
162  set (self, arglist[i], value)
163  return i + 1
164end
165
166
167--- Option at `arglist[i}` requires an argument.
168--
169-- This is the handler automatically assigned to options that have
170-- `--opt=ARG` style specifications in the @{OptionParser} spec argument.
171-- You can also pass it as the `handler` argument to @{on} for options
172-- you want to add manually without putting them in the @{OptionParser}
173-- spec.
174--
175-- Normally the value stored in the `opt` table by this handler will be
176-- the string given as the argument to that option on the command line.
177-- However, if the option is given on the command-line multiple times,
178-- `opt["name"]` will end up with all those arguments stored in the
179-- array part of a table:
180--
181--     $ cat ./prog
182--     ...
183--     parser:on ({"-e", "-exec"}, required)
184--     _G.arg, _G.opt = parser:parse (_G.arg)
185--     print std.string.tostring (_G.opt.exec)
186--     ...
187--     $ ./prog -e '(foo bar)' -e '(foo baz)' -- qux
188--     {1=(foo bar),2=(foo baz)}
189-- @static
190-- @tparam table arglist list of arguments
191-- @int i index of last processed element of *arglist*
192-- @param[opt] value either a function to process the option argument,
193--   or a forced value to replace the user's option argument.
194-- @treturn int index of next element of *arglist* to process
195-- @usage
196-- parser:on ({"-o", "--output"}, parser.required)
197function required (self, arglist, i, value)
198  local opt = arglist[i]
199  if i + 1 > len (arglist) then
200    self:opterr ("option '" .. opt .. "' requires an argument")
201    return i + 1
202  end
203
204  if type (value) == "function" then
205    value = value (self, opt, arglist[i + 1])
206  elseif value == nil then
207    value = arglist[i + 1]
208  end
209
210  set (self, opt, value)
211  return i + 2
212end
213
214
215--- Finish option processing
216--
217-- This is the handler automatically assigned to the option written as
218-- `--` in the @{OptionParser} spec argument.  You can also pass it as
219-- the `handler` argument to @{on} if you want to manually add an end
220-- of options marker without writing it in the @{OptionParser} spec.
221--
222-- This handler tells the parser to stop processing arguments, so that
223-- anything after it will be an argument even if it otherwise looks
224-- like an option.
225-- @static
226-- @tparam table arglist list of arguments
227-- @int i index of last processed element of `arglist`
228-- @treturn int index of next element of `arglist` to process
229-- @usage
230-- parser:on ("--", parser.finished)
231local function finished (self, arglist, i)
232  for opt = i + 1, len (arglist) do
233    insert (self.unrecognised, arglist[opt])
234  end
235  return 1 + len (arglist)
236end
237
238
239--- Option at `arglist[i]` is a boolean switch.
240--
241-- This is the handler automatically assigned to options that have
242-- `--long-opt` or `-x` style specifications in the @{OptionParser} spec
243-- argument. You can also pass it as the `handler` argument to @{on} for
244-- options you want to add manually without putting them in the
245-- @{OptionParser} spec.
246--
247-- Beware that, _unlike_ @{required}, this handler will store multiple
248-- occurrences of a command-line option as a table **only** when given a
249-- `value` function.  Automatically assigned handlers do not do this, so
250-- the option will simply be `true` if the option was given one or more
251-- times on the command-line.
252-- @static
253-- @tparam table arglist list of arguments
254-- @int i index of last processed element of *arglist*
255-- @param[opt] value either a function to process the option argument,
256--   or a value to store when this flag is encountered
257-- @treturn int index of next element of *arglist* to process
258-- @usage
259-- parser:on ({"--long-opt", "-x"}, parser.flag)
260local function flag (self, arglist, i, value)
261  local opt = arglist[i]
262  if type (value) == "function" then
263    set (self, opt, value (self, opt, true))
264  elseif value == nil then
265    local key = self[opt].key
266    self.opts[key] = true
267  end
268
269  return i + 1
270end
271
272
273--- Option should display help text, then exit.
274--
275-- This is the handler automatically assigned tooptions that have
276-- `--help` in the specification, e.g. `-h, -?, --help`.
277-- @static
278-- @function help
279-- @usage
280-- parser:on ("-?", parser.version)
281local function help (self)
282  print (self.helptext)
283  os.exit (0)
284end
285
286
287--- Option should display version text, then exit.
288--
289-- This is the handler automatically assigned tooptions that have
290-- `--version` in the specification, e.g. `-V, --version`.
291-- @static
292-- @function version
293-- @usage
294-- parser:on ("-V", parser.version)
295local function version (self)
296  print (self.versiontext)
297  os.exit (0)
298end
299
300
301
302--[[ =============== ]]--
303--[[ Argument Types. ]]--
304--[[ =============== ]]--
305
306
307--- Map various option strings to equivalent Lua boolean values.
308-- @table boolvals
309-- @field false false
310-- @field 0 false
311-- @field no false
312-- @field n false
313-- @field true true
314-- @field 1 true
315-- @field yes true
316-- @field y true
317local boolvals = {
318  ["false"] = false, ["true"]  = true,
319  ["0"]     = false, ["1"]     = true,
320  no        = false, yes       = true,
321  n         = false, y         = true,
322}
323
324
325--- Return a Lua boolean equivalent of various *optarg* strings.
326-- Report an option parse error if *optarg* is not recognised.
327--
328-- Pass this as the `value` function to @{on} when you want various
329-- "truthy" or "falsey" option arguments to be coerced to a Lua `true`
330-- or `false` respectively in the options table.
331-- @static
332-- @string opt option name
333-- @string[opt="1"] optarg option argument, must be a key in @{boolvals}
334-- @treturn bool `true` or `false`
335-- @usage
336-- parser:on ("--enable-nls", parser.optional, parser.boolean)
337local function boolean (self, opt, optarg)
338  if optarg == nil then optarg = "1" end -- default to truthy
339  local b = boolvals[tostring (optarg):lower ()]
340  if b == nil then
341    return self:opterr (optarg .. ": Not a valid argument to " ..opt[1] .. ".")
342  end
343  return b
344end
345
346
347--- Report an option parse error unless *optarg* names an
348-- existing file.
349--
350-- Pass this as the `value` function to @{on} when you want to accept
351-- only option arguments that name an existing file.
352-- @fixme this only checks whether the file has read permissions
353-- @static
354-- @string opt option name
355-- @string optarg option argument, must be an existing file
356-- @treturn string *optarg*
357-- @usage
358-- parser:on ("--config-file", parser.required, parser.file)
359local function file (self, opt, optarg)
360  local h, errmsg = io.open (optarg, "r")
361  if h == nil then
362    return self:opterr (optarg .. ": " .. errmsg)
363  end
364  h:close ()
365  return optarg
366end
367
368
369
370--[[ =============== ]]--
371--[[ Option Parsing. ]]--
372--[[ =============== ]]--
373
374
375--- Report an option parse error, then exit with status 2.
376--
377-- Use this in your custom option handlers for consistency with the
378-- error output from built-in @{std.optparse} error messages.
379-- @static
380-- @string msg error message
381local function opterr (self, msg)
382  local prog = self.program
383  -- Ensure final period.
384  if msg:match ("%.$") == nil then msg = msg .. "." end
385  io.stderr:write (prog .. ": error: " .. msg .. "\n")
386  io.stderr:write (prog .. ": Try '" .. prog .. " --help' for help.\n")
387  os.exit (2)
388end
389
390
391------
392-- Function signature of an option handler for @{on}.
393-- @function on_handler
394-- @tparam table arglist list of arguments
395-- @int i index of last processed element of *arglist*
396-- @param[opt=nil] value additional `value` registered with @{on}
397-- @treturn int index of next element of *arglist* to process
398
399
400--- Add an option handler.
401--
402-- When the automatically assigned option handlers don't do everything
403-- you require, or when you don't want to put an option into the
404-- @{OptionParser} `spec` argument, use this function to specify custom
405-- behaviour.  If you write the option into the `spec` argument anyway,
406-- calling this function will replace the automatically assigned handler
407-- with your own.
408--
409-- When writing your own handlers for @{std.optparse:on}, you only need
410-- to deal with normalised arguments, because combined short arguments
411-- (`-xyz`), equals separators to long options (`--long=ARG`) are fully
412-- expanded before any handler is called.
413-- @function on
414-- @tparam[string|table] opts name of the option, or list of option names
415-- @tparam on_handler handler function to call when any of *opts* is
416--   encountered
417-- @param value additional value passed to @{on_handler}
418-- @usage
419-- -- Don't process any arguments after `--`
420-- parser:on ('--', parser.finished)
421local function on (self, opts, handler, value)
422  if type (opts) == "string" then opts = { opts } end
423  handler = handler or flag -- unspecified options behave as flags
424
425  local normal = {}
426  for _, optspec in ipairs (opts) do
427    optspec:gsub ("(%S+)",
428                  function (opt)
429                    -- 'x' => '-x'
430                    if string.len (opt) == 1 then
431                      opt = "-" .. opt
432
433                    -- 'option-name' => '--option-name'
434                    elseif opt:match ("^[^%-]") ~= nil then
435                      opt = "--" .. opt
436                    end
437
438                    if opt:match ("^%-[^%-]+") ~= nil then
439                      -- '-xyz' => '-x -y -z'
440                      for i = 2, string.len (opt) do
441                        insert (normal, "-" .. opt:sub (i, i))
442                      end
443                    else
444                      insert (normal, opt)
445                    end
446                  end)
447  end
448
449  -- strip leading '-', and convert non-alphanums to '_'
450  local key = last (normal):match ("^%-*(.*)$"):gsub ("%W", "_")
451
452  for _, opt in ipairs (normal) do
453    self[opt] = { key = key, handler = handler, value = value }
454  end
455end
456
457
458------
459-- Parsed options table, with a key for each encountered option, each
460-- with value set by that option's @{on_handler}.  Where an option
461-- has one or more long-options specified, the key will be the first
462-- one of those with leading hyphens stripped and non-alphanumeric
463-- characters replaced with underscores.  For options that can only be
464-- specified by a short option, the key will be the letter of the first
465-- of the specified short options:
466--
467--     {"-e", "--eval-file"} => opts.eval_file
468--     {"-n", "--dryrun", "--dry-run"} => opts.dryrun
469--     {"-t", "-T"} => opts.t
470--
471-- Generally there will be one key for each previously specified
472-- option (either automatically assigned by @{OptionParser} or
473-- added manually with @{on}) containing the value(s) assigned by the
474-- associated @{on_handler}.  For automatically assigned handlers,
475-- that means `true` for straight-forward flags and
476-- optional-argument options for which no argument was given; or else
477-- the string value of the argument passed with an option given only
478-- once; or a table of string values of the same for arguments given
479-- multiple times.
480--
481--     ./prog -x -n -x => opts = { x = true, dryrun = true }
482--     ./prog -e '(foo bar)' -e '(foo baz)'
483--         => opts = {eval_file = {"(foo bar)", "(foo baz)"} }
484--
485-- If you write your own handlers, or otherwise specify custom
486-- handling of options with @{on}, then whatever value those handlers
487-- return will be assigned to the respective keys in `opts`.
488-- @table opts
489
490
491--- Parse an argument list.
492-- @tparam table arglist list of arguments
493-- @tparam[opt] table defaults table of default option values
494-- @treturn table a list of unrecognised *arglist* elements
495-- @treturn opts parsing results
496local function parse (self, arglist, defaults)
497  self.unrecognised, self.opts = {}, {}
498
499  arglist = normalise (self, arglist)
500
501  local i = 1
502  while i > 0 and i <= len (arglist) do
503    local opt = arglist[i]
504
505    if self[opt] == nil then
506      insert (self.unrecognised, opt)
507      i = i + 1
508
509      -- Following non-'-' prefixed argument is an optarg.
510      if i <= len (arglist) and arglist[i]:match "^[^%-]" then
511        insert (self.unrecognised, arglist[i])
512        i = i + 1
513      end
514
515    -- Run option handler functions.
516    else
517      assert (type (self[opt].handler) == "function")
518
519      i = self[opt].handler (self, arglist, i, self[opt].value)
520    end
521  end
522
523  -- Merge defaults into user options.
524  for k, v in pairs (defaults or {}) do
525    if self.opts[k] == nil then self.opts[k] = v end
526  end
527
528  -- metatable allows `io.warn` to find `parser.program` when assigned
529  -- back to _G.opts.
530  return self.unrecognised, setmetatable (self.opts, {__index = self})
531end
532
533
534--- Take care not to register duplicate handlers.
535-- @param current current handler value
536-- @param new new handler value
537-- @return `new` if `current` is nil
538local function set_handler (current, new)
539  assert (current == nil, "only one handler per option")
540  return new
541end
542
543
544local function _init (_, spec)
545  local parser = {}
546
547  parser.versiontext, parser.version, parser.helptext, parser.program =
548    spec:match ("^([^\n]-(%S+)\n.-)%s*([Uu]sage: (%S+).-)%s*$")
549
550  if parser.versiontext == nil then
551    error ("OptionParser spec argument must match '<version>\\n" ..
552           "...Usage: <program>...'")
553  end
554
555  -- Collect helptext lines that begin with two or more spaces followed
556  -- by a '-'.
557  local specs = {}
558  parser.helptext:gsub ("\n  %s*(%-[^\n]+)",
559                        function (spec) insert (specs, spec) end)
560
561  -- Register option handlers according to the help text.
562  for _, spec in ipairs (specs) do
563    local options, handler = {}
564
565    -- Loop around each '-' prefixed option on this line.
566    while spec:sub (1, 1) == "-" do
567
568      -- Capture end of options processing marker.
569      if spec:match "^%-%-,?%s" then
570        handler = set_handler (handler, finished)
571
572      -- Capture optional argument in the option string.
573      elseif spec:match "^%-[%-%w]+=%[.+%],?%s" then
574        handler = set_handler (handler, optional)
575
576      -- Capture required argument in the option string.
577      elseif spec:match "^%-[%-%w]+=%S+,?%s" then
578        handler = set_handler (handler, required)
579
580      -- Capture any specially handled arguments.
581      elseif spec:match "^%-%-help,?%s" then
582        handler = set_handler (handler, help)
583
584      elseif spec:match "^%-%-version,?%s" then
585        handler = set_handler (handler, version)
586      end
587
588      -- Consume argument spec, now that it was processed above.
589      spec = spec:gsub ("^(%-[%-%w]+)=%S+%s", "%1 ")
590
591      -- Consume short option.
592      local _, c = spec:gsub ("^%-([-%w]),?%s+(.*)$",
593                              function (opt, rest)
594                                if opt == "-" then opt = "--" end
595                                insert (options, opt)
596                                spec = rest
597                              end)
598
599      -- Be careful not to consume more than one option per iteration,
600      -- otherwise we might miss a handler test at the next loop.
601      if c == 0 then
602        -- Consume long option.
603        spec:gsub ("^%-%-([%-%w]+),?%s+(.*)$",
604                   function (opt, rest)
605                     insert (options, opt)
606                     spec = rest
607                   end)
608      end
609    end
610
611    -- Unless specified otherwise, treat each option as a flag.
612    on (parser, options, handler or flag)
613  end
614
615  return parser
616end
617
618
619--- Signature for initialising a custom OptionParser.
620--
621-- Read the documented options from *spec* and return custom parser that
622-- can be used for parsing the options described in *spec* from a run-time
623-- argument list.  Options in *spec* are recognised as lines that begin
624-- with at least two spaces, followed by a hyphen.
625-- @static
626-- @function OptionParser_Init
627-- @string spec option parsing specification
628-- @treturn OptionParser a parser for options described by *spec*
629-- @usage
630-- customparser = std.optparse (optparse_spec)
631
632
633--- OptionParser prototype object.
634--
635-- Most often, after instantiating an @{OptionParser}, everything else
636-- is handled automatically.
637--
638-- Then, calling `parser:parse` as shown below saves unparsed arguments
639-- into `_G.arg` (usually filenames or similar), and `_G.opts` will be a
640-- table of successfully parsed option values. The keys into this table
641-- are the long-options with leading hyphens stripped, and non-word
642-- characters turned to `_`.  For example if `--another-long` had been
643-- found in the initial `_G.arg`, then `_G.opts` will have a key named
644-- `another_long`, with an appropriate value.  If there is no long
645-- option name, then the short option is used, i.e. `_G.opts.b` will be
646-- set.
647--
648-- The values saved against those keys are controlled by the option
649-- handler, usually just `true` or the option argument string as
650-- appropriate.
651-- @object OptionParser
652-- @tparam OptionParser_Init _init initialisation function
653-- @string program the first word following "Usage:" from *spec*
654-- @string version the last white-space delimited word on the first line
655--   of text from *spec*
656-- @string versiontext everything preceding "Usage:" from *spec*, and
657--   which will be displayed by the @{version} @{on_handler}
658-- @string helptext everything including and following "Usage:" from
659--   *spec* string and which will be displayed by the @{help}
660--   @{on_handler}
661-- @usage
662-- local std = require "std"
663--
664-- local optparser = std.optparse [[
665-- any text VERSION
666-- Additional lines of text to show when the --version
667-- option is passed.
668--
669-- Several lines or paragraphs are permitted.
670--
671-- Usage: PROGNAME
672--
673-- Banner text.
674--
675-- Optional long description text to show when the --help
676-- option is passed.
677--
678-- Several lines or paragraphs of long description are permitted.
679--
680-- Options:
681--
682--   -b                       a short option with no long option
683--       --long               a long option with no short option
684--       --another-long       a long option with internal hypen
685--   -v, --verbose            a combined short and long option
686--   -n, --dryrun, --dry-run  several spellings of the same option
687--   -u, --name=USER          require an argument
688--   -o, --output=[FILE]      accept an optional argument
689--       --version            display version information, then exit
690--       --help               display this help, then exit
691--
692-- Footer text.  Several lines or paragraphs are permitted.
693--
694-- Please report bugs at bug-list@yourhost.com
695-- ]]
696--
697-- -- Note that @{std.io.die} and @{std.io.warn} will only prefix messages
698-- -- with `parser.program` if the parser options are assigned back to
699-- -- `_G.opts`:
700-- _G.arg, _G.opts = optparser:parse (_G.arg)
701return Object {
702  _type = "OptionParser",
703
704  _init = _init,
705
706  -- Prototype initial values.
707  opts        = {},
708  helptext    = "",
709  program     = "",
710  versiontext = "",
711  version     = 0,
712
713  --- @export
714  __index = {
715    boolean  = boolean,
716    file     = file,
717    finished = finished,
718    flag     = flag,
719    help     = help,
720    optional = optional,
721    required = required,
722    version  = version,
723
724    on     = on,
725    opterr = opterr,
726    parse  = parse,
727  },
728}
729