1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2015 Andreas Rumpf
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## This module provides the standard Nim command line parser.
11## It supports one convenience iterator over all command line options and some
12## lower-level features.
13##
14## Supported Syntax
15## ================
16##
17## The following syntax is supported when arguments for the `shortNoVal` and
18## `longNoVal` parameters, which are
19## `described later<#shortnoval-and-longnoval>`_, are not provided:
20##
21## 1. Short options: `-abcd`, `-e:5`, `-e=5`
22## 2. Long options: `--foo:bar`, `--foo=bar`, `--foo`
23## 3. Arguments: everything that does not start with a `-`
24##
25## These three kinds of tokens are enumerated in the
26## `CmdLineKind enum<#CmdLineKind>`_.
27##
28## When option values begin with ':' or '=', they need to be doubled up (as in
29## `--delim::`) or alternated (as in `--delim=:`).
30##
31## The `--` option, commonly used to denote that every token that follows is
32## an argument, is interpreted as a long option, and its name is the empty
33## string.
34##
35## Parsing
36## =======
37##
38## Use an `OptParser<#OptParser>`_ to parse command line options. It can be
39## created with `initOptParser<#initOptParser,string,set[char],seq[string]>`_,
40## and `next<#next,OptParser>`_ advances the parser by one token.
41##
42## For each token, the parser's `kind`, `key`, and `val` fields give
43## information about that token. If the token is a long or short option, `key`
44## is the option's name, and  `val` is either the option's value, if provided,
45## or the empty string. For arguments, the `key` field contains the argument
46## itself, and `val` is unused. To check if the end of the command line has
47## been reached, check if `kind` is equal to `cmdEnd`.
48##
49## Here is an example:
50##
51## .. code-block::
52##   import std/parseopt
53##
54##   var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt")
55##   while true:
56##     p.next()
57##     case p.kind
58##     of cmdEnd: break
59##     of cmdShortOption, cmdLongOption:
60##       if p.val == "":
61##         echo "Option: ", p.key
62##       else:
63##         echo "Option and value: ", p.key, ", ", p.val
64##     of cmdArgument:
65##       echo "Argument: ", p.key
66##
67##   # Output:
68##   # Option: a
69##   # Option: b
70##   # Option and value: e, 5
71##   # Option: foo
72##   # Option and value: bar, 20
73##   # Argument: file.txt
74##
75## The `getopt iterator<#getopt.i,OptParser>`_, which is provided for
76## convenience, can be used to iterate through all command line options as well.
77##
78## `shortNoVal` and `longNoVal`
79## ============================
80##
81## The optional `shortNoVal` and `longNoVal` parameters present in
82## `initOptParser<#initOptParser,string,set[char],seq[string]>`_ are for
83## specifying which short and long options do not accept values.
84##
85## When `shortNoVal` is non-empty, users are not required to separate short
86## options and their values with a ':' or '=' since the parser knows which
87## options accept values and which ones do not. This behavior also applies for
88## long options if `longNoVal` is non-empty. For short options, `-j4`
89## becomes supported syntax, and for long options, `--foo bar` becomes
90## supported. This is in addition to the `previously mentioned
91## syntax<#supported-syntax>`_. Users can still separate options and their
92## values with ':' or '=', but that becomes optional.
93##
94## As more options which do not accept values are added to your program,
95## remember to amend `shortNoVal` and `longNoVal` accordingly.
96##
97## The following example illustrates the difference between having an empty
98## `shortNoVal` and `longNoVal`, which is the default, and providing
99## arguments for those two parameters:
100##
101## .. code-block::
102##   import std/parseopt
103##
104##   proc printToken(kind: CmdLineKind, key: string, val: string) =
105##     case kind
106##     of cmdEnd: doAssert(false)  # Doesn't happen with getopt()
107##     of cmdShortOption, cmdLongOption:
108##       if val == "":
109##         echo "Option: ", key
110##       else:
111##         echo "Option and value: ", key, ", ", val
112##     of cmdArgument:
113##       echo "Argument: ", key
114##
115##   let cmdLine = "-j4 --first bar"
116##
117##   var emptyNoVal = initOptParser(cmdLine)
118##   for kind, key, val in emptyNoVal.getopt():
119##     printToken(kind, key, val)
120##
121##   # Output:
122##   # Option: j
123##   # Option: 4
124##   # Option: first
125##   # Argument: bar
126##
127##   var withNoVal = initOptParser(cmdLine, shortNoVal = {'c'},
128##                                 longNoVal = @["second"])
129##   for kind, key, val in withNoVal.getopt():
130##     printToken(kind, key, val)
131##
132##   # Output:
133##   # Option and value: j, 4
134##   # Option and value: first, bar
135##
136## See also
137## ========
138##
139## * `os module<os.html>`_ for lower-level command line parsing procs
140## * `parseutils module<parseutils.html>`_ for helpers that parse tokens,
141##   numbers, identifiers, etc.
142## * `strutils module<strutils.html>`_ for common string handling operations
143## * `json module<json.html>`_ for a JSON parser
144## * `parsecfg module<parsecfg.html>`_ for a configuration file parser
145## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value)
146##   parser
147## * `parsexml module<parsexml.html>`_ for a XML / HTML parser
148## * `other parsers<lib.html#pure-libraries-parsers>`_ for more parsers
149
150{.push debugger: off.}
151
152include "system/inclrtl"
153
154import os
155
156type
157  CmdLineKind* = enum ## The detected command line token.
158    cmdEnd,           ## End of command line reached
159    cmdArgument,      ## An argument such as a filename
160    cmdLongOption,    ## A long option such as --option
161    cmdShortOption    ## A short option such as -c
162  OptParser* = object of RootObj ## \
163    ## Implementation of the command line parser.
164    ##
165    ## To initialize it, use the
166    ## `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_.
167    pos*: int
168    inShortState: bool
169    allowWhitespaceAfterColon: bool
170    shortNoVal: set[char]
171    longNoVal: seq[string]
172    cmds: seq[string]
173    idx: int
174    kind*: CmdLineKind           ## The detected command line token
175    key*, val*: string           ## Key and value pair; the key is the option
176                                 ## or the argument, and the value is not "" if
177                                 ## the option was given a value
178
179proc parseWord(s: string, i: int, w: var string,
180               delim: set[char] = {'\t', ' '}): int =
181  result = i
182  if result < s.len and s[result] == '\"':
183    inc(result)
184    while result < s.len:
185      if s[result] == '"':
186        inc result
187        break
188      add(w, s[result])
189      inc(result)
190  else:
191    while result < s.len and s[result] notin delim:
192      add(w, s[result])
193      inc(result)
194
195proc initOptParser*(cmdline = "", shortNoVal: set[char] = {},
196                    longNoVal: seq[string] = @[];
197                    allowWhitespaceAfterColon = true): OptParser =
198  ## Initializes the command line parser.
199  ##
200  ## If `cmdline == ""`, the real command line as provided by the
201  ## `os` module is retrieved instead if it is available. If the
202  ## command line is not available, a `ValueError` will be raised.
203  ##
204  ## `shortNoVal` and `longNoVal` are used to specify which options
205  ## do not take values. See the `documentation about these
206  ## parameters<#shortnoval-and-longnoval>`_ for more information on
207  ## how this affects parsing.
208  ##
209  ## See also:
210  ## * `getopt iterator<#getopt.i,OptParser>`_
211  runnableExamples:
212    var p = initOptParser()
213    p = initOptParser("--left --debug:3 -l -r:2")
214    p = initOptParser("--left --debug:3 -l -r:2",
215                      shortNoVal = {'l'}, longNoVal = @["left"])
216
217  result.pos = 0
218  result.idx = 0
219  result.inShortState = false
220  result.shortNoVal = shortNoVal
221  result.longNoVal = longNoVal
222  result.allowWhitespaceAfterColon = allowWhitespaceAfterColon
223  if cmdline != "":
224    result.cmds = parseCmdLine(cmdline)
225  else:
226    when declared(paramCount):
227      result.cmds = newSeq[string](paramCount())
228      for i in countup(1, paramCount()):
229        result.cmds[i-1] = paramStr(i)
230    else:
231      # we cannot provide this for NimRtl creation on Posix, because we can't
232      # access the command line arguments then!
233      doAssert false, "empty command line given but" &
234        " real command line is not accessible"
235
236  result.kind = cmdEnd
237  result.key = ""
238  result.val = ""
239
240proc initOptParser*(cmdline: seq[string], shortNoVal: set[char] = {},
241                    longNoVal: seq[string] = @[];
242                    allowWhitespaceAfterColon = true): OptParser =
243  ## Initializes the command line parser.
244  ##
245  ## If `cmdline.len == 0`, the real command line as provided by the
246  ## `os` module is retrieved instead if it is available. If the
247  ## command line is not available, a `ValueError` will be raised.
248  ## Behavior of the other parameters remains the same as in
249  ## `initOptParser(string, ...)
250  ## <#initOptParser,string,set[char],seq[string]>`_.
251  ##
252  ## See also:
253  ## * `getopt iterator<#getopt.i,seq[string],set[char],seq[string]>`_
254  runnableExamples:
255    var p = initOptParser()
256    p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"])
257    p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"],
258                      shortNoVal = {'l'}, longNoVal = @["left"])
259
260  result.pos = 0
261  result.idx = 0
262  result.inShortState = false
263  result.shortNoVal = shortNoVal
264  result.longNoVal = longNoVal
265  result.allowWhitespaceAfterColon = allowWhitespaceAfterColon
266  if cmdline.len != 0:
267    result.cmds = newSeq[string](cmdline.len)
268    for i in 0..<cmdline.len:
269      result.cmds[i] = cmdline[i]
270  else:
271    when declared(paramCount):
272      result.cmds = newSeq[string](paramCount())
273      for i in countup(1, paramCount()):
274        result.cmds[i-1] = paramStr(i)
275    else:
276      # we cannot provide this for NimRtl creation on Posix, because we can't
277      # access the command line arguments then!
278      doAssert false, "empty command line given but" &
279        " real command line is not accessible"
280  result.kind = cmdEnd
281  result.key = ""
282  result.val = ""
283
284proc handleShortOption(p: var OptParser; cmd: string) =
285  var i = p.pos
286  p.kind = cmdShortOption
287  if i < cmd.len:
288    add(p.key, cmd[i])
289    inc(i)
290  p.inShortState = true
291  while i < cmd.len and cmd[i] in {'\t', ' '}:
292    inc(i)
293    p.inShortState = false
294  if i < cmd.len and (cmd[i] in {':', '='} or
295      card(p.shortNoVal) > 0 and p.key[0] notin p.shortNoVal):
296    if i < cmd.len and cmd[i] in {':', '='}:
297      inc(i)
298    p.inShortState = false
299    while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i)
300    p.val = substr(cmd, i)
301    p.pos = 0
302    inc p.idx
303  else:
304    p.pos = i
305  if i >= cmd.len:
306    p.inShortState = false
307    p.pos = 0
308    inc p.idx
309
310proc next*(p: var OptParser) {.rtl, extern: "npo$1".} =
311  ## Parses the next token.
312  ##
313  ## `p.kind` describes what kind of token has been parsed. `p.key` and
314  ## `p.val` are set accordingly.
315  runnableExamples:
316    var p = initOptParser("--left -r:2 file.txt")
317    p.next()
318    doAssert p.kind == cmdLongOption and p.key == "left"
319    p.next()
320    doAssert p.kind == cmdShortOption and p.key == "r" and p.val == "2"
321    p.next()
322    doAssert p.kind == cmdArgument and p.key == "file.txt"
323    p.next()
324    doAssert p.kind == cmdEnd
325
326  if p.idx >= p.cmds.len:
327    p.kind = cmdEnd
328    return
329
330  var i = p.pos
331  while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
332  p.pos = i
333  setLen(p.key, 0)
334  setLen(p.val, 0)
335  if p.inShortState:
336    p.inShortState = false
337    if i >= p.cmds[p.idx].len:
338      inc(p.idx)
339      p.pos = 0
340      if p.idx >= p.cmds.len:
341        p.kind = cmdEnd
342        return
343    else:
344      handleShortOption(p, p.cmds[p.idx])
345      return
346
347  if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
348    inc(i)
349    if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
350      p.kind = cmdLongOption
351      inc(i)
352      i = parseWord(p.cmds[p.idx], i, p.key, {' ', '\t', ':', '='})
353      while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
354      if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}:
355        inc(i)
356        while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
357        # if we're at the end, use the next command line option:
358        if i >= p.cmds[p.idx].len and p.idx < p.cmds.len and
359            p.allowWhitespaceAfterColon:
360          inc p.idx
361          i = 0
362        if p.idx < p.cmds.len:
363          p.val = p.cmds[p.idx].substr(i)
364      elif len(p.longNoVal) > 0 and p.key notin p.longNoVal and p.idx+1 < p.cmds.len:
365        p.val = p.cmds[p.idx+1]
366        inc p.idx
367      else:
368        p.val = ""
369      inc p.idx
370      p.pos = 0
371    else:
372      p.pos = i
373      handleShortOption(p, p.cmds[p.idx])
374  else:
375    p.kind = cmdArgument
376    p.key =  p.cmds[p.idx]
377    inc p.idx
378    p.pos = 0
379
380when declared(quoteShellCommand):
381  proc cmdLineRest*(p: OptParser): string {.rtl, extern: "npo$1".} =
382    ## Retrieves the rest of the command line that has not been parsed yet.
383    ##
384    ## See also:
385    ## * `remainingArgs proc<#remainingArgs,OptParser>`_
386    ##
387    ## **Examples:**
388    ##
389    ## .. code-block::
390    ##   var p = initOptParser("--left -r:2 -- foo.txt bar.txt")
391    ##   while true:
392    ##     p.next()
393    ##     if p.kind == cmdLongOption and p.key == "":  # Look for "--"
394    ##       break
395    ##     else: continue
396    ##   doAssert p.cmdLineRest == "foo.txt bar.txt"
397    result = p.cmds[p.idx .. ^1].quoteShellCommand
398
399proc remainingArgs*(p: OptParser): seq[string] {.rtl, extern: "npo$1".} =
400  ## Retrieves a sequence of the arguments that have not been parsed yet.
401  ##
402  ## See also:
403  ## * `cmdLineRest proc<#cmdLineRest,OptParser>`_
404  ##
405  ## **Examples:**
406  ##
407  ## .. code-block::
408  ##   var p = initOptParser("--left -r:2 -- foo.txt bar.txt")
409  ##   while true:
410  ##     p.next()
411  ##     if p.kind == cmdLongOption and p.key == "":  # Look for "--"
412  ##       break
413  ##     else: continue
414  ##   doAssert p.remainingArgs == @["foo.txt", "bar.txt"]
415  result = @[]
416  for i in p.idx..<p.cmds.len: result.add p.cmds[i]
417
418iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key,
419    val: string] =
420  ## Convenience iterator for iterating over the given
421  ## `OptParser<#OptParser>`_.
422  ##
423  ## There is no need to check for `cmdEnd` while iterating.
424  ##
425  ## See also:
426  ## * `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_
427  ##
428  ## **Examples:**
429  ##
430  ## .. code-block::
431  ##   # these are placeholders, of course
432  ##   proc writeHelp() = discard
433  ##   proc writeVersion() = discard
434  ##
435  ##   var filename: string
436  ##   var p = initOptParser("--left --debug:3 -l -r:2")
437  ##
438  ##   for kind, key, val in p.getopt():
439  ##     case kind
440  ##     of cmdArgument:
441  ##       filename = key
442  ##     of cmdLongOption, cmdShortOption:
443  ##       case key
444  ##       of "help", "h": writeHelp()
445  ##       of "version", "v": writeVersion()
446  ##     of cmdEnd: assert(false) # cannot happen
447  ##   if filename == "":
448  ##     # no filename has been given, so we show the help
449  ##     writeHelp()
450  p.pos = 0
451  p.idx = 0
452  while true:
453    next(p)
454    if p.kind == cmdEnd: break
455    yield (p.kind, p.key, p.val)
456
457iterator getopt*(cmdline: seq[string] = @[],
458                  shortNoVal: set[char] = {}, longNoVal: seq[string] = @[]):
459            tuple[kind: CmdLineKind, key, val: string] =
460  ## Convenience iterator for iterating over command line arguments.
461  ##
462  ## This creates a new `OptParser<#OptParser>`_. If no command line
463  ## arguments are provided, the real command line as provided by the
464  ## `os` module is retrieved instead.
465  ##
466  ## `shortNoVal` and `longNoVal` are used to specify which options
467  ## do not take values. See the `documentation about these
468  ## parameters<#shortnoval-and-longnoval>`_ for more information on
469  ## how this affects parsing.
470  ##
471  ## There is no need to check for `cmdEnd` while iterating.
472  ##
473  ## See also:
474  ## * `initOptParser proc<#initOptParser,seq[string],set[char],seq[string]>`_
475  ##
476  ## **Examples:**
477  ##
478  ## .. code-block::
479  ##
480  ##   # these are placeholders, of course
481  ##   proc writeHelp() = discard
482  ##   proc writeVersion() = discard
483  ##
484  ##   var filename: string
485  ##   let params = @["--left", "--debug:3", "-l", "-r:2"]
486  ##
487  ##   for kind, key, val in getopt(params):
488  ##     case kind
489  ##     of cmdArgument:
490  ##       filename = key
491  ##     of cmdLongOption, cmdShortOption:
492  ##       case key
493  ##       of "help", "h": writeHelp()
494  ##       of "version", "v": writeVersion()
495  ##     of cmdEnd: assert(false) # cannot happen
496  ##   if filename == "":
497  ##     # no filename has been written, so we show the help
498  ##     writeHelp()
499  var p = initOptParser(cmdline, shortNoVal = shortNoVal,
500      longNoVal = longNoVal)
501  while true:
502    next(p)
503    if p.kind == cmdEnd: break
504    yield (p.kind, p.key, p.val)
505
506{.pop.}
507