1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2012 Andreas Rumpf
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## The system module defines several common functions for working with strings,
11## such as:
12## * `$` for converting other data-types to strings
13## * `&` for string concatenation
14## * `add` for adding a new character or a string to the existing one
15## * `in` (alias for `contains`) and `notin` for checking if a character
16##   is in a string
17##
18## This module builds upon that, providing additional functionality in form of
19## procedures, iterators and templates for strings.
20
21runnableExamples:
22  let
23    numbers = @[867, 5309]
24    multiLineString = "first line\nsecond line\nthird line"
25
26  let jenny = numbers.join("-")
27  assert jenny == "867-5309"
28
29  assert splitLines(multiLineString) ==
30         @["first line", "second line", "third line"]
31  assert split(multiLineString) == @["first", "line", "second",
32                                     "line", "third", "line"]
33  assert indent(multiLineString, 4) ==
34         "    first line\n    second line\n    third line"
35  assert 'z'.repeat(5) == "zzzzz"
36
37## The chaining of functions is possible thanks to the
38## `method call syntax<manual.html#procedures-method-call-syntax>`_:
39
40runnableExamples:
41  from std/sequtils import map
42
43  let jenny = "867-5309"
44  assert jenny.split('-').map(parseInt) == @[867, 5309]
45
46  assert "Beetlejuice".indent(1).repeat(3).strip ==
47         "Beetlejuice Beetlejuice Beetlejuice"
48
49## This module is available for the `JavaScript target
50## <backends.html#backends-the-javascript-target>`_.
51##
52## ----
53##
54## **See also:**
55## * `strformat module<strformat.html>`_ for string interpolation and formatting
56## * `unicode module<unicode.html>`_ for Unicode UTF-8 handling
57## * `sequtils module<sequtils.html>`_ for operations on container
58##   types (including strings)
59## * `parsecsv module<parsecsv.html>`_ for a high-performance CSV parser
60## * `parseutils module<parseutils.html>`_ for lower-level parsing of tokens,
61##   numbers, identifiers, etc.
62## * `parseopt module<parseopt.html>`_ for command-line parsing
63## * `pegs module<pegs.html>`_ for PEG (Parsing Expression Grammar) support
64## * `strtabs module<strtabs.html>`_ for efficient hash tables
65##   (dictionaries, in some programming languages) mapping from strings to strings
66## * `ropes module<ropes.html>`_ for rope data type, which can represent very
67##   long strings efficiently
68## * `re module<re.html>`_ for regular expression (regex) support
69## * `strscans<strscans.html>`_ for `scanf` and `scanp` macros, which offer
70##   easier substring extraction than regular expressions
71
72
73import parseutils
74from math import pow, floor, log10
75from algorithm import fill, reverse
76import std/enumutils
77
78from unicode import toLower, toUpper
79export toLower, toUpper
80
81include "system/inclrtl"
82import std/private/since
83from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl, startsWithImpl, endsWithImpl
84
85
86const
87  Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
88    ## All the characters that count as whitespace (space, tab, vertical tab,
89    ## carriage return, new line, form feed).
90
91  Letters* = {'A'..'Z', 'a'..'z'}
92    ## The set of letters.
93
94  Digits* = {'0'..'9'}
95    ## The set of digits.
96
97  HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'}
98    ## The set of hexadecimal digits.
99
100  IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
101    ## The set of characters an identifier can consist of.
102
103  IdentStartChars* = {'a'..'z', 'A'..'Z', '_'}
104    ## The set of characters an identifier can start with.
105
106  Newlines* = {'\13', '\10'}
107    ## The set of characters a newline terminator can start with (carriage
108    ## return, line feed).
109
110  AllChars* = {'\x00'..'\xFF'}
111    ## A set with all the possible characters.
112    ##
113    ## Not very useful by its own, you can use it to create *inverted* sets to
114    ## make the `find func<#find,string,set[char],Natural,int>`_
115    ## find **invalid** characters in strings. Example:
116    ##
117    ## .. code-block:: nim
118    ##   let invalid = AllChars - Digits
119    ##   doAssert "01234".find(invalid) == -1
120    ##   doAssert "01A34".find(invalid) == 2
121
122func isAlphaAscii*(c: char): bool {.rtl, extern: "nsuIsAlphaAsciiChar".} =
123  ## Checks whether or not character `c` is alphabetical.
124  ##
125  ## This checks a-z, A-Z ASCII characters only.
126  ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
127  runnableExamples:
128    doAssert isAlphaAscii('e') == true
129    doAssert isAlphaAscii('E') == true
130    doAssert isAlphaAscii('8') == false
131  return c in Letters
132
133func isAlphaNumeric*(c: char): bool {.rtl, extern: "nsuIsAlphaNumericChar".} =
134  ## Checks whether or not `c` is alphanumeric.
135  ##
136  ## This checks a-z, A-Z, 0-9 ASCII characters only.
137  runnableExamples:
138    doAssert isAlphaNumeric('n') == true
139    doAssert isAlphaNumeric('8') == true
140    doAssert isAlphaNumeric(' ') == false
141  return c in Letters+Digits
142
143func isDigit*(c: char): bool {.rtl, extern: "nsuIsDigitChar".} =
144  ## Checks whether or not `c` is a number.
145  ##
146  ## This checks 0-9 ASCII characters only.
147  runnableExamples:
148    doAssert isDigit('n') == false
149    doAssert isDigit('8') == true
150  return c in Digits
151
152func isSpaceAscii*(c: char): bool {.rtl, extern: "nsuIsSpaceAsciiChar".} =
153  ## Checks whether or not `c` is a whitespace character.
154  runnableExamples:
155    doAssert isSpaceAscii('n') == false
156    doAssert isSpaceAscii(' ') == true
157    doAssert isSpaceAscii('\t') == true
158  return c in Whitespace
159
160func isLowerAscii*(c: char): bool {.rtl, extern: "nsuIsLowerAsciiChar".} =
161  ## Checks whether or not `c` is a lower case character.
162  ##
163  ## This checks ASCII characters only.
164  ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
165  ##
166  ## See also:
167  ## * `toLowerAscii func<#toLowerAscii,char>`_
168  runnableExamples:
169    doAssert isLowerAscii('e') == true
170    doAssert isLowerAscii('E') == false
171    doAssert isLowerAscii('7') == false
172  return c in {'a'..'z'}
173
174func isUpperAscii*(c: char): bool {.rtl, extern: "nsuIsUpperAsciiChar".} =
175  ## Checks whether or not `c` is an upper case character.
176  ##
177  ## This checks ASCII characters only.
178  ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
179  ##
180  ## See also:
181  ## * `toUpperAscii func<#toUpperAscii,char>`_
182  runnableExamples:
183    doAssert isUpperAscii('e') == false
184    doAssert isUpperAscii('E') == true
185    doAssert isUpperAscii('7') == false
186  return c in {'A'..'Z'}
187
188
189func toLowerAscii*(c: char): char {.rtl, extern: "nsuToLowerAsciiChar".} =
190  ## Returns the lower case version of character `c`.
191  ##
192  ## This works only for the letters `A-Z`. See `unicode.toLower
193  ## <unicode.html#toLower,Rune>`_ for a version that works for any Unicode
194  ## character.
195  ##
196  ## See also:
197  ## * `isLowerAscii func<#isLowerAscii,char>`_
198  ## * `toLowerAscii func<#toLowerAscii,string>`_ for converting a string
199  runnableExamples:
200    doAssert toLowerAscii('A') == 'a'
201    doAssert toLowerAscii('e') == 'e'
202  if c in {'A'..'Z'}:
203    result = char(uint8(c) xor 0b0010_0000'u8)
204  else:
205    result = c
206
207template toImpl(call) =
208  result = newString(len(s))
209  for i in 0..len(s) - 1:
210    result[i] = call(s[i])
211
212func toLowerAscii*(s: string): string {.rtl, extern: "nsuToLowerAsciiStr".} =
213  ## Converts string `s` into lower case.
214  ##
215  ## This works only for the letters `A-Z`. See `unicode.toLower
216  ## <unicode.html#toLower,string>`_ for a version that works for any Unicode
217  ## character.
218  ##
219  ## See also:
220  ## * `normalize func<#normalize,string>`_
221  runnableExamples:
222    doAssert toLowerAscii("FooBar!") == "foobar!"
223  toImpl toLowerAscii
224
225func toUpperAscii*(c: char): char {.rtl, extern: "nsuToUpperAsciiChar".} =
226  ## Converts character `c` into upper case.
227  ##
228  ## This works only for the letters `A-Z`.  See `unicode.toUpper
229  ## <unicode.html#toUpper,Rune>`_ for a version that works for any Unicode
230  ## character.
231  ##
232  ## See also:
233  ## * `isUpperAscii func<#isUpperAscii,char>`_
234  ## * `toUpperAscii func<#toUpperAscii,string>`_ for converting a string
235  ## * `capitalizeAscii func<#capitalizeAscii,string>`_
236  runnableExamples:
237    doAssert toUpperAscii('a') == 'A'
238    doAssert toUpperAscii('E') == 'E'
239  if c in {'a'..'z'}:
240    result = char(uint8(c) xor 0b0010_0000'u8)
241  else:
242    result = c
243
244func toUpperAscii*(s: string): string {.rtl, extern: "nsuToUpperAsciiStr".} =
245  ## Converts string `s` into upper case.
246  ##
247  ## This works only for the letters `A-Z`.  See `unicode.toUpper
248  ## <unicode.html#toUpper,string>`_ for a version that works for any Unicode
249  ## character.
250  ##
251  ## See also:
252  ## * `capitalizeAscii func<#capitalizeAscii,string>`_
253  runnableExamples:
254    doAssert toUpperAscii("FooBar!") == "FOOBAR!"
255  toImpl toUpperAscii
256
257func capitalizeAscii*(s: string): string {.rtl, extern: "nsuCapitalizeAscii".} =
258  ## Converts the first character of string `s` into upper case.
259  ##
260  ## This works only for the letters `A-Z`.
261  ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
262  ##
263  ## See also:
264  ## * `toUpperAscii func<#toUpperAscii,char>`_
265  runnableExamples:
266    doAssert capitalizeAscii("foo") == "Foo"
267    doAssert capitalizeAscii("-bar") == "-bar"
268  if s.len == 0: result = ""
269  else: result = toUpperAscii(s[0]) & substr(s, 1)
270
271func nimIdentNormalize*(s: string): string =
272  ## Normalizes the string `s` as a Nim identifier.
273  ##
274  ## That means to convert to lower case and remove any '_' on all characters
275  ## except first one.
276  runnableExamples:
277    doAssert nimIdentNormalize("Foo_bar") == "Foobar"
278  result = newString(s.len)
279  if s.len > 0:
280    result[0] = s[0]
281  var j = 1
282  for i in 1..len(s) - 1:
283    if s[i] in {'A'..'Z'}:
284      result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
285      inc j
286    elif s[i] != '_':
287      result[j] = s[i]
288      inc j
289  if j != s.len: setLen(result, j)
290
291func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} =
292  ## Normalizes the string `s`.
293  ##
294  ## That means to convert it to lower case and remove any '_'. This
295  ## should NOT be used to normalize Nim identifier names.
296  ##
297  ## See also:
298  ## * `toLowerAscii func<#toLowerAscii,string>`_
299  runnableExamples:
300    doAssert normalize("Foo_bar") == "foobar"
301    doAssert normalize("Foo Bar") == "foo bar"
302  result = newString(s.len)
303  var j = 0
304  for i in 0..len(s) - 1:
305    if s[i] in {'A'..'Z'}:
306      result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
307      inc j
308    elif s[i] != '_':
309      result[j] = s[i]
310      inc j
311  if j != s.len: setLen(result, j)
312
313func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
314  ## Compares two strings in a case insensitive manner. Returns:
315  ##
316  ## | 0 if a == b
317  ## | < 0 if a < b
318  ## | > 0 if a > b
319  runnableExamples:
320    doAssert cmpIgnoreCase("FooBar", "foobar") == 0
321    doAssert cmpIgnoreCase("bar", "Foo") < 0
322    doAssert cmpIgnoreCase("Foo5", "foo4") > 0
323  cmpIgnoreCaseImpl(a, b)
324
325{.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
326                                      # thus we compile without checks here
327
328func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
329  ## Semantically the same as `cmp(normalize(a), normalize(b))`. It
330  ## is just optimized to not allocate temporary strings. This should
331  ## NOT be used to compare Nim identifier names.
332  ## Use `macros.eqIdent<macros.html#eqIdent,string,string>`_ for that.
333  ##
334  ## Returns:
335  ##
336  ## | 0 if a == b
337  ## | < 0 if a < b
338  ## | > 0 if a > b
339  runnableExamples:
340    doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
341    doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
342  cmpIgnoreStyleImpl(a, b)
343{.pop.}
344
345# --------- Private templates for different split separators -----------
346
347func substrEq(s: string, pos: int, substr: string): bool =
348  var i = 0
349  var length = substr.len
350  while i < length and pos+i < s.len and s[pos+i] == substr[i]:
351    inc i
352  return i == length
353
354template stringHasSep(s: string, index: int, seps: set[char]): bool =
355  s[index] in seps
356
357template stringHasSep(s: string, index: int, sep: char): bool =
358  s[index] == sep
359
360template stringHasSep(s: string, index: int, sep: string): bool =
361  s.substrEq(index, sep)
362
363template splitCommon(s, sep, maxsplit, sepLen) =
364  ## Common code for split procs
365  var last = 0
366  var splits = maxsplit
367
368  while last <= len(s):
369    var first = last
370    while last < len(s) and not stringHasSep(s, last, sep):
371      inc(last)
372    if splits == 0: last = len(s)
373    yield substr(s, first, last-1)
374    if splits == 0: break
375    dec(splits)
376    inc(last, sepLen)
377
378template oldSplit(s, seps, maxsplit) =
379  var last = 0
380  var splits = maxsplit
381  assert(not ('\0' in seps))
382  while last < len(s):
383    while last < len(s) and s[last] in seps: inc(last)
384    var first = last
385    while last < len(s) and s[last] notin seps: inc(last)
386    if first <= last-1:
387      if splits == 0: last = len(s)
388      yield substr(s, first, last-1)
389      if splits == 0: break
390      dec(splits)
391
392template accResult(iter: untyped) =
393  result = @[]
394  for x in iter: add(result, x)
395
396
397iterator split*(s: string, sep: char, maxsplit: int = -1): string =
398  ## Splits the string `s` into substrings using a single separator.
399  ##
400  ## Substrings are separated by the character `sep`.
401  ## The code:
402  ##
403  ## .. code-block:: nim
404  ##   for word in split(";;this;is;an;;example;;;", ';'):
405  ##     writeLine(stdout, word)
406  ##
407  ## Results in:
408  ##
409  ## .. code-block::
410  ##   ""
411  ##   ""
412  ##   "this"
413  ##   "is"
414  ##   "an"
415  ##   ""
416  ##   "example"
417  ##   ""
418  ##   ""
419  ##   ""
420  ##
421  ## See also:
422  ## * `rsplit iterator<#rsplit.i,string,char,int>`_
423  ## * `splitLines iterator<#splitLines.i,string>`_
424  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
425  ## * `split func<#split,string,char,int>`_
426  splitCommon(s, sep, maxsplit, 1)
427
428iterator split*(s: string, seps: set[char] = Whitespace,
429                maxsplit: int = -1): string =
430  ## Splits the string `s` into substrings using a group of separators.
431  ##
432  ## Substrings are separated by a substring containing only `seps`.
433  ##
434  ## .. code-block:: nim
435  ##   for word in split("this\lis an\texample"):
436  ##     writeLine(stdout, word)
437  ##
438  ## ...generates this output:
439  ##
440  ## .. code-block::
441  ##   "this"
442  ##   "is"
443  ##   "an"
444  ##   "example"
445  ##
446  ## And the following code:
447  ##
448  ## .. code-block:: nim
449  ##   for word in split("this:is;an$example", {';', ':', '$'}):
450  ##     writeLine(stdout, word)
451  ##
452  ## ...produces the same output as the first example. The code:
453  ##
454  ## .. code-block:: nim
455  ##   let date = "2012-11-20T22:08:08.398990"
456  ##   let separators = {' ', '-', ':', 'T'}
457  ##   for number in split(date, separators):
458  ##     writeLine(stdout, number)
459  ##
460  ## ...results in:
461  ##
462  ## .. code-block::
463  ##   "2012"
464  ##   "11"
465  ##   "20"
466  ##   "22"
467  ##   "08"
468  ##   "08.398990"
469  ##
470  ## See also:
471  ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_
472  ## * `splitLines iterator<#splitLines.i,string>`_
473  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
474  ## * `split func<#split,string,set[char],int>`_
475  splitCommon(s, seps, maxsplit, 1)
476
477iterator split*(s: string, sep: string, maxsplit: int = -1): string =
478  ## Splits the string `s` into substrings using a string separator.
479  ##
480  ## Substrings are separated by the string `sep`.
481  ## The code:
482  ##
483  ## .. code-block:: nim
484  ##   for word in split("thisDATAisDATAcorrupted", "DATA"):
485  ##     writeLine(stdout, word)
486  ##
487  ## Results in:
488  ##
489  ## .. code-block::
490  ##   "this"
491  ##   "is"
492  ##   "corrupted"
493  ##
494  ## See also:
495  ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_
496  ## * `splitLines iterator<#splitLines.i,string>`_
497  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
498  ## * `split func<#split,string,string,int>`_
499  splitCommon(s, sep, maxsplit, sep.len)
500
501
502template rsplitCommon(s, sep, maxsplit, sepLen) =
503  ## Common code for rsplit functions
504  var
505    last = s.len - 1
506    first = last
507    splits = maxsplit
508    startPos = 0
509  # go to -1 in order to get separators at the beginning
510  while first >= -1:
511    while first >= 0 and not stringHasSep(s, first, sep):
512      dec(first)
513    if splits == 0:
514      # No more splits means set first to the beginning
515      first = -1
516    if first == -1:
517      startPos = 0
518    else:
519      startPos = first + sepLen
520    yield substr(s, startPos, last)
521    if splits == 0: break
522    dec(splits)
523    dec(first)
524    last = first
525
526iterator rsplit*(s: string, sep: char,
527                 maxsplit: int = -1): string =
528  ## Splits the string `s` into substrings from the right using a
529  ## string separator. Works exactly the same as `split iterator
530  ## <#split.i,string,char,int>`_ except in reverse order.
531  ##
532  ## .. code-block:: nim
533  ##   for piece in "foo:bar".rsplit(':'):
534  ##     echo piece
535  ##
536  ## Results in:
537  ##
538  ## .. code-block:: nim
539  ##   "bar"
540  ##   "foo"
541  ##
542  ## Substrings are separated from the right by the char `sep`.
543  ##
544  ## See also:
545  ## * `split iterator<#split.i,string,char,int>`_
546  ## * `splitLines iterator<#splitLines.i,string>`_
547  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
548  ## * `rsplit func<#rsplit,string,char,int>`_
549  rsplitCommon(s, sep, maxsplit, 1)
550
551iterator rsplit*(s: string, seps: set[char] = Whitespace,
552                 maxsplit: int = -1): string =
553  ## Splits the string `s` into substrings from the right using a
554  ## string separator. Works exactly the same as `split iterator
555  ## <#split.i,string,char,int>`_ except in reverse order.
556  ##
557  ## .. code-block:: nim
558  ##   for piece in "foo bar".rsplit(WhiteSpace):
559  ##     echo piece
560  ##
561  ## Results in:
562  ##
563  ## .. code-block:: nim
564  ##   "bar"
565  ##   "foo"
566  ##
567  ## Substrings are separated from the right by the set of chars `seps`
568  ##
569  ## See also:
570  ## * `split iterator<#split.i,string,set[char],int>`_
571  ## * `splitLines iterator<#splitLines.i,string>`_
572  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
573  ## * `rsplit func<#rsplit,string,set[char],int>`_
574  rsplitCommon(s, seps, maxsplit, 1)
575
576iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
577                 keepSeparators: bool = false): string =
578  ## Splits the string `s` into substrings from the right using a
579  ## string separator. Works exactly the same as `split iterator
580  ## <#split.i,string,string,int>`_ except in reverse order.
581  ##
582  ## .. code-block:: nim
583  ##   for piece in "foothebar".rsplit("the"):
584  ##     echo piece
585  ##
586  ## Results in:
587  ##
588  ## .. code-block:: nim
589  ##   "bar"
590  ##   "foo"
591  ##
592  ## Substrings are separated from the right by the string `sep`
593  ##
594  ## See also:
595  ## * `split iterator<#split.i,string,string,int>`_
596  ## * `splitLines iterator<#splitLines.i,string>`_
597  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
598  ## * `rsplit func<#rsplit,string,string,int>`_
599  rsplitCommon(s, sep, maxsplit, sep.len)
600
601iterator splitLines*(s: string, keepEol = false): string =
602  ## Splits the string `s` into its containing lines.
603  ##
604  ## Every `character literal <manual.html#lexical-analysis-character-literals>`_
605  ## newline combination (CR, LF, CR-LF) is supported. The result strings
606  ## contain no trailing end of line characters unless parameter `keepEol`
607  ## is set to `true`.
608  ##
609  ## Example:
610  ##
611  ## .. code-block:: nim
612  ##   for line in splitLines("\nthis\nis\nan\n\nexample\n"):
613  ##     writeLine(stdout, line)
614  ##
615  ## Results in:
616  ##
617  ## .. code-block:: nim
618  ##   ""
619  ##   "this"
620  ##   "is"
621  ##   "an"
622  ##   ""
623  ##   "example"
624  ##   ""
625  ##
626  ## See also:
627  ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
628  ## * `splitLines func<#splitLines,string>`_
629  var first = 0
630  var last = 0
631  var eolpos = 0
632  while true:
633    while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
634
635    eolpos = last
636    if last < s.len:
637      if s[last] == '\l': inc(last)
638      elif s[last] == '\c':
639        inc(last)
640        if last < s.len and s[last] == '\l': inc(last)
641
642    yield substr(s, first, if keepEol: last-1 else: eolpos-1)
643
644    # no eol characters consumed means that the string is over
645    if eolpos == last:
646      break
647
648    first = last
649
650iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
651  ## Splits the string `s` at whitespace stripping leading and trailing
652  ## whitespace if necessary. If `maxsplit` is specified and is positive,
653  ## no more than `maxsplit` splits is made.
654  ##
655  ## The following code:
656  ##
657  ## .. code-block:: nim
658  ##   let s = "  foo \t bar  baz  "
659  ##   for ms in [-1, 1, 2, 3]:
660  ##     echo "------ maxsplit = ", ms, ":"
661  ##     for item in s.splitWhitespace(maxsplit=ms):
662  ##       echo '"', item, '"'
663  ##
664  ## ...results in:
665  ##
666  ## .. code-block::
667  ##   ------ maxsplit = -1:
668  ##   "foo"
669  ##   "bar"
670  ##   "baz"
671  ##   ------ maxsplit = 1:
672  ##   "foo"
673  ##   "bar  baz  "
674  ##   ------ maxsplit = 2:
675  ##   "foo"
676  ##   "bar"
677  ##   "baz  "
678  ##   ------ maxsplit = 3:
679  ##   "foo"
680  ##   "bar"
681  ##   "baz"
682  ##
683  ## See also:
684  ## * `splitLines iterator<#splitLines.i,string>`_
685  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
686  oldSplit(s, Whitespace, maxsplit)
687
688
689
690func split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
691    extern: "nsuSplitChar".} =
692  ## The same as the `split iterator <#split.i,string,char,int>`_ (see its
693  ## documentation), but is a func that returns a sequence of substrings.
694  ##
695  ## See also:
696  ## * `split iterator <#split.i,string,char,int>`_
697  ## * `rsplit func<#rsplit,string,char,int>`_
698  ## * `splitLines func<#splitLines,string>`_
699  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
700  runnableExamples:
701    doAssert "a,b,c".split(',') == @["a", "b", "c"]
702    doAssert "".split(' ') == @[""]
703  accResult(split(s, sep, maxsplit))
704
705func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
706    string] {.rtl, extern: "nsuSplitCharSet".} =
707  ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its
708  ## documentation), but is a func that returns a sequence of substrings.
709  ##
710  ## See also:
711  ## * `split iterator <#split.i,string,set[char],int>`_
712  ## * `rsplit func<#rsplit,string,set[char],int>`_
713  ## * `splitLines func<#splitLines,string>`_
714  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
715  runnableExamples:
716    doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
717    doAssert "".split({' '}) == @[""]
718  accResult(split(s, seps, maxsplit))
719
720func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
721    extern: "nsuSplitString".} =
722  ## Splits the string `s` into substrings using a string separator.
723  ##
724  ## Substrings are separated by the string `sep`. This is a wrapper around the
725  ## `split iterator <#split.i,string,string,int>`_.
726  ##
727  ## See also:
728  ## * `split iterator <#split.i,string,string,int>`_
729  ## * `rsplit func<#rsplit,string,string,int>`_
730  ## * `splitLines func<#splitLines,string>`_
731  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
732  runnableExamples:
733    doAssert "a,b,c".split(",") == @["a", "b", "c"]
734    doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"]
735    doAssert "".split("Elon Musk") == @[""]
736    doAssert "a  largely    spaced sentence".split(" ") == @["a", "", "largely",
737        "", "", "", "spaced", "sentence"]
738    doAssert "a  largely    spaced sentence".split(" ", maxsplit = 1) == @["a", " largely    spaced sentence"]
739  doAssert(sep.len > 0)
740
741  accResult(split(s, sep, maxsplit))
742
743func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
744    extern: "nsuRSplitChar".} =
745  ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a func
746  ## that returns a sequence of substrings.
747  ##
748  ## A possible common use case for `rsplit` is path manipulation,
749  ## particularly on systems that don't use a common delimiter.
750  ##
751  ## For example, if a system had `#` as a delimiter, you could
752  ## do the following to get the tail of the path:
753  ##
754  ## .. code-block:: nim
755  ##   var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
756  ##
757  ## Results in `tailSplit` containing:
758  ##
759  ## .. code-block:: nim
760  ##   @["Root#Object#Method", "Index"]
761  ##
762  ## See also:
763  ## * `rsplit iterator <#rsplit.i,string,char,int>`_
764  ## * `split func<#split,string,char,int>`_
765  ## * `splitLines func<#splitLines,string>`_
766  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
767  accResult(rsplit(s, sep, maxsplit))
768  result.reverse()
769
770func rsplit*(s: string, seps: set[char] = Whitespace,
771             maxsplit: int = -1): seq[string]
772             {.rtl, extern: "nsuRSplitCharSet".} =
773  ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a
774  ## func that returns a sequence of substrings.
775  ##
776  ## A possible common use case for `rsplit` is path manipulation,
777  ## particularly on systems that don't use a common delimiter.
778  ##
779  ## For example, if a system had `#` as a delimiter, you could
780  ## do the following to get the tail of the path:
781  ##
782  ## .. code-block:: nim
783  ##   var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
784  ##
785  ## Results in `tailSplit` containing:
786  ##
787  ## .. code-block:: nim
788  ##   @["Root#Object#Method", "Index"]
789  ##
790  ## See also:
791  ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_
792  ## * `split func<#split,string,set[char],int>`_
793  ## * `splitLines func<#splitLines,string>`_
794  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
795  accResult(rsplit(s, seps, maxsplit))
796  result.reverse()
797
798func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
799    extern: "nsuRSplitString".} =
800  ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a func
801  ## that returns a sequence of substrings.
802  ##
803  ## A possible common use case for `rsplit` is path manipulation,
804  ## particularly on systems that don't use a common delimiter.
805  ##
806  ## For example, if a system had `#` as a delimiter, you could
807  ## do the following to get the tail of the path:
808  ##
809  ## .. code-block:: nim
810  ##   var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
811  ##
812  ## Results in `tailSplit` containing:
813  ##
814  ## .. code-block:: nim
815  ##   @["Root#Object#Method", "Index"]
816  ##
817  ## See also:
818  ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_
819  ## * `split func<#split,string,string,int>`_
820  ## * `splitLines func<#splitLines,string>`_
821  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
822  runnableExamples:
823    doAssert "a  largely    spaced sentence".rsplit(" ", maxsplit = 1) == @[
824        "a  largely    spaced", "sentence"]
825    doAssert "a,b,c".rsplit(",") == @["a", "b", "c"]
826    doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ",
827        "plan ", "canal panama"]
828    doAssert "".rsplit("Elon Musk") == @[""]
829    doAssert "a  largely    spaced sentence".rsplit(" ") == @["a", "",
830        "largely", "", "", "", "spaced", "sentence"]
831  accResult(rsplit(s, sep, maxsplit))
832  result.reverse()
833
834func splitLines*(s: string, keepEol = false): seq[string] {.rtl,
835    extern: "nsuSplitLines".} =
836  ## The same as the `splitLines iterator<#splitLines.i,string>`_ (see its
837  ## documentation), but is a func that returns a sequence of substrings.
838  ##
839  ## See also:
840  ## * `splitLines iterator<#splitLines.i,string>`_
841  ## * `splitWhitespace func<#splitWhitespace,string,int>`_
842  ## * `countLines func<#countLines,string>`_
843  accResult(splitLines(s, keepEol = keepEol))
844
845func splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.rtl,
846    extern: "nsuSplitWhitespace".} =
847  ## The same as the `splitWhitespace iterator <#splitWhitespace.i,string,int>`_
848  ## (see its documentation), but is a func that returns a sequence of substrings.
849  ##
850  ## See also:
851  ## * `splitWhitespace iterator <#splitWhitespace.i,string,int>`_
852  ## * `splitLines func<#splitLines,string>`_
853  accResult(splitWhitespace(s, maxsplit))
854
855func toBin*(x: BiggestInt, len: Positive): string {.rtl, extern: "nsuToBin".} =
856  ## Converts `x` into its binary representation.
857  ##
858  ## The resulting string is always `len` characters long. No leading `0b`
859  ## prefix is generated.
860  runnableExamples:
861    let
862      a = 29
863      b = 257
864    doAssert a.toBin(8) == "00011101"
865    doAssert b.toBin(8) == "00000001"
866    doAssert b.toBin(9) == "100000001"
867  var
868    mask = BiggestUInt 1
869    shift = BiggestUInt 0
870  assert(len > 0)
871  result = newString(len)
872  for j in countdown(len-1, 0):
873    result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0'))
874    inc shift
875    mask = mask shl BiggestUInt(1)
876
877func toOct*(x: BiggestInt, len: Positive): string {.rtl, extern: "nsuToOct".} =
878  ## Converts `x` into its octal representation.
879  ##
880  ## The resulting string is always `len` characters long. No leading `0o`
881  ## prefix is generated.
882  ##
883  ## Do not confuse it with `toOctal func<#toOctal,char>`_.
884  runnableExamples:
885    let
886      a = 62
887      b = 513
888    doAssert a.toOct(3) == "076"
889    doAssert b.toOct(3) == "001"
890    doAssert b.toOct(5) == "01001"
891  var
892    mask = BiggestUInt 7
893    shift = BiggestUInt 0
894  assert(len > 0)
895  result = newString(len)
896  for j in countdown(len-1, 0):
897    result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0'))
898    inc shift, 3
899    mask = mask shl BiggestUInt(3)
900
901func toHexImpl(x: BiggestUInt, len: Positive, handleNegative: bool): string =
902  const
903    HexChars = "0123456789ABCDEF"
904  var n = x
905  result = newString(len)
906  for j in countdown(len-1, 0):
907    result[j] = HexChars[int(n and 0xF)]
908    n = n shr 4
909    # handle negative overflow
910    if n == 0 and handleNegative: n = not(BiggestUInt 0)
911
912func toHex*[T: SomeInteger](x: T, len: Positive): string =
913  ## Converts `x` to its hexadecimal representation.
914  ##
915  ## The resulting string will be exactly `len` characters long. No prefix like
916  ## `0x` is generated. `x` is treated as an unsigned value.
917  runnableExamples:
918    let
919      a = 62'u64
920      b = 4097'u64
921    doAssert a.toHex(3) == "03E"
922    doAssert b.toHex(3) == "001"
923    doAssert b.toHex(4) == "1001"
924    doAssert toHex(62, 3) == "03E"
925    doAssert toHex(-8, 6) == "FFFFF8"
926  toHexImpl(cast[BiggestUInt](x), len, x < 0)
927
928func toHex*[T: SomeInteger](x: T): string =
929  ## Shortcut for `toHex(x, T.sizeof * 2)`
930  runnableExamples:
931    doAssert toHex(1984'i64) == "00000000000007C0"
932    doAssert toHex(1984'i16) == "07C0"
933  toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0)
934
935func toHex*(s: string): string {.rtl.} =
936  ## Converts a bytes string to its hexadecimal representation.
937  ##
938  ## The output is twice the input long. No prefix like
939  ## `0x` is generated.
940  ##
941  ## See also:
942  ## * `parseHexStr func<#parseHexStr,string>`_ for the reverse operation
943  runnableExamples:
944    let
945      a = "1"
946      b = "A"
947      c = "\0\255"
948    doAssert a.toHex() == "31"
949    doAssert b.toHex() == "41"
950    doAssert c.toHex() == "00FF"
951
952  const HexChars = "0123456789ABCDEF"
953  result = newString(s.len * 2)
954  for pos, c in s:
955    var n = ord(c)
956    result[pos * 2 + 1] = HexChars[n and 0xF]
957    n = n shr 4
958    result[pos * 2] = HexChars[n]
959
960func toOctal*(c: char): string {.rtl, extern: "nsuToOctal".} =
961  ## Converts a character `c` to its octal representation.
962  ##
963  ## The resulting string may not have a leading zero. Its length is always
964  ## exactly 3.
965  ##
966  ## Do not confuse it with `toOct func<#toOct,BiggestInt,Positive>`_.
967  runnableExamples:
968    doAssert toOctal('1') == "061"
969    doAssert toOctal('A') == "101"
970    doAssert toOctal('a') == "141"
971    doAssert toOctal('!') == "041"
972
973  result = newString(3)
974  var val = ord(c)
975  for i in countdown(2, 0):
976    result[i] = chr(val mod 8 + ord('0'))
977    val = val div 8
978
979func fromBin*[T: SomeInteger](s: string): T =
980  ## Parses a binary integer value from a string `s`.
981  ##
982  ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
983  ## one of the following optional prefixes: `0b`, `0B`. Underscores within
984  ## `s` are ignored.
985  ##
986  ## Does not check for overflow. If the value represented by `s`
987  ## is too big to fit into a return type, only the value of the rightmost
988  ## binary digits of `s` is returned without producing an error.
989  runnableExamples:
990    let s = "0b_0100_1000_1000_1000_1110_1110_1001_1001"
991    doAssert fromBin[int](s) == 1216933529
992    doAssert fromBin[int8](s) == 0b1001_1001'i8
993    doAssert fromBin[int8](s) == -103'i8
994    doAssert fromBin[uint8](s) == 153
995    doAssert s.fromBin[:int16] == 0b1110_1110_1001_1001'i16
996    doAssert s.fromBin[:uint64] == 1216933529'u64
997
998  let p = parseutils.parseBin(s, result)
999  if p != s.len or p == 0:
1000    raise newException(ValueError, "invalid binary integer: " & s)
1001
1002func fromOct*[T: SomeInteger](s: string): T =
1003  ## Parses an octal integer value from a string `s`.
1004  ##
1005  ## If `s` is not a valid octal integer, `ValueError` is raised. `s` can have
1006  ## one of the following optional prefixes: `0o`, `0O`. Underscores within
1007  ## `s` are ignored.
1008  ##
1009  ## Does not check for overflow. If the value represented by `s`
1010  ## is too big to fit into a return type, only the value of the rightmost
1011  ## octal digits of `s` is returned without producing an error.
1012  runnableExamples:
1013    let s = "0o_123_456_777"
1014    doAssert fromOct[int](s) == 21913087
1015    doAssert fromOct[int8](s) == 0o377'i8
1016    doAssert fromOct[int8](s) == -1'i8
1017    doAssert fromOct[uint8](s) == 255'u8
1018    doAssert s.fromOct[:int16] == 24063'i16
1019    doAssert s.fromOct[:uint64] == 21913087'u64
1020
1021  let p = parseutils.parseOct(s, result)
1022  if p != s.len or p == 0:
1023    raise newException(ValueError, "invalid oct integer: " & s)
1024
1025func fromHex*[T: SomeInteger](s: string): T =
1026  ## Parses a hex integer value from a string `s`.
1027  ##
1028  ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have
1029  ## one of the following optional prefixes: `0x`, `0X`, `#`. Underscores within
1030  ## `s` are ignored.
1031  ##
1032  ## Does not check for overflow. If the value represented by `s`
1033  ## is too big to fit into a return type, only the value of the rightmost
1034  ## hex digits of `s` is returned without producing an error.
1035  runnableExamples:
1036    let s = "0x_1235_8df6"
1037    doAssert fromHex[int](s) == 305499638
1038    doAssert fromHex[int8](s) == 0xf6'i8
1039    doAssert fromHex[int8](s) == -10'i8
1040    doAssert fromHex[uint8](s) == 246'u8
1041    doAssert s.fromHex[:int16] == -29194'i16
1042    doAssert s.fromHex[:uint64] == 305499638'u64
1043
1044  let p = parseutils.parseHex(s, result)
1045  if p != s.len or p == 0:
1046    raise newException(ValueError, "invalid hex integer: " & s)
1047
1048func intToStr*(x: int, minchars: Positive = 1): string {.rtl,
1049    extern: "nsuIntToStr".} =
1050  ## Converts `x` to its decimal representation.
1051  ##
1052  ## The resulting string will be minimally `minchars` characters long. This is
1053  ## achieved by adding leading zeros.
1054  runnableExamples:
1055    doAssert intToStr(1984) == "1984"
1056    doAssert intToStr(1984, 6) == "001984"
1057  result = $abs(x)
1058  for i in 1 .. minchars - len(result):
1059    result = '0' & result
1060  if x < 0:
1061    result = '-' & result
1062
1063func parseInt*(s: string): int {.rtl, extern: "nsuParseInt".} =
1064  ## Parses a decimal integer value contained in `s`.
1065  ##
1066  ## If `s` is not a valid integer, `ValueError` is raised.
1067  runnableExamples:
1068    doAssert parseInt("-0042") == -42
1069  result = 0
1070  let L = parseutils.parseInt(s, result, 0)
1071  if L != s.len or L == 0:
1072    raise newException(ValueError, "invalid integer: " & s)
1073
1074func parseBiggestInt*(s: string): BiggestInt {.rtl,
1075    extern: "nsuParseBiggestInt".} =
1076  ## Parses a decimal integer value contained in `s`.
1077  ##
1078  ## If `s` is not a valid integer, `ValueError` is raised.
1079  result = BiggestInt(0)
1080  let L = parseutils.parseBiggestInt(s, result, 0)
1081  if L != s.len or L == 0:
1082    raise newException(ValueError, "invalid integer: " & s)
1083
1084func parseUInt*(s: string): uint {.rtl, extern: "nsuParseUInt".} =
1085  ## Parses a decimal unsigned integer value contained in `s`.
1086  ##
1087  ## If `s` is not a valid integer, `ValueError` is raised.
1088  result = uint(0)
1089  let L = parseutils.parseUInt(s, result, 0)
1090  if L != s.len or L == 0:
1091    raise newException(ValueError, "invalid unsigned integer: " & s)
1092
1093func parseBiggestUInt*(s: string): BiggestUInt {.rtl,
1094    extern: "nsuParseBiggestUInt".} =
1095  ## Parses a decimal unsigned integer value contained in `s`.
1096  ##
1097  ## If `s` is not a valid integer, `ValueError` is raised.
1098  result = BiggestUInt(0)
1099  let L = parseutils.parseBiggestUInt(s, result, 0)
1100  if L != s.len or L == 0:
1101    raise newException(ValueError, "invalid unsigned integer: " & s)
1102
1103func parseFloat*(s: string): float {.rtl, extern: "nsuParseFloat".} =
1104  ## Parses a decimal floating point value contained in `s`.
1105  ##
1106  ## If `s` is not a valid floating point number, `ValueError` is raised.
1107  ##`NAN`, `INF`, `-INF` are also supported (case insensitive comparison).
1108  runnableExamples:
1109    doAssert parseFloat("3.14") == 3.14
1110    doAssert parseFloat("inf") == 1.0/0
1111  result = 0.0
1112  let L = parseutils.parseFloat(s, result, 0)
1113  if L != s.len or L == 0:
1114    raise newException(ValueError, "invalid float: " & s)
1115
1116func parseBinInt*(s: string): int {.rtl, extern: "nsuParseBinInt".} =
1117  ## Parses a binary integer value contained in `s`.
1118  ##
1119  ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
1120  ## one of the following optional prefixes: `0b`, `0B`. Underscores within
1121  ## `s` are ignored.
1122  runnableExamples:
1123    let
1124      a = "0b11_0101"
1125      b = "111"
1126    doAssert a.parseBinInt() == 53
1127    doAssert b.parseBinInt() == 7
1128
1129  result = 0
1130  let L = parseutils.parseBin(s, result, 0)
1131  if L != s.len or L == 0:
1132    raise newException(ValueError, "invalid binary integer: " & s)
1133
1134func parseOctInt*(s: string): int {.rtl, extern: "nsuParseOctInt".} =
1135  ## Parses an octal integer value contained in `s`.
1136  ##
1137  ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
1138  ## of the following optional prefixes: `0o`, `0O`.  Underscores within
1139  ## `s` are ignored.
1140  result = 0
1141  let L = parseutils.parseOct(s, result, 0)
1142  if L != s.len or L == 0:
1143    raise newException(ValueError, "invalid oct integer: " & s)
1144
1145func parseHexInt*(s: string): int {.rtl, extern: "nsuParseHexInt".} =
1146  ## Parses a hexadecimal integer value contained in `s`.
1147  ##
1148  ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
1149  ## of the following optional prefixes: `0x`, `0X`, `#`.  Underscores
1150  ## within `s` are ignored.
1151  result = 0
1152  let L = parseutils.parseHex(s, result, 0)
1153  if L != s.len or L == 0:
1154    raise newException(ValueError, "invalid hex integer: " & s)
1155
1156func generateHexCharToValueMap(): string =
1157  ## Generates a string to map a hex digit to uint value.
1158  result = ""
1159  for inp in 0..255:
1160    let ch = chr(inp)
1161    let o =
1162      case ch
1163      of '0'..'9': inp - ord('0')
1164      of 'a'..'f': inp - ord('a') + 10
1165      of 'A'..'F': inp - ord('A') + 10
1166      else: 17 # indicates an invalid hex char
1167    result.add chr(o)
1168
1169const hexCharToValueMap = generateHexCharToValueMap()
1170
1171func parseHexStr*(s: string): string {.rtl, extern: "nsuParseHexStr".} =
1172  ## Converts hex-encoded string to byte string, e.g.:
1173  ##
1174  ## Raises `ValueError` for an invalid hex values. The comparison is
1175  ## case-insensitive.
1176  ##
1177  ## See also:
1178  ## * `toHex func<#toHex,string>`_ for the reverse operation
1179  runnableExamples:
1180    let
1181      a = "41"
1182      b = "3161"
1183      c = "00ff"
1184    doAssert parseHexStr(a) == "A"
1185    doAssert parseHexStr(b) == "1a"
1186    doAssert parseHexStr(c) == "\0\255"
1187
1188  if s.len mod 2 != 0:
1189    raise newException(ValueError, "Incorrect hex string len")
1190  result = newString(s.len div 2)
1191  var buf = 0
1192  for pos, c in s:
1193    let val = hexCharToValueMap[ord(c)].ord
1194    if val == 17:
1195      raise newException(ValueError, "Invalid hex char `" &
1196                         c & "` (ord " & $c.ord & ")")
1197    if pos mod 2 == 0:
1198      buf = val
1199    else:
1200      result[pos div 2] = chr(val + buf shl 4)
1201
1202func parseBool*(s: string): bool =
1203  ## Parses a value into a `bool`.
1204  ##
1205  ## If `s` is one of the following values: `y, yes, true, 1, on`, then
1206  ## returns `true`. If `s` is one of the following values: `n, no, false,
1207  ## 0, off`, then returns `false`.  If `s` is something else a
1208  ## `ValueError` exception is raised.
1209  runnableExamples:
1210    let a = "n"
1211    doAssert parseBool(a) == false
1212
1213  case normalize(s)
1214  of "y", "yes", "true", "1", "on": result = true
1215  of "n", "no", "false", "0", "off": result = false
1216  else: raise newException(ValueError, "cannot interpret as a bool: " & s)
1217
1218func parseEnum*[T: enum](s: string): T =
1219  ## Parses an enum `T`. This errors at compile time, if the given enum
1220  ## type contains multiple fields with the same string value.
1221  ##
1222  ## Raises `ValueError` for an invalid value in `s`. The comparison is
1223  ## done in a style insensitive way.
1224  runnableExamples:
1225    type
1226      MyEnum = enum
1227        first = "1st",
1228        second,
1229        third = "3rd"
1230
1231    doAssert parseEnum[MyEnum]("1_st") == first
1232    doAssert parseEnum[MyEnum]("second") == second
1233    doAssertRaises(ValueError):
1234      echo parseEnum[MyEnum]("third")
1235
1236  genEnumCaseStmt(T, s, default = nil, ord(low(T)), ord(high(T)), nimIdentNormalize)
1237
1238func parseEnum*[T: enum](s: string, default: T): T =
1239  ## Parses an enum `T`. This errors at compile time, if the given enum
1240  ## type contains multiple fields with the same string value.
1241  ##
1242  ## Uses `default` for an invalid value in `s`. The comparison is done in a
1243  ## style insensitive way.
1244  runnableExamples:
1245    type
1246      MyEnum = enum
1247        first = "1st",
1248        second,
1249        third = "3rd"
1250
1251    doAssert parseEnum[MyEnum]("1_st") == first
1252    doAssert parseEnum[MyEnum]("second") == second
1253    doAssert parseEnum[MyEnum]("last", third) == third
1254
1255  genEnumCaseStmt(T, s, default, ord(low(T)), ord(high(T)), nimIdentNormalize)
1256
1257func repeat*(c: char, count: Natural): string {.rtl, extern: "nsuRepeatChar".} =
1258  ## Returns a string of length `count` consisting only of
1259  ## the character `c`.
1260  runnableExamples:
1261    let a = 'z'
1262    doAssert a.repeat(5) == "zzzzz"
1263  result = newString(count)
1264  for i in 0..count-1: result[i] = c
1265
1266func repeat*(s: string, n: Natural): string {.rtl, extern: "nsuRepeatStr".} =
1267  ## Returns string `s` concatenated `n` times.
1268  runnableExamples:
1269    doAssert "+ foo +".repeat(3) == "+ foo ++ foo ++ foo +"
1270
1271  result = newStringOfCap(n * s.len)
1272  for i in 1..n: result.add(s)
1273
1274func spaces*(n: Natural): string {.inline.} =
1275  ## Returns a string with `n` space characters. You can use this func
1276  ## to left align strings.
1277  ##
1278  ## See also:
1279  ## * `align func<#align,string,Natural,char>`_
1280  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1281  ## * `indent func<#indent,string,Natural,string>`_
1282  ## * `center func<#center,string,int,char>`_
1283  runnableExamples:
1284    let
1285      width = 15
1286      text1 = "Hello user!"
1287      text2 = "This is a very long string"
1288    doAssert text1 & spaces(max(0, width - text1.len)) & "|" ==
1289             "Hello user!    |"
1290    doAssert text2 & spaces(max(0, width - text2.len)) & "|" ==
1291             "This is a very long string|"
1292  repeat(' ', n)
1293
1294func align*(s: string, count: Natural, padding = ' '): string {.rtl,
1295    extern: "nsuAlignString".} =
1296  ## Aligns a string `s` with `padding`, so that it is of length `count`.
1297  ##
1298  ## `padding` characters (by default spaces) are added before `s` resulting in
1299  ## right alignment. If `s.len >= count`, no spaces are added and `s` is
1300  ## returned unchanged. If you need to left align a string use the `alignLeft
1301  ## func<#alignLeft,string,Natural,char>`_.
1302  ##
1303  ## See also:
1304  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1305  ## * `spaces func<#spaces,Natural>`_
1306  ## * `indent func<#indent,string,Natural,string>`_
1307  ## * `center func<#center,string,int,char>`_
1308  runnableExamples:
1309    assert align("abc", 4) == " abc"
1310    assert align("a", 0) == "a"
1311    assert align("1232", 6) == "  1232"
1312    assert align("1232", 6, '#') == "##1232"
1313  if s.len < count:
1314    result = newString(count)
1315    let spaces = count - s.len
1316    for i in 0..spaces-1: result[i] = padding
1317    for i in spaces..count-1: result[i] = s[i-spaces]
1318  else:
1319    result = s
1320
1321func alignLeft*(s: string, count: Natural, padding = ' '): string =
1322  ## Left-Aligns a string `s` with `padding`, so that it is of length `count`.
1323  ##
1324  ## `padding` characters (by default spaces) are added after `s` resulting in
1325  ## left alignment. If `s.len >= count`, no spaces are added and `s` is
1326  ## returned unchanged. If you need to right align a string use the `align
1327  ## func<#align,string,Natural,char>`_.
1328  ##
1329  ## See also:
1330  ## * `align func<#align,string,Natural,char>`_
1331  ## * `spaces func<#spaces,Natural>`_
1332  ## * `indent func<#indent,string,Natural,string>`_
1333  ## * `center func<#center,string,int,char>`_
1334  runnableExamples:
1335    assert alignLeft("abc", 4) == "abc "
1336    assert alignLeft("a", 0) == "a"
1337    assert alignLeft("1232", 6) == "1232  "
1338    assert alignLeft("1232", 6, '#') == "1232##"
1339  if s.len < count:
1340    result = newString(count)
1341    if s.len > 0:
1342      result[0 .. (s.len - 1)] = s
1343    for i in s.len ..< count:
1344      result[i] = padding
1345  else:
1346    result = s
1347
1348func center*(s: string, width: int, fillChar: char = ' '): string {.rtl,
1349    extern: "nsuCenterString".} =
1350  ## Return the contents of `s` centered in a string `width` long using
1351  ## `fillChar` (default: space) as padding.
1352  ##
1353  ## The original string is returned if `width` is less than or equal
1354  ## to `s.len`.
1355  ##
1356  ## See also:
1357  ## * `align func<#align,string,Natural,char>`_
1358  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1359  ## * `spaces func<#spaces,Natural>`_
1360  ## * `indent func<#indent,string,Natural,string>`_
1361  runnableExamples:
1362    let a = "foo"
1363    doAssert a.center(2) == "foo"
1364    doAssert a.center(5) == " foo "
1365    doAssert a.center(6) == " foo  "
1366  if width <= s.len: return s
1367  result = newString(width)
1368  # Left padding will be one fillChar
1369  # smaller if there are an odd number
1370  # of characters
1371  let
1372    charsLeft = (width - s.len)
1373    leftPadding = charsLeft div 2
1374  for i in 0 ..< width:
1375    if i >= leftPadding and i < leftPadding + s.len:
1376      # we are where the string should be located
1377      result[i] = s[i-leftPadding]
1378    else:
1379      # we are either before or after where
1380      # the string s should go
1381      result[i] = fillChar
1382
1383func indent*(s: string, count: Natural, padding: string = " "): string {.rtl,
1384    extern: "nsuIndent".} =
1385  ## Indents each line in `s` by `count` amount of `padding`.
1386  ##
1387  ## **Note:** This does not preserve the new line characters used in `s`.
1388  ##
1389  ## See also:
1390  ## * `align func<#align,string,Natural,char>`_
1391  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1392  ## * `spaces func<#spaces,Natural>`_
1393  ## * `unindent func<#unindent,string,Natural,string>`_
1394  ## * `dedent func<#dedent,string,Natural>`_
1395  runnableExamples:
1396    doAssert indent("First line\c\l and second line.", 2) ==
1397             "  First line\l   and second line."
1398  result = ""
1399  var i = 0
1400  for line in s.splitLines():
1401    if i != 0:
1402      result.add("\n")
1403    for j in 1..count:
1404      result.add(padding)
1405    result.add(line)
1406    i.inc
1407
1408func unindent*(s: string, count: Natural = int.high,
1409               padding: string = " "): string {.rtl, extern: "nsuUnindent".} =
1410  ## Unindents each line in `s` by `count` amount of `padding`.
1411  ##
1412  ## **Note:** This does not preserve the new line characters used in `s`.
1413  ##
1414  ## See also:
1415  ## * `dedent func<#dedent,string,Natural>`_
1416  ## * `align func<#align,string,Natural,char>`_
1417  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1418  ## * `spaces func<#spaces,Natural>`_
1419  ## * `indent func<#indent,string,Natural,string>`_
1420  runnableExamples:
1421    let x = """
1422      Hello
1423        There
1424    """.unindent()
1425
1426    doAssert x == "Hello\nThere\n"
1427  result = ""
1428  var i = 0
1429  for line in s.splitLines():
1430    if i != 0:
1431      result.add("\n")
1432    var indentCount = 0
1433    for j in 0..<count.int:
1434      indentCount.inc
1435      if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding:
1436        indentCount = j
1437        break
1438    result.add(line[indentCount*padding.len .. ^1])
1439    i.inc
1440
1441func indentation*(s: string): Natural {.since: (1, 3).} =
1442  ## Returns the amount of indentation all lines of `s` have in common,
1443  ## ignoring lines that consist only of whitespace.
1444  result = int.high
1445  for line in s.splitLines:
1446    for i, c in line:
1447      if i >= result: break
1448      elif c != ' ':
1449        result = i
1450        break
1451  if result == int.high:
1452    result = 0
1453
1454func dedent*(s: string, count: Natural = indentation(s)): string {.rtl,
1455    extern: "nsuDedent", since: (1, 3).} =
1456  ## Unindents each line in `s` by `count` amount of `padding`.
1457  ## The only difference between this and the
1458  ## `unindent func<#unindent,string,Natural,string>`_ is that this by default
1459  ## only cuts off the amount of indentation that all lines of `s` share as
1460  ## opposed to all indentation. It only supports spaces as padding.
1461  ##
1462  ## **Note:** This does not preserve the new line characters used in `s`.
1463  ##
1464  ## See also:
1465  ## * `unindent func<#unindent,string,Natural,string>`_
1466  ## * `align func<#align,string,Natural,char>`_
1467  ## * `alignLeft func<#alignLeft,string,Natural,char>`_
1468  ## * `spaces func<#spaces,Natural>`_
1469  ## * `indent func<#indent,string,Natural,string>`_
1470  runnableExamples:
1471    let x = """
1472      Hello
1473        There
1474    """.dedent()
1475
1476    doAssert x == "Hello\n  There\n"
1477  unindent(s, count, " ")
1478
1479func delete*(s: var string, slice: Slice[int]) =
1480  ## Deletes the items `s[slice]`, raising `IndexDefect` if the slice contains
1481  ## elements out of range.
1482  ##
1483  ## This operation moves all elements after `s[slice]` in linear time, and
1484  ## is the string analog to `sequtils.delete`.
1485  runnableExamples:
1486    var a = "abcde"
1487    doAssertRaises(IndexDefect): a.delete(4..5)
1488    assert a == "abcde"
1489    a.delete(4..4)
1490    assert a == "abcd"
1491    a.delete(1..2)
1492    assert a == "ad"
1493    a.delete(1..<1) # empty slice
1494    assert a == "ad"
1495  when compileOption("boundChecks"):
1496    if not (slice.a < s.len and slice.a >= 0 and slice.b < s.len):
1497      raise newException(IndexDefect, $(slice: slice, len: s.len))
1498  if slice.b >= slice.a:
1499    var i = slice.a
1500    var j = slice.b + 1
1501    var newLen = s.len - j + i
1502    # if j < s.len: moveMem(addr s[i], addr s[j], s.len - j) # pending benchmark
1503    while i < newLen:
1504      s[i] = s[j]
1505      inc(i)
1506      inc(j)
1507    setLen(s, newLen)
1508
1509func delete*(s: var string, first, last: int) {.rtl, extern: "nsuDelete", deprecated: "use `delete(s, first..last)`".} =
1510  ## Deletes in `s` the characters at positions `first .. last` (both ends included).
1511  runnableExamples("--warning:deprecated:off"):
1512    var a = "abracadabra"
1513
1514    a.delete(4, 5)
1515    doAssert a == "abradabra"
1516
1517    a.delete(1, 6)
1518    doAssert a == "ara"
1519
1520    a.delete(2, 999)
1521    doAssert a == "ar"
1522
1523  var i = first
1524  var j = min(len(s), last+1)
1525  var newLen = len(s)-j+i
1526  while i < newLen:
1527    s[i] = s[j]
1528    inc(i)
1529    inc(j)
1530  setLen(s, newLen)
1531
1532func startsWith*(s: string, prefix: char): bool {.inline.} =
1533  ## Returns true if `s` starts with character `prefix`.
1534  ##
1535  ## See also:
1536  ## * `endsWith func<#endsWith,string,char>`_
1537  ## * `continuesWith func<#continuesWith,string,string,Natural>`_
1538  ## * `removePrefix func<#removePrefix,string,char>`_
1539  runnableExamples:
1540    let a = "abracadabra"
1541    doAssert a.startsWith('a') == true
1542    doAssert a.startsWith('b') == false
1543  result = s.len > 0 and s[0] == prefix
1544
1545func startsWith*(s, prefix: string): bool {.rtl, extern: "nsuStartsWith".} =
1546  ## Returns true if `s` starts with string `prefix`.
1547  ##
1548  ## If `prefix == ""` true is returned.
1549  ##
1550  ## See also:
1551  ## * `endsWith func<#endsWith,string,string>`_
1552  ## * `continuesWith func<#continuesWith,string,string,Natural>`_
1553  ## * `removePrefix func<#removePrefix,string,string>`_
1554  runnableExamples:
1555    let a = "abracadabra"
1556    doAssert a.startsWith("abra") == true
1557    doAssert a.startsWith("bra") == false
1558  startsWithImpl(s, prefix)
1559
1560func endsWith*(s: string, suffix: char): bool {.inline.} =
1561  ## Returns true if `s` ends with `suffix`.
1562  ##
1563  ## See also:
1564  ## * `startsWith func<#startsWith,string,char>`_
1565  ## * `continuesWith func<#continuesWith,string,string,Natural>`_
1566  ## * `removeSuffix func<#removeSuffix,string,char>`_
1567  runnableExamples:
1568    let a = "abracadabra"
1569    doAssert a.endsWith('a') == true
1570    doAssert a.endsWith('b') == false
1571  result = s.len > 0 and s[s.high] == suffix
1572
1573func endsWith*(s, suffix: string): bool {.rtl, extern: "nsuEndsWith".} =
1574  ## Returns true if `s` ends with `suffix`.
1575  ##
1576  ## If `suffix == ""` true is returned.
1577  ##
1578  ## See also:
1579  ## * `startsWith func<#startsWith,string,string>`_
1580  ## * `continuesWith func<#continuesWith,string,string,Natural>`_
1581  ## * `removeSuffix func<#removeSuffix,string,string>`_
1582  runnableExamples:
1583    let a = "abracadabra"
1584    doAssert a.endsWith("abra") == true
1585    doAssert a.endsWith("dab") == false
1586  endsWithImpl(s, suffix)
1587
1588func continuesWith*(s, substr: string, start: Natural): bool {.rtl,
1589    extern: "nsuContinuesWith".} =
1590  ## Returns true if `s` continues with `substr` at position `start`.
1591  ##
1592  ## If `substr == ""` true is returned.
1593  ##
1594  ## See also:
1595  ## * `startsWith func<#startsWith,string,string>`_
1596  ## * `endsWith func<#endsWith,string,string>`_
1597  runnableExamples:
1598    let a = "abracadabra"
1599    doAssert a.continuesWith("ca", 4) == true
1600    doAssert a.continuesWith("ca", 5) == false
1601    doAssert a.continuesWith("dab", 6) == true
1602  var i = 0
1603  while true:
1604    if i >= substr.len: return true
1605    if i+start >= s.len or s[i+start] != substr[i]: return false
1606    inc(i)
1607
1608
1609func removePrefix*(s: var string, chars: set[char] = Newlines) {.rtl,
1610    extern: "nsuRemovePrefixCharSet".} =
1611  ## Removes all characters from `chars` from the start of the string `s`
1612  ## (in-place).
1613  ##
1614  ## See also:
1615  ## * `removeSuffix func<#removeSuffix,string,set[char]>`_
1616  runnableExamples:
1617    var userInput = "\r\n*~Hello World!"
1618    userInput.removePrefix
1619    doAssert userInput == "*~Hello World!"
1620    userInput.removePrefix({'~', '*'})
1621    doAssert userInput == "Hello World!"
1622
1623    var otherInput = "?!?Hello!?!"
1624    otherInput.removePrefix({'!', '?'})
1625    doAssert otherInput == "Hello!?!"
1626
1627  var start = 0
1628  while start < s.len and s[start] in chars: start += 1
1629  if start > 0: s.delete(0, start - 1)
1630
1631func removePrefix*(s: var string, c: char) {.rtl,
1632    extern: "nsuRemovePrefixChar".} =
1633  ## Removes all occurrences of a single character (in-place) from the start
1634  ## of a string.
1635  ##
1636  ## See also:
1637  ## * `removeSuffix func<#removeSuffix,string,char>`_
1638  ## * `startsWith func<#startsWith,string,char>`_
1639  runnableExamples:
1640    var ident = "pControl"
1641    ident.removePrefix('p')
1642    doAssert ident == "Control"
1643  removePrefix(s, chars = {c})
1644
1645func removePrefix*(s: var string, prefix: string) {.rtl,
1646    extern: "nsuRemovePrefixString".} =
1647  ## Remove the first matching prefix (in-place) from a string.
1648  ##
1649  ## See also:
1650  ## * `removeSuffix func<#removeSuffix,string,string>`_
1651  ## * `startsWith func<#startsWith,string,string>`_
1652  runnableExamples:
1653    var answers = "yesyes"
1654    answers.removePrefix("yes")
1655    doAssert answers == "yes"
1656  if s.startsWith(prefix):
1657    s.delete(0, prefix.len - 1)
1658
1659func removeSuffix*(s: var string, chars: set[char] = Newlines) {.rtl,
1660    extern: "nsuRemoveSuffixCharSet".} =
1661  ## Removes all characters from `chars` from the end of the string `s`
1662  ## (in-place).
1663  ##
1664  ## See also:
1665  ## * `removePrefix func<#removePrefix,string,set[char]>`_
1666  runnableExamples:
1667    var userInput = "Hello World!*~\r\n"
1668    userInput.removeSuffix
1669    doAssert userInput == "Hello World!*~"
1670    userInput.removeSuffix({'~', '*'})
1671    doAssert userInput == "Hello World!"
1672
1673    var otherInput = "Hello!?!"
1674    otherInput.removeSuffix({'!', '?'})
1675    doAssert otherInput == "Hello"
1676
1677  if s.len == 0: return
1678  var last = s.high
1679  while last > -1 and s[last] in chars: last -= 1
1680  s.setLen(last + 1)
1681
1682func removeSuffix*(s: var string, c: char) {.rtl,
1683    extern: "nsuRemoveSuffixChar".} =
1684  ## Removes all occurrences of a single character (in-place) from the end
1685  ## of a string.
1686  ##
1687  ## See also:
1688  ## * `removePrefix func<#removePrefix,string,char>`_
1689  ## * `endsWith func<#endsWith,string,char>`_
1690  runnableExamples:
1691    var table = "users"
1692    table.removeSuffix('s')
1693    doAssert table == "user"
1694
1695    var dots = "Trailing dots......."
1696    dots.removeSuffix('.')
1697    doAssert dots == "Trailing dots"
1698
1699  removeSuffix(s, chars = {c})
1700
1701func removeSuffix*(s: var string, suffix: string) {.rtl,
1702    extern: "nsuRemoveSuffixString".} =
1703  ## Remove the first matching suffix (in-place) from a string.
1704  ##
1705  ## See also:
1706  ## * `removePrefix func<#removePrefix,string,string>`_
1707  ## * `endsWith func<#endsWith,string,string>`_
1708  runnableExamples:
1709    var answers = "yeses"
1710    answers.removeSuffix("es")
1711    doAssert answers == "yes"
1712  var newLen = s.len
1713  if s.endsWith(suffix):
1714    newLen -= len(suffix)
1715    s.setLen(newLen)
1716
1717
1718func addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.inline.} =
1719  ## Adds a separator to `dest` only if its length is bigger than `startLen`.
1720  ##
1721  ## A shorthand for:
1722  ##
1723  ## .. code-block:: nim
1724  ##   if dest.len > startLen: add(dest, sep)
1725  ##
1726  ## This is often useful for generating some code where the items need to
1727  ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
1728  ## `startLen`. The following example creates a string describing
1729  ## an array of integers.
1730  runnableExamples:
1731    var arr = "["
1732    for x in items([2, 3, 5, 7, 11]):
1733      addSep(arr, startLen = len("["))
1734      add(arr, $x)
1735    add(arr, "]")
1736    doAssert arr == "[2, 3, 5, 7, 11]"
1737
1738  if dest.len > startLen: add(dest, sep)
1739
1740func allCharsInSet*(s: string, theSet: set[char]): bool =
1741  ## Returns true if every character of `s` is in the set `theSet`.
1742  runnableExamples:
1743    doAssert allCharsInSet("aeea", {'a', 'e'}) == true
1744    doAssert allCharsInSet("", {'a', 'e'}) == true
1745
1746  for c in items(s):
1747    if c notin theSet: return false
1748  return true
1749
1750func abbrev*(s: string, possibilities: openArray[string]): int =
1751  ## Returns the index of the first item in `possibilities` which starts
1752  ## with `s`, if not ambiguous.
1753  ##
1754  ## Returns -1 if no item has been found and -2 if multiple items match.
1755  runnableExamples:
1756    doAssert abbrev("fac", ["college", "faculty", "industry"]) == 1
1757    doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found
1758    doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous
1759    doAssert abbrev("college", ["college", "colleges", "industry"]) == 0
1760
1761  result = -1 # none found
1762  for i in 0..possibilities.len-1:
1763    if possibilities[i].startsWith(s):
1764      if possibilities[i] == s:
1765        # special case: exact match shouldn't be ambiguous
1766        return i
1767      if result >= 0: return -2 # ambiguous
1768      result = i
1769
1770# ---------------------------------------------------------------------------
1771
1772func join*(a: openArray[string], sep: string = ""): string {.rtl,
1773    extern: "nsuJoinSep".} =
1774  ## Concatenates all strings in the container `a`, separating them with `sep`.
1775  runnableExamples:
1776    doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion"
1777
1778  if len(a) > 0:
1779    var L = sep.len * (a.len-1)
1780    for i in 0..high(a): inc(L, a[i].len)
1781    result = newStringOfCap(L)
1782    add(result, a[0])
1783    for i in 1..high(a):
1784      add(result, sep)
1785      add(result, a[i])
1786  else:
1787    result = ""
1788
1789func join*[T: not string](a: openArray[T], sep: string = ""): string =
1790  ## Converts all elements in the container `a` to strings using `$`,
1791  ## and concatenates them with `sep`.
1792  runnableExamples:
1793    doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3"
1794
1795  result = ""
1796  for i, x in a:
1797    if i > 0:
1798      add(result, sep)
1799    add(result, $x)
1800
1801type
1802  SkipTable* = array[char, int]
1803
1804func initSkipTable*(a: var SkipTable, sub: string) {.rtl,
1805    extern: "nsuInitSkipTable".} =
1806  ## Preprocess table `a` for `sub`.
1807  let m = len(sub)
1808  fill(a, m)
1809
1810  for i in 0 ..< m - 1:
1811    a[sub[i]] = m - 1 - i
1812
1813func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
1814    rtl, extern: "nsuFindStrA".} =
1815  ## Searches for `sub` in `s` inside range `start..last` using preprocessed
1816  ## table `a`. If `last` is unspecified, it defaults to `s.high` (the last
1817  ## element).
1818  ##
1819  ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
1820  let
1821    last = if last == 0: s.high else: last
1822    subLast = sub.len - 1
1823
1824  if subLast == -1:
1825    # this was an empty needle string,
1826    # we count this as match in the first possible position:
1827    return start
1828
1829  # This is an implementation of the Boyer-Moore Horspool algorithms
1830  # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
1831  var skip = start
1832
1833  while last - skip >= subLast:
1834    var i = subLast
1835    while s[skip + i] == sub[i]:
1836      if i == 0:
1837        return skip
1838      dec i
1839    inc skip, a[s[skip + subLast]]
1840  return -1
1841
1842when not (defined(js) or defined(nimdoc) or defined(nimscript)):
1843  func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {.
1844                importc: "memchr", header: "<string.h>".}
1845  func c_strstr(haystack, needle: cstring): cstring {.
1846    importc: "strstr", header: "<string.h>".}
1847
1848  const hasCStringBuiltin = true
1849else:
1850  const hasCStringBuiltin = false
1851
1852func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl,
1853    extern: "nsuFindChar".} =
1854  ## Searches for `sub` in `s` inside range `start..last` (both ends included).
1855  ## If `last` is unspecified, it defaults to `s.high` (the last element).
1856  ##
1857  ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
1858  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1859  ## Use `s[start..last].rfind` for a `start`-origin index.
1860  ##
1861  ## See also:
1862  ## * `rfind func<#rfind,string,char,Natural>`_
1863  ## * `replace func<#replace,string,char,char>`_
1864  let last = if last == 0: s.high else: last
1865  when nimvm:
1866    for i in int(start)..last:
1867      if sub == s[i]: return i
1868  else:
1869    when hasCStringBuiltin:
1870      let L = last-start+1
1871      if L > 0:
1872        let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](L))
1873        if not found.isNil:
1874          return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
1875    else:
1876      for i in int(start)..last:
1877        if sub == s[i]: return i
1878  return -1
1879
1880func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.
1881    rtl, extern: "nsuFindCharSet".} =
1882  ## Searches for `chars` in `s` inside range `start..last` (both ends included).
1883  ## If `last` is unspecified, it defaults to `s.high` (the last element).
1884  ##
1885  ## If `s` contains none of the characters in `chars`, -1 is returned.
1886  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1887  ## Use `s[start..last].find` for a `start`-origin index.
1888  ##
1889  ## See also:
1890  ## * `rfind func<#rfind,string,set[char],Natural>`_
1891  ## * `multiReplace func<#multiReplace,string,varargs[]>`_
1892  let last = if last == 0: s.high else: last
1893  for i in int(start)..last:
1894    if s[i] in chars: return i
1895  return -1
1896
1897func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl,
1898    extern: "nsuFindStr".} =
1899  ## Searches for `sub` in `s` inside range `start..last` (both ends included).
1900  ## If `last` is unspecified, it defaults to `s.high` (the last element).
1901  ##
1902  ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
1903  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1904  ## Use `s[start..last].find` for a `start`-origin index.
1905  ##
1906  ## See also:
1907  ## * `rfind func<#rfind,string,string,Natural>`_
1908  ## * `replace func<#replace,string,string,string>`_
1909  if sub.len > s.len - start: return -1
1910  if sub.len == 1: return find(s, sub[0], start, last)
1911
1912  template useSkipTable {.dirty.} =
1913    var a {.noinit.}: SkipTable
1914    initSkipTable(a, sub)
1915    result = find(a, s, sub, start, last)
1916
1917  when not hasCStringBuiltin:
1918    useSkipTable()
1919  else:
1920    when nimvm:
1921      useSkipTable()
1922    else:
1923      when hasCStringBuiltin:
1924        if last == 0 and s.len > start:
1925          let found = c_strstr(s[start].unsafeAddr, sub)
1926          if not found.isNil:
1927            result = cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
1928          else:
1929            result = -1
1930        else:
1931          useSkipTable()
1932      else:
1933        useSkipTable()
1934
1935func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl,
1936    extern: "nsuRFindChar".} =
1937  ## Searches for `sub` in `s` inside range `start..last` (both ends included)
1938  ## in reverse -- starting at high indexes and moving lower to the first
1939  ## character or `start`.  If `last` is unspecified, it defaults to `s.high`
1940  ## (the last element).
1941  ##
1942  ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
1943  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1944  ## Use `s[start..last].find` for a `start`-origin index.
1945  ##
1946  ## See also:
1947  ## * `find func<#find,string,char,Natural,int>`_
1948  let last = if last == -1: s.high else: last
1949  for i in countdown(last, start):
1950    if sub == s[i]: return i
1951  return -1
1952
1953func rfind*(s: string, chars: set[char], start: Natural = 0, last = -1): int {.
1954    rtl, extern: "nsuRFindCharSet".} =
1955  ## Searches for `chars` in `s` inside range `start..last` (both ends
1956  ## included) in reverse -- starting at high indexes and moving lower to the
1957  ## first character or `start`. If `last` is unspecified, it defaults to
1958  ## `s.high` (the last element).
1959  ##
1960  ## If `s` contains none of the characters in `chars`, -1 is returned.
1961  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1962  ## Use `s[start..last].rfind` for a `start`-origin index.
1963  ##
1964  ## See also:
1965  ## * `find func<#find,string,set[char],Natural,int>`_
1966  let last = if last == -1: s.high else: last
1967  for i in countdown(last, start):
1968    if s[i] in chars: return i
1969  return -1
1970
1971func rfind*(s, sub: string, start: Natural = 0, last = -1): int {.rtl,
1972    extern: "nsuRFindStr".} =
1973  ## Searches for `sub` in `s` inside range `start..last` (both ends included)
1974  ## included) in reverse -- starting at high indexes and moving lower to the
1975  ## first character or `start`. If `last` is unspecified, it defaults to
1976  ## `s.high` (the last element).
1977  ##
1978  ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
1979  ## Otherwise the index returned is relative to `s[0]`, not `start`.
1980  ## Use `s[start..last].rfind` for a `start`-origin index.
1981  ##
1982  ## See also:
1983  ## * `find func<#find,string,string,Natural,int>`_
1984  if sub.len == 0:
1985    return -1
1986  if sub.len > s.len - start:
1987    return -1
1988  let last = if last == -1: s.high else: last
1989  result = 0
1990  for i in countdown(last - sub.len + 1, start):
1991    for j in 0..sub.len-1:
1992      result = i
1993      if sub[j] != s[i+j]:
1994        result = -1
1995        break
1996    if result != -1: return
1997  return -1
1998
1999
2000func count*(s: string, sub: char): int {.rtl, extern: "nsuCountChar".} =
2001  ## Counts the occurrences of the character `sub` in the string `s`.
2002  ##
2003  ## See also:
2004  ## * `countLines func<#countLines,string>`_
2005  result = 0
2006  for c in s:
2007    if c == sub: inc result
2008
2009func count*(s: string, subs: set[char]): int {.rtl,
2010    extern: "nsuCountCharSet".} =
2011  ## Counts the occurrences of the group of character `subs` in the string `s`.
2012  ##
2013  ## See also:
2014  ## * `countLines func<#countLines,string>`_
2015  doAssert card(subs) > 0
2016  result = 0
2017  for c in s:
2018    if c in subs: inc result
2019
2020func count*(s: string, sub: string, overlapping: bool = false): int {.rtl,
2021    extern: "nsuCountString".} =
2022  ## Counts the occurrences of a substring `sub` in the string `s`.
2023  ## Overlapping occurrences of `sub` only count when `overlapping`
2024  ## is set to true (default: false).
2025  ##
2026  ## See also:
2027  ## * `countLines func<#countLines,string>`_
2028  doAssert sub.len > 0
2029  result = 0
2030  var i = 0
2031  while true:
2032    i = s.find(sub, i)
2033    if i < 0: break
2034    if overlapping: inc i
2035    else: i += sub.len
2036    inc result
2037
2038func countLines*(s: string): int {.rtl, extern: "nsuCountLines".} =
2039  ## Returns the number of lines in the string `s`.
2040  ##
2041  ## This is the same as `len(splitLines(s))`, but much more efficient
2042  ## because it doesn't modify the string creating temporary objects. Every
2043  ## `character literal <manual.html#lexical-analysis-character-literals>`_
2044  ## newline combination (CR, LF, CR-LF) is supported.
2045  ##
2046  ## In this context, a line is any string separated by a newline combination.
2047  ## A line can be an empty string.
2048  ##
2049  ## See also:
2050  ## * `splitLines func<#splitLines,string>`_
2051  runnableExamples:
2052    doAssert countLines("First line\l and second line.") == 2
2053  result = 1
2054  var i = 0
2055  while i < s.len:
2056    case s[i]
2057    of '\c':
2058      if i+1 < s.len and s[i+1] == '\l': inc i
2059      inc result
2060    of '\l': inc result
2061    else: discard
2062    inc i
2063
2064
2065func contains*(s, sub: string): bool =
2066  ## Same as `find(s, sub) >= 0`.
2067  ##
2068  ## See also:
2069  ## * `find func<#find,string,string,Natural,int>`_
2070  return find(s, sub) >= 0
2071
2072func contains*(s: string, chars: set[char]): bool =
2073  ## Same as `find(s, chars) >= 0`.
2074  ##
2075  ## See also:
2076  ## * `find func<#find,string,set[char],Natural,int>`_
2077  return find(s, chars) >= 0
2078
2079func replace*(s, sub: string, by = ""): string {.rtl,
2080    extern: "nsuReplaceStr".} =
2081  ## Replaces every occurrence of the string `sub` in `s` with the string `by`.
2082  ##
2083  ## See also:
2084  ## * `find func<#find,string,string,Natural,int>`_
2085  ## * `replace func<#replace,string,char,char>`_ for replacing
2086  ##   single characters
2087  ## * `replaceWord func<#replaceWord,string,string,string>`_
2088  ## * `multiReplace func<#multiReplace,string,varargs[]>`_
2089  result = ""
2090  let subLen = sub.len
2091  if subLen == 0:
2092    result = s
2093  elif subLen == 1:
2094    # when the pattern is a single char, we use a faster
2095    # char-based search that doesn't need a skip table:
2096    let c = sub[0]
2097    let last = s.high
2098    var i = 0
2099    while true:
2100      let j = find(s, c, i, last)
2101      if j < 0: break
2102      add result, substr(s, i, j - 1)
2103      add result, by
2104      i = j + subLen
2105    # copy the rest:
2106    add result, substr(s, i)
2107  else:
2108    var a {.noinit.}: SkipTable
2109    initSkipTable(a, sub)
2110    let last = s.high
2111    var i = 0
2112    while true:
2113      let j = find(a, s, sub, i, last)
2114      if j < 0: break
2115      add result, substr(s, i, j - 1)
2116      add result, by
2117      i = j + subLen
2118    # copy the rest:
2119    add result, substr(s, i)
2120
2121func replace*(s: string, sub, by: char): string {.rtl,
2122    extern: "nsuReplaceChar".} =
2123  ## Replaces every occurrence of the character `sub` in `s` with the character
2124  ## `by`.
2125  ##
2126  ## Optimized version of `replace <#replace,string,string,string>`_ for
2127  ## characters.
2128  ##
2129  ## See also:
2130  ## * `find func<#find,string,char,Natural,int>`_
2131  ## * `replaceWord func<#replaceWord,string,string,string>`_
2132  ## * `multiReplace func<#multiReplace,string,varargs[]>`_
2133  result = newString(s.len)
2134  var i = 0
2135  while i < s.len:
2136    if s[i] == sub: result[i] = by
2137    else: result[i] = s[i]
2138    inc(i)
2139
2140func replaceWord*(s, sub: string, by = ""): string {.rtl,
2141    extern: "nsuReplaceWord".} =
2142  ## Replaces every occurrence of the string `sub` in `s` with the string `by`.
2143  ##
2144  ## Each occurrence of `sub` has to be surrounded by word boundaries
2145  ## (comparable to `\b` in regular expressions), otherwise it is not
2146  ## replaced.
2147  if sub.len == 0: return s
2148  const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
2149  var a {.noinit.}: SkipTable
2150  result = ""
2151  initSkipTable(a, sub)
2152  var i = 0
2153  let last = s.high
2154  let sublen = sub.len
2155  if sublen > 0:
2156    while true:
2157      var j = find(a, s, sub, i, last)
2158      if j < 0: break
2159      # word boundary?
2160      if (j == 0 or s[j-1] notin wordChars) and
2161          (j+sub.len >= s.len or s[j+sub.len] notin wordChars):
2162        add result, substr(s, i, j - 1)
2163        add result, by
2164        i = j + sublen
2165      else:
2166        add result, substr(s, i, j)
2167        i = j + 1
2168    # copy the rest:
2169    add result, substr(s, i)
2170
2171func multiReplace*(s: string, replacements: varargs[(string, string)]): string =
2172  ## Same as replace, but specialized for doing multiple replacements in a single
2173  ## pass through the input string.
2174  ##
2175  ## `multiReplace` performs all replacements in a single pass, this means it
2176  ## can be used to swap the occurrences of "a" and "b", for instance.
2177  ##
2178  ## If the resulting string is not longer than the original input string,
2179  ## only a single memory allocation is required.
2180  ##
2181  ## The order of the replacements does matter. Earlier replacements are
2182  ## preferred over later replacements in the argument list.
2183  result = newStringOfCap(s.len)
2184  var i = 0
2185  var fastChk: set[char] = {}
2186  for sub, by in replacements.items:
2187    if sub.len > 0:
2188      # Include first character of all replacements
2189      fastChk.incl sub[0]
2190  while i < s.len:
2191    block sIteration:
2192      # Assume most chars in s are not candidates for any replacement operation
2193      if s[i] in fastChk:
2194        for sub, by in replacements.items:
2195          if sub.len > 0 and s.continuesWith(sub, i):
2196            add result, by
2197            inc(i, sub.len)
2198            break sIteration
2199      # No matching replacement found
2200      # copy current character from s
2201      add result, s[i]
2202      inc(i)
2203
2204
2205
2206func insertSep*(s: string, sep = '_', digits = 3): string {.rtl,
2207    extern: "nsuInsertSep".} =
2208  ## Inserts the separator `sep` after `digits` characters (default: 3)
2209  ## from right to left.
2210  ##
2211  ## Even though the algorithm works with any string `s`, it is only useful
2212  ## if `s` contains a number.
2213  runnableExamples:
2214    doAssert insertSep("1000000") == "1_000_000"
2215  result = newStringOfCap(s.len)
2216  let hasPrefix = isDigit(s[s.low]) == false
2217  var idx:int
2218  if hasPrefix:
2219    result.add s[s.low]
2220    for i in (s.low + 1)..s.high:
2221      idx = i
2222      if not isDigit(s[i]):
2223        result.add s[i]
2224      else:
2225        break
2226  let partsLen = s.len - idx
2227  var L = (partsLen-1) div digits + partsLen
2228  result.setLen(L + idx)
2229  var j = 0
2230  dec(L)
2231  for i in countdown(partsLen-1,0):
2232    if j == digits:
2233      result[L + idx] = sep
2234      dec(L)
2235      j = 0
2236    result[L + idx] = s[i + idx]
2237    inc(j)
2238    dec(L)
2239
2240func escape*(s: string, prefix = "\"", suffix = "\""): string {.rtl,
2241    extern: "nsuEscape".} =
2242  ## Escapes a string `s`.
2243  ##
2244  ## .. note:: The escaping scheme is different from
2245  ##    `system.addEscapedChar`.
2246  ##
2247  ## * replaces `'\0'..'\31'` and `'\127'..'\255'` by `\xHH` where `HH` is its hexadecimal value
2248  ## * replaces ``\`` by `\\`
2249  ## * replaces `'` by `\'`
2250  ## * replaces `"` by `\"`
2251  ##
2252  ## The resulting string is prefixed with `prefix` and suffixed with `suffix`.
2253  ## Both may be empty strings.
2254  ##
2255  ## See also:
2256  ## * `addEscapedChar proc<system.html#addEscapedChar,string,char>`_
2257  ## * `unescape func<#unescape,string,string,string>`_ for the opposite
2258  ##   operation
2259  result = newStringOfCap(s.len + s.len shr 2)
2260  result.add(prefix)
2261  for c in items(s):
2262    case c
2263    of '\0'..'\31', '\127'..'\255':
2264      add(result, "\\x")
2265      add(result, toHex(ord(c), 2))
2266    of '\\': add(result, "\\\\")
2267    of '\'': add(result, "\\'")
2268    of '\"': add(result, "\\\"")
2269    else: add(result, c)
2270  add(result, suffix)
2271
2272func unescape*(s: string, prefix = "\"", suffix = "\""): string {.rtl,
2273    extern: "nsuUnescape".} =
2274  ## Unescapes a string `s`.
2275  ##
2276  ## This complements `escape func<#escape,string,string,string>`_
2277  ## as it performs the opposite operations.
2278  ##
2279  ## If `s` does not begin with `prefix` and end with `suffix` a
2280  ## ValueError exception will be raised.
2281  result = newStringOfCap(s.len)
2282  var i = prefix.len
2283  if not s.startsWith(prefix):
2284    raise newException(ValueError,
2285                       "String does not start with: " & prefix)
2286  while true:
2287    if i >= s.len-suffix.len: break
2288    if s[i] == '\\':
2289      if i+1 >= s.len:
2290        result.add('\\')
2291        break
2292      case s[i+1]:
2293      of 'x':
2294        inc i, 2
2295        var c = 0
2296        i += parseutils.parseHex(s, c, i, maxLen = 2)
2297        result.add(chr(c))
2298        dec i, 2
2299      of '\\':
2300        result.add('\\')
2301      of '\'':
2302        result.add('\'')
2303      of '\"':
2304        result.add('\"')
2305      else:
2306        result.add("\\" & s[i+1])
2307      inc(i, 2)
2308    else:
2309      result.add(s[i])
2310      inc(i)
2311  if not s.endsWith(suffix):
2312    raise newException(ValueError,
2313                       "String does not end in: " & suffix)
2314
2315func validIdentifier*(s: string): bool {.rtl, extern: "nsuValidIdentifier".} =
2316  ## Returns true if `s` is a valid identifier.
2317  ##
2318  ## A valid identifier starts with a character of the set `IdentStartChars`
2319  ## and is followed by any number of characters of the set `IdentChars`.
2320  runnableExamples:
2321    doAssert "abc_def08".validIdentifier
2322
2323  if s.len > 0 and s[0] in IdentStartChars:
2324    for i in 1..s.len-1:
2325      if s[i] notin IdentChars: return false
2326    return true
2327
2328
2329# floating point formatting:
2330when not defined(js):
2331  func c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
2332                                     importc: "sprintf", varargs}
2333
2334type
2335  FloatFormatMode* = enum
2336    ## The different modes of floating point formatting.
2337    ffDefault,   ## use the shorter floating point notation
2338    ffDecimal,   ## use decimal floating point notation
2339    ffScientific ## use scientific notation (using `e` character)
2340
2341func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
2342                         precision: range[-1..32] = 16;
2343                         decimalSep = '.'): string {.rtl, extern: "nsu$1".} =
2344  ## Converts a floating point value `f` to a string.
2345  ##
2346  ## If `format == ffDecimal` then precision is the number of digits to
2347  ## be printed after the decimal point.
2348  ## If `format == ffScientific` then precision is the maximum number
2349  ## of significant digits to be printed.
2350  ## `precision`'s default value is the maximum number of meaningful digits
2351  ## after the decimal point for Nim's `biggestFloat` type.
2352  ##
2353  ## If `precision == -1`, it tries to format it nicely.
2354  runnableExamples:
2355    let x = 123.456
2356    doAssert x.formatBiggestFloat() == "123.4560000000000"
2357    doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560"
2358    doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02"
2359  when defined(js):
2360    var precision = precision
2361    if precision == -1:
2362      # use the same default precision as c_sprintf
2363      precision = 6
2364    var res: cstring
2365    case format
2366    of ffDefault:
2367      {.emit: "`res` = `f`.toString();".}
2368    of ffDecimal:
2369      {.emit: "`res` = `f`.toFixed(`precision`);".}
2370    of ffScientific:
2371      {.emit: "`res` = `f`.toExponential(`precision`);".}
2372    result = $res
2373    if 1.0 / f == -Inf:
2374      # JavaScript removes the "-" from negative Zero, add it back here
2375      result = "-" & $res
2376    for i in 0 ..< result.len:
2377      # Depending on the locale either dot or comma is produced,
2378      # but nothing else is possible:
2379      if result[i] in {'.', ','}: result[i] = decimalSep
2380  else:
2381    const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
2382    var
2383      frmtstr {.noinit.}: array[0..5, char]
2384      buf {.noinit.}: array[0..2500, char]
2385      L: cint
2386    frmtstr[0] = '%'
2387    if precision >= 0:
2388      frmtstr[1] = '#'
2389      frmtstr[2] = '.'
2390      frmtstr[3] = '*'
2391      frmtstr[4] = floatFormatToChar[format]
2392      frmtstr[5] = '\0'
2393      L = c_sprintf(addr buf, addr frmtstr, precision, f)
2394    else:
2395      frmtstr[1] = floatFormatToChar[format]
2396      frmtstr[2] = '\0'
2397      L = c_sprintf(addr buf, addr frmtstr, f)
2398    result = newString(L)
2399    for i in 0 ..< L:
2400      # Depending on the locale either dot or comma is produced,
2401      # but nothing else is possible:
2402      if buf[i] in {'.', ','}: result[i] = decimalSep
2403      else: result[i] = buf[i]
2404    when defined(windows):
2405      # VS pre 2015 violates the C standard: "The exponent always contains at
2406      # least two digits, and only as many more digits as necessary to
2407      # represent the exponent." [C11 §7.21.6.1]
2408      # The following post-processing fixes this behavior.
2409      if result.len > 4 and result[^4] == '+' and result[^3] == '0':
2410        result[^3] = result[^2]
2411        result[^2] = result[^1]
2412        result.setLen(result.len - 1)
2413
2414func formatFloat*(f: float, format: FloatFormatMode = ffDefault,
2415                  precision: range[-1..32] = 16; decimalSep = '.'): string {.
2416                  rtl, extern: "nsu$1".} =
2417  ## Converts a floating point value `f` to a string.
2418  ##
2419  ## If `format == ffDecimal` then precision is the number of digits to
2420  ## be printed after the decimal point.
2421  ## If `format == ffScientific` then precision is the maximum number
2422  ## of significant digits to be printed.
2423  ## `precision`'s default value is the maximum number of meaningful digits
2424  ## after the decimal point for Nim's `float` type.
2425  ##
2426  ## If `precision == -1`, it tries to format it nicely.
2427  runnableExamples:
2428    let x = 123.456
2429    doAssert x.formatFloat() == "123.4560000000000"
2430    doAssert x.formatFloat(ffDecimal, 4) == "123.4560"
2431    doAssert x.formatFloat(ffScientific, 2) == "1.23e+02"
2432
2433  result = formatBiggestFloat(f, format, precision, decimalSep)
2434
2435func trimZeros*(x: var string; decimalSep = '.') =
2436  ## Trim trailing zeros from a formatted floating point
2437  ## value `x` (must be declared as `var`).
2438  ##
2439  ## This modifies `x` itself, it does not return a copy.
2440  runnableExamples:
2441    var x = "123.456000000"
2442    x.trimZeros()
2443    doAssert x == "123.456"
2444
2445  let sPos = find(x, decimalSep)
2446  if sPos >= 0:
2447    var last = find(x, 'e', start = sPos)
2448    last = if last >= 0: last - 1 else: high(x)
2449    var pos = last
2450    while pos >= 0 and x[pos] == '0': dec(pos)
2451    if pos > sPos: inc(pos)
2452    x.delete(pos, last)
2453
2454type
2455  BinaryPrefixMode* = enum ## The different names for binary prefixes.
2456    bpIEC,                 # use the IEC/ISO standard prefixes such as kibi
2457    bpColloquial           # use the colloquial kilo, mega etc
2458
2459func formatSize*(bytes: int64,
2460                 decimalSep = '.',
2461                 prefix = bpIEC,
2462                 includeSpace = false): string =
2463  ## Rounds and formats `bytes`.
2464  ##
2465  ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
2466  ## formatted as 1KiB.  Set prefix to `bpColloquial` to use the colloquial
2467  ## names from the SI standard (e.g. k for 1000 being reused as 1024).
2468  ##
2469  ## `includeSpace` can be set to true to include the (SI preferred) space
2470  ## between the number and the unit (e.g. 1 KiB).
2471  ##
2472  ## See also:
2473  ## * `strformat module<strformat.html>`_ for string interpolation and formatting
2474  runnableExamples:
2475    doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
2476    doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
2477    doAssert formatSize(4096, includeSpace = true) == "4 KiB"
2478    doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB"
2479    doAssert formatSize(4096) == "4KiB"
2480    doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB"
2481
2482  const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
2483  const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
2484  var
2485    xb: int64 = bytes
2486    fbytes: float
2487    lastXb: int64 = bytes
2488    matchedIndex = 0
2489    prefixes: array[9, string]
2490  if prefix == bpColloquial:
2491    prefixes = collPrefixes
2492  else:
2493    prefixes = iecPrefixes
2494
2495  # Iterate through prefixes seeing if value will be greater than
2496  # 0 in each case
2497  for index in 1..<prefixes.len:
2498    lastXb = xb
2499    xb = bytes div (1'i64 shl (index*10))
2500    matchedIndex = index
2501    if xb == 0:
2502      xb = lastXb
2503      matchedIndex = index - 1
2504      break
2505  # xb has the integer number for the latest value; index should be correct
2506  fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
2507  result = formatFloat(fbytes, format = ffDecimal, precision = 3,
2508      decimalSep = decimalSep)
2509  result.trimZeros(decimalSep)
2510  if includeSpace:
2511    result &= " "
2512  result &= prefixes[matchedIndex]
2513  result &= "B"
2514
2515func formatEng*(f: BiggestFloat,
2516                precision: range[0..32] = 10,
2517                trim: bool = true,
2518                siPrefix: bool = false,
2519                unit: string = "",
2520                decimalSep = '.',
2521                useUnitSpace = false): string =
2522  ## Converts a floating point value `f` to a string using engineering notation.
2523  ##
2524  ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an
2525  ## exponent. Numbers outside of this range will be formatted as a
2526  ## significand in the range -1000.0<f<1000.0 and an exponent that will always
2527  ## be an integer multiple of 3, corresponding with the SI prefix scale k, M,
2528  ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p
2529  ## etc for numbers with an absolute value less than 1.
2530  ##
2531  ## The default configuration (`trim=true` and `precision=10`) shows the
2532  ## **shortest** form that precisely (up to a maximum of 10 decimal places)
2533  ## displays the value. For example, 4.100000 will be displayed as 4.1 (which
2534  ## is mathematically identical) whereas 4.1000003 will be displayed as
2535  ## 4.1000003.
2536  ##
2537  ## If `trim` is set to true, trailing zeros will be removed; if false, the
2538  ## number of digits specified by `precision` will always be shown.
2539  ##
2540  ## `precision` can be used to set the number of digits to be shown after the
2541  ## decimal point or (if `trim` is true) the maximum number of digits to be
2542  ## shown.
2543  ##
2544  ## .. code-block:: nim
2545  ##
2546  ##    formatEng(0, 2, trim=false) == "0.00"
2547  ##    formatEng(0, 2) == "0"
2548  ##    formatEng(0.053, 0) == "53e-3"
2549  ##    formatEng(52731234, 2) == "52.73e6"
2550  ##    formatEng(-52731234, 2) == "-52.73e6"
2551  ##
2552  ## If `siPrefix` is set to true, the number will be displayed with the SI
2553  ## prefix corresponding to the exponent. For example 4100 will be displayed
2554  ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place
2555  ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute
2556  ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed
2557  ## with an exponent rather than an SI prefix, regardless of whether
2558  ## `siPrefix` is true.
2559  ##
2560  ## If `useUnitSpace` is true, the provided unit will be appended to the string
2561  ## (with a space as required by the SI standard). This behaviour is slightly
2562  ## different to appending the unit to the result as the location of the space
2563  ## is altered depending on whether there is an exponent.
2564  ##
2565  ## .. code-block:: nim
2566  ##
2567  ##    formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
2568  ##    formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
2569  ##    formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
2570  ##    formatEng(4100, siPrefix=true) == "4.1 k"
2571  ##    formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit=""
2572  ##    formatEng(4100, siPrefix=true, unit="") == "4.1 k"
2573  ##    formatEng(4100) == "4.1e3"
2574  ##    formatEng(4100, unit="V") == "4.1e3 V"
2575  ##    formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true
2576  ##
2577  ## `decimalSep` is used as the decimal separator.
2578  ##
2579  ## See also:
2580  ## * `strformat module<strformat.html>`_ for string interpolation and formatting
2581  var
2582    absolute: BiggestFloat
2583    significand: BiggestFloat
2584    fexponent: BiggestFloat
2585    exponent: int
2586    splitResult: seq[string]
2587    suffix: string = ""
2588  func getPrefix(exp: int): char =
2589    ## Get the SI prefix for a given exponent
2590    ##
2591    ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
2592    const siPrefixes = ['a', 'f', 'p', 'n', 'u', 'm', ' ', 'k', 'M', 'G', 'T',
2593        'P', 'E']
2594    var index: int = (exp div 3) + 6
2595    result = ' '
2596    if index in low(siPrefixes)..high(siPrefixes):
2597      result = siPrefixes[index]
2598
2599  # Most of the work is done with the sign ignored, so get the absolute value
2600  absolute = abs(f)
2601  significand = f
2602
2603  if absolute == 0.0:
2604    # Simple case: just format it and force the exponent to 0
2605    exponent = 0
2606    result = significand.formatBiggestFloat(ffDecimal, precision,
2607        decimalSep = '.')
2608  else:
2609    # Find the best exponent that's a multiple of 3
2610    fexponent = floor(log10(absolute))
2611    fexponent = 3.0 * floor(fexponent / 3.0)
2612    # Adjust the significand for the new exponent
2613    significand /= pow(10.0, fexponent)
2614
2615    # Adjust the significand and check whether it has affected
2616    # the exponent
2617    absolute = abs(significand)
2618    if absolute >= 1000.0:
2619      significand *= 0.001
2620      fexponent += 3
2621    # Components of the result:
2622    result = significand.formatBiggestFloat(ffDecimal, precision,
2623        decimalSep = '.')
2624    exponent = fexponent.int()
2625
2626  splitResult = result.split('.')
2627  result = splitResult[0]
2628  # result should have at most one decimal character
2629  if splitResult.len() > 1:
2630    # If trim is set, we get rid of trailing zeros.  Don't use trimZeros here as
2631    # we can be a bit more efficient through knowledge that there will never be
2632    # an exponent in this part.
2633    if trim:
2634      while splitResult[1].endsWith("0"):
2635        # Trim last character
2636        splitResult[1].setLen(splitResult[1].len-1)
2637      if splitResult[1].len() > 0:
2638        result &= decimalSep & splitResult[1]
2639    else:
2640      result &= decimalSep & splitResult[1]
2641
2642  # Combine the results accordingly
2643  if siPrefix and exponent != 0:
2644    var p = getPrefix(exponent)
2645    if p != ' ':
2646      suffix = " " & p
2647      exponent = 0 # Exponent replaced by SI prefix
2648  if suffix == "" and useUnitSpace:
2649    suffix = " "
2650  suffix &= unit
2651  if exponent != 0:
2652    result &= "e" & $exponent
2653  result &= suffix
2654
2655func findNormalized(x: string, inArray: openArray[string]): int =
2656  var i = 0
2657  while i < high(inArray):
2658    if cmpIgnoreStyle(x, inArray[i]) == 0: return i
2659    inc(i, 2) # incrementing by 1 would probably lead to a
2660              # security hole...
2661  return -1
2662
2663func invalidFormatString() {.noinline.} =
2664  raise newException(ValueError, "invalid format string")
2665
2666func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl,
2667    extern: "nsuAddf".} =
2668  ## The same as `add(s, formatstr % a)`, but more efficient.
2669  const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
2670  var i = 0
2671  var num = 0
2672  while i < len(formatstr):
2673    if formatstr[i] == '$' and i+1 < len(formatstr):
2674      case formatstr[i+1]
2675      of '#':
2676        if num > a.high: invalidFormatString()
2677        add s, a[num]
2678        inc i, 2
2679        inc num
2680      of '$':
2681        add s, '$'
2682        inc(i, 2)
2683      of '1'..'9', '-':
2684        var j = 0
2685        inc(i) # skip $
2686        var negative = formatstr[i] == '-'
2687        if negative: inc i
2688        while i < formatstr.len and formatstr[i] in Digits:
2689          j = j * 10 + ord(formatstr[i]) - ord('0')
2690          inc(i)
2691        let idx = if not negative: j-1 else: a.len-j
2692        if idx < 0 or idx > a.high: invalidFormatString()
2693        add s, a[idx]
2694      of '{':
2695        var j = i+2
2696        var k = 0
2697        var negative = formatstr[j] == '-'
2698        if negative: inc j
2699        var isNumber = 0
2700        while j < formatstr.len and formatstr[j] notin {'\0', '}'}:
2701          if formatstr[j] in Digits:
2702            k = k * 10 + ord(formatstr[j]) - ord('0')
2703            if isNumber == 0: isNumber = 1
2704          else:
2705            isNumber = -1
2706          inc(j)
2707        if isNumber == 1:
2708          let idx = if not negative: k-1 else: a.len-k
2709          if idx < 0 or idx > a.high: invalidFormatString()
2710          add s, a[idx]
2711        else:
2712          var x = findNormalized(substr(formatstr, i+2, j-1), a)
2713          if x >= 0 and x < high(a): add s, a[x+1]
2714          else: invalidFormatString()
2715        i = j+1
2716      of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
2717        var j = i+1
2718        while j < formatstr.len and formatstr[j] in PatternChars: inc(j)
2719        var x = findNormalized(substr(formatstr, i+1, j-1), a)
2720        if x >= 0 and x < high(a): add s, a[x+1]
2721        else: invalidFormatString()
2722        i = j
2723      else:
2724        invalidFormatString()
2725    else:
2726      add s, formatstr[i]
2727      inc(i)
2728
2729func `%`*(formatstr: string, a: openArray[string]): string {.rtl,
2730    extern: "nsuFormatOpenArray".} =
2731  ## Interpolates a format string with the values from `a`.
2732  ##
2733  ## The `substitution`:idx: operator performs string substitutions in
2734  ## `formatstr` and returns a modified `formatstr`. This is often called
2735  ## `string interpolation`:idx:.
2736  ##
2737  ## This is best explained by an example:
2738  ##
2739  ## .. code-block:: nim
2740  ##   "$1 eats $2." % ["The cat", "fish"]
2741  ##
2742  ## Results in:
2743  ##
2744  ## .. code-block:: nim
2745  ##   "The cat eats fish."
2746  ##
2747  ## The substitution variables (the thing after the `$`) are enumerated
2748  ## from 1 to `a.len`.
2749  ## To produce a verbatim `$`, use `$$`.
2750  ## The notation `$#` can be used to refer to the next substitution
2751  ## variable:
2752  ##
2753  ## .. code-block:: nim
2754  ##   "$# eats $#." % ["The cat", "fish"]
2755  ##
2756  ## Substitution variables can also be words (that is
2757  ## `[A-Za-z_]+[A-Za-z0-9_]*`) in which case the arguments in `a` with even
2758  ## indices are keys and with odd indices are the corresponding values.
2759  ## An example:
2760  ##
2761  ## .. code-block:: nim
2762  ##   "$animal eats $food." % ["animal", "The cat", "food", "fish"]
2763  ##
2764  ## Results in:
2765  ##
2766  ## .. code-block:: nim
2767  ##   "The cat eats fish."
2768  ##
2769  ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
2770  ## raised if an ill-formed format string has been passed to the `%` operator.
2771  ##
2772  ## See also:
2773  ## * `strformat module<strformat.html>`_ for string interpolation and formatting
2774  result = newStringOfCap(formatstr.len + a.len shl 4)
2775  addf(result, formatstr, a)
2776
2777func `%`*(formatstr, a: string): string {.rtl,
2778    extern: "nsuFormatSingleElem".} =
2779  ## This is the same as `formatstr % [a]` (see
2780  ## `% func<#%25,string,openArray[string]>`_).
2781  result = newStringOfCap(formatstr.len + a.len)
2782  addf(result, formatstr, [a])
2783
2784func format*(formatstr: string, a: varargs[string, `$`]): string {.rtl,
2785    extern: "nsuFormatVarargs".} =
2786  ## This is the same as `formatstr % a` (see
2787  ## `% func<#%25,string,openArray[string]>`_) except that it supports
2788  ## auto stringification.
2789  ##
2790  ## See also:
2791  ## * `strformat module<strformat.html>`_ for string interpolation and formatting
2792  result = newStringOfCap(formatstr.len + a.len)
2793  addf(result, formatstr, a)
2794
2795
2796func strip*(s: string, leading = true, trailing = true,
2797            chars: set[char] = Whitespace): string {.rtl, extern: "nsuStrip".} =
2798  ## Strips leading or trailing `chars` (default: whitespace characters)
2799  ## from `s` and returns the resulting string.
2800  ##
2801  ## If `leading` is true (default), leading `chars` are stripped.
2802  ## If `trailing` is true (default), trailing `chars` are stripped.
2803  ## If both are false, the string is returned unchanged.
2804  ##
2805  ## See also:
2806  ## * `strip proc<strbasics.html#strip,string,set[char]>`_ Inplace version.
2807  ## * `stripLineEnd func<#stripLineEnd,string>`_
2808  runnableExamples:
2809    let a = "  vhellov   "
2810    let b = strip(a)
2811    doAssert b == "vhellov"
2812
2813    doAssert a.strip(leading = false) == "  vhellov"
2814    doAssert a.strip(trailing = false) == "vhellov   "
2815
2816    doAssert b.strip(chars = {'v'}) == "hello"
2817    doAssert b.strip(leading = false, chars = {'v'}) == "vhello"
2818
2819    let c = "blaXbla"
2820    doAssert c.strip(chars = {'b', 'a'}) == "laXbl"
2821    doAssert c.strip(chars = {'b', 'a', 'l'}) == "X"
2822
2823  var
2824    first = 0
2825    last = len(s)-1
2826  if leading:
2827    while first <= last and s[first] in chars: inc(first)
2828  if trailing:
2829    while last >= first and s[last] in chars: dec(last)
2830  result = substr(s, first, last)
2831
2832func stripLineEnd*(s: var string) =
2833  ## Strips one of these suffixes from `s` in-place:
2834  ## `\r, \n, \r\n, \f, \v` (at most once instance).
2835  ## For example, can be useful in conjunction with `osproc.execCmdEx`.
2836  ## aka: `chomp`:idx:
2837  runnableExamples:
2838    var s = "foo\n\n"
2839    s.stripLineEnd
2840    doAssert s == "foo\n"
2841    s = "foo\r\n"
2842    s.stripLineEnd
2843    doAssert s == "foo"
2844
2845  if s.len > 0:
2846    case s[^1]
2847    of '\n':
2848      if s.len > 1 and s[^2] == '\r':
2849        s.setLen s.len-2
2850      else:
2851        s.setLen s.len-1
2852    of '\r', '\v', '\f':
2853      s.setLen s.len-1
2854    else:
2855      discard
2856
2857
2858iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
2859  token: string, isSep: bool] =
2860  ## Tokenizes the string `s` into substrings.
2861  ##
2862  ## Substrings are separated by a substring containing only `seps`.
2863  ## Example:
2864  ##
2865  ## .. code-block:: nim
2866  ##   for word in tokenize("  this is an  example  "):
2867  ##     writeLine(stdout, word)
2868  ##
2869  ## Results in:
2870  ##
2871  ## .. code-block:: nim
2872  ##   ("  ", true)
2873  ##   ("this", false)
2874  ##   (" ", true)
2875  ##   ("is", false)
2876  ##   (" ", true)
2877  ##   ("an", false)
2878  ##   ("  ", true)
2879  ##   ("example", false)
2880  ##   ("  ", true)
2881  var i = 0
2882  while true:
2883    var j = i
2884    var isSep = j < s.len and s[j] in seps
2885    while j < s.len and (s[j] in seps) == isSep: inc(j)
2886    if j > i:
2887      yield (substr(s, i, j-1), isSep)
2888    else:
2889      break
2890    i = j
2891
2892func isEmptyOrWhitespace*(s: string): bool {.rtl,
2893    extern: "nsuIsEmptyOrWhitespace".} =
2894  ## Checks if `s` is empty or consists entirely of whitespace characters.
2895  result = s.allCharsInSet(Whitespace)
2896