1--[[--
2 Additions to the core string module.
3
4 The module table returned by `std.string` also contains all of the entries
5 from the core string table.  An hygienic way to import this module, then, is
6 simply to override the core `string` locally:
7
8    local string = require "std.string"
9
10 @module std.string
11]]
12
13local base   = require "std.base"
14local debug  = require "std.debug"
15
16local StrBuf = require "std.strbuf" {}
17
18local copy          = base.copy
19local getmetamethod = base.getmetamethod
20local insert, len   = base.insert, base.len
21local pairs         = base.pairs
22local render        = base.render
23
24local M
25
26
27
28local _tostring = base.tostring
29
30local function __concat (s, o)
31  return _tostring (s) .. _tostring (o)
32end
33
34
35local function __index (s, i)
36  if type (i) == "number" then
37    return s:sub (i, i)
38  else
39    -- Fall back to module metamethods
40    return M[i]
41  end
42end
43
44
45local _format   = string.format
46
47local function format (f, arg1, ...)
48  return (arg1 ~= nil) and _format (f, arg1, ...) or f
49end
50
51
52local function tpack (from, to, ...)
53  return from, to, {...}
54end
55
56local function tfind (s, ...)
57  return tpack (s:find (...))
58end
59
60
61local function finds (s, p, i, ...)
62  i = i or 1
63  local l = {}
64  local from, to, r
65  repeat
66    from, to, r = tfind (s, p, i, ...)
67    if from ~= nil then
68      insert (l, {from, to, capt = r})
69      i = to + 1
70    end
71  until not from
72  return l
73end
74
75
76local function monkey_patch (namespace)
77  namespace = namespace or _G
78  namespace.string = base.copy (namespace.string or {}, M)
79
80  local string_metatable = getmetatable ""
81  string_metatable.__concat = M.__concat
82  string_metatable.__index = M.__index
83
84  return M
85end
86
87
88local function caps (s)
89  return (s:gsub ("(%w)([%w]*)", function (l, ls) return l:upper () .. ls end))
90end
91
92
93local function escape_shell (s)
94  return (s:gsub ("([ %(%)%\\%[%]\"'])", "\\%1"))
95end
96
97
98local function ordinal_suffix (n)
99  n = math.abs (n) % 100
100  local d = n % 10
101  if d == 1 and n ~= 11 then
102    return "st"
103  elseif d == 2 and n ~= 12 then
104    return "nd"
105  elseif d == 3 and n ~= 13 then
106    return "rd"
107  else
108    return "th"
109  end
110end
111
112
113local function pad (s, w, p)
114  p = string.rep (p or " ", math.abs (w))
115  if w < 0 then
116    return string.sub (p .. s, w)
117  end
118  return string.sub (s .. p, 1, w)
119end
120
121
122local function wrap (s, w, ind, ind1)
123  w = w or 78
124  ind = ind or 0
125  ind1 = ind1 or ind
126  assert (ind1 < w and ind < w,
127          "the indents must be less than the line width")
128  local r = StrBuf { string.rep (" ", ind1) }
129  local i, lstart, lens = 1, ind1, len (s)
130  while i <= lens do
131    local j = i + w - lstart
132    while len (s[j]) > 0 and s[j] ~= " " and j > i do
133      j = j - 1
134    end
135    local ni = j + 1
136    while s[j] == " " do
137      j = j - 1
138    end
139    r:concat (s:sub (i, j))
140    i = ni
141    if i < lens then
142      r:concat ("\n" .. string.rep (" ", ind))
143      lstart = ind
144    end
145  end
146  return r:tostring ()
147end
148
149
150local function numbertosi (n)
151  local SIprefix = {
152    [-8] = "y", [-7] = "z", [-6] = "a", [-5] = "f",
153    [-4] = "p", [-3] = "n", [-2] = "mu", [-1] = "m",
154    [0] = "", [1] = "k", [2] = "M", [3] = "G",
155    [4] = "T", [5] = "P", [6] = "E", [7] = "Z",
156    [8] = "Y"
157  }
158  local t = format("% #.2e", n)
159  local _, _, m, e = t:find(".(.%...)e(.+)")
160  local man, exp = tonumber (m), tonumber (e)
161  local siexp = math.floor (exp / 3)
162  local shift = exp - siexp * 3
163  local s = SIprefix[siexp] or "e" .. tostring (siexp)
164  man = man * (10 ^ shift)
165  return format ("%0.f", man) .. s
166end
167
168
169local function trim (s, r)
170  r = r or "%s+"
171  return (s:gsub ("^" .. r, ""):gsub (r .. "$", ""))
172end
173
174
175local function prettytostring (x, indent, spacing)
176  indent = indent or "\t"
177  spacing = spacing or ""
178  return render (x,
179                 function ()
180                   local s = spacing .. "{"
181                   spacing = spacing .. indent
182                   return s
183                 end,
184                 function ()
185                   spacing = string.gsub (spacing, indent .. "$", "")
186                   return spacing .. "}"
187                 end,
188                 function (x)
189                   if type (x) == "string" then
190                     return format ("%q", x)
191                   else
192                     return tostring (x)
193                   end
194                 end,
195                 function (x, k, v, ks, vs)
196                   local s = spacing
197		   if type (k) ~= "string" or k:match "[^%w_]" then
198		     s = s .. "["
199                     if type (k) == "table" then
200                       s = s .. "\n"
201                     end
202                     s = s .. ks
203                     if type (k) == "table" then
204                       s = s .. "\n"
205                     end
206                     s = s .. "]"
207		   else
208		     s = s .. k
209		   end
210		   s = s .. " ="
211                   if type (v) == "table" then
212                     s = s .. "\n"
213                   else
214                     s = s .. " "
215                   end
216                   s = s .. vs
217                   return s
218                 end,
219                 function (_, k)
220                   local s = "\n"
221                   if k then
222                     s = "," .. s
223                   end
224                   return s
225                 end)
226end
227
228
229local function pickle (x)
230  if type (x) == "string" then
231    return format ("%q", x)
232  elseif type (x) == "number" or type (x) == "boolean" or
233    type (x) == "nil" then
234    return tostring (x)
235  else
236    x = copy (x) or x
237    if type (x) == "table" then
238      local s, sep = "{", ""
239      for i, v in pairs (x) do
240        s = s .. sep .. "[" .. M.pickle (i) .. "]=" .. M.pickle (v)
241        sep = ","
242      end
243      s = s .. "}"
244      return s
245    else
246      die ("cannot pickle " .. tostring (x))
247    end
248  end
249end
250
251
252
253--[[ ================= ]]--
254--[[ Public Interface. ]]--
255--[[ ================= ]]--
256
257
258local function X (decl, fn)
259  return debug.argscheck ("std.string." .. decl, fn)
260end
261
262M = {
263  --- String concatenation operation.
264  -- @string s initial string
265  -- @param o object to stringify and concatenate
266  -- @return s .. tostring (o)
267  -- @usage
268  -- local string = require "std.string".monkey_patch ()
269  -- concatenated = "foo" .. {"bar"}
270  __concat = __concat,
271
272  --- String subscript operation.
273  -- @string s string
274  -- @tparam int|string i index or method name
275  -- @return `s:sub (i, i)` if i is a number, otherwise
276  --   fall back to a `std.string` metamethod (if any).
277  -- @usage
278  -- getmetatable ("").__index = require "std.string".__index
279  -- third = ("12345")[3]
280  __index = __index,
281
282  --- Capitalise each word in a string.
283  -- @function caps
284  -- @string s any string
285  -- @treturn string *s* with each word capitalized
286  -- @usage userfullname = caps (input_string)
287  caps = X ("caps (string)", caps),
288
289  --- Remove any final newline from a string.
290  -- @function chomp
291  -- @string s any string
292  -- @treturn string *s* with any single trailing newline removed
293  -- @usage line = chomp (line)
294  chomp = X ("chomp (string)", function (s) return (s:gsub ("\n$", "")) end),
295
296  --- Escape a string to be used as a pattern.
297  -- @function escape_pattern
298  -- @string s any string
299  -- @treturn string *s* with active pattern characters escaped
300  -- @usage substr = inputstr:match (escape_pattern (literal))
301  escape_pattern = X ("escape_pattern (string)", base.escape_pattern),
302
303  --- Escape a string to be used as a shell token.
304  -- Quotes spaces, parentheses, brackets, quotes, apostrophes and
305  -- whitespace.
306  -- @function escape_shell
307  -- @string s any string
308  -- @treturn string *s* with active shell characters escaped
309  -- @usage os.execute ("echo " .. escape_shell (outputstr))
310  escape_shell = X ("escape_shell (string)", escape_shell),
311
312  --- Repeatedly `string.find` until target string is exhausted.
313  -- @function finds
314  -- @string s target string
315  -- @string pattern pattern to match in *s*
316  -- @int[opt=1] init start position
317  -- @bool[opt] plain inhibit magic characters
318  -- @return list of `{from, to; capt = {captures}}`
319  -- @see std.string.tfind
320  -- @usage
321  -- for t in std.elems (finds ("the target string", "%S+")) do
322  --   print (tostring (t.capt))
323  -- end
324  finds = X ("finds (string, string, ?int, ?boolean|:plain)", finds),
325
326  --- Extend to work better with one argument.
327  -- If only one argument is passed, no formatting is attempted.
328  -- @function format
329  -- @string f format string
330  -- @param[opt] ... arguments to format
331  -- @return formatted string
332  -- @usage print (format "100% stdlib!")
333  format = X ("format (string, [any...])", format),
334
335  --- Remove leading matter from a string.
336  -- @function ltrim
337  -- @string s any string
338  -- @string[opt="%s+"] r leading pattern
339  -- @treturn string *s* with leading *r* stripped
340  -- @usage print ("got: " .. ltrim (userinput))
341  ltrim = X ("ltrim (string, ?string)",
342             function (s, r) return (s:gsub ("^" .. (r or "%s+"), "")) end),
343
344  --- Overwrite core `string` methods with `std` enhanced versions.
345  --
346  -- Also adds auto-stringification to `..` operator on core strings, and
347  -- integer indexing of strings with `[]` dereferencing.
348  -- @function monkey_patch
349  -- @tparam[opt=_G] table namespace where to install global functions
350  -- @treturn table the module table
351  -- @usage local string = require "std.string".monkey_patch ()
352  monkey_patch = X ("monkey_patch (?table)", monkey_patch),
353
354  --- Write a number using SI suffixes.
355  -- The number is always written to 3 s.f.
356  -- @function numbertosi
357  -- @tparam number|string n any numeric value
358  -- @treturn string *n* simplifed using largest available SI suffix.
359  -- @usage print (numbertosi (bitspersecond) .. "bps")
360  numbertosi = X ("numbertosi (number|string)", numbertosi),
361
362  --- Return the English suffix for an ordinal.
363  -- @function ordinal_suffix
364  -- @tparam int|string n any integer value
365  -- @treturn string English suffix for *n*
366  -- @usage
367  -- local now = os.date "*t"
368  -- print ("%d%s day of the week", now.day, ordinal_suffix (now.day))
369  ordinal_suffix = X ("ordinal_suffix (int|string)", ordinal_suffix),
370
371  --- Justify a string.
372  -- When the string is longer than w, it is truncated (left or right
373  -- according to the sign of w).
374  -- @function pad
375  -- @string s a string to justify
376  -- @int w width to justify to (-ve means right-justify; +ve means
377  --   left-justify)
378  -- @string[opt=" "] p string to pad with
379  -- @treturn string *s* justified to *w* characters wide
380  -- @usage print (pad (trim (outputstr, 78)) .. "\n")
381  pad = X ("pad (string, int, ?string)", pad),
382
383  --- Convert a value to a string.
384  -- The string can be passed to `functional.eval` to retrieve the value.
385  -- @todo Make it work for recursive tables.
386  -- @param x object to pickle
387  -- @treturn string reversible string rendering of *x*
388  -- @see std.eval
389  -- @usage
390  -- function slow_identity (x) return functional.eval (pickle (x)) end
391  pickle = pickle,
392
393  --- Pretty-print a table, or other object.
394  -- @function prettytostring
395  -- @param x object to convert to string
396  -- @string[opt="\t"] indent indent between levels
397  -- @string[opt=""] spacing space before every line
398  -- @treturn string pretty string rendering of *x*
399  -- @usage print (prettytostring (std, "  "))
400  prettytostring = X ("prettytostring (?any, ?string, ?string)", prettytostring),
401
402  --- Turn tables into strings with recursion detection.
403  -- N.B. Functions calling render should not recurse, or recursion
404  -- detection will not work.
405  -- @function render
406  -- @param x object to convert to string
407  -- @tparam opentablecb open open table rendering function
408  -- @tparam closetablecb close close table rendering function
409  -- @tparam elementcb elem element rendering function
410  -- @tparam paircb pair pair rendering function
411  -- @tparam separatorcb sep separator rendering function
412  -- @tparam[opt] table roots accumulates table references to detect recursion
413  -- @return string representation of *x*
414  -- @usage
415  -- function tostring (x)
416  --   return render (x, lambda '="{"', lambda '="}"', tostring,
417  --                  lambda '=_4.."=".._5', lambda '= _4 and "," or ""',
418  --                  lambda '=","')
419  -- end
420  render = X ("render (?any, func, func, func, func, func, ?table)", render),
421
422  --- Remove trailing matter from a string.
423  -- @function rtrim
424  -- @string s any string
425  -- @string[opt="%s+"] r trailing pattern
426  -- @treturn string *s* with trailing *r* stripped
427  -- @usage print ("got: " .. rtrim (userinput))
428  rtrim = X ("rtrim (string, ?string)",
429             function (s, r) return (s:gsub ((r or "%s+") .. "$", "")) end),
430
431  --- Split a string at a given separator.
432  -- Separator is a Lua pattern, so you have to escape active characters,
433  -- `^$()%.[]*+-?` with a `%` prefix to match a literal character in *s*.
434  -- @function split
435  -- @string s to split
436  -- @string[opt="%s+"] sep separator pattern
437  -- @return list of strings
438  -- @usage words = split "a very short sentence"
439  split = X ("split (string, ?string)", base.split),
440
441  --- Do `string.find`, returning a table of captures.
442  -- @function tfind
443  -- @string s target string
444  -- @string pattern pattern to match in *s*
445  -- @int[opt=1] init start position
446  -- @bool[opt] plain inhibit magic characters
447  -- @treturn int start of match
448  -- @treturn int end of match
449  -- @treturn table list of captured strings
450  -- @see std.string.finds
451  -- @usage b, e, captures = tfind ("the target string", "%s", 10)
452  tfind = X ("tfind (string, string, ?int, ?boolean|:plain)", tfind),
453
454  --- Remove leading and trailing matter from a string.
455  -- @function trim
456  -- @string s any string
457  -- @string[opt="%s+"] r trailing pattern
458  -- @treturn string *s* with leading and trailing *r* stripped
459  -- @usage print ("got: " .. trim (userinput))
460  trim = X ("trim (string, ?string)", trim),
461
462  --- Wrap a string into a paragraph.
463  -- @function wrap
464  -- @string s a paragraph of text
465  -- @int[opt=78] w width to wrap to
466  -- @int[opt=0] ind indent
467  -- @int[opt=ind] ind1 indent of first line
468  -- @treturn string *s* wrapped to *w* columns
469  -- @usage
470  -- print (wrap (copyright, 72, 4))
471  wrap = X ("wrap (string, ?int, ?int, ?int)", wrap),
472}
473
474
475
476--[[ ============= ]]--
477--[[ Deprecations. ]]--
478--[[ ============= ]]--
479
480
481local DEPRECATED = debug.DEPRECATED
482
483
484M.assert = DEPRECATED ("41", "'std.string.assert'",
485  "use 'std.assert' instead", base.assert)
486
487
488M.require_version = DEPRECATED ("41", "'std.string.require_version'",
489  "use 'std.require' instead", base.require)
490
491
492M.tostring = DEPRECATED ("41", "'std.string.tostring'",
493  "use 'std.tostring' instead", base.tostring)
494
495
496
497return base.merge (M, string)
498
499
500
501--- Types
502-- @section Types
503
504--- Signature of @{render} open table callback.
505-- @function opentablecb
506-- @tparam table t table about to be rendered
507-- @treturn string open table rendering
508-- @see render
509-- @usage function open (t) return "{" end
510
511
512--- Signature of @{render} close table callback.
513-- @function closetablecb
514-- @tparam table t table just rendered
515-- @treturn string close table rendering
516-- @see render
517-- @usage function close (t) return "}" end
518
519
520--- Signature of @{render} element callback.
521-- @function elementcb
522-- @param x element to render
523-- @treturn string element rendering
524-- @see render
525-- @usage function element (e) return require "std".tostring (e) end
526
527
528--- Signature of @{render} pair callback.
529-- Trying to re-render *key* or *value* here will break recursion
530-- detection, use *strkey* and *strvalue* pre-rendered values instead.
531-- @function paircb
532-- @tparam table t table containing pair being rendered
533-- @param key key part of key being rendered
534-- @param value value part of key being rendered
535-- @string keystr prerendered *key*
536-- @string valuestr prerendered *value*
537-- @treturn string pair rendering
538-- @see render
539-- @usage
540-- function pair (_, _, _, key, value) return key .. "=" .. value end
541
542
543--- Signature of @{render} separator callback.
544-- @function separatorcb
545-- @tparam table t table currently being rendered
546-- @param pk *t* key preceding separator, or `nil` for first key
547-- @param pv *t* value preceding separator, or `nil` for first value
548-- @param fk *t* key following separator, or `nil` for last key
549-- @param fv *t* value following separator, or `nil` for last value
550-- @treturn string separator rendering
551-- @usage
552-- function separator (_, _, _, fk) return fk and "," or "" end
553