1--[[-- 2 Additions to the core string module. 3 4 The module table returned by `std.string` also contains all of the entries 5 from the core string table. An hygienic way to import this module, then, is 6 simply to override the core `string` locally: 7 8 local string = require "std.string" 9 10 @module std.string 11]] 12 13local base = require "std.base" 14local debug = require "std.debug" 15 16local StrBuf = require "std.strbuf" {} 17 18local copy = base.copy 19local getmetamethod = base.getmetamethod 20local insert, len = base.insert, base.len 21local pairs = base.pairs 22local render = base.render 23 24local M 25 26 27 28local _tostring = base.tostring 29 30local function __concat (s, o) 31 return _tostring (s) .. _tostring (o) 32end 33 34 35local function __index (s, i) 36 if type (i) == "number" then 37 return s:sub (i, i) 38 else 39 -- Fall back to module metamethods 40 return M[i] 41 end 42end 43 44 45local _format = string.format 46 47local function format (f, arg1, ...) 48 return (arg1 ~= nil) and _format (f, arg1, ...) or f 49end 50 51 52local function tpack (from, to, ...) 53 return from, to, {...} 54end 55 56local function tfind (s, ...) 57 return tpack (s:find (...)) 58end 59 60 61local function finds (s, p, i, ...) 62 i = i or 1 63 local l = {} 64 local from, to, r 65 repeat 66 from, to, r = tfind (s, p, i, ...) 67 if from ~= nil then 68 insert (l, {from, to, capt = r}) 69 i = to + 1 70 end 71 until not from 72 return l 73end 74 75 76local function monkey_patch (namespace) 77 namespace = namespace or _G 78 namespace.string = base.copy (namespace.string or {}, M) 79 80 local string_metatable = getmetatable "" 81 string_metatable.__concat = M.__concat 82 string_metatable.__index = M.__index 83 84 return M 85end 86 87 88local function caps (s) 89 return (s:gsub ("(%w)([%w]*)", function (l, ls) return l:upper () .. ls end)) 90end 91 92 93local function escape_shell (s) 94 return (s:gsub ("([ %(%)%\\%[%]\"'])", "\\%1")) 95end 96 97 98local function ordinal_suffix (n) 99 n = math.abs (n) % 100 100 local d = n % 10 101 if d == 1 and n ~= 11 then 102 return "st" 103 elseif d == 2 and n ~= 12 then 104 return "nd" 105 elseif d == 3 and n ~= 13 then 106 return "rd" 107 else 108 return "th" 109 end 110end 111 112 113local function pad (s, w, p) 114 p = string.rep (p or " ", math.abs (w)) 115 if w < 0 then 116 return string.sub (p .. s, w) 117 end 118 return string.sub (s .. p, 1, w) 119end 120 121 122local function wrap (s, w, ind, ind1) 123 w = w or 78 124 ind = ind or 0 125 ind1 = ind1 or ind 126 assert (ind1 < w and ind < w, 127 "the indents must be less than the line width") 128 local r = StrBuf { string.rep (" ", ind1) } 129 local i, lstart, lens = 1, ind1, len (s) 130 while i <= lens do 131 local j = i + w - lstart 132 while len (s[j]) > 0 and s[j] ~= " " and j > i do 133 j = j - 1 134 end 135 local ni = j + 1 136 while s[j] == " " do 137 j = j - 1 138 end 139 r:concat (s:sub (i, j)) 140 i = ni 141 if i < lens then 142 r:concat ("\n" .. string.rep (" ", ind)) 143 lstart = ind 144 end 145 end 146 return r:tostring () 147end 148 149 150local function numbertosi (n) 151 local SIprefix = { 152 [-8] = "y", [-7] = "z", [-6] = "a", [-5] = "f", 153 [-4] = "p", [-3] = "n", [-2] = "mu", [-1] = "m", 154 [0] = "", [1] = "k", [2] = "M", [3] = "G", 155 [4] = "T", [5] = "P", [6] = "E", [7] = "Z", 156 [8] = "Y" 157 } 158 local t = format("% #.2e", n) 159 local _, _, m, e = t:find(".(.%...)e(.+)") 160 local man, exp = tonumber (m), tonumber (e) 161 local siexp = math.floor (exp / 3) 162 local shift = exp - siexp * 3 163 local s = SIprefix[siexp] or "e" .. tostring (siexp) 164 man = man * (10 ^ shift) 165 return format ("%0.f", man) .. s 166end 167 168 169local function trim (s, r) 170 r = r or "%s+" 171 return (s:gsub ("^" .. r, ""):gsub (r .. "$", "")) 172end 173 174 175local function prettytostring (x, indent, spacing) 176 indent = indent or "\t" 177 spacing = spacing or "" 178 return render (x, 179 function () 180 local s = spacing .. "{" 181 spacing = spacing .. indent 182 return s 183 end, 184 function () 185 spacing = string.gsub (spacing, indent .. "$", "") 186 return spacing .. "}" 187 end, 188 function (x) 189 if type (x) == "string" then 190 return format ("%q", x) 191 else 192 return tostring (x) 193 end 194 end, 195 function (x, k, v, ks, vs) 196 local s = spacing 197 if type (k) ~= "string" or k:match "[^%w_]" then 198 s = s .. "[" 199 if type (k) == "table" then 200 s = s .. "\n" 201 end 202 s = s .. ks 203 if type (k) == "table" then 204 s = s .. "\n" 205 end 206 s = s .. "]" 207 else 208 s = s .. k 209 end 210 s = s .. " =" 211 if type (v) == "table" then 212 s = s .. "\n" 213 else 214 s = s .. " " 215 end 216 s = s .. vs 217 return s 218 end, 219 function (_, k) 220 local s = "\n" 221 if k then 222 s = "," .. s 223 end 224 return s 225 end) 226end 227 228 229local function pickle (x) 230 if type (x) == "string" then 231 return format ("%q", x) 232 elseif type (x) == "number" or type (x) == "boolean" or 233 type (x) == "nil" then 234 return tostring (x) 235 else 236 x = copy (x) or x 237 if type (x) == "table" then 238 local s, sep = "{", "" 239 for i, v in pairs (x) do 240 s = s .. sep .. "[" .. M.pickle (i) .. "]=" .. M.pickle (v) 241 sep = "," 242 end 243 s = s .. "}" 244 return s 245 else 246 die ("cannot pickle " .. tostring (x)) 247 end 248 end 249end 250 251 252 253--[[ ================= ]]-- 254--[[ Public Interface. ]]-- 255--[[ ================= ]]-- 256 257 258local function X (decl, fn) 259 return debug.argscheck ("std.string." .. decl, fn) 260end 261 262M = { 263 --- String concatenation operation. 264 -- @string s initial string 265 -- @param o object to stringify and concatenate 266 -- @return s .. tostring (o) 267 -- @usage 268 -- local string = require "std.string".monkey_patch () 269 -- concatenated = "foo" .. {"bar"} 270 __concat = __concat, 271 272 --- String subscript operation. 273 -- @string s string 274 -- @tparam int|string i index or method name 275 -- @return `s:sub (i, i)` if i is a number, otherwise 276 -- fall back to a `std.string` metamethod (if any). 277 -- @usage 278 -- getmetatable ("").__index = require "std.string".__index 279 -- third = ("12345")[3] 280 __index = __index, 281 282 --- Capitalise each word in a string. 283 -- @function caps 284 -- @string s any string 285 -- @treturn string *s* with each word capitalized 286 -- @usage userfullname = caps (input_string) 287 caps = X ("caps (string)", caps), 288 289 --- Remove any final newline from a string. 290 -- @function chomp 291 -- @string s any string 292 -- @treturn string *s* with any single trailing newline removed 293 -- @usage line = chomp (line) 294 chomp = X ("chomp (string)", function (s) return (s:gsub ("\n$", "")) end), 295 296 --- Escape a string to be used as a pattern. 297 -- @function escape_pattern 298 -- @string s any string 299 -- @treturn string *s* with active pattern characters escaped 300 -- @usage substr = inputstr:match (escape_pattern (literal)) 301 escape_pattern = X ("escape_pattern (string)", base.escape_pattern), 302 303 --- Escape a string to be used as a shell token. 304 -- Quotes spaces, parentheses, brackets, quotes, apostrophes and 305 -- whitespace. 306 -- @function escape_shell 307 -- @string s any string 308 -- @treturn string *s* with active shell characters escaped 309 -- @usage os.execute ("echo " .. escape_shell (outputstr)) 310 escape_shell = X ("escape_shell (string)", escape_shell), 311 312 --- Repeatedly `string.find` until target string is exhausted. 313 -- @function finds 314 -- @string s target string 315 -- @string pattern pattern to match in *s* 316 -- @int[opt=1] init start position 317 -- @bool[opt] plain inhibit magic characters 318 -- @return list of `{from, to; capt = {captures}}` 319 -- @see std.string.tfind 320 -- @usage 321 -- for t in std.elems (finds ("the target string", "%S+")) do 322 -- print (tostring (t.capt)) 323 -- end 324 finds = X ("finds (string, string, ?int, ?boolean|:plain)", finds), 325 326 --- Extend to work better with one argument. 327 -- If only one argument is passed, no formatting is attempted. 328 -- @function format 329 -- @string f format string 330 -- @param[opt] ... arguments to format 331 -- @return formatted string 332 -- @usage print (format "100% stdlib!") 333 format = X ("format (string, [any...])", format), 334 335 --- Remove leading matter from a string. 336 -- @function ltrim 337 -- @string s any string 338 -- @string[opt="%s+"] r leading pattern 339 -- @treturn string *s* with leading *r* stripped 340 -- @usage print ("got: " .. ltrim (userinput)) 341 ltrim = X ("ltrim (string, ?string)", 342 function (s, r) return (s:gsub ("^" .. (r or "%s+"), "")) end), 343 344 --- Overwrite core `string` methods with `std` enhanced versions. 345 -- 346 -- Also adds auto-stringification to `..` operator on core strings, and 347 -- integer indexing of strings with `[]` dereferencing. 348 -- @function monkey_patch 349 -- @tparam[opt=_G] table namespace where to install global functions 350 -- @treturn table the module table 351 -- @usage local string = require "std.string".monkey_patch () 352 monkey_patch = X ("monkey_patch (?table)", monkey_patch), 353 354 --- Write a number using SI suffixes. 355 -- The number is always written to 3 s.f. 356 -- @function numbertosi 357 -- @tparam number|string n any numeric value 358 -- @treturn string *n* simplifed using largest available SI suffix. 359 -- @usage print (numbertosi (bitspersecond) .. "bps") 360 numbertosi = X ("numbertosi (number|string)", numbertosi), 361 362 --- Return the English suffix for an ordinal. 363 -- @function ordinal_suffix 364 -- @tparam int|string n any integer value 365 -- @treturn string English suffix for *n* 366 -- @usage 367 -- local now = os.date "*t" 368 -- print ("%d%s day of the week", now.day, ordinal_suffix (now.day)) 369 ordinal_suffix = X ("ordinal_suffix (int|string)", ordinal_suffix), 370 371 --- Justify a string. 372 -- When the string is longer than w, it is truncated (left or right 373 -- according to the sign of w). 374 -- @function pad 375 -- @string s a string to justify 376 -- @int w width to justify to (-ve means right-justify; +ve means 377 -- left-justify) 378 -- @string[opt=" "] p string to pad with 379 -- @treturn string *s* justified to *w* characters wide 380 -- @usage print (pad (trim (outputstr, 78)) .. "\n") 381 pad = X ("pad (string, int, ?string)", pad), 382 383 --- Convert a value to a string. 384 -- The string can be passed to `functional.eval` to retrieve the value. 385 -- @todo Make it work for recursive tables. 386 -- @param x object to pickle 387 -- @treturn string reversible string rendering of *x* 388 -- @see std.eval 389 -- @usage 390 -- function slow_identity (x) return functional.eval (pickle (x)) end 391 pickle = pickle, 392 393 --- Pretty-print a table, or other object. 394 -- @function prettytostring 395 -- @param x object to convert to string 396 -- @string[opt="\t"] indent indent between levels 397 -- @string[opt=""] spacing space before every line 398 -- @treturn string pretty string rendering of *x* 399 -- @usage print (prettytostring (std, " ")) 400 prettytostring = X ("prettytostring (?any, ?string, ?string)", prettytostring), 401 402 --- Turn tables into strings with recursion detection. 403 -- N.B. Functions calling render should not recurse, or recursion 404 -- detection will not work. 405 -- @function render 406 -- @param x object to convert to string 407 -- @tparam opentablecb open open table rendering function 408 -- @tparam closetablecb close close table rendering function 409 -- @tparam elementcb elem element rendering function 410 -- @tparam paircb pair pair rendering function 411 -- @tparam separatorcb sep separator rendering function 412 -- @tparam[opt] table roots accumulates table references to detect recursion 413 -- @return string representation of *x* 414 -- @usage 415 -- function tostring (x) 416 -- return render (x, lambda '="{"', lambda '="}"', tostring, 417 -- lambda '=_4.."=".._5', lambda '= _4 and "," or ""', 418 -- lambda '=","') 419 -- end 420 render = X ("render (?any, func, func, func, func, func, ?table)", render), 421 422 --- Remove trailing matter from a string. 423 -- @function rtrim 424 -- @string s any string 425 -- @string[opt="%s+"] r trailing pattern 426 -- @treturn string *s* with trailing *r* stripped 427 -- @usage print ("got: " .. rtrim (userinput)) 428 rtrim = X ("rtrim (string, ?string)", 429 function (s, r) return (s:gsub ((r or "%s+") .. "$", "")) end), 430 431 --- Split a string at a given separator. 432 -- Separator is a Lua pattern, so you have to escape active characters, 433 -- `^$()%.[]*+-?` with a `%` prefix to match a literal character in *s*. 434 -- @function split 435 -- @string s to split 436 -- @string[opt="%s+"] sep separator pattern 437 -- @return list of strings 438 -- @usage words = split "a very short sentence" 439 split = X ("split (string, ?string)", base.split), 440 441 --- Do `string.find`, returning a table of captures. 442 -- @function tfind 443 -- @string s target string 444 -- @string pattern pattern to match in *s* 445 -- @int[opt=1] init start position 446 -- @bool[opt] plain inhibit magic characters 447 -- @treturn int start of match 448 -- @treturn int end of match 449 -- @treturn table list of captured strings 450 -- @see std.string.finds 451 -- @usage b, e, captures = tfind ("the target string", "%s", 10) 452 tfind = X ("tfind (string, string, ?int, ?boolean|:plain)", tfind), 453 454 --- Remove leading and trailing matter from a string. 455 -- @function trim 456 -- @string s any string 457 -- @string[opt="%s+"] r trailing pattern 458 -- @treturn string *s* with leading and trailing *r* stripped 459 -- @usage print ("got: " .. trim (userinput)) 460 trim = X ("trim (string, ?string)", trim), 461 462 --- Wrap a string into a paragraph. 463 -- @function wrap 464 -- @string s a paragraph of text 465 -- @int[opt=78] w width to wrap to 466 -- @int[opt=0] ind indent 467 -- @int[opt=ind] ind1 indent of first line 468 -- @treturn string *s* wrapped to *w* columns 469 -- @usage 470 -- print (wrap (copyright, 72, 4)) 471 wrap = X ("wrap (string, ?int, ?int, ?int)", wrap), 472} 473 474 475 476--[[ ============= ]]-- 477--[[ Deprecations. ]]-- 478--[[ ============= ]]-- 479 480 481local DEPRECATED = debug.DEPRECATED 482 483 484M.assert = DEPRECATED ("41", "'std.string.assert'", 485 "use 'std.assert' instead", base.assert) 486 487 488M.require_version = DEPRECATED ("41", "'std.string.require_version'", 489 "use 'std.require' instead", base.require) 490 491 492M.tostring = DEPRECATED ("41", "'std.string.tostring'", 493 "use 'std.tostring' instead", base.tostring) 494 495 496 497return base.merge (M, string) 498 499 500 501--- Types 502-- @section Types 503 504--- Signature of @{render} open table callback. 505-- @function opentablecb 506-- @tparam table t table about to be rendered 507-- @treturn string open table rendering 508-- @see render 509-- @usage function open (t) return "{" end 510 511 512--- Signature of @{render} close table callback. 513-- @function closetablecb 514-- @tparam table t table just rendered 515-- @treturn string close table rendering 516-- @see render 517-- @usage function close (t) return "}" end 518 519 520--- Signature of @{render} element callback. 521-- @function elementcb 522-- @param x element to render 523-- @treturn string element rendering 524-- @see render 525-- @usage function element (e) return require "std".tostring (e) end 526 527 528--- Signature of @{render} pair callback. 529-- Trying to re-render *key* or *value* here will break recursion 530-- detection, use *strkey* and *strvalue* pre-rendered values instead. 531-- @function paircb 532-- @tparam table t table containing pair being rendered 533-- @param key key part of key being rendered 534-- @param value value part of key being rendered 535-- @string keystr prerendered *key* 536-- @string valuestr prerendered *value* 537-- @treturn string pair rendering 538-- @see render 539-- @usage 540-- function pair (_, _, _, key, value) return key .. "=" .. value end 541 542 543--- Signature of @{render} separator callback. 544-- @function separatorcb 545-- @tparam table t table currently being rendered 546-- @param pk *t* key preceding separator, or `nil` for first key 547-- @param pv *t* value preceding separator, or `nil` for first value 548-- @param fk *t* key following separator, or `nil` for last key 549-- @param fv *t* value following separator, or `nil` for last value 550-- @treturn string separator rendering 551-- @usage 552-- function separator (_, _, _, fk) return fk and "," or "" end 553