1#!/usr/bin/env lua 2 3-- $Id: test.lua $ 4 5-- require"strict" -- just to be pedantic 6 7local m = require"lpeg" 8 9 10-- for general use 11local a, b, c, d, e, f, g, p, t 12 13 14-- compatibility with Lua 5.2 15local unpack = rawget(table, "unpack") or unpack 16local loadstring = rawget(_G, "loadstring") or load 17 18 19local any = m.P(1) 20local space = m.S" \t\n"^0 21 22local function checkeq (x, y, p) 23if p then print(x,y) end 24 if type(x) ~= "table" then assert(x == y) 25 else 26 for k,v in pairs(x) do checkeq(v, y[k], p) end 27 for k,v in pairs(y) do checkeq(v, x[k], p) end 28 end 29end 30 31 32local mt = getmetatable(m.P(1)) 33 34 35local allchar = {} 36for i=0,255 do allchar[i + 1] = i end 37allchar = string.char(unpack(allchar)) 38assert(#allchar == 256) 39 40local function cs2str (c) 41 return m.match(m.Cs((c + m.P(1)/"")^0), allchar) 42end 43 44local function eqcharset (c1, c2) 45 assert(cs2str(c1) == cs2str(c2)) 46end 47 48 49print"General tests for LPeg library" 50 51assert(type(m.version()) == "string") 52print("version " .. m.version()) 53assert(m.type("alo") ~= "pattern") 54assert(m.type(io.input) ~= "pattern") 55assert(m.type(m.P"alo") == "pattern") 56 57-- tests for some basic optimizations 58assert(m.match(m.P(false) + "a", "a") == 2) 59assert(m.match(m.P(true) + "a", "a") == 1) 60assert(m.match("a" + m.P(false), "b") == nil) 61assert(m.match("a" + m.P(true), "b") == 1) 62 63assert(m.match(m.P(false) * "a", "a") == nil) 64assert(m.match(m.P(true) * "a", "a") == 2) 65assert(m.match("a" * m.P(false), "a") == nil) 66assert(m.match("a" * m.P(true), "a") == 2) 67 68assert(m.match(#m.P(false) * "a", "a") == nil) 69assert(m.match(#m.P(true) * "a", "a") == 2) 70assert(m.match("a" * #m.P(false), "a") == nil) 71assert(m.match("a" * #m.P(true), "a") == 2) 72 73 74-- tests for locale 75do 76 assert(m.locale(m) == m) 77 local t = {} 78 assert(m.locale(t, m) == t) 79 local x = m.locale() 80 for n,v in pairs(x) do 81 assert(type(n) == "string") 82 eqcharset(v, m[n]) 83 end 84end 85 86 87assert(m.match(3, "aaaa")) 88assert(m.match(4, "aaaa")) 89assert(not m.match(5, "aaaa")) 90assert(m.match(-3, "aa")) 91assert(not m.match(-3, "aaa")) 92assert(not m.match(-3, "aaaa")) 93assert(not m.match(-4, "aaaa")) 94assert(m.P(-5):match"aaaa") 95 96assert(m.match("a", "alo") == 2) 97assert(m.match("al", "alo") == 3) 98assert(not m.match("alu", "alo")) 99assert(m.match(true, "") == 1) 100 101local digit = m.S"0123456789" 102local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ" 103local lower = m.S"abcdefghijklmnopqrstuvwxyz" 104local letter = m.S"" + upper + lower 105local alpha = letter + digit + m.R() 106 107eqcharset(m.S"", m.P(false)) 108eqcharset(upper, m.R("AZ")) 109eqcharset(lower, m.R("az")) 110eqcharset(upper + lower, m.R("AZ", "az")) 111eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90")) 112eqcharset(digit, m.S"01234567" + "8" + "9") 113eqcharset(upper, letter - lower) 114eqcharset(m.S(""), m.R()) 115assert(cs2str(m.S("")) == "") 116 117eqcharset(m.S"\0", "\0") 118eqcharset(m.S"\1\0\2", m.R"\0\2") 119eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0") 120eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2") 121 122local word = alpha^1 * (1 - alpha)^0 123 124assert((word^0 * -1):match"alo alo") 125assert(m.match(word^1 * -1, "alo alo")) 126assert(m.match(word^2 * -1, "alo alo")) 127assert(not m.match(word^3 * -1, "alo alo")) 128 129assert(not m.match(word^-1 * -1, "alo alo")) 130assert(m.match(word^-2 * -1, "alo alo")) 131assert(m.match(word^-3 * -1, "alo alo")) 132 133local eos = m.P(-1) 134 135assert(m.match(digit^0 * letter * digit * eos, "1298a1")) 136assert(not m.match(digit^0 * letter * eos, "1257a1")) 137 138b = { 139 [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")" 140} 141 142assert(m.match(b, "(al())()")) 143assert(not m.match(b * eos, "(al())()")) 144assert(m.match(b * eos, "((al())()(é))")) 145assert(not m.match(b, "(al()()")) 146 147assert(not m.match(letter^1 - "for", "foreach")) 148assert(m.match(letter^1 - ("for" * eos), "foreach")) 149assert(not m.match(letter^1 - ("for" * eos), "for")) 150 151function basiclookfor (p) 152 return m.P { 153 [1] = p + (1 * m.V(1)) 154 } 155end 156 157function caplookfor (p) 158 return basiclookfor(p:C()) 159end 160 161assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou") 162a = {m.match(caplookfor(letter^1)^0, " two words, one more ")} 163checkeq(a, {"two", "words", "one", "more"}) 164 165assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) 166 167a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} 168checkeq(a, {"123", "d"}) 169 170-- bug in LPeg 0.12 (nil value does not create a 'ktable') 171assert(m.match(m.Cc(nil), "") == nil) 172 173a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} 174checkeq(a, {"abcd", "l"}) 175 176a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} 177checkeq(a, {10,20,30,2}) 178a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} 179checkeq(a, {1,10,20,30,2}) 180a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') 181checkeq(a, {1,10,20,30,2}) 182a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') 183checkeq(a, {1,7,8,10,20,30,2}) 184a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')} 185checkeq(a, {1,2,3,4}) 186 187a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")} 188checkeq(a, {1, 5}) 189 190 191t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} 192checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) 193 194-- bug in 0.12 ('hascapture' did not check for captures inside a rule) 195do 196 local pat = m.P{ 197 'S'; 198 S1 = m.C('abc') + 3, 199 S = #m.V('S1') -- rule has capture, but '#' must ignore it 200 } 201 assert(pat:match'abc' == 1) 202end 203 204 205-- bug: loop in 'hascaptures' 206do 207 local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) 208 assert(p:match("xxx") == "") 209end 210 211 212 213-- test for small capture boundary 214for i = 250,260 do 215 assert(#m.match(m.C(i), string.rep('a', i)) == i) 216 assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) 217end 218 219-- tests for any*n and any*-n 220for n = 1, 550, 13 do 221 local x_1 = string.rep('x', n - 1) 222 local x = x_1 .. 'a' 223 assert(not m.P(n):match(x_1)) 224 assert(m.P(n):match(x) == n + 1) 225 assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4) 226 assert(m.C(n):match(x) == x) 227 assert(m.C(m.C(n)):match(x) == x) 228 assert(m.P(-n):match(x_1) == 1) 229 assert(not m.P(-n):match(x)) 230 assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20) 231 local n3 = math.floor(n/3) 232 assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1) 233end 234 235-- true values 236assert(m.P(0):match("x") == 1) 237assert(m.P(0):match("") == 1) 238assert(m.C(0):match("x") == "") 239 240assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1) 241assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0) 242assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd") 243p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4 244 245 246-- test for alternation optimization 247assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2) 248assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3) 249assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1) 250assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3) 251assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3) 252assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3) 253assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3) 254assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4) 255assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3) 256assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4) 257assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4) 258assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) 259assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3) 260assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4) 261assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4) 262assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) 263assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3) 264assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4) 265assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4) 266assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4) 267assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3) 268assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3) 269assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4) 270assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3) 271assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1) 272assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1) 273assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3) 274assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4) 275assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3) 276assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2) 277assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1) 278assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe")) 279 280assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4) 281 282 283-- bug in 0.12 (rc1) 284assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4) 285 286assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) == 287 4*10 + 1) 288 289-- optimizations with optional parts 290assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1) 291assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1) 292assert(m.match(("ab" * m.B"c")^-1, "ab") == 1) 293assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7) 294 295assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3) 296 297p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1 298assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21) 299 300 301-- bug in 0.12.2 302-- p = { ('ab' ('c' 'ef'?)*)? } 303p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1) 304s = "abcefccefc" 305assert(s == p:match(s)) 306 307 308pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510" 309assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) == 310 m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi)) 311print"+" 312 313 314-- tests for capture optimizations 315assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5) 316t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")} 317checkeq(t, {3, 6}) 318 319 320-- tests for numbered captures 321p = m.C(1) 322assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a") 323assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef") 324assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc") 325assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7) 326 327a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh") 328assert(a == "a" and b == "efg" and c == "h") 329 330-- test for table captures 331t = m.match(m.Ct(letter^1), "alo") 332checkeq(t, {}) 333 334t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo") 335assert(n == "t" and table.concat(t) == "alo") 336 337t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") 338assert(table.concat(t, ";") == "alo;a;l;o") 339 340t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") 341assert(table.concat(t, ";") == "alo;a;l;o") 342 343t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo") 344assert(table.concat(t[1], ";") == "1;2;2;3;3;4") 345 346t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo") 347checkeq(t, {"alo", "a", "o"}) 348 349 350-- tests for groups 351p = m.Cg(1) -- no capture 352assert(p:match('x') == 'x') 353p = m.Cg(m.P(true)/function () end * 1) -- no value 354assert(p:match('x') == 'x') 355p = m.Cg(m.Cg(m.Cg(m.C(1)))) 356assert(p:match('x') == 'x') 357p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2))) 358t = {p:match'abc'} 359checkeq(t, {'a', 'b', 'c', 1, 2}) 360 361p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho")) 362t = p:match'' 363checkeq(t, {hi = 10, ho = 20}) 364t = p:match'abc' 365checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) 366 367-- non-string group names 368p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io)) 369t = p:match('abcdefghij') 370assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c') 371 372 373-- test for error messages 374local function checkerr (msg, f, ...) 375 local st, err = pcall(f, ...) 376 assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) 377end 378 379checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") 380checkerr("rule '1' used outside a grammar", m.match, m.V(1), "") 381checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "") 382checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "") 383checkerr("undefined in given grammar", m.match, { m.V{} }, "") 384 385checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} }) 386checkerr("grammar has no initial rule", m.P, { [print] = {} }) 387 388-- grammar with a long call chain before left recursion 389p = {'a', 390 a = m.V'b' * m.V'c' * m.V'd' * m.V'a', 391 b = m.V'c', 392 c = m.V'd', 393 d = m.V'e', 394 e = m.V'f', 395 f = m.V'g', 396 g = m.P'' 397} 398checkerr("rule 'a' may be left recursive", m.match, p, "a") 399 400-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) 401-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 402-- that is optimized to ICommit L1 403 404p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } 405assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') 406 407 408do 409 -- large dynamic Cc 410 local lim = 2^16 - 1 411 local c = 0 412 local function seq (n) 413 if n == 1 then c = c + 1; return m.Cc(c) 414 else 415 local m = math.floor(n / 2) 416 return seq(m) * seq(n - m) 417 end 418 end 419 p = m.Ct(seq(lim)) 420 t = p:match('') 421 assert(t[lim] == lim) 422 checkerr("too many", function () p = p / print end) 423 checkerr("too many", seq, lim + 1) 424end 425 426 427do 428 -- nesting of captures too deep 429 local p = m.C(1) 430 for i = 1, 300 do 431 p = m.Ct(p) 432 end 433 checkerr("too deep", p.match, p, "x") 434end 435 436 437-- tests for non-pattern as arguments to pattern functions 438 439p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 } 440assert(m.match(p, "aaabaac") == 7) 441 442p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans 443 444assert(p:match("abc01de") == 8) 445assert(p:match("abc01de3456") == nil) 446 447p = 'abc' * (2 * (-5 * (true * m.P'de'))) 448 449assert(p:match("abc01de") == 8) 450assert(p:match("abc01de3456") == nil) 451 452p = { m.V(2), m.P"abc" } * 453 (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" }) 454assert(p:match("abcaaaxx") == 7) 455assert(p:match("abcxx") == 6) 456 457 458-- a large table capture 459t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000)) 460assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a') 461 462print('+') 463 464 465-- bug in 0.10 (rechecking a grammar, after tail-call optimization) 466m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } } 467 468local V = m.V 469 470local Space = m.S(" \n\t")^0 471local Number = m.C(m.R("09")^1) * Space 472local FactorOp = m.C(m.S("+-")) * Space 473local TermOp = m.C(m.S("*/")) * Space 474local Open = "(" * Space 475local Close = ")" * Space 476 477 478local function f_factor (v1, op, v2, d) 479 assert(d == nil) 480 if op == "+" then return v1 + v2 481 else return v1 - v2 482 end 483end 484 485 486local function f_term (v1, op, v2, d) 487 assert(d == nil) 488 if op == "*" then return v1 * v2 489 else return v1 / v2 490 end 491end 492 493G = m.P{ "Exp", 494 Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor); 495 Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term); 496 Term = Number / tonumber + Open * V"Exp" * Close; 497} 498 499G = Space * G * -1 500 501for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do 502 assert(m.match(G, s) == loadstring("return "..s)()) 503end 504 505 506-- test for grammars (errors deep in calling non-terminals) 507g = m.P{ 508 [1] = m.V(2) + "a", 509 [2] = "a" * m.V(3) * "x", 510 [3] = "b" * m.V(3) + "c" 511} 512 513assert(m.match(g, "abbbcx") == 7) 514assert(m.match(g, "abbbbx") == 2) 515 516 517-- tests for \0 518assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4) 519assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5) 520assert(m.match(m.P(1)^3, "\0\1\0a") == 5) 521assert(not m.match(-4, "\0\1\0a")) 522assert(m.match("\0\1\0a", "\0\1\0a") == 5) 523assert(m.match("\0\0\0", "\0\0\0") == 4) 524assert(not m.match("\0\0\0", "\0\0")) 525 526 527-- tests for predicates 528assert(not m.match(-m.P("a") * 2, "alo")) 529assert(m.match(- -m.P("a") * 2, "alo") == 3) 530assert(m.match(#m.P("a") * 2, "alo") == 3) 531assert(m.match(##m.P("a") * 2, "alo") == 3) 532assert(not m.match(##m.P("c") * 2, "alo")) 533assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 534assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 535assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 536assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 537 538 539-- fixed length 540do 541 -- 'and' predicate using fixed length 542 local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) 543 assert(p:match("acd") == "ac") 544 545 p = #m.P{ "a" * m.V(2), m.P"b" } * 2 546 assert(p:match("abc") == 3) 547 548 p = #(m.P"abc" * m.B"c") 549 assert(p:match("abc") == 1 and not p:match("ab")) 550 551 p = m.P{ "a" * m.V(2), m.P"b"^1 } 552 checkerr("pattern may not have fixed length", m.B, p) 553 554 p = "abc" * (m.P"b"^1 + m.P"a"^0) 555 checkerr("pattern may not have fixed length", m.B, p) 556end 557 558 559p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) 560assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) 561 562p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20) 563assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10) 564 565 566 567-- look-behind predicate 568assert(not m.match(m.B'a', 'a')) 569assert(m.match(1 * m.B'a', 'a') == 2) 570assert(not m.match(m.B(1), 'a')) 571assert(m.match(1 * m.B(1), 'a') == 2) 572assert(m.match(-m.B(1), 'a') == 1) 573assert(m.match(m.B(250), string.rep('a', 250)) == nil) 574assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) 575 576-- look-behind with an open call 577checkerr("pattern may not have fixed length", m.B, m.V'S1') 578checkerr("too long to look behind", m.B, 260) 579 580B = #letter * -m.B(letter) + -letter * m.B(letter) 581x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) 582checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10}) 583checkeq(m.match(x, ' ar cal '), {2,4,5,8}) 584checkeq(m.match(x, ' '), {}) 585checkeq(m.match(x, 'aloalo'), {1,7}) 586 587assert(m.match(B, "a") == 1) 588assert(m.match(1 * B, "a") == 2) 589assert(not m.B(1 - letter):match("")) 590assert((-m.B(letter)):match("") == 1) 591 592assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5) 593assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa")) 594assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5) 595 596-- look-behind with grammars 597assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil) 598assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil) 599assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4) 600 601 602 603-- bug in 0.9 604assert(m.match(('a' * #m.P'b'), "ab") == 2) 605assert(not m.match(('a' * #m.P'b'), "a")) 606 607assert(not m.match(#m.S'567', "")) 608assert(m.match(#m.S'567' * 1, "6") == 2) 609 610 611-- tests for Tail Calls 612 613p = m.P{ 'a' * m.V(1) + '' } 614assert(p:match(string.rep('a', 1000)) == 1001) 615 616-- create a grammar for a simple DFA for even number of 0s and 1s 617-- 618-- ->1 <---0---> 2 619-- ^ ^ 620-- | | 621-- 1 1 622-- | | 623-- V V 624-- 3 <---0---> 4 625-- 626-- this grammar should keep no backtracking information 627 628p = m.P{ 629 [1] = '0' * m.V(2) + '1' * m.V(3) + -1, 630 [2] = '0' * m.V(1) + '1' * m.V(4), 631 [3] = '0' * m.V(4) + '1' * m.V(1), 632 [4] = '0' * m.V(3) + '1' * m.V(2), 633} 634 635assert(p:match(string.rep("00", 10000))) 636assert(p:match(string.rep("01", 10000))) 637assert(p:match(string.rep("011", 10000))) 638assert(not p:match(string.rep("011", 10000) .. "1")) 639assert(not p:match(string.rep("011", 10001))) 640 641 642-- this grammar does need backtracking info. 643local lim = 10000 644p = m.P{ '0' * m.V(1) + '0' } 645checkerr("stack overflow", m.match, p, string.rep("0", lim)) 646m.setmaxstack(2*lim) 647checkerr("stack overflow", m.match, p, string.rep("0", lim)) 648m.setmaxstack(2*lim + 4) 649assert(m.match(p, string.rep("0", lim)) == lim + 1) 650 651-- this repetition should not need stack space (only the call does) 652p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } 653m.setmaxstack(200) 654assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) 655 656m.setmaxstack(100) -- restore low limit 657 658-- tests for optional start position 659assert(m.match("a", "abc", 1)) 660assert(m.match("b", "abc", 2)) 661assert(m.match("c", "abc", 3)) 662assert(not m.match(1, "abc", 4)) 663assert(m.match("a", "abc", -3)) 664assert(m.match("b", "abc", -2)) 665assert(m.match("c", "abc", -1)) 666assert(m.match("abc", "abc", -4)) -- truncate to position 1 667 668assert(m.match("", "abc", 10)) -- empty string is everywhere! 669assert(m.match("", "", 10)) 670assert(not m.match(1, "", 1)) 671assert(not m.match(1, "", -1)) 672assert(not m.match(1, "", 0)) 673 674print("+") 675 676 677-- tests for argument captures 678checkerr("invalid argument", m.Carg, 0) 679checkerr("invalid argument", m.Carg, -1) 680checkerr("invalid argument", m.Carg, 2^18) 681checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) 682assert(m.match(m.Carg(1), 'a', 1, print) == print) 683x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} 684checkeq(x, {10, 20}) 685 686assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") * 687 m.Cmt(m.Cb("a"), function (s,i,x) 688 assert(s == "a" and i == 1); 689 return i, x+1 690 end) * 691 m.Carg(2), function (s,i,a,b,c) 692 assert(s == "a" and i == 1 and c == nil); 693 return i, 2*a + 3*b 694 end) * "a", 695 "a", 1, false, 100, 1000) == 2*1001 + 3*100) 696 697 698-- tests for Lua functions 699 700t = {} 701s = "" 702p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false 703s = "hi, this is a test" 704assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1) 705assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3) 706 707assert(not m.match(p, s)) 708 709p = mt.__add(function (s, i) return i end, function (s, i) return nil end) 710assert(m.match(p, "alo")) 711 712p = mt.__mul(function (s, i) return i end, function (s, i) return nil end) 713assert(not m.match(p, "alo")) 714 715 716t = {} 717p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end 718s = "hi, this is a test" 719assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1) 720assert(#t == string.len(s) and t[1] == 2 and t[2] == 3) 721 722t = {} 723p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; 724 return i <= s1:len() and i end) * 1 725s = "hi, this is a test" 726assert(m.match(p^0, s) == string.len(s) + 1) 727assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2) 728 729p = function (s1, i) return m.match(m.P"a"^1, s1, i) end 730assert(m.match(p, "aaaa") == 5) 731assert(m.match(p, "abaa") == 2) 732assert(not m.match(p, "baaa")) 733 734checkerr("invalid position", m.match, function () return 2^20 end, s) 735checkerr("invalid position", m.match, function () return 0 end, s) 736checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) 737checkerr("invalid position", m.match, 738 m.P(1)^0 * function (_, i) return i - 1 end, s) 739assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) 740checkerr("invalid position", m.match, 741 m.P(1)^0 * function (_, i) return i + 1 end, s) 742assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) 743checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) 744assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) 745assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == 746 string.len(s) + 1) 747for i = 1, string.len(s) + 1 do 748 assert(m.match(function (_, _) return i end, s) == i) 749end 750 751p = (m.P(function (s, i) return i%2 == 0 and i end) * 1 752 + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0 753 * -1 754assert(p:match(string.rep('a', 14000))) 755 756-- tests for Function Replacements 757f = function (a, ...) if a ~= "x" then return {a, ...} end end 758 759t = m.match(m.C(1)^0/f, "abc") 760checkeq(t, {"a", "b", "c"}) 761 762t = m.match(m.C(1)^0/f/f, "abc") 763checkeq(t, {{"a", "b", "c"}}) 764 765t = m.match(m.P(1)^0/f/f, "abc") -- no capture 766checkeq(t, {{"abc"}}) 767 768t = m.match((m.P(1)^0/f * m.Cp())/f, "abc") 769checkeq(t, {{"abc"}, 4}) 770 771t = m.match((m.C(1)^0/f * m.Cp())/f, "abc") 772checkeq(t, {{"a", "b", "c"}, 4}) 773 774t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc") 775checkeq(t, {4}) 776 777t = m.match(m.C(m.C(1)^0)/f, "abc") 778checkeq(t, {"abc", "a", "b", "c"}) 779 780g = function (...) return 1, ... end 781t = {m.match(m.C(1)^0/g/g, "abc")} 782checkeq(t, {1, 1, "a", "b", "c"}) 783 784t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")} 785t1 = {1,1,nil,nil,4,nil,3,nil,nil} 786for i=1,10 do assert(t[i] == t1[i]) end 787 788-- bug in 0.12.2: ktable with only nil could be eliminated when joining 789-- with a pattern without ktable 790assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil) 791 792t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")} 793checkeq(t, {"a", "ax", "b", "bx", "c", "cx"}) 794 795t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc") 796checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1}) 797 798-- tests for Query Replacements 799 800assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10) 801assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10) 802assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40) 803t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc") 804checkeq(t, {40}) 805 806assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e") 807assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde") 808assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e") 809assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.") 810assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+") 811assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde") 812assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde") 813assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde") 814assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde") 815assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx") 816assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3") 817assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3) 818assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3) 819 820assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0") 821 822assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd") 823assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d") 824assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad") 825assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a") 826assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") 827assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == 828 "411 - abc ") 829 830assert(m.match(m.P(1)/"%0", "abc") == "a") 831checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") 832checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") 833 834p = m.C(1) 835p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" 836assert(p:match("1234567890") == "9 - 1") 837 838assert(m.match(m.Cc(print), "") == print) 839 840-- too many captures (just ignore extra ones) 841p = m.C(1)^0 / "%2-%9-%0-%9" 842assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8") 843s = string.rep("12345678901234567890", 20) 844assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") 845 846-- string captures with non-string subcaptures 847p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" 848assert(p:match'x' == 'alo - x - alo') 849 850checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") 851 852-- long strings for string capture 853l = 10000 854s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l) 855 856p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1' 857 858assert(p:match(s) == string.rep('c', l) .. 859 string.rep('b', l) .. 860 string.rep('a', l)) 861 862print"+" 863 864-- accumulator capture 865function f (x) return x + 1 end 866assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7) 867 868t = {m.match(m.Cf(m.Cc(1,2,3), error), "")} 869checkeq(t, {1}) 870p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0, 871 rawset) 872t = p:match("a=b;c=du;xux=yuy;") 873checkeq(t, {a="b", c="du", xux="yuy"}) 874 875 876-- errors in accumulator capture 877 878-- no initial capture 879checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') 880-- no initial capture (very long match forces fold to be a pair open-close) 881checkerr("no initial value", m.match, m.Cf(m.P(500), print), 882 string.rep('a', 600)) 883 884-- nested capture produces no initial value 885checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") 886 887 888-- tests for loop checker 889 890local function isnullable (p) 891 checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) 892end 893 894isnullable(m.P("x")^-4) 895assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) 896assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) 897isnullable("") 898isnullable(m.P("x")^0) 899isnullable(m.P("x")^-1) 900isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) 901isnullable(-m.P("ab")) 902isnullable(- -m.P("ab")) 903isnullable(# #(m.P("ab") + "xy")) 904isnullable(- #m.P("ab")^0) 905isnullable(# -m.P("ab")^1) 906isnullable(#m.V(3)) 907isnullable(m.V(3) + m.V(1) + m.P('a')^-1) 908isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) 909assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") 910 == 3) 911assert(m.match(m.P""^-3, "a") == 1) 912 913local function find (p, s) 914 return m.match(basiclookfor(p), s) 915end 916 917 918local function badgrammar (g, expected) 919 local stat, msg = pcall(m.P, g) 920 assert(not stat) 921 if expected then assert(find(expected, msg)) end 922end 923 924badgrammar({[1] = m.V(1)}, "rule '1'") 925badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal 926badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal 927badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal 928badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive 929badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive 930badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive 931badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive 932badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive 933badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop 934badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop 935badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop 936badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop 937badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop 938badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive 939badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'") 940 941assert(m.match({'a' * -m.V(1)}, "aaa") == 2) 942assert(m.match({'a' * -m.V(1)}, "aaaa") == nil) 943 944 945-- good x bad grammars 946m.P{ ('a' * m.V(1))^-1 } 947m.P{ -('a' * m.V(1)) } 948m.P{ ('abc' * m.V(1))^-1 } 949m.P{ -('abc' * m.V(1)) } 950badgrammar{ #m.P('abc') * m.V(1) } 951badgrammar{ -('a' + m.V(1)) } 952m.P{ #('a' * m.V(1)) } 953badgrammar{ #('a' + m.V(1)) } 954m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) } 955badgrammar{ m.B{ m.P'abc' } * m.V(1) } 956badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) } 957 958 959-- simple tests for maximum sizes: 960local p = m.P"a" 961for i=1,14 do p = p * p end 962 963p = {} 964for i=1,100 do p[i] = m.P"a" end 965p = m.P(p) 966 967 968-- strange values for rule labels 969 970p = m.P{ "print", 971 print = m.V(print), 972 [print] = m.V(_G), 973 [_G] = m.P"a", 974 } 975 976assert(p:match("a")) 977 978-- initial rule 979g = {} 980for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end 981g.i11 = m.P"" 982for i = 1, 10 do 983 g[1] = "i"..i 984 local p = m.P(g) 985 assert(p:match("aaaaaaaaaaa") == 11 - i + 1) 986end 987 988print"+" 989 990 991-- tests for back references 992checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') 993checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') 994 995p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) 996t = p:match("ab") 997checkeq(t, {"a", "b"}) 998 999p = m.P(true) 1000for i = 1, 10 do p = p * m.Cg(1, i) end 1001for i = 1, 10 do 1002 local p = p * m.Cb(i) 1003 assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i)) 1004end 1005 1006 1007t = {} 1008function foo (p) t[#t + 1] = p; return p .. "x" end 1009 1010p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" * 1011 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * 1012 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * 1013 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" 1014x = {p:match'ab'} 1015checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'}) 1016checkeq(t, {'ab', 1017 'ab', 'abx', 1018 'ab', 'abx', 'abxx', 1019 'ab', 'abx', 'abxx', 'abxxx'}) 1020 1021 1022 1023-- tests for match-time captures 1024 1025p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end) 1026 + 'acd' 1027 1028assert(p:match('abc') == 3) 1029assert(p:match('acd') == 4) 1030 1031local function id (s, i, ...) 1032 return true, ... 1033end 1034 1035assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) + 1036 m.R'09'^1 / string.char + 1037 m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y") 1038 1039p = m.P{'S', 1040 S = m.V'atom' * space 1041 + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id), 1042 atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id) 1043} 1044x = p:match"(a g () ((b) c) (d (e)))" 1045checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}}); 1046 1047x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))} 1048assert(#x == 500) 1049 1050local function id(s, i, x) 1051 if x == 'a' then return i, 1, 3, 7 1052 else return nil, 2, 4, 6, 8 1053 end 1054end 1055 1056p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0 1057assert(table.concat{p:match('abababab')} == string.rep('137', 4)) 1058 1059local function ref (s, i, x) 1060 return m.match(x, s, i - x:len()) 1061end 1062 1063assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4) 1064assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4) 1065assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo')) 1066 1067ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end 1068 1069assert(m.Cmt(1, ref):match'2') 1070assert(not m.Cmt(1, ref):match'1') 1071assert(m.Cmt(m.P(1)^0, ref):match'03') 1072 1073function ref (s, i, a, b) 1074 if a == b then return i, a:upper() end 1075end 1076 1077p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref) 1078p = (any - p)^0 * p * any^0 * -1 1079 1080assert(p:match'abbbc-bc ddaa' == 'BC') 1081 1082do -- match-time captures cannot be optimized away 1083 local touch = 0 1084 f = m.P(function () touch = touch + 1; return true end) 1085 1086 local function check(n) n = n or 1; assert(touch == n); touch = 0 end 1087 1088 assert(m.match(f * false + 'b', 'a') == nil); check() 1089 assert(m.match(f * false + 'b', '') == nil); check() 1090 assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check() 1091 assert(m.match( (f * 'a')^0 * 'b', '') == nil); check() 1092 assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check() 1093 assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check() 1094 assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check() 1095 assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check() 1096 assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); 1097 check() 1098 assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check() 1099 assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2) 1100 assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1) 1101 assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1) 1102 assert(m.match(-f * 'a' + 'b', '') == nil); check(1) 1103end 1104 1105c = '[' * m.Cg(m.P'='^0, "init") * '[' * 1106 { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2) 1107 return s1 == s2 end) 1108 + 1 * m.V(1) } / 0 1109 1110assert(c:match'[==[]]====]]]]==]===[]' == 18) 1111assert(c:match'[[]=]====]=]]]==]===[]' == 14) 1112assert(not c:match'[[]=]====]=]=]==]===[]') 1113 1114 1115-- old bug: optimization of concat with fail removed match-time capture 1116p = m.Cmt(0, function (s) p = s end) * m.P(false) 1117assert(not p:match('alo')) 1118assert(p == 'alo') 1119 1120 1121-- ensure that failed match-time captures are not kept on Lua stack 1122do 1123 local t = {__mode = "kv"}; setmetatable(t,t) 1124 local c = 0 1125 1126 local function foo (s,i) 1127 collectgarbage(); 1128 assert(next(t) == "__mode" and next(t, "__mode") == nil) 1129 local x = {} 1130 t[x] = true 1131 c = c + 1 1132 return i, x 1133 end 1134 1135 local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" } 1136 p:match(string.rep('1', 10)) 1137 assert(c == 11) 1138end 1139 1140 1141-- Return a match-time capture that returns 'n' captures 1142local function manyCmt (n) 1143 return m.Cmt("a", function () 1144 local a = {}; for i = 1, n do a[i] = n - i end 1145 return true, unpack(a) 1146 end) 1147end 1148 1149-- bug in 1.0: failed match-time that used previous match-time results 1150do 1151 local x 1152 local function aux (...) x = #{...}; return false end 1153 local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} 1154 assert(#res == 10 and res[1] == 9 and res[10] == 0) 1155end 1156 1157 1158-- bug in 1.0: problems with math-times returning too many captures 1159do 1160 local lim = 2^11 - 10 1161 local res = {m.match(manyCmt(lim), "a")} 1162 assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) 1163 checkerr("too many", m.match, manyCmt(2^15), "a") 1164end 1165 1166p = (m.P(function () return true, "a" end) * 'a' 1167 + m.P(function (s, i) return i, "aa", 20 end) * 'b' 1168 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 1169 1170t = {p:match('abacc')} 1171checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) 1172 1173 1174------------------------------------------------------------------- 1175-- Tests for 're' module 1176------------------------------------------------------------------- 1177 1178local re = require "re" 1179 1180local match, compile = re.match, re.compile 1181 1182 1183 1184assert(match("a", ".") == 2) 1185assert(match("a", "''") == 1) 1186assert(match("", " ! . ") == 1) 1187assert(not match("a", " ! . ")) 1188assert(match("abcde", " ( . . ) * ") == 5) 1189assert(match("abbcde", " [a-c] +") == 5) 1190assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7) 1191assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8) 1192assert(match("abbc--", " [a-c] + +") == 5) 1193assert(match("abbc--", " [ac-] +") == 2) 1194assert(match("abbc--", " [-acb] + ") == 7) 1195assert(not match("abbcde", " [b-z] + ")) 1196assert(match("abb\"de", '"abb"["]"de"') == 7) 1197assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee") 1198assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8) 1199 1200assert(re.match("aaand", "[a]^2") == 3) 1201 1202local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")} 1203checkeq(t, {4, 5, 7}) 1204local t = {match("abceefe", "((&&'e' {})? .)*")} 1205checkeq(t, {4, 5, 7}) 1206local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")} 1207checkeq(t, {4, 5, 7}) 1208local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")} 1209checkeq(t, {4, 5, 7}) 1210 1211assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5) 1212assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4) 1213assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8) 1214 1215assert(match("abc", "a <- (. a)?") == 4) 1216b = "balanced <- '(' ([^()] / balanced)* ')'" 1217assert(match("(abc)", b)) 1218assert(match("(a(b)((c) (d)))", b)) 1219assert(not match("(a(b ((c) (d)))", b)) 1220 1221b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]] 1222assert(b == m.P(b)) 1223assert(b:match"((((a))(b)))") 1224 1225local g = [[ 1226 S <- "0" B / "1" A / "" -- balanced strings 1227 A <- "0" S / "1" A A -- one more 0 1228 B <- "1" S / "0" B B -- one more 1 1229]] 1230assert(match("00011011", g) == 9) 1231 1232local g = [[ 1233 S <- ("0" B / "1" A)* 1234 A <- "0" / "1" A A 1235 B <- "1" / "0" B B 1236]] 1237assert(match("00011011", g) == 9) 1238assert(match("000110110", g) == 9) 1239assert(match("011110110", g) == 3) 1240assert(match("000110010", g) == 1) 1241 1242s = "aaaaaaaaaaaaaaaaaaaaaaaa" 1243assert(match(s, "'a'^3") == 4) 1244assert(match(s, "'a'^0") == 1) 1245assert(match(s, "'a'^+3") == s:len() + 1) 1246assert(not match(s, "'a'^+30")) 1247assert(match(s, "'a'^-30") == s:len() + 1) 1248assert(match(s, "'a'^-5") == 6) 1249for i = 1, s:len() do 1250 assert(match(s, string.format("'a'^+%d", i)) >= i + 1) 1251 assert(match(s, string.format("'a'^-%d", i)) <= i + 1) 1252 assert(match(s, string.format("'a'^%d", i)) == i + 1) 1253end 1254assert(match("01234567890123456789", "[0-9]^3+") == 19) 1255 1256 1257assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123") 1258t = match("0123456789", "{| {.}* |}") 1259checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}) 1260assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101") 1261 1262assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c") 1263assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d") 1264assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6) 1265 1266assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x")) 1267assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x")) 1268 1269eqcharset(compile"[]]", "]") 1270eqcharset(compile"[][]", m.S"[]") 1271eqcharset(compile"[]-]", m.S"-]") 1272eqcharset(compile"[-]", m.S"-") 1273eqcharset(compile"[az-]", m.S"a-z") 1274eqcharset(compile"[-az]", m.S"a-z") 1275eqcharset(compile"[a-z]", m.R"az") 1276eqcharset(compile"[]['\"]", m.S[[]['"]]) 1277 1278eqcharset(compile"[^]]", any - "]") 1279eqcharset(compile"[^][]", any - m.S"[]") 1280eqcharset(compile"[^]-]", any - m.S"-]") 1281eqcharset(compile"[^]-]", any - m.S"-]") 1282eqcharset(compile"[^-]", any - m.S"-") 1283eqcharset(compile"[^az-]", any - m.S"a-z") 1284eqcharset(compile"[^-az]", any - m.S"a-z") 1285eqcharset(compile"[^a-z]", any - m.R"az") 1286eqcharset(compile"[^]['\"]", any - m.S[[]['"]]) 1287 1288-- tests for comments in 're' 1289e = compile[[ 1290A <- _B -- \t \n %nl .<> <- -> -- 1291_B <- 'x' --]] 1292assert(e:match'xy' == 2) 1293 1294-- tests for 're' with pre-definitions 1295defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"} 1296e = compile("%letters (%letters / %digits)*", defs) 1297assert(e:match"x123" == 5) 1298e = compile("%_", defs) 1299assert(e:match"__" == 3) 1300 1301e = compile([[ 1302 S <- A+ 1303 A <- %letters+ B 1304 B <- %digits+ 1305]], defs) 1306 1307e = compile("{[0-9]+'.'?[0-9]*} -> sin", math) 1308assert(e:match("2.34") == math.sin(2.34)) 1309 1310 1311function eq (_, _, a, b) return a == b end 1312 1313c = re.compile([[ 1314 longstring <- '[' {:init: '='* :} '[' close 1315 close <- ']' =init ']' / . close 1316]]) 1317 1318assert(c:match'[==[]]===]]]]==]===[]' == 17) 1319assert(c:match'[[]=]====]=]]]==]===[]' == 14) 1320assert(not c:match'[[]=]====]=]=]==]===[]') 1321 1322c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. " 1323 1324assert(c:match'[==[]]===]]]]==]') 1325assert(c:match'[[]=]====]=][]==]===[]]') 1326assert(not c:match'[[]=]====]=]=]==]===[]') 1327 1328assert(re.find("hi alalo", "{:x:..:} =x") == 4) 1329assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4) 1330assert(not re.find("hi alalo", "{:x:..:} =x", 5)) 1331assert(re.find("hi alalo", "{'al'}", 5) == 6) 1332assert(re.find("hi aloalolo", "{:x:..:} =x") == 8) 1333assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11) 1334 1335-- re.find discards any captures 1336local a,b,c = re.find("alo", "{.}{'o'}") 1337assert(a == 2 and b == 3 and c == nil) 1338 1339local function match (s,p) 1340 local i,e = re.find(s,p) 1341 if i then return s:sub(i, e) end 1342end 1343assert(match("alo alo", '[a-z]+') == "alo") 1344assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil) 1345assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo") 1346 1347assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo") 1348assert(re.gsub("alo alo", "%w+", ".") == ". .") 1349assert(re.gsub("hi, how are you", "[aeiou]", string.upper) == 1350 "hI, hOw ArE yOU") 1351 1352s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]' 1353c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' " 1354assert(re.gsub(s, c, "%2") == 'hi and =]') 1355assert(re.gsub(s, c, "%0") == s) 1356assert(re.gsub('[=[hi]=]', c, "%2") == '=') 1357 1358assert(re.find("", "!.") == 1) 1359assert(re.find("alo", "!.") == 4) 1360 1361function addtag (s, i, t, tag) t.tag = tag; return i, t end 1362 1363c = re.compile([[ 1364 doc <- block !. 1365 block <- (start {| (block / { [^<]+ })* |} end?) => addtag 1366 start <- '<' {:tag: [a-z]+ :} '>' 1367 end <- '</' { =tag } '>' 1368]], {addtag = addtag}) 1369 1370x = c:match[[ 1371<x>hi<b>hello</b>but<b>totheend</x>]] 1372checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but', 1373 {'totheend'}}) 1374 1375 1376-- test for folding captures 1377c = re.compile([[ 1378 S <- (number (%s+ number)*) ~> add 1379 number <- %d+ -> tonumber 1380]], {tonumber = tonumber, add = function (a,b) return a + b end}) 1381assert(c:match("3 401 50") == 3 + 401 + 50) 1382 1383-- tests for look-ahead captures 1384x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")} 1385checkeq(x, {"", "alo", ""}) 1386 1387assert(re.match("aloalo", 1388 "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}") 1389 == "AallooAalloo") 1390 1391-- bug in 0.9 (and older versions), due to captures in look-aheads 1392x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]] 1393assert(x:match"alo alo" == "+ +") 1394 1395-- valid capture in look-ahead (used inside the look-ahead itself) 1396x = re.compile[[ 1397 S <- &({:two: .. :} . =two) {[a-z]+} / . S 1398]] 1399assert(x:match("hello aloaLo aloalo xuxu") == "aloalo") 1400 1401 1402p = re.compile[[ 1403 block <- {| {:ident:space*:} line 1404 ((=ident !space line) / &(=ident space) block)* |} 1405 line <- {[^%nl]*} %nl 1406 space <- '_' -- should be ' ', but '_' is simpler for editors 1407]] 1408 1409t= p:match[[ 14101 1411__1.1 1412__1.2 1413____1.2.1 1414____ 14152 1416__2.1 1417]] 1418checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"}, 1419 "2", {"2.1", ident = "__"}, ident = ""}) 1420 1421 1422-- nested grammars 1423p = re.compile[[ 1424 s <- a b !. 1425 b <- ( x <- ('b' x)? ) 1426 a <- ( x <- 'a' x? ) 1427]] 1428 1429assert(p:match'aaabbb') 1430assert(p:match'aaa') 1431assert(not p:match'bbb') 1432assert(not p:match'aaabbba') 1433 1434-- testing groups 1435t = {re.match("abc", "{:S <- {:.:} {S} / '':}")} 1436checkeq(t, {"a", "bc", "b", "c", "c", ""}) 1437 1438t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}") 1439checkeq(t, {a="1", b="2", c="4"}) 1440t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}") 1441checkeq(t, {a="1", b="2", c="4"}) 1442t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}") 1443checkeq(t, {"1", b="2", "4", "5"}) 1444t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}") 1445checkeq(t, {"1", "23", "4", "5"}) 1446t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}") 1447checkeq(t, {"1", "23", "4", "5"}) 1448 1449 1450-- testing pre-defined names 1451assert(os.setlocale("C") == "C") 1452 1453function eqlpeggsub (p1, p2) 1454 local s1 = cs2str(re.compile(p1)) 1455 local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "") 1456 -- if s1 ~= s2 then print(#s1,#s2) end 1457 assert(s1 == s2) 1458end 1459 1460 1461eqlpeggsub("%w", "%w") 1462eqlpeggsub("%a", "%a") 1463eqlpeggsub("%l", "%l") 1464eqlpeggsub("%u", "%u") 1465eqlpeggsub("%p", "%p") 1466eqlpeggsub("%d", "%d") 1467eqlpeggsub("%x", "%x") 1468eqlpeggsub("%s", "%s") 1469eqlpeggsub("%c", "%c") 1470 1471eqlpeggsub("%W", "%W") 1472eqlpeggsub("%A", "%A") 1473eqlpeggsub("%L", "%L") 1474eqlpeggsub("%U", "%U") 1475eqlpeggsub("%P", "%P") 1476eqlpeggsub("%D", "%D") 1477eqlpeggsub("%X", "%X") 1478eqlpeggsub("%S", "%S") 1479eqlpeggsub("%C", "%C") 1480 1481eqlpeggsub("[%w]", "%w") 1482eqlpeggsub("[_%w]", "_%w") 1483eqlpeggsub("[^%w]", "%W") 1484eqlpeggsub("[%W%S]", "%W%S") 1485 1486re.updatelocale() 1487 1488 1489-- testing nested substitutions x string captures 1490 1491p = re.compile[[ 1492 text <- {~ item* ~} 1493 item <- macro / [^()] / '(' item* ')' 1494 arg <- ' '* {~ (!',' item)* ~} 1495 args <- '(' arg (',' arg)* ')' 1496 macro <- ('apply' args) -> '%1(%2)' 1497 / ('add' args) -> '%1 + %2' 1498 / ('mul' args) -> '%1 * %2' 1499]] 1500 1501assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)") 1502 1503rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']] 1504 1505assert(rev:match"0123456789" == "9876543210") 1506 1507 1508-- testing error messages in re 1509 1510local function errmsg (p, err) 1511 checkerr(err, re.compile, p) 1512end 1513 1514errmsg('aaaa', "rule 'aaaa'") 1515errmsg('a', 'outside') 1516errmsg('b <- a', 'undefined') 1517errmsg("x <- 'a' x <- 'b'", 'already defined') 1518errmsg("'a' -", "near '-'") 1519 1520 1521print"OK" 1522 1523 1524