1#!/usr/bin/env lua
2
3-- $Id: test.lua $
4
5-- require"strict"    -- just to be pedantic
6
7local m = require"lpeg"
8
9
10-- for general use
11local a, b, c, d, e, f, g, p, t
12
13
14-- compatibility with Lua 5.2
15local unpack = rawget(table, "unpack") or unpack
16local loadstring = rawget(_G, "loadstring") or load
17
18
19local any = m.P(1)
20local space = m.S" \t\n"^0
21
22local function checkeq (x, y, p)
23if p then print(x,y) end
24  if type(x) ~= "table" then assert(x == y)
25  else
26    for k,v in pairs(x) do checkeq(v, y[k], p) end
27    for k,v in pairs(y) do checkeq(v, x[k], p) end
28  end
29end
30
31
32local mt = getmetatable(m.P(1))
33
34
35local allchar = {}
36for i=0,255 do allchar[i + 1] = i end
37allchar = string.char(unpack(allchar))
38assert(#allchar == 256)
39
40local function cs2str (c)
41  return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
42end
43
44local function eqcharset (c1, c2)
45  assert(cs2str(c1) == cs2str(c2))
46end
47
48
49print"General tests for LPeg library"
50
51assert(type(m.version()) == "string")
52print("version " .. m.version())
53assert(m.type("alo") ~= "pattern")
54assert(m.type(io.input) ~= "pattern")
55assert(m.type(m.P"alo") == "pattern")
56
57-- tests for some basic optimizations
58assert(m.match(m.P(false) + "a", "a") == 2)
59assert(m.match(m.P(true) + "a", "a") == 1)
60assert(m.match("a" + m.P(false), "b") == nil)
61assert(m.match("a" + m.P(true), "b") == 1)
62
63assert(m.match(m.P(false) * "a", "a") == nil)
64assert(m.match(m.P(true) * "a", "a") == 2)
65assert(m.match("a" * m.P(false), "a") == nil)
66assert(m.match("a" * m.P(true), "a") == 2)
67
68assert(m.match(#m.P(false) * "a", "a") == nil)
69assert(m.match(#m.P(true) * "a", "a") == 2)
70assert(m.match("a" * #m.P(false), "a") == nil)
71assert(m.match("a" * #m.P(true), "a") == 2)
72
73
74-- tests for locale
75do
76  assert(m.locale(m) == m)
77  local t = {}
78  assert(m.locale(t, m) == t)
79  local x = m.locale()
80  for n,v in pairs(x) do
81    assert(type(n) == "string")
82    eqcharset(v, m[n])
83  end
84end
85
86
87assert(m.match(3, "aaaa"))
88assert(m.match(4, "aaaa"))
89assert(not m.match(5, "aaaa"))
90assert(m.match(-3, "aa"))
91assert(not m.match(-3, "aaa"))
92assert(not m.match(-3, "aaaa"))
93assert(not m.match(-4, "aaaa"))
94assert(m.P(-5):match"aaaa")
95
96assert(m.match("a", "alo") == 2)
97assert(m.match("al", "alo") == 3)
98assert(not m.match("alu", "alo"))
99assert(m.match(true, "") == 1)
100
101local digit = m.S"0123456789"
102local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
103local lower = m.S"abcdefghijklmnopqrstuvwxyz"
104local letter = m.S"" + upper + lower
105local alpha = letter + digit + m.R()
106
107eqcharset(m.S"", m.P(false))
108eqcharset(upper, m.R("AZ"))
109eqcharset(lower, m.R("az"))
110eqcharset(upper + lower, m.R("AZ", "az"))
111eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
112eqcharset(digit, m.S"01234567" + "8" + "9")
113eqcharset(upper, letter - lower)
114eqcharset(m.S(""), m.R())
115assert(cs2str(m.S("")) == "")
116
117eqcharset(m.S"\0", "\0")
118eqcharset(m.S"\1\0\2", m.R"\0\2")
119eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
120eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
121
122local word = alpha^1 * (1 - alpha)^0
123
124assert((word^0 * -1):match"alo alo")
125assert(m.match(word^1 * -1, "alo alo"))
126assert(m.match(word^2 * -1, "alo alo"))
127assert(not m.match(word^3 * -1, "alo alo"))
128
129assert(not m.match(word^-1 * -1, "alo alo"))
130assert(m.match(word^-2 * -1, "alo alo"))
131assert(m.match(word^-3 * -1, "alo alo"))
132
133local eos = m.P(-1)
134
135assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
136assert(not m.match(digit^0 * letter * eos, "1257a1"))
137
138b = {
139  [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
140}
141
142assert(m.match(b, "(al())()"))
143assert(not m.match(b * eos, "(al())()"))
144assert(m.match(b * eos, "((al())()(é))"))
145assert(not m.match(b, "(al()()"))
146
147assert(not m.match(letter^1 - "for", "foreach"))
148assert(m.match(letter^1 - ("for" * eos), "foreach"))
149assert(not m.match(letter^1 - ("for" * eos), "for"))
150
151function basiclookfor (p)
152  return m.P {
153    [1] = p + (1 * m.V(1))
154  }
155end
156
157function caplookfor (p)
158  return basiclookfor(p:C())
159end
160
161assert(m.match(caplookfor(letter^1), "   4achou123...") == "achou")
162a = {m.match(caplookfor(letter^1)^0, " two words, one more  ")}
163checkeq(a, {"two", "words", "one", "more"})
164
165assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), "  (  (a)") == 7)
166
167a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
168checkeq(a, {"123", "d"})
169
170-- bug in LPeg 0.12  (nil value does not create a 'ktable')
171assert(m.match(m.Cc(nil), "") == nil)
172
173a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
174checkeq(a, {"abcd", "l"})
175
176a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
177checkeq(a, {10,20,30,2})
178a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
179checkeq(a, {1,10,20,30,2})
180a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
181checkeq(a, {1,10,20,30,2})
182a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
183checkeq(a, {1,7,8,10,20,30,2})
184a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
185checkeq(a, {1,2,3,4})
186
187a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
188checkeq(a, {1, 5})
189
190
191t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
192checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
193
194-- bug in 0.12 ('hascapture' did not check for captures inside a rule)
195do
196  local pat = m.P{
197    'S';
198    S1 = m.C('abc') + 3,
199    S = #m.V('S1')    -- rule has capture, but '#' must ignore it
200  }
201  assert(pat:match'abc' == 1)
202end
203
204
205-- bug: loop in 'hascaptures'
206do
207  local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
208  assert(p:match("xxx") == "")
209end
210
211
212
213-- test for small capture boundary
214for i = 250,260 do
215  assert(#m.match(m.C(i), string.rep('a', i)) == i)
216  assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
217end
218
219-- tests for any*n and any*-n
220for n = 1, 550, 13 do
221  local x_1 = string.rep('x', n - 1)
222  local x = x_1 .. 'a'
223  assert(not m.P(n):match(x_1))
224  assert(m.P(n):match(x) == n + 1)
225  assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
226  assert(m.C(n):match(x) == x)
227  assert(m.C(m.C(n)):match(x) == x)
228  assert(m.P(-n):match(x_1) == 1)
229  assert(not m.P(-n):match(x))
230  assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
231  local n3 = math.floor(n/3)
232  assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
233end
234
235-- true values
236assert(m.P(0):match("x") == 1)
237assert(m.P(0):match("") == 1)
238assert(m.C(0):match("x") == "")
239
240assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
241assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
242assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
243p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
244
245
246-- test for alternation optimization
247assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
248assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
249assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
250assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
251assert(m.match("a" * m.P"b"^0 * "c"  + "cd" + "ax" + "cy", "ax") == 3)
252assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
253assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
254assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
255assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
256assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
257assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
258assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
259assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
260assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
261assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
262assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
263assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
264assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
265assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
266assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
267assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
268assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
269assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
270assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
271assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
272assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
273assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
274assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
275assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
276assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
277assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
278assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
279
280assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
281
282
283-- bug in 0.12 (rc1)
284assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
285
286assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
287    4*10 + 1)
288
289-- optimizations with optional parts
290assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
291assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
292assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
293assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
294
295assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
296
297p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
298assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
299
300
301-- bug in 0.12.2
302-- p = { ('ab' ('c' 'ef'?)*)? }
303p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1)
304s = "abcefccefc"
305assert(s == p:match(s))
306
307
308pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
309assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
310  m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
311print"+"
312
313
314-- tests for capture optimizations
315assert(m.match((m.P(3) +  4 * m.Cp()) * "a", "abca") == 5)
316t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
317checkeq(t, {3, 6})
318
319
320-- tests for numbered captures
321p = m.C(1)
322assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
323assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
324assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
325assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
326
327a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
328assert(a == "a" and b == "efg" and c == "h")
329
330-- test for table captures
331t = m.match(m.Ct(letter^1), "alo")
332checkeq(t, {})
333
334t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
335assert(n == "t" and table.concat(t) == "alo")
336
337t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
338assert(table.concat(t, ";") == "alo;a;l;o")
339
340t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
341assert(table.concat(t, ";") == "alo;a;l;o")
342
343t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
344assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
345
346t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
347checkeq(t, {"alo", "a", "o"})
348
349
350-- tests for groups
351p = m.Cg(1)   -- no capture
352assert(p:match('x') == 'x')
353p = m.Cg(m.P(true)/function () end * 1)   -- no value
354assert(p:match('x') == 'x')
355p = m.Cg(m.Cg(m.Cg(m.C(1))))
356assert(p:match('x') == 'x')
357p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
358t = {p:match'abc'}
359checkeq(t, {'a', 'b', 'c', 1, 2})
360
361p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
362t = p:match''
363checkeq(t, {hi = 10, ho = 20})
364t = p:match'abc'
365checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
366
367-- non-string group names
368p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io))
369t = p:match('abcdefghij')
370assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c')
371
372
373-- test for error messages
374local function checkerr (msg, f, ...)
375  local st, err = pcall(f, ...)
376  assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
377end
378
379checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
380checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
381checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
382checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
383checkerr("undefined in given grammar", m.match, { m.V{} }, "")
384
385checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
386checkerr("grammar has no initial rule", m.P, { [print] = {} })
387
388-- grammar with a long call chain before left recursion
389p = {'a',
390  a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
391  b = m.V'c',
392  c = m.V'd',
393  d = m.V'e',
394  e = m.V'f',
395  f = m.V'g',
396  g = m.P''
397}
398checkerr("rule 'a' may be left recursive", m.match, p, "a")
399
400-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
401-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
402-- that is optimized to ICommit L1
403
404p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
405assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
406
407
408do
409  -- large dynamic Cc
410  local lim = 2^16 - 1
411  local c = 0
412  local function seq (n)
413    if n == 1 then c = c + 1; return m.Cc(c)
414    else
415      local m = math.floor(n / 2)
416      return seq(m) * seq(n - m)
417    end
418  end
419  p = m.Ct(seq(lim))
420  t = p:match('')
421  assert(t[lim] == lim)
422  checkerr("too many", function () p = p / print end)
423  checkerr("too many", seq, lim + 1)
424end
425
426
427do
428  -- nesting of captures too deep
429  local p = m.C(1)
430  for i = 1, 300 do
431    p = m.Ct(p)
432  end
433  checkerr("too deep", p.match, p, "x")
434end
435
436
437-- tests for non-pattern as arguments to pattern functions
438
439p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
440assert(m.match(p, "aaabaac") == 7)
441
442p = m.P'abc' * 2 * -5 * true * 'de'  -- mix of numbers and strings and booleans
443
444assert(p:match("abc01de") == 8)
445assert(p:match("abc01de3456") == nil)
446
447p = 'abc' * (2 * (-5 * (true * m.P'de')))
448
449assert(p:match("abc01de") == 8)
450assert(p:match("abc01de3456") == nil)
451
452p = { m.V(2), m.P"abc" } *
453     (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
454assert(p:match("abcaaaxx") == 7)
455assert(p:match("abcxx") == 6)
456
457
458-- a large table capture
459t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
460assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
461
462print('+')
463
464
465-- bug in 0.10 (rechecking a grammar, after tail-call optimization)
466m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
467
468local V = m.V
469
470local Space = m.S(" \n\t")^0
471local Number = m.C(m.R("09")^1) * Space
472local FactorOp = m.C(m.S("+-")) * Space
473local TermOp = m.C(m.S("*/")) * Space
474local Open = "(" * Space
475local Close = ")" * Space
476
477
478local function f_factor (v1, op, v2, d)
479  assert(d == nil)
480  if op == "+" then return v1 + v2
481  else return v1 - v2
482  end
483end
484
485
486local function f_term (v1, op, v2, d)
487  assert(d == nil)
488  if op == "*" then return v1 * v2
489  else return v1 / v2
490  end
491end
492
493G = m.P{ "Exp",
494  Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
495  Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
496  Term = Number / tonumber  +  Open * V"Exp" * Close;
497}
498
499G = Space * G * -1
500
501for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1-  8"} do
502  assert(m.match(G, s) == loadstring("return "..s)())
503end
504
505
506-- test for grammars (errors deep in calling non-terminals)
507g = m.P{
508  [1] = m.V(2) + "a",
509  [2] = "a" * m.V(3) * "x",
510  [3] = "b" * m.V(3) + "c"
511}
512
513assert(m.match(g, "abbbcx") == 7)
514assert(m.match(g, "abbbbx") == 2)
515
516
517-- tests for \0
518assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
519assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
520assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
521assert(not m.match(-4, "\0\1\0a"))
522assert(m.match("\0\1\0a", "\0\1\0a") == 5)
523assert(m.match("\0\0\0", "\0\0\0") == 4)
524assert(not m.match("\0\0\0", "\0\0"))
525
526
527-- tests for predicates
528assert(not m.match(-m.P("a") * 2, "alo"))
529assert(m.match(- -m.P("a") * 2, "alo") == 3)
530assert(m.match(#m.P("a") * 2, "alo") == 3)
531assert(m.match(##m.P("a") * 2, "alo") == 3)
532assert(not m.match(##m.P("c") * 2, "alo"))
533assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
534assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
535assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
536assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
537
538
539-- fixed length
540do
541  -- 'and' predicate using fixed length
542  local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
543  assert(p:match("acd") == "ac")
544
545  p = #m.P{ "a" * m.V(2), m.P"b" } * 2
546  assert(p:match("abc") == 3)
547
548  p = #(m.P"abc" * m.B"c")
549  assert(p:match("abc") == 1 and not p:match("ab"))
550
551  p = m.P{ "a" * m.V(2), m.P"b"^1 }
552  checkerr("pattern may not have fixed length", m.B, p)
553
554  p = "abc" * (m.P"b"^1 + m.P"a"^0)
555  checkerr("pattern may not have fixed length", m.B, p)
556end
557
558
559p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
560assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
561
562p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
563assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
564
565
566
567-- look-behind predicate
568assert(not m.match(m.B'a', 'a'))
569assert(m.match(1 * m.B'a', 'a') == 2)
570assert(not m.match(m.B(1), 'a'))
571assert(m.match(1 * m.B(1), 'a') == 2)
572assert(m.match(-m.B(1), 'a') == 1)
573assert(m.match(m.B(250), string.rep('a', 250)) == nil)
574assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
575
576-- look-behind with an open call
577checkerr("pattern may not have fixed length", m.B, m.V'S1')
578checkerr("too long to look behind", m.B, 260)
579
580B = #letter * -m.B(letter) + -letter * m.B(letter)
581x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
582checkeq(m.match(x, 'ar cal  c'), {1,3,4,7,9,10})
583checkeq(m.match(x, ' ar cal  '), {2,4,5,8})
584checkeq(m.match(x, '   '), {})
585checkeq(m.match(x, 'aloalo'), {1,7})
586
587assert(m.match(B, "a") == 1)
588assert(m.match(1 * B, "a") == 2)
589assert(not m.B(1 - letter):match(""))
590assert((-m.B(letter)):match("") == 1)
591
592assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
593assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
594assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
595
596-- look-behind with grammars
597assert(m.match('a' * m.B{'x', x = m.P(3)},  'aaa') == nil)
598assert(m.match('aa' * m.B{'x', x = m.P('aaa')},  'aaaa') == nil)
599assert(m.match('aaa' * m.B{'x', x = m.P('aaa')},  'aaaaa') == 4)
600
601
602
603-- bug in 0.9
604assert(m.match(('a' * #m.P'b'), "ab") == 2)
605assert(not m.match(('a' * #m.P'b'), "a"))
606
607assert(not m.match(#m.S'567', ""))
608assert(m.match(#m.S'567' * 1, "6") == 2)
609
610
611-- tests for Tail Calls
612
613p = m.P{ 'a' * m.V(1) + '' }
614assert(p:match(string.rep('a', 1000)) == 1001)
615
616-- create a grammar for a simple DFA for even number of 0s and 1s
617--
618--  ->1 <---0---> 2
619--    ^           ^
620--    |           |
621--    1           1
622--    |           |
623--    V           V
624--    3 <---0---> 4
625--
626-- this grammar should keep no backtracking information
627
628p = m.P{
629  [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
630  [2] = '0' * m.V(1) + '1' * m.V(4),
631  [3] = '0' * m.V(4) + '1' * m.V(1),
632  [4] = '0' * m.V(3) + '1' * m.V(2),
633}
634
635assert(p:match(string.rep("00", 10000)))
636assert(p:match(string.rep("01", 10000)))
637assert(p:match(string.rep("011", 10000)))
638assert(not p:match(string.rep("011", 10000) .. "1"))
639assert(not p:match(string.rep("011", 10001)))
640
641
642-- this grammar does need backtracking info.
643local lim = 10000
644p = m.P{ '0' * m.V(1) + '0' }
645checkerr("stack overflow", m.match, p, string.rep("0", lim))
646m.setmaxstack(2*lim)
647checkerr("stack overflow", m.match, p, string.rep("0", lim))
648m.setmaxstack(2*lim + 4)
649assert(m.match(p, string.rep("0", lim)) == lim + 1)
650
651-- this repetition should not need stack space (only the call does)
652p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
653m.setmaxstack(200)
654assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
655
656m.setmaxstack(100)   -- restore low limit
657
658-- tests for optional start position
659assert(m.match("a", "abc", 1))
660assert(m.match("b", "abc", 2))
661assert(m.match("c", "abc", 3))
662assert(not m.match(1, "abc", 4))
663assert(m.match("a", "abc", -3))
664assert(m.match("b", "abc", -2))
665assert(m.match("c", "abc", -1))
666assert(m.match("abc", "abc", -4))   -- truncate to position 1
667
668assert(m.match("", "abc", 10))   -- empty string is everywhere!
669assert(m.match("", "", 10))
670assert(not m.match(1, "", 1))
671assert(not m.match(1, "", -1))
672assert(not m.match(1, "", 0))
673
674print("+")
675
676
677-- tests for argument captures
678checkerr("invalid argument", m.Carg, 0)
679checkerr("invalid argument", m.Carg, -1)
680checkerr("invalid argument", m.Carg, 2^18)
681checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
682assert(m.match(m.Carg(1), 'a', 1, print) == print)
683x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
684checkeq(x, {10, 20})
685
686assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
687                     m.Cmt(m.Cb("a"), function (s,i,x)
688                                        assert(s == "a" and i == 1);
689                                        return i, x+1
690                                      end) *
691                     m.Carg(2), function (s,i,a,b,c)
692                                  assert(s == "a" and i == 1 and c == nil);
693				  return i, 2*a + 3*b
694                                end) * "a",
695               "a", 1, false, 100, 1000) == 2*1001 + 3*100)
696
697
698-- tests for Lua functions
699
700t = {}
701s = ""
702p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
703s = "hi, this is a test"
704assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
705assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
706
707assert(not m.match(p, s))
708
709p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
710assert(m.match(p, "alo"))
711
712p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
713assert(not m.match(p, "alo"))
714
715
716t = {}
717p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
718s = "hi, this is a test"
719assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
720assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
721
722t = {}
723p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
724                         return i <= s1:len() and i end) * 1
725s = "hi, this is a test"
726assert(m.match(p^0, s) == string.len(s) + 1)
727assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
728
729p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
730assert(m.match(p, "aaaa") == 5)
731assert(m.match(p, "abaa") == 2)
732assert(not m.match(p, "baaa"))
733
734checkerr("invalid position", m.match, function () return 2^20 end, s)
735checkerr("invalid position", m.match, function () return 0 end, s)
736checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
737checkerr("invalid position", m.match,
738             m.P(1)^0 * function (_, i) return i - 1 end, s)
739assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
740checkerr("invalid position", m.match,
741             m.P(1)^0 * function (_, i) return i + 1 end, s)
742assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
743checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
744assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
745assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
746       string.len(s) + 1)
747for i = 1, string.len(s) + 1 do
748  assert(m.match(function (_, _) return i end, s) == i)
749end
750
751p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
752  +  m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
753  * -1
754assert(p:match(string.rep('a', 14000)))
755
756-- tests for Function Replacements
757f = function (a, ...) if a ~= "x" then return {a, ...} end end
758
759t = m.match(m.C(1)^0/f, "abc")
760checkeq(t, {"a", "b", "c"})
761
762t = m.match(m.C(1)^0/f/f, "abc")
763checkeq(t, {{"a", "b", "c"}})
764
765t = m.match(m.P(1)^0/f/f, "abc")   -- no capture
766checkeq(t, {{"abc"}})
767
768t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
769checkeq(t, {{"abc"}, 4})
770
771t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
772checkeq(t, {{"a", "b", "c"}, 4})
773
774t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
775checkeq(t, {4})
776
777t = m.match(m.C(m.C(1)^0)/f, "abc")
778checkeq(t, {"abc", "a", "b", "c"})
779
780g = function (...) return 1, ... end
781t = {m.match(m.C(1)^0/g/g, "abc")}
782checkeq(t, {1, 1, "a", "b", "c"})
783
784t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
785t1 = {1,1,nil,nil,4,nil,3,nil,nil}
786for i=1,10 do assert(t[i] == t1[i]) end
787
788-- bug in 0.12.2: ktable with only nil could be eliminated when joining
789-- with a pattern without ktable
790assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil)
791
792t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
793checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
794
795t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
796checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
797
798-- tests for Query Replacements
799
800assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
801assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
802assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
803t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
804checkeq(t, {40})
805
806assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
807assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
808assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
809assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
810assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
811assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
812assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
813assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
814assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
815assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
816assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
817assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
818assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
819
820assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
821
822assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
823assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
824assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
825assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
826assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
827assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
828   "411 - abc ")
829
830assert(m.match(m.P(1)/"%0", "abc") == "a")
831checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
832checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
833
834p = m.C(1)
835p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
836assert(p:match("1234567890") == "9 - 1")
837
838assert(m.match(m.Cc(print), "") == print)
839
840-- too many captures (just ignore extra ones)
841p = m.C(1)^0 / "%2-%9-%0-%9"
842assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
843s = string.rep("12345678901234567890", 20)
844assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
845
846-- string captures with non-string subcaptures
847p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
848assert(p:match'x' == 'alo - x - alo')
849
850checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
851
852-- long strings for string capture
853l = 10000
854s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
855
856p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
857
858assert(p:match(s) == string.rep('c', l) ..
859                     string.rep('b', l) ..
860                     string.rep('a', l))
861
862print"+"
863
864-- accumulator capture
865function f (x) return x + 1 end
866assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
867
868t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
869checkeq(t, {1})
870p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
871         rawset)
872t = p:match("a=b;c=du;xux=yuy;")
873checkeq(t, {a="b", c="du", xux="yuy"})
874
875
876-- errors in accumulator capture
877
878-- no initial capture
879checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
880-- no initial capture (very long match forces fold to be a pair open-close)
881checkerr("no initial value", m.match, m.Cf(m.P(500), print),
882                               string.rep('a', 600))
883
884-- nested capture produces no initial value
885checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
886
887
888-- tests for loop checker
889
890local function isnullable (p)
891  checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
892end
893
894isnullable(m.P("x")^-4)
895assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
896assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
897isnullable("")
898isnullable(m.P("x")^0)
899isnullable(m.P("x")^-1)
900isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
901isnullable(-m.P("ab"))
902isnullable(- -m.P("ab"))
903isnullable(# #(m.P("ab") + "xy"))
904isnullable(- #m.P("ab")^0)
905isnullable(# -m.P("ab")^1)
906isnullable(#m.V(3))
907isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
908isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
909assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
910       == 3)
911assert(m.match(m.P""^-3, "a") == 1)
912
913local function find (p, s)
914  return m.match(basiclookfor(p), s)
915end
916
917
918local function badgrammar (g, expected)
919  local stat, msg = pcall(m.P, g)
920  assert(not stat)
921  if expected then assert(find(expected, msg)) end
922end
923
924badgrammar({[1] = m.V(1)}, "rule '1'")
925badgrammar({[1] = m.V(2)}, "rule '2'")   -- invalid non-terminal
926badgrammar({[1] = m.V"x"}, "rule 'x'")   -- invalid non-terminal
927badgrammar({[1] = m.V{}}, "rule '(a table)'")   -- invalid non-terminal
928badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'")  -- left-recursive
929badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'")  -- left-recursive
930badgrammar({[1] = -1 * m.V(1)}, "rule '1'")  -- left-recursive
931badgrammar({[1] = -1 + m.V(1)}, "rule '1'")  -- left-recursive
932badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'")  -- left-recursive
933badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'")  -- inf. loop
934badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'")  -- inf. loop
935badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'")  -- inf. loop
936badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'")  -- inf. loop
937badgrammar({ -(m.V(1) * 'a') }, "rule '1'")  -- inf. loop
938badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'")  -- left recursive
939badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
940
941assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
942assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
943
944
945-- good x bad grammars
946m.P{ ('a' * m.V(1))^-1 }
947m.P{ -('a' * m.V(1)) }
948m.P{ ('abc' * m.V(1))^-1 }
949m.P{ -('abc' * m.V(1)) }
950badgrammar{ #m.P('abc') * m.V(1) }
951badgrammar{ -('a' + m.V(1)) }
952m.P{ #('a' * m.V(1)) }
953badgrammar{ #('a' + m.V(1)) }
954m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
955badgrammar{ m.B{ m.P'abc' } * m.V(1) }
956badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
957
958
959-- simple tests for maximum sizes:
960local p = m.P"a"
961for i=1,14 do p = p * p end
962
963p = {}
964for i=1,100 do p[i] = m.P"a" end
965p = m.P(p)
966
967
968-- strange values for rule labels
969
970p = m.P{ "print",
971     print = m.V(print),
972     [print] = m.V(_G),
973     [_G] = m.P"a",
974   }
975
976assert(p:match("a"))
977
978-- initial rule
979g = {}
980for i = 1, 10 do g["i"..i] =  "a" * m.V("i"..i+1) end
981g.i11 = m.P""
982for i = 1, 10 do
983  g[1] = "i"..i
984  local p = m.P(g)
985  assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
986end
987
988print"+"
989
990
991-- tests for back references
992checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
993checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
994
995p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
996t = p:match("ab")
997checkeq(t, {"a", "b"})
998
999p = m.P(true)
1000for i = 1, 10 do p = p * m.Cg(1, i) end
1001for i = 1, 10 do
1002  local p = p * m.Cb(i)
1003  assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i))
1004end
1005
1006
1007t = {}
1008function foo (p) t[#t + 1] = p; return p .. "x" end
1009
1010p = m.Cg(m.C(2)    / foo, "x") * m.Cb"x" *
1011    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
1012    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
1013    m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
1014x = {p:match'ab'}
1015checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
1016checkeq(t, {'ab',
1017            'ab', 'abx',
1018            'ab', 'abx', 'abxx',
1019            'ab', 'abx', 'abxx', 'abxxx'})
1020
1021
1022
1023-- tests for match-time captures
1024
1025p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
1026  + 'acd'
1027
1028assert(p:match('abc') == 3)
1029assert(p:match('acd') == 4)
1030
1031local function id (s, i, ...)
1032  return true, ...
1033end
1034
1035assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
1036              m.R'09'^1 /  string.char +
1037              m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
1038
1039p = m.P{'S',
1040  S = m.V'atom' * space
1041    + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
1042  atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
1043}
1044x = p:match"(a g () ((b) c) (d (e)))"
1045checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
1046
1047x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
1048assert(#x == 500)
1049
1050local function id(s, i, x)
1051  if x == 'a' then return i, 1, 3, 7
1052  else return nil, 2, 4, 6, 8
1053  end
1054end
1055
1056p = ((m.P(id) * 1 + m.Cmt(2, id) * 1  + m.Cmt(1, id) * 1))^0
1057assert(table.concat{p:match('abababab')} == string.rep('137', 4))
1058
1059local function ref (s, i, x)
1060  return m.match(x, s, i - x:len())
1061end
1062
1063assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
1064assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
1065assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
1066
1067ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
1068
1069assert(m.Cmt(1, ref):match'2')
1070assert(not m.Cmt(1, ref):match'1')
1071assert(m.Cmt(m.P(1)^0, ref):match'03')
1072
1073function ref (s, i, a, b)
1074  if a == b then return i, a:upper() end
1075end
1076
1077p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
1078p = (any - p)^0 * p * any^0 * -1
1079
1080assert(p:match'abbbc-bc ddaa' == 'BC')
1081
1082do   -- match-time captures cannot be optimized away
1083  local touch = 0
1084  f = m.P(function () touch = touch + 1; return true end)
1085
1086  local function check(n) n = n or 1; assert(touch == n); touch = 0 end
1087
1088  assert(m.match(f * false + 'b', 'a') == nil); check()
1089  assert(m.match(f * false + 'b', '') == nil); check()
1090  assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
1091  assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
1092  assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
1093  assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
1094  assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
1095  assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
1096  assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
1097     check()
1098  assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1099  assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1100  assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1101  assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1102  assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1103end
1104
1105c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1106    { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1107                                               return s1 == s2 end)
1108       + 1 * m.V(1) } / 0
1109
1110assert(c:match'[==[]]====]]]]==]===[]' == 18)
1111assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1112assert(not c:match'[[]=]====]=]=]==]===[]')
1113
1114
1115-- old bug: optimization of concat with fail removed match-time capture
1116p = m.Cmt(0, function (s) p = s end) * m.P(false)
1117assert(not p:match('alo'))
1118assert(p == 'alo')
1119
1120
1121-- ensure that failed match-time captures are not kept on Lua stack
1122do
1123  local t = {__mode = "kv"}; setmetatable(t,t)
1124  local c = 0
1125
1126  local function foo (s,i)
1127    collectgarbage();
1128    assert(next(t) == "__mode" and next(t, "__mode") == nil)
1129    local x = {}
1130    t[x] = true
1131    c = c + 1
1132    return i, x
1133  end
1134
1135  local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1136  p:match(string.rep('1', 10))
1137  assert(c == 11)
1138end
1139
1140
1141-- Return a match-time capture that returns 'n' captures
1142local function manyCmt (n)
1143    return m.Cmt("a", function ()
1144             local a = {}; for i = 1, n do a[i] = n - i end
1145             return true, unpack(a)
1146           end)
1147end
1148
1149-- bug in 1.0: failed match-time that used previous match-time results
1150do
1151  local x
1152  local function aux (...) x = #{...}; return false end
1153  local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
1154  assert(#res == 10 and res[1] == 9 and res[10] == 0)
1155end
1156
1157
1158-- bug in 1.0: problems with math-times returning too many captures
1159do
1160  local lim = 2^11 - 10
1161  local res = {m.match(manyCmt(lim), "a")}
1162  assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
1163  checkerr("too many", m.match, manyCmt(2^15), "a")
1164end
1165
1166p = (m.P(function () return true, "a" end) * 'a'
1167  + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1168  + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1169
1170t = {p:match('abacc')}
1171checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1172
1173
1174-------------------------------------------------------------------
1175-- Tests for 're' module
1176-------------------------------------------------------------------
1177
1178local re = require "re"
1179
1180local match, compile = re.match, re.compile
1181
1182
1183
1184assert(match("a", ".") == 2)
1185assert(match("a", "''") == 1)
1186assert(match("", " ! . ") == 1)
1187assert(not match("a", " ! . "))
1188assert(match("abcde", "  ( . . ) * ") == 5)
1189assert(match("abbcde", " [a-c] +") == 5)
1190assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1191assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1192assert(match("abbc--", " [a-c] + +") == 5)
1193assert(match("abbc--", " [ac-] +") == 2)
1194assert(match("abbc--", " [-acb] + ") == 7)
1195assert(not match("abbcde", " [b-z] + "))
1196assert(match("abb\"de", '"abb"["]"de"') == 7)
1197assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1198assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1199
1200assert(re.match("aaand", "[a]^2") == 3)
1201
1202local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1203checkeq(t, {4, 5, 7})
1204local t = {match("abceefe", "((&&'e' {})? .)*")}
1205checkeq(t, {4, 5, 7})
1206local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1207checkeq(t, {4, 5, 7})
1208local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1209checkeq(t, {4, 5, 7})
1210
1211assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1212assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1213assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1214
1215assert(match("abc", "a <- (. a)?") == 4)
1216b = "balanced <- '(' ([^()] / balanced)* ')'"
1217assert(match("(abc)", b))
1218assert(match("(a(b)((c) (d)))", b))
1219assert(not match("(a(b ((c) (d)))", b))
1220
1221b = compile[[  balanced <- "(" ([^()] / balanced)* ")" ]]
1222assert(b == m.P(b))
1223assert(b:match"((((a))(b)))")
1224
1225local g = [[
1226  S <- "0" B / "1" A / ""   -- balanced strings
1227  A <- "0" S / "1" A A      -- one more 0
1228  B <- "1" S / "0" B B      -- one more 1
1229]]
1230assert(match("00011011", g) == 9)
1231
1232local g = [[
1233  S <- ("0" B / "1" A)*
1234  A <- "0" / "1" A A
1235  B <- "1" / "0" B B
1236]]
1237assert(match("00011011", g) == 9)
1238assert(match("000110110", g) == 9)
1239assert(match("011110110", g) == 3)
1240assert(match("000110010", g) == 1)
1241
1242s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1243assert(match(s, "'a'^3") == 4)
1244assert(match(s, "'a'^0") == 1)
1245assert(match(s, "'a'^+3") == s:len() + 1)
1246assert(not match(s, "'a'^+30"))
1247assert(match(s, "'a'^-30") == s:len() + 1)
1248assert(match(s, "'a'^-5") == 6)
1249for i = 1, s:len() do
1250  assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1251  assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1252  assert(match(s, string.format("'a'^%d", i)) == i + 1)
1253end
1254assert(match("01234567890123456789", "[0-9]^3+") == 19)
1255
1256
1257assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1258t = match("0123456789", "{| {.}* |}")
1259checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1260assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1261
1262assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1263assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1264assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1265
1266assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1267assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1268
1269eqcharset(compile"[]]", "]")
1270eqcharset(compile"[][]", m.S"[]")
1271eqcharset(compile"[]-]", m.S"-]")
1272eqcharset(compile"[-]", m.S"-")
1273eqcharset(compile"[az-]", m.S"a-z")
1274eqcharset(compile"[-az]", m.S"a-z")
1275eqcharset(compile"[a-z]", m.R"az")
1276eqcharset(compile"[]['\"]", m.S[[]['"]])
1277
1278eqcharset(compile"[^]]", any - "]")
1279eqcharset(compile"[^][]", any - m.S"[]")
1280eqcharset(compile"[^]-]", any - m.S"-]")
1281eqcharset(compile"[^]-]", any - m.S"-]")
1282eqcharset(compile"[^-]", any - m.S"-")
1283eqcharset(compile"[^az-]", any - m.S"a-z")
1284eqcharset(compile"[^-az]", any - m.S"a-z")
1285eqcharset(compile"[^a-z]", any - m.R"az")
1286eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1287
1288-- tests for comments in 're'
1289e = compile[[
1290A  <- _B   -- \t \n %nl .<> <- -> --
1291_B <- 'x'  --]]
1292assert(e:match'xy' == 2)
1293
1294-- tests for 're' with pre-definitions
1295defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1296e = compile("%letters (%letters / %digits)*", defs)
1297assert(e:match"x123" == 5)
1298e = compile("%_", defs)
1299assert(e:match"__" == 3)
1300
1301e = compile([[
1302  S <- A+
1303  A <- %letters+ B
1304  B <- %digits+
1305]], defs)
1306
1307e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1308assert(e:match("2.34") == math.sin(2.34))
1309
1310
1311function eq (_, _, a, b) return a == b end
1312
1313c = re.compile([[
1314  longstring <- '[' {:init: '='* :} '[' close
1315  close <- ']' =init ']' / . close
1316]])
1317
1318assert(c:match'[==[]]===]]]]==]===[]' == 17)
1319assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1320assert(not c:match'[[]=]====]=]=]==]===[]')
1321
1322c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1323
1324assert(c:match'[==[]]===]]]]==]')
1325assert(c:match'[[]=]====]=][]==]===[]]')
1326assert(not c:match'[[]=]====]=]=]==]===[]')
1327
1328assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1329assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1330assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1331assert(re.find("hi alalo", "{'al'}", 5) == 6)
1332assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1333assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1334
1335-- re.find discards any captures
1336local a,b,c = re.find("alo", "{.}{'o'}")
1337assert(a == 2 and b == 3 and c == nil)
1338
1339local function match (s,p)
1340  local i,e = re.find(s,p)
1341  if i then return s:sub(i, e) end
1342end
1343assert(match("alo alo", '[a-z]+') == "alo")
1344assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1345assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1346
1347assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1348assert(re.gsub("alo alo", "%w+", ".") == ". .")
1349assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1350               "hI, hOw ArE yOU")
1351
1352s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1353c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1354assert(re.gsub(s, c, "%2") == 'hi  and =]')
1355assert(re.gsub(s, c, "%0") == s)
1356assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1357
1358assert(re.find("", "!.") == 1)
1359assert(re.find("alo", "!.") == 4)
1360
1361function addtag (s, i, t, tag) t.tag = tag; return i, t end
1362
1363c = re.compile([[
1364  doc <- block !.
1365  block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1366  start <- '<' {:tag: [a-z]+ :} '>'
1367  end <- '</' { =tag } '>'
1368]], {addtag = addtag})
1369
1370x = c:match[[
1371<x>hi<b>hello</b>but<b>totheend</x>]]
1372checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1373                     {'totheend'}})
1374
1375
1376-- test for folding captures
1377c = re.compile([[
1378  S <- (number (%s+ number)*) ~> add
1379  number <- %d+ -> tonumber
1380]], {tonumber = tonumber, add = function (a,b) return a + b end})
1381assert(c:match("3 401 50") == 3 + 401 + 50)
1382
1383-- tests for look-ahead captures
1384x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1385checkeq(x, {"", "alo", ""})
1386
1387assert(re.match("aloalo",
1388   "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1389       == "AallooAalloo")
1390
1391-- bug in 0.9 (and older versions), due to captures in look-aheads
1392x = re.compile[[   {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~}  ]]
1393assert(x:match"alo alo" == "+ +")
1394
1395-- valid capture in look-ahead (used inside the look-ahead itself)
1396x = re.compile[[
1397      S <- &({:two: .. :} . =two) {[a-z]+} / . S
1398]]
1399assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1400
1401
1402p = re.compile[[
1403  block <- {| {:ident:space*:} line
1404           ((=ident !space line) / &(=ident space) block)* |}
1405  line <- {[^%nl]*} %nl
1406  space <- '_'     -- should be ' ', but '_' is simpler for editors
1407]]
1408
1409t= p:match[[
14101
1411__1.1
1412__1.2
1413____1.2.1
1414____
14152
1416__2.1
1417]]
1418checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1419            "2", {"2.1", ident = "__"}, ident = ""})
1420
1421
1422-- nested grammars
1423p = re.compile[[
1424       s <- a b !.
1425       b <- ( x <- ('b' x)? )
1426       a <- ( x <- 'a' x? )
1427]]
1428
1429assert(p:match'aaabbb')
1430assert(p:match'aaa')
1431assert(not p:match'bbb')
1432assert(not p:match'aaabbba')
1433
1434-- testing groups
1435t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1436checkeq(t, {"a", "bc", "b", "c", "c", ""})
1437
1438t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1439checkeq(t, {a="1", b="2", c="4"})
1440t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1441checkeq(t, {a="1", b="2", c="4"})
1442t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1443checkeq(t, {"1", b="2", "4", "5"})
1444t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1445checkeq(t, {"1", "23", "4", "5"})
1446t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1447checkeq(t, {"1", "23", "4", "5"})
1448
1449
1450-- testing pre-defined names
1451assert(os.setlocale("C") == "C")
1452
1453function eqlpeggsub (p1, p2)
1454  local s1 = cs2str(re.compile(p1))
1455  local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1456  -- if s1 ~= s2 then print(#s1,#s2) end
1457  assert(s1 == s2)
1458end
1459
1460
1461eqlpeggsub("%w", "%w")
1462eqlpeggsub("%a", "%a")
1463eqlpeggsub("%l", "%l")
1464eqlpeggsub("%u", "%u")
1465eqlpeggsub("%p", "%p")
1466eqlpeggsub("%d", "%d")
1467eqlpeggsub("%x", "%x")
1468eqlpeggsub("%s", "%s")
1469eqlpeggsub("%c", "%c")
1470
1471eqlpeggsub("%W", "%W")
1472eqlpeggsub("%A", "%A")
1473eqlpeggsub("%L", "%L")
1474eqlpeggsub("%U", "%U")
1475eqlpeggsub("%P", "%P")
1476eqlpeggsub("%D", "%D")
1477eqlpeggsub("%X", "%X")
1478eqlpeggsub("%S", "%S")
1479eqlpeggsub("%C", "%C")
1480
1481eqlpeggsub("[%w]", "%w")
1482eqlpeggsub("[_%w]", "_%w")
1483eqlpeggsub("[^%w]", "%W")
1484eqlpeggsub("[%W%S]", "%W%S")
1485
1486re.updatelocale()
1487
1488
1489-- testing nested substitutions x string captures
1490
1491p = re.compile[[
1492      text <- {~ item* ~}
1493      item <- macro / [^()] / '(' item* ')'
1494      arg <- ' '* {~ (!',' item)* ~}
1495      args <- '(' arg (',' arg)* ')'
1496      macro <- ('apply' args) -> '%1(%2)'
1497             / ('add' args) -> '%1 + %2'
1498             / ('mul' args) -> '%1 * %2'
1499]]
1500
1501assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1502
1503rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1504
1505assert(rev:match"0123456789" == "9876543210")
1506
1507
1508-- testing error messages in re
1509
1510local function errmsg (p, err)
1511  checkerr(err, re.compile, p)
1512end
1513
1514errmsg('aaaa', "rule 'aaaa'")
1515errmsg('a', 'outside')
1516errmsg('b <- a', 'undefined')
1517errmsg("x <- 'a'  x <- 'b'", 'already defined')
1518errmsg("'a' -", "near '-'")
1519
1520
1521print"OK"
1522
1523
1524