xref: /freebsd/libexec/nuageinit/yaml.lua (revision 5f757f3f)
1-- SPDX-License-Identifier: MIT
2--
3-- Copyright (c) 2017 Dominic Letz dominicletz@exosite.com
4
5local table_print_value
6table_print_value = function(value, indent, done)
7  indent = indent or 0
8  done = done or {}
9  if type(value) == "table" and not done [value] then
10    done [value] = true
11
12    local list = {}
13    for key in pairs (value) do
14      list[#list + 1] = key
15    end
16    table.sort(list, function(a, b) return tostring(a) < tostring(b) end)
17    local last = list[#list]
18
19    local rep = "{\n"
20    local comma
21    for _, key in ipairs (list) do
22      if key == last then
23        comma = ''
24      else
25        comma = ','
26      end
27      local keyRep
28      if type(key) == "number" then
29        keyRep = key
30      else
31        keyRep = string.format("%q", tostring(key))
32      end
33      rep = rep .. string.format(
34        "%s[%s] = %s%s\n",
35        string.rep(" ", indent + 2),
36        keyRep,
37        table_print_value(value[key], indent + 2, done),
38        comma
39      )
40    end
41
42    rep = rep .. string.rep(" ", indent) -- indent it
43    rep = rep .. "}"
44
45    done[value] = false
46    return rep
47  elseif type(value) == "string" then
48    return string.format("%q", value)
49  else
50    return tostring(value)
51  end
52end
53
54local table_print = function(tt)
55  print('return '..table_print_value(tt))
56end
57
58local table_clone = function(t)
59  local clone = {}
60  for k,v in pairs(t) do
61    clone[k] = v
62  end
63  return clone
64end
65
66local string_trim = function(s, what)
67  what = what or " "
68  return s:gsub("^[" .. what .. "]*(.-)["..what.."]*$", "%1")
69end
70
71local push = function(stack, item)
72  stack[#stack + 1] = item
73end
74
75local pop = function(stack)
76  local item = stack[#stack]
77  stack[#stack] = nil
78  return item
79end
80
81local context = function (str)
82  if type(str) ~= "string" then
83    return ""
84  end
85
86  str = str:sub(0,25):gsub("\n","\\n"):gsub("\"","\\\"");
87  return ", near \"" .. str .. "\""
88end
89
90local Parser = {}
91function Parser.new (self, tokens)
92  self.tokens = tokens
93  self.parse_stack = {}
94  self.refs = {}
95  self.current = 0
96  return self
97end
98
99local exports = {version = "1.2"}
100
101local word = function(w) return "^("..w..")([%s$%c])" end
102
103local tokens = {
104  {"comment",   "^#[^\n]*"},
105  {"indent",    "^\n( *)"},
106  {"space",     "^ +"},
107  {"true",      word("enabled"),  const = true, value = true},
108  {"true",      word("true"),     const = true, value = true},
109  {"true",      word("yes"),      const = true, value = true},
110  {"true",      word("on"),      const = true, value = true},
111  {"false",     word("disabled"), const = true, value = false},
112  {"false",     word("false"),    const = true, value = false},
113  {"false",     word("no"),       const = true, value = false},
114  {"false",     word("off"),      const = true, value = false},
115  {"null",      word("null"),     const = true, value = nil},
116  {"null",      word("Null"),     const = true, value = nil},
117  {"null",      word("NULL"),     const = true, value = nil},
118  {"null",      word("~"),        const = true, value = nil},
119  {"id",    "^\"([^\"]-)\" *(:[%s%c])"},
120  {"id",    "^'([^']-)' *(:[%s%c])"},
121  {"string",    "^\"([^\"]-)\"",  force_text = true},
122  {"string",    "^'([^']-)'",    force_text = true},
123  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)%s+(%d%d?):(%d%d):(%d%d)%s+(%-?%d%d?):(%d%d)"},
124  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)%s+(%d%d?):(%d%d):(%d%d)%s+(%-?%d%d?)"},
125  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)%s+(%d%d?):(%d%d):(%d%d)"},
126  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)%s+(%d%d?):(%d%d)"},
127  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)%s+(%d%d?)"},
128  {"timestamp", "^(%d%d%d%d)-(%d%d?)-(%d%d?)"},
129  {"doc",       "^%-%-%-[^%c]*"},
130  {",",         "^,"},
131  {"string",    "^%b{} *[^,%c]+", noinline = true},
132  {"{",         "^{"},
133  {"}",         "^}"},
134  {"string",    "^%b[] *[^,%c]+", noinline = true},
135  {"[",         "^%["},
136  {"]",         "^%]"},
137  {"-",         "^%-", noinline = true},
138  {":",         "^:"},
139  {"pipe",      "^(|)(%d*[+%-]?)", sep = "\n"},
140  {"pipe",      "^(>)(%d*[+%-]?)", sep = " "},
141  {"id",        "^([%w][%w %-_]*)(:[%s%c])"},
142  {"string",    "^[^%c]+", noinline = true},
143  {"string",    "^[^,%]}%c ]+"}
144};
145exports.tokenize = function (str)
146  local token
147  local row = 0
148  local ignore
149  local indents = 0
150  local lastIndents
151  local stack = {}
152  local indentAmount = 0
153  local inline = false
154  str = str:gsub("\r\n","\010")
155
156  while #str > 0 do
157    for i in ipairs(tokens) do
158      local captures = {}
159      if not inline or tokens[i].noinline == nil then
160        captures = {str:match(tokens[i][2])}
161      end
162
163      if #captures > 0 then
164        captures.input = str:sub(0, 25)
165        token = table_clone(tokens[i])
166        token[2] = captures
167        local str2 = str:gsub(tokens[i][2], "", 1)
168        token.raw = str:sub(1, #str - #str2)
169        str = str2
170
171        if token[1] == "{" or token[1] == "[" then
172          inline = true
173        elseif token.const then
174          -- Since word pattern contains last char we're re-adding it
175          str = token[2][2] .. str
176          token.raw = token.raw:sub(1, #token.raw - #token[2][2])
177        elseif token[1] == "id" then
178          -- Since id pattern contains last semi-colon we're re-adding it
179          str = token[2][2] .. str
180          token.raw = token.raw:sub(1, #token.raw - #token[2][2])
181          -- Trim
182          token[2][1] = string_trim(token[2][1])
183        elseif token[1] == "string" then
184          -- Finding numbers
185          local snip = token[2][1]
186          if not token.force_text then
187            if snip:match("^(-?%d+%.%d+)$") or snip:match("^(-?%d+)$") then
188              token[1] = "number"
189            end
190          end
191
192        elseif token[1] == "comment" then
193          ignore = true;
194        elseif token[1] == "indent" then
195          row = row + 1
196          inline = false
197          lastIndents = indents
198          if indentAmount == 0 then
199            indentAmount = #token[2][1]
200          end
201
202          if indentAmount ~= 0 then
203            indents = (#token[2][1] / indentAmount);
204          else
205            indents = 0
206          end
207
208          if indents == lastIndents then
209            ignore = true;
210          elseif indents > lastIndents + 2 then
211            error("SyntaxError: invalid indentation, got " .. tostring(indents)
212              .. " instead of " .. tostring(lastIndents) .. context(token[2].input))
213          elseif indents > lastIndents + 1 then
214            push(stack, token)
215          elseif indents < lastIndents then
216            local input = token[2].input
217            token = {"dedent", {"", input = ""}}
218            token.input = input
219            while lastIndents > indents + 1 do
220              lastIndents = lastIndents - 1
221              push(stack, token)
222            end
223          end
224        end -- if token[1] == XXX
225        token.row = row
226        break
227      end -- if #captures > 0
228    end
229
230    if not ignore then
231      if token then
232        push(stack, token)
233        token = nil
234      else
235        error("SyntaxError " .. context(str))
236      end
237    end
238
239    ignore = false;
240  end
241
242  return stack
243end
244
245Parser.peek = function (self, offset)
246  offset = offset or 1
247  return self.tokens[offset + self.current]
248end
249
250Parser.advance = function (self)
251  self.current = self.current + 1
252  return self.tokens[self.current]
253end
254
255Parser.advanceValue = function (self)
256  return self:advance()[2][1]
257end
258
259Parser.accept = function (self, type)
260  if self:peekType(type) then
261    return self:advance()
262  end
263end
264
265Parser.expect = function (self, type, msg)
266  return self:accept(type) or
267    error(msg .. context(self:peek()[1].input))
268end
269
270Parser.expectDedent = function (self, msg)
271  return self:accept("dedent") or (self:peek() == nil) or
272    error(msg .. context(self:peek()[2].input))
273end
274
275Parser.peekType = function (self, val, offset)
276  return self:peek(offset) and self:peek(offset)[1] == val
277end
278
279Parser.ignore = function (self, items)
280  local advanced
281  repeat
282    advanced = false
283    for _,v in pairs(items) do
284      if self:peekType(v) then
285        self:advance()
286        advanced = true
287      end
288    end
289  until advanced == false
290end
291
292Parser.ignoreSpace = function (self)
293  self:ignore{"space"}
294end
295
296Parser.ignoreWhitespace = function (self)
297  self:ignore{"space", "indent", "dedent"}
298end
299
300Parser.parse = function (self)
301
302  local ref = nil
303  if self:peekType("string") and not self:peek().force_text then
304    local char = self:peek()[2][1]:sub(1,1)
305    if char == "&" then
306      ref = self:peek()[2][1]:sub(2)
307      self:advanceValue()
308      self:ignoreSpace()
309    elseif char == "*" then
310      ref = self:peek()[2][1]:sub(2)
311      return self.refs[ref]
312    end
313  end
314
315  local result
316  local c = {
317    indent = self:accept("indent") and 1 or 0,
318    token = self:peek()
319  }
320  push(self.parse_stack, c)
321
322  if c.token[1] == "doc" then
323    result = self:parseDoc()
324  elseif c.token[1] == "-" then
325    result = self:parseList()
326  elseif c.token[1] == "{" then
327    result = self:parseInlineHash()
328  elseif c.token[1] == "[" then
329    result = self:parseInlineList()
330  elseif c.token[1] == "id" then
331    result = self:parseHash()
332  elseif c.token[1] == "string" then
333    result = self:parseString("\n")
334  elseif c.token[1] == "timestamp" then
335    result = self:parseTimestamp()
336  elseif c.token[1] == "number" then
337    result = tonumber(self:advanceValue())
338  elseif c.token[1] == "pipe" then
339    result = self:parsePipe()
340  elseif c.token.const == true then
341    self:advanceValue();
342    result = c.token.value
343  else
344    error("ParseError: unexpected token '" .. c.token[1] .. "'" .. context(c.token.input))
345  end
346
347  pop(self.parse_stack)
348  while c.indent > 0 do
349    c.indent = c.indent - 1
350    local term = "term "..c.token[1]..": '"..c.token[2][1].."'"
351    self:expectDedent("last ".. term .." is not properly dedented")
352  end
353
354  if ref then
355    self.refs[ref] = result
356  end
357  return result
358end
359
360Parser.parseDoc = function (self)
361  self:accept("doc")
362  return self:parse()
363end
364
365Parser.inline = function (self)
366  local current = self:peek(0)
367  if not current then
368    return {}, 0
369  end
370
371  local inline = {}
372  local i = 0
373
374  while self:peek(i) and not self:peekType("indent", i) and current.row == self:peek(i).row do
375    inline[self:peek(i)[1]] = true
376    i = i - 1
377  end
378  return inline, -i
379end
380
381Parser.isInline = function (self)
382  local _, i = self:inline()
383  return i > 0
384end
385
386Parser.parent = function(self, level)
387  level = level or 1
388  return self.parse_stack[#self.parse_stack - level]
389end
390
391Parser.parentType = function(self, type, level)
392  return self:parent(level) and self:parent(level).token[1] == type
393end
394
395Parser.parseString = function (self)
396  if self:isInline() then
397    local result = self:advanceValue()
398
399    --[[
400      - a: this looks
401        flowing: but is
402        no: string
403    --]]
404    local types = self:inline()
405    if types["id"] and types["-"] then
406      if not self:peekType("indent") or not self:peekType("indent", 2) then
407        return result
408      end
409    end
410
411    --[[
412      a: 1
413      b: this is
414        a flowing string
415        example
416      c: 3
417    --]]
418    if self:peekType("indent") then
419      self:expect("indent", "text block needs to start with indent")
420      local addtl = self:accept("indent")
421
422      result = result .. "\n" .. self:parseTextBlock("\n")
423
424      self:expectDedent("text block ending dedent missing")
425      if addtl then
426        self:expectDedent("text block ending dedent missing")
427      end
428    end
429    return result
430  else
431    --[[
432      a: 1
433      b:
434        this is also
435        a flowing string
436        example
437      c: 3
438    --]]
439    return self:parseTextBlock("\n")
440  end
441end
442
443Parser.parsePipe = function (self)
444  local pipe = self:expect("pipe")
445  self:expect("indent", "text block needs to start with indent")
446  local result = self:parseTextBlock(pipe.sep)
447  self:expectDedent("text block ending dedent missing")
448  return result
449end
450
451Parser.parseTextBlock = function (self, sep)
452  local token = self:advance()
453  local result = string_trim(token.raw, "\n")
454  local indents = 0
455  while self:peek() ~= nil and ( indents > 0 or not self:peekType("dedent") ) do
456    local newtoken = self:advance()
457    while token.row < newtoken.row do
458      result = result .. sep
459      token.row = token.row + 1
460    end
461    if newtoken[1] == "indent" then
462      indents = indents + 1
463    elseif newtoken[1] == "dedent" then
464      indents = indents - 1
465    else
466      result = result .. string_trim(newtoken.raw, "\n")
467    end
468  end
469  return result
470end
471
472Parser.parseHash = function (self, hash)
473  hash = hash or {}
474  local indents = 0
475
476  if self:isInline() then
477    local id = self:advanceValue()
478    self:expect(":", "expected semi-colon after id")
479    self:ignoreSpace()
480    if self:accept("indent") then
481      indents = indents + 1
482      hash[id] = self:parse()
483    else
484      hash[id] = self:parse()
485      if self:accept("indent") then
486        indents = indents + 1
487      end
488    end
489    self:ignoreSpace();
490  end
491
492  while self:peekType("id") do
493    local id = self:advanceValue()
494    self:expect(":","expected semi-colon after id")
495    self:ignoreSpace()
496    hash[id] = self:parse()
497    self:ignoreSpace();
498  end
499
500  while indents > 0 do
501    self:expectDedent("expected dedent")
502    indents = indents - 1
503  end
504
505  return hash
506end
507
508Parser.parseInlineHash = function (self)
509  local id
510  local hash = {}
511  local i = 0
512
513  self:accept("{")
514  while not self:accept("}") do
515    self:ignoreSpace()
516    if i > 0 then
517      self:expect(",","expected comma")
518    end
519
520    self:ignoreWhitespace()
521    if self:peekType("id") then
522      id = self:advanceValue()
523      if id then
524        self:expect(":","expected semi-colon after id")
525        self:ignoreSpace()
526        hash[id] = self:parse()
527        self:ignoreWhitespace()
528      end
529    end
530
531    i = i + 1
532  end
533  return hash
534end
535
536Parser.parseList = function (self)
537  local list = {}
538  while self:accept("-") do
539    self:ignoreSpace()
540    list[#list + 1] = self:parse()
541
542    self:ignoreSpace()
543  end
544  return list
545end
546
547Parser.parseInlineList = function (self)
548  local list = {}
549  local i = 0
550  self:accept("[")
551  while not self:accept("]") do
552    self:ignoreSpace()
553    if i > 0 then
554      self:expect(",","expected comma")
555    end
556
557    self:ignoreSpace()
558    list[#list + 1] = self:parse()
559    self:ignoreSpace()
560    i = i + 1
561  end
562
563  return list
564end
565
566Parser.parseTimestamp = function (self)
567  local capture = self:advance()[2]
568
569  return os.time{
570    year  = capture[1],
571    month = capture[2],
572    day   = capture[3],
573    hour  = capture[4] or 0,
574    min   = capture[5] or 0,
575    sec   = capture[6] or 0,
576    isdst = false,
577  } - os.time{year=1970, month=1, day=1, hour=8}
578end
579
580exports.eval = function (str)
581  return Parser:new(exports.tokenize(str)):parse()
582end
583
584exports.dump = table_print
585
586return exports
587