1--[[ 2 Licensed according to the included 'LICENSE' document 3 Author: Thomas Harning Jr <harningt@gmail.com> 4]] 5local lpeg = require("lpeg") 6local jsonutil = require("json.util") 7local util = require("json.decode.util") 8local merge = jsonutil.merge 9 10local tonumber = tonumber 11local string_char = require("string").char 12local floor = require("math").floor 13local table_concat = require("table").concat 14 15local error = error 16 17local _ENV = nil 18 19local function get_error(item) 20 local fmt_string = item .. " in string [%q] @ %i:%i" 21 return lpeg.P(function(data, index) 22 local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index) 23 local err = fmt_string:format(bad_char, line, line_index) 24 error(err) 25 end) * 1 26end 27 28local bad_unicode = get_error("Illegal unicode escape") 29local bad_hex = get_error("Illegal hex escape") 30local bad_character = get_error("Illegal character") 31local bad_escape = get_error("Illegal escape") 32 33local knownReplacements = { 34 ["'"] = "'", 35 ['"'] = '"', 36 ['\\'] = '\\', 37 ['/'] = '/', 38 b = '\b', 39 f = '\f', 40 n = '\n', 41 r = '\r', 42 t = '\t', 43 v = '\v', 44 z = '\z' 45} 46 47-- according to the table at http://da.wikipedia.org/wiki/UTF-8 48local function utf8DecodeUnicode(code1, code2) 49 code1, code2 = tonumber(code1, 16), tonumber(code2, 16) 50 if code1 == 0 and code2 < 0x80 then 51 return string_char(code2) 52 end 53 if code1 < 0x08 then 54 return string_char( 55 0xC0 + code1 * 4 + floor(code2 / 64), 56 0x80 + code2 % 64) 57 end 58 return string_char( 59 0xE0 + floor(code1 / 16), 60 0x80 + (code1 % 16) * 4 + floor(code2 / 64), 61 0x80 + code2 % 64) 62end 63 64local function decodeX(code) 65 code = tonumber(code, 16) 66 return string_char(code) 67end 68 69local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements 70local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + bad_unicode) 71local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + bad_hex) 72 73local defaultOptions = { 74 badChars = '', 75 additionalEscapes = false, -- disallow untranslated escapes 76 escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters 77 decodeUnicode = utf8DecodeUnicode, 78 strict_quotes = false 79} 80 81local modeOptions = {} 82 83modeOptions.strict = { 84 badChars = '\b\f\n\r\t\v', 85 additionalEscapes = false, -- no additional escapes 86 escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped 87 strict_quotes = true 88} 89 90local function mergeOptions(options, mode) 91 jsonutil.doOptionMerge(options, false, 'strings', defaultOptions, mode and modeOptions[mode]) 92end 93 94local function buildCaptureString(quote, badChars, escapeMatch) 95 local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch) 96 -- During error, force end 97 local captureString = captureChar^0 + (-#lpeg.P(quote) * bad_character + -1) 98 return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote) 99end 100 101local function generateLexer(options) 102 options = options.strings 103 local quotes = { '"' } 104 if not options.strict_quotes then 105 quotes[#quotes + 1] = "'" 106 end 107 local escapeMatch = doSimpleSub 108 escapeMatch = escapeMatch + doXSub / decodeX 109 escapeMatch = escapeMatch + doUniSub / options.decodeUnicode 110 if options.escapeCheck then 111 escapeMatch = options.escapeCheck * escapeMatch + bad_escape 112 end 113 if options.additionalEscapes then 114 escapeMatch = options.additionalEscapes + escapeMatch 115 end 116 local captureString 117 for i = 1, #quotes do 118 local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch) 119 if captureString == nil then 120 captureString = cap 121 else 122 captureString = captureString + cap 123 end 124 end 125 return captureString 126end 127 128local strings = { 129 mergeOptions = mergeOptions, 130 generateLexer = generateLexer 131} 132 133return strings 134