1--[[
2	Licensed according to the included 'LICENSE' document
3	Author: Thomas Harning Jr <harningt@gmail.com>
4]]
5local lpeg = require("lpeg")
6local jsonutil = require("json.util")
7local util = require("json.decode.util")
8local merge = jsonutil.merge
9
10local tonumber = tonumber
11local string_char = require("string").char
12local floor = require("math").floor
13local table_concat = require("table").concat
14
15local error = error
16
17local _ENV = nil
18
19local function get_error(item)
20	local fmt_string = item .. " in string [%q] @ %i:%i"
21	return lpeg.P(function(data, index)
22		local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index)
23		local err = fmt_string:format(bad_char, line, line_index)
24		error(err)
25	end) * 1
26end
27
28local bad_unicode   = get_error("Illegal unicode escape")
29local bad_hex       = get_error("Illegal hex escape")
30local bad_character = get_error("Illegal character")
31local bad_escape    = get_error("Illegal escape")
32
33local knownReplacements = {
34	["'"] = "'",
35	['"'] = '"',
36	['\\'] = '\\',
37	['/'] = '/',
38	b = '\b',
39	f = '\f',
40	n = '\n',
41	r = '\r',
42	t = '\t',
43	v = '\v',
44	z = '\z'
45}
46
47-- according to the table at http://da.wikipedia.org/wiki/UTF-8
48local function utf8DecodeUnicode(code1, code2)
49	code1, code2 = tonumber(code1, 16), tonumber(code2, 16)
50	if code1 == 0 and code2 < 0x80 then
51		return string_char(code2)
52	end
53	if code1 < 0x08 then
54		return string_char(
55			0xC0 + code1 * 4 + floor(code2 / 64),
56			0x80 + code2 % 64)
57	end
58	return string_char(
59		0xE0 + floor(code1 / 16),
60		0x80 + (code1 % 16) * 4 + floor(code2 / 64),
61		0x80 + code2 % 64)
62end
63
64local function decodeX(code)
65	code = tonumber(code, 16)
66	return string_char(code)
67end
68
69local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements
70local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + bad_unicode)
71local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + bad_hex)
72
73local defaultOptions = {
74	badChars = '',
75	additionalEscapes = false, -- disallow untranslated escapes
76	escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
77	decodeUnicode = utf8DecodeUnicode,
78	strict_quotes = false
79}
80
81local modeOptions = {}
82
83modeOptions.strict = {
84	badChars = '\b\f\n\r\t\v',
85	additionalEscapes = false, -- no additional escapes
86	escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
87	strict_quotes = true
88}
89
90local function mergeOptions(options, mode)
91	jsonutil.doOptionMerge(options, false, 'strings', defaultOptions, mode and modeOptions[mode])
92end
93
94local function buildCaptureString(quote, badChars, escapeMatch)
95	local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch)
96	-- During error, force end
97	local captureString = captureChar^0 + (-#lpeg.P(quote) * bad_character + -1)
98	return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote)
99end
100
101local function generateLexer(options)
102	options = options.strings
103	local quotes = { '"' }
104	if not options.strict_quotes then
105		quotes[#quotes + 1] = "'"
106	end
107	local escapeMatch = doSimpleSub
108	escapeMatch = escapeMatch + doXSub / decodeX
109	escapeMatch = escapeMatch + doUniSub / options.decodeUnicode
110	if options.escapeCheck then
111		escapeMatch = options.escapeCheck * escapeMatch + bad_escape
112	end
113	if options.additionalEscapes then
114		escapeMatch = options.additionalEscapes + escapeMatch
115	end
116	local captureString
117	for i = 1, #quotes do
118		local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch)
119		if captureString == nil then
120			captureString = cap
121		else
122			captureString = captureString + cap
123		end
124	end
125	return captureString
126end
127
128local strings = {
129	mergeOptions = mergeOptions,
130	generateLexer = generateLexer
131}
132
133return strings
134