1----------------------------------------------------------------------------
2--## lcpp - a C-PreProcessor in Lua 5.1 for LuaJIT ffi
3--
4-- Copyright (C) 2012-2013 Michael Schmoock <michael@willigens.de>
5--
6--### Links
7-- * GitHub page:   [http://github.com/willsteel/lcpp](http://github.com/willsteel/lcpp)
8-- * Project page:  [http://lcpp.schmoock.net](http://lcpp.schmoock.net)
9-- * Lua:           [http://www.lua.org](http://www.lua.org)
10-- * LuaJIT:        [http://luajit.org](http://luajit.org)
11-- * Sponsored by:  [http://mmbbq.org](http://mmbbq.org)
12--
13-- It can be used to pre-process LuaJIT ffi C header file input.
14-- It can also be used to preprocess any other code (i.e. Lua itself)
15--
16-- 	git clone https://github.com/willsteel/lcpp.git
17----------------------------------------------------------------------------
18--## USAGE
19--	-- load lcpp
20--	local lcpp = require("lcpp")
21--
22--	-- use LuaJIT ffi and lcpp to parse cpp code
23--	ffi.cdef("#include <your_header.h>")
24--
25--	-- compile some input
26--	local out = lcpp.compile([[
27--		#include "myheader.h"
28--		#define MAXPATH 260
29--		typedef struct somestruct_t {
30--			void*          base;
31--			size_t         size;
32--			wchar_t        path[MAXPATH];
33--		} t_exe;
34--	]])
35--
36--	-- the result should be
37--	out = [[
38--		// <preprocessed content of file "myheader.h">
39--		typedef struct somestruct_t {
40--			void*          base;
41--			size_t         size;
42--			wchar_t        path[260];
43--		} t_exe;
44--	]]
45--
46--## This CPPs BNF:
47--	RULES:
48--	CODE              := {LINE}
49--	LINE              := {STUFF NEWML} STUFF  NEWL
50--	STUFF             := DIRECTIVE | IGNORED_CONTENT
51--	DIRECTIVE         := OPTSPACES CMD OPTSPACES DIRECTIVE_NAME WHITESPACES DIRECTIVE_CONTENT WHITESPACES NEWL
52--
53--	LEAVES:
54--	NEWL              := "\n"
55--	NEWL_ESC          := "\\n"
56--	WHITESPACES       := "[ \t]+"
57--	OPTSPACES         := "[ \t]*"
58--	COMMENT           := "//(.-)$"
59--	MLCOMMENT         := "/[*](.-)[*]/"
60--	IGNORED_CONTENT   := "[^#].*"
61--	CMD               := "#"
62--	DIRECTIVE_NAME    := "include"|"define"|"undef"|"if"|"else"|"elif"|"else if"|"endif"|"ifdef"|"ifndef"|"pragma"|"version"
63--	DIRECTIVE_CONTENT := ".*?"
64--
65--## TODOs:
66--	- lcpp.LCPP_LUA for: load, loadfile
67--	- "#" operator for stringification
68--	- literal concatenation: "foo" "bar" -> "foobar"
69--
70--## License (MIT)
71-- -----------------------------------------------------------------------------
72-- Permission is hereby granted, free of charge, to any person obtaining a copy
73-- of this software and associated documentation files (the "Software"), to deal
74-- in the Software without restriction, including without limitation the rights
75-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
76-- copies of the Software, and to permit persons to whom the Software is
77-- furnished to do so, subject to the following conditions:
78--
79-- The above copyright notice and this permission notice shall be included in
80-- all copies or substantial portions of the Software.
81--
82-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
83-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
84-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
85-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
86-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
87-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
88-- THE SOFTWARE.
89--
90-- MIT license: http://www.opensource.org/licenses/mit-license.php
91-- -----------------------------------------------------------------------------
92--
93-- @module lcpp
94local lcpp = {}
95
96-- CONFIG
97lcpp.LCPP_LUA         = false   -- whether to use lcpp to preprocess Lua code (load, loadfile, loadstring...)
98lcpp.LCPP_FFI         = true    -- whether to use lcpp as LuaJIT ffi PreProcessor (if used in luaJIT)
99lcpp.LCPP_TEST        = false   -- whether to run lcpp unit tests when loading lcpp module
100lcpp.ENV              = {}      -- static predefines (env-like)
101lcpp.FAST             = true		-- perf. tweaks when enabled. con: breaks minor stuff like __LINE__ macros
102lcpp.DEBUG            = false
103
104-- PREDEFINES
105local __FILE__        = "__FILE__"
106local __LINE__        = "__LINE__"
107local __DATE__        = "__DATE__"
108local __TIME__        = "__TIME__"
109local __LCPP_INDENT__ = "__LCPP_INDENT__"
110local __LCPP_INSIDE_HEADERFILE__ = "__LCPP_INSIDE_HEADERFILE__"
111
112-- BNF LEAVES
113local ENDL            = "$"
114local STARTL          = "^"
115local NEWL            = "\n"
116local NEWL_BYTE       = NEWL:byte(1)
117local NEWL_ESC        = "\\"
118local NEWML           = "\\\n"
119local CMD             = "#"
120local CMD_BYTE        = CMD:byte(1)
121local COMMENT         = "^(.-)//.-$"
122local MLCOMMENT       = "/[*].-[*]/"
123local WHITESPACES     = "%s+"
124local OPTSPACES       = "%s*"
125local IDENTIFIER      = "[_%a][_%w]*"
126local NOIDENTIFIER    = "[^%w_]+"
127local FILENAME        = "[0-9a-zA-Z.-_/\\]+"
128local TEXT            = ".+"
129
130-- BNF WORDS
131local _INCLUDE        = "include"
132local _DEFINE         = "define"
133local _IFDEF          = "ifdef"
134local _IFNDEF         = "ifndef"
135local _ENDIF          = "endif"
136local _UNDEF          = "undef"
137local _IF             = "if"
138local _ELSE           = "else"
139local _ELIF           = "elif"
140local _NOT            = "!"
141local _ERROR          = "error"
142local _PRAGMA         = "pragma"
143local _VERSION				= "version"
144
145-- BNF RULES
146local INCLUDE         = STARTL.._INCLUDE..WHITESPACES.."[\"<]("..FILENAME..")[\">]"..OPTSPACES..ENDL
147local DEFINE          = STARTL.._DEFINE
148local IFDEF           = STARTL.._IFDEF..WHITESPACES.."("..IDENTIFIER..")"..OPTSPACES..ENDL
149local IFNDEF          = STARTL.._IFNDEF..WHITESPACES.."("..IDENTIFIER..")"..OPTSPACES..ENDL
150local ENDIF           = STARTL.._ENDIF..OPTSPACES.."(.*)"..ENDL
151local UNDEF           = STARTL.._UNDEF..WHITESPACES.."("..IDENTIFIER..")"..OPTSPACES..ENDL
152local IF              = STARTL.._IF..WHITESPACES.."(.*)"..ENDL
153local ELSE            = STARTL.._ELSE..OPTSPACES.."(.*)"..ENDL
154local ELIF            = STARTL.._ELIF..WHITESPACES.."(.*)"..ENDL
155local ELSEIF          = STARTL.._ELSE..WHITESPACES.._IF..WHITESPACES.."(.*)"..ENDL
156local ERROR           = STARTL.._ERROR..WHITESPACES.."("..TEXT..")"..OPTSPACES..ENDL
157local ERROR_NOTEXT    = STARTL.._ERROR..OPTSPACES..ENDL	--> not required when we have POSIX regex
158local PRAGMA          = STARTL.._PRAGMA
159local VERSION         = STARTL.._VERSION
160
161
162
163-- speedups
164local TRUEMACRO = STARTL.."("..IDENTIFIER..")%s*$"
165local REPLMACRO = STARTL.."("..IDENTIFIER..")"..WHITESPACES.."(.+)$"
166local FUNCMACRO = STARTL.."("..IDENTIFIER..")%s*%(([%s%w,]*)%)%s*(.*)"
167
168
169-- ------------
170-- LOCAL UTILS
171-- ------------
172lcpp.STATE = {lineno = 0} -- current state for debugging the last operation
173local function error(msg) _G.print(debug.traceback()); _G.error(string.format("lcpp ERR [%04i] %s", lcpp.STATE.lineno, msg)) end
174local function print(msg) _G.print(string.format("lcpp INF [%04i] %s", lcpp.STATE.lineno, msg)) end
175
176-- splits a string using a pattern into a table of substrings
177local function gsplit(str, pat)
178	local function _split(str, pat)
179		local t = {}  -- NOTE: use {n = 0} in Lua-5.0
180		local fpat = "(.-)"..pat
181		local last_end = 1
182		local s, e, cap = str:find(fpat, 1)
183		while s do
184			if s ~= 1 or cap ~= "" then
185				coroutine.yield(cap)
186			end
187			last_end = e + 1
188			s, e, cap = str:find(fpat, last_end)
189		end
190		if last_end <= #str then
191			cap = str:sub(last_end)
192			coroutine.yield(cap)
193		end
194	end
195	return coroutine.wrap(function() _split(str, pat) end)
196end
197local function split(str, pat)
198	local t = {}
199	for str in gsplit(str, pat) do table.insert(t, str) end
200	return t
201end
202
203-- Checks whether a string starts with a given substring
204-- offset is optional
205local function strsw(str, pat, offset)
206	if not str then return false end
207	if not offset then offset = 0 end
208	return string.sub(str, 1+offset, string.len(pat)+offset) == pat
209end
210
211-- Checks whether a string ends with a given substring
212local function strew(str, pat)
213	if not str then return false end
214	return pat=='' or string.sub(str,-string.len(pat)) == pat
215end
216
217-- string trim12 from lua wiki
218local function trim(str)
219	local from = str:match"^%s*()"
220	return from > #str and "" or str:match(".*%S", from)
221end
222
223-- returns the number of string occurrences
224local function findn(input, what)
225	local count = 0
226	local offset = 0
227	while true do
228			_, offset = string.find(input, what, offset+1, true)
229			if not offset then return count end
230			count = count + 1
231	end
232end
233
234-- a lightweight and flexible tokenizer
235local function _tokenizer(str, setup)
236		local defsetup = {
237			-- EXAMPLE patterns have to be pretended with "^" for the tokenizer
238			["identifier"] = '^[_%a][_%w]*',
239			["number"] = '^[%+%-]?%d+[%.]?%d*',
240			["ignore"] = '^%s+',
241			["string"] = true,
242			["keywords"] = {
243				-- ["NAME"] = '^pattern',
244				-- ...
245			},
246		}
247	if not setup then
248		setup = defsetup
249	end
250	setup.identifier = setup.identifier or defsetup.identifier
251	setup.number = setup.number or defsetup.number
252	setup.ignore = setup.number or defsetup.ignore
253	if nil == setup.string then setup.string = true end
254	setup.keywords = setup.keywords or {}
255
256	local strlen = #str
257	local i = 1
258	local i1, i2
259	local keyword
260
261	local function find(pat)
262		i1, i2 = str:find(pat,i)
263		return i1 ~= nil
264	end
265
266	local function cut()
267		return str:sub(i, i2)
268	end
269
270	local function findKeyword()
271		for name, pat in pairs(setup.keywords) do
272			local result = find(pat)
273			if result then
274				keyword = name
275				return true
276			end
277		end
278	end
279
280	while true do
281		if i > strlen then return 'eof', nil, strlen, strlen end
282		if find(setup.ignore) then
283			coroutine.yield("ignore", cut(), i1, i2)
284		elseif findKeyword() then
285			coroutine.yield(keyword, cut(), i1, i2)
286		elseif find(setup.number) then
287			coroutine.yield('number', tonumber(cut()), i1, i2)
288		elseif find(setup.identifier) then
289			coroutine.yield('identifier', cut(), i1, i2)
290		elseif setup.string and (find('^"[^"]*"') or find("^'[^']*'")) then
291			-- strip the quotes
292			coroutine.yield('string', cut():sub(2,-2), i1, i2)
293		else -- any other unknown character
294			i1 = i
295			i2 = i
296			coroutine.yield('unknown', cut(), i1, i2)
297		end
298		i = i2+1
299	end
300end
301local function tokenizer(str, setup)
302	return coroutine.wrap(function() _tokenizer(str, setup) end)
303end
304
305
306-- ------------
307-- PARSER
308-- ------------
309
310-- hint: LuaJIT ffi does not rely on us to remove the comments, but maybe other usecases
311local function removeComments(input)
312		input = string.gsub(input, "//.-\n", "\n") -- remove sl comments
313		-- remove multiline comments in a way that it does not break __LINE__ macro
314		if lcpp.FAST then
315			input = string.gsub(input, "/%*.-%*/", "") -- remove ml comments (stupid method)
316		else
317			local offset = 0
318			local output = {}
319			local starti, endi, match, lastendi
320			while offset do
321				starti, endi, match = input:find("/%*(.-)%*/", offset, false)
322				if starti then
323					lastendi = endi
324					local newlineCount = findn(match, "\n")
325					local newlines = string.rep("\n", newlineCount)
326					table.insert(output, input:sub(offset+1, starti-1))
327					table.insert(output, newlines)
328					offset = endi
329				else
330					offset = nil
331					table.insert(output, input:sub((lastendi or 0) + 1))
332				end
333			end
334			input = table.concat(output)
335			--error(input)
336		end
337
338		return input
339end
340
341-- screener: revmoce comments, trim, ml concat...
342-- it only splits to cpp input lines and removes comments. it does not tokenize.
343local function screener(input)
344	local function _screener(input)
345		--input = removeComments(input)
346
347		-- concat mulit-line input.
348		local count = 1
349		while count > 0 do input, count = string.gsub(input, "^(.-)\\\n(.-)$", "%1 %2\n") end
350
351		-- trim and join blocks not starting with "#"
352		local buffer = {}
353		for line in gsplit(input, NEWL) do
354			--line = trim(line)
355			if #line > 0 then
356				if line:byte(1) == CMD_BYTE then
357					--line = line:gsub("#%s*(.*)", "#%1")	-- remove optinal whitespaces after "#". reduce triming later.
358					if #buffer > 0 then
359						coroutine.yield(table.concat(buffer, NEWL))
360						buffer = {}
361					end
362					coroutine.yield(line)
363				else
364					if lcpp.FAST then
365						table.insert(buffer, line)
366					else
367						coroutine.yield(line)
368					end
369				end
370			elseif not lcpp.FAST then
371				coroutine.yield(line)
372			end
373		end
374		if #buffer > 0 then
375			coroutine.yield(table.concat(buffer, NEWL))
376		end
377	end
378
379	return coroutine.wrap(function() _screener(input) end)
380end
381
382-- apply currently known macros to input (and returns it)
383local function apply(state, input)
384	local out = {}
385	local functions = {}
386
387	for k, v, start, end_ in tokenizer(input) do
388		if k == "identifier" then
389			local repl = v
390			local macro = state.defines[v]
391			if macro then
392				if type(macro)     == "boolean" then
393					repl = ""
394				elseif type(macro) == "string" then
395					repl = macro
396				elseif type(macro) == "number" then
397					repl = tostring(macro)
398				elseif type(macro) == "function" then
399					table.insert(functions, macro)	-- we apply functions in a later step
400				end
401			end
402			table.insert(out, repl)
403		else
404			table.insert(out, input:sub(start, end_))
405		end
406	end
407	input = table.concat(out)
408	for _, func in pairs(functions) do	-- TODO: looks sucky (but works quite nice)
409		input = func(input)
410	end
411
412	return input
413end
414
415-- processes an input line. called from lcpp doWork loop
416local function processLine(state, line)
417	if not line or #line == 0 then return line end
418	local cmd = nil
419	if line:byte(1) == CMD_BYTE then cmd = line:sub(2) end
420	--print("processLine(): "..line)
421
422
423	--[[ SKIPPING ]]--
424	if state:skip() then return end
425
426
427	--[[ READ NEW DIRECTIVES ]]--
428	if cmd then
429		-- handle #include ...
430		local filename = cmd:match(INCLUDE)
431		if filename then
432			print("processing header  " .. filename)
433			return state:includeFile(filename)
434		end
435
436		return line
437	end
438
439
440	--[[ APPLY MACROS ]]--
441	--line = state:apply(line);
442
443	return line
444end
445
446
447local function processLine2(state, line)
448	if not line or #line == 0 then return line end
449	local cmd = nil
450	if line:byte(1) == CMD_BYTE then cmd = line:sub(2) end
451	--print("processLine(): "..line)
452
453	if state:defined(__LCPP_INSIDE_HEADERFILE__) then
454		--[[ IF/THEN/ELSE STRUCTURAL BLOCKS ]]--
455		if cmd then
456			local ifdef   = cmd:match(IFDEF)
457			local ifexp   = cmd:match(IF)
458			local ifndef  = cmd:match(IFNDEF)
459			local elif    = cmd:match(ELIF)
460			local elseif_ = cmd:match(ELSEIF)
461			local else_   = cmd:match(ELSE)
462			local endif   = cmd:match(ENDIF)
463			local struct  = ifdef or ifexp or ifndef or elif or elseif_ or else_ or endif
464
465			if struct then
466				if ifdef   then state:openBlock(state:defined(ifdef))      end
467				if ifexp   then state:openBlock(state:parseExpr(ifexp))    end
468				if ifndef  then state:openBlock(not state:defined(ifndef)) end
469				if elif    then state:elseBlock(state:parseExpr(elif))     end
470				if elseif_ then state:elseBlock(state:parseExpr(elseif_))  end
471				if else_   then state:elseBlock(true)                      end
472				if endif   then state:closeBlock()                         end
473				return line
474			end
475		end
476	end
477
478
479	--[[ SKIPPING ]]--
480	if state:skip() then return end
481
482
483	--[[ READ NEW DIRECTIVES ]]--
484	if cmd then
485		-- handle #include ...
486		local filename = cmd:match(INCLUDE)
487		if filename then
488			print("processing header  " .. filename)
489			return state:includeFile(filename)
490		end
491
492		if state:defined(__LCPP_INSIDE_HEADERFILE__) then
493			-- handle #undef ...
494			local key = cmd:match(UNDEF)
495			if type(key) == "string" then
496				state:undefine(key)
497				return
498			end
499
500			-- read "#define >FooBar...<" directives
501			if cmd:match(DEFINE) then
502				local define = trim(cmd:sub(DEFINE:len()+1))
503				local macroname, replacement
504
505				-- simple "true" defines
506				macroname = define:match(TRUEMACRO)
507				if macroname then
508					state:define(macroname, true)
509				end
510
511				-- replace macro defines
512				macroname, replacement = define:match(REPLMACRO)
513				if macroname and replacement then
514					state:define(macroname, replacement)
515				end
516
517				-- read functional macros
518				macroname, replacement = state:parseFunction(define)
519				if macroname and replacement then
520					state:define(macroname, replacement)
521				end
522
523				return line
524			end
525
526			-- ignore, because we dont have any pragma directives yet
527			if cmd:match(PRAGMA) then
528				return line
529			end
530
531			-- abort on unknown keywords
532			error("unknown directive: "..line)
533		else
534			return line
535		end
536	end
537
538
539	--[[ APPLY MACROS ]]--
540	--line = state:apply(line);
541
542	return line
543end
544
545
546local function doWork(state)
547	local function _doWork(state)
548		if not state:defined(__FILE__) then state:define(__FILE__, "<USER_CHUNK>", true) end
549		local oldIndent = state:getIndent()
550		while true do
551			local input = state:getLine()
552			if not input then break end
553			local output = processLine(state, input)
554			if not lcpp.FAST and not output then
555				output = "" end -- output empty skipped lines
556
557			if lcpp.DEBUG then output = output.." -- "..input end -- input as comment when DEBUG
558
559			if output then coroutine.yield(output) end
560		end
561		if (oldIndent ~= state:getIndent()) then error("indentation level must be balanced within a file. was:"..oldIndent.." is:"..state:getIndent()) end
562	end
563	return coroutine.wrap(function() _doWork(state) end)
564end
565
566local function doWork2(state)
567	local function _doWork2(state)
568		if not state:defined(__FILE__) then state:define(__FILE__, "<USER_CHUNK>", true) end
569		local oldIndent = state:getIndent()
570		while true do
571			local input = state:getLine()
572			if not input then break end
573			local output = processLine2(state, input)
574			if not lcpp.FAST and not output then output = "" end -- output empty skipped lines
575			if lcpp.DEBUG then output = output.." -- "..input end -- input as comment when DEBUG
576			if output then coroutine.yield(output) end
577		end
578		if (oldIndent ~= state:getIndent()) then error("indentation level must be balanced within a file. was:"..oldIndent.." is:"..state:getIndent()) end
579	end
580	return coroutine.wrap(function() _doWork2(state) end)
581end
582
583local function includeFile(state, filename)
584	local result, result_state = lcpp.compileHeaderFile("../src/" .. filename, state.defines)
585	-- now, we take the define table of the sub file for further processing
586	state.defines = result_state.defines
587	-- and return the compiled result
588	return result
589end
590
591-- sets a global define
592local function define(state, key, value, override)
593	--print("define:"..key.." type:"..type(value))
594	--if value and not override and state:defined(key) then error("already defined: "..key) end
595	value = state:prepareMacro(value)
596	state.defines[key] = value
597end
598
599-- parses CPP exressions
600-- i.e.: #if !defined(_UNICODE) && !defined(UNICODE)
601--
602--BNF:
603--  EXPR     -> (BRACKET_OPEN)(EXPR)(BRACKET_CLOSE)
604--  EXPR     -> (EXPR)(OR)(EXPR)
605--  EXPR     -> (EXPR)(AND)(EXPR)
606--  EXPR     -> (NOT)(EXPR)
607--  EXPR     -> (FUNCTION)
608--  FUNCTION -> (IDENTIFIER)(BRACKET_OPEN)(ARGS)(BRACKET_CLOSE)
609--  ARGS     -> ((IDENTIFIER)[(COMMA)(IDENTIFIER)])?
610--LEAVES:
611--  IGNORE -> " \t"
612--  BRACKET_OPEN  -> "("
613--  BRACKET_CLOSE -> ")"
614--  OR -> "||"
615--  AND -> "&&"
616--  NOT -> "!"
617--  IDENTIFIER -> "[0-9a-zA-Z_]"
618--
619
620local LCPP_TOKENIZE_MACRO = {
621	string = true,
622	keywords = {
623		CONCAT = "^##",
624	},
625}
626local LCPP_TOKENIZE_EXPR = {
627	string = false,
628	keywords = {
629		NOT = '^!',
630		DEFINED = '^defined',
631		BROPEN = '^[(]',
632		BRCLOSE = '^[)]',
633		AND = '^&&',
634		OR = '^||',
635	},
636}
637
638local function parseDefined(state, input)
639	local result = false
640	local bropen = false
641	local brclose = false
642	local ident = nil
643
644	for key, value in input do
645		if key == "BROPEN" then
646			bropen = true
647		end
648		if key == "identifier" then
649			 ident = value
650			 if not bropen then break end
651		end
652		if key == "BRCLOSE" and ident then
653			brclose = true
654			break
655		end
656	end
657
658	-- wiht and w/o brackets allowed
659	if ident and ((bropen and brclose) or (not bropen and not brclose)) then
660		return state:defined(ident)
661	end
662
663	error("expression parse error: defined(ident)")
664end
665
666local function parseExpr(state, input)
667	-- first call gets string input. rest uses tokenizer
668	if type(input) == "string" then input = tokenizer(input, LCPP_TOKENIZE_EXPR) end
669	local result = false
670	local _not = false
671
672	for type, value in input do
673--		print("type:"..type.." value:"..value)
674		if type == "NOT" then
675			_not = true
676		end
677		if type == "BROPEN" then
678			return state:parseExpr(input)
679		end
680		if type == "BRCLOSE" then
681			return result
682		end
683		if type == "AND" then
684			return result and state:parseExpr(input)
685		end
686		if type == "OR" then
687			return result or state:parseExpr(input)
688		end
689
690		if type == "DEFINED" then
691			if _not then
692				result = not parseDefined(state, input)
693			else
694				result = parseDefined(state, input)
695			end
696		end
697	end
698
699	return result
700end
701
702-- apply macros chaining and string ops "##" and "#"
703local function prepareMacro(state, input)
704	if type(input) ~= "string" then return input end
705	input = state:apply(input)
706	local out = {}
707	for k, v, start, end_ in tokenizer(input, LCPP_TOKENIZE_MACRO) do
708		if k == "CONCAT" then
709			-- remove concat op "##"
710		else
711			table.insert(out, input:sub(start, end_))
712		end
713	end
714	return table.concat(out)
715end
716
717-- i.e.: "MAX(x, y) (((x) > (y)) ? (x) : (y))"
718local function parseFunction(state, input)
719	if not input then return end
720	local name, argsstr, repl = input:match(FUNCMACRO)
721	if not name or not argsstr or not repl then return end
722	repl = state:prepareMacro(repl)
723
724	-- rename args to %1,%2... for later gsub
725	local noargs = 0
726	for argname in argsstr:gmatch(IDENTIFIER) do
727		noargs = noargs + 1
728		repl = repl:gsub(argname, "%%"..noargs)
729	end
730
731	-- build pattern string:  name(arg, arg, ...)
732	local pattern
733	if     noargs == 0 then pattern = name.."%s*%(%s*%)"                             -- quick 0 arg version
734	elseif noargs == 1 then pattern = name.."%s*%(%s*([^,%)]*)%s*%)"                 -- quick 1 arg version
735	elseif noargs == 2 then pattern = name.."%s*%(%s*([^,%)]*)%s*,%s*([^,%)]*)%s*%)" -- quick 2 arg version
736	else -- arbitrary arg version
737		local buf = {}
738		table.insert(buf, name)
739		table.insert(buf, "%s*%(%s*")
740		for i = 1, noargs do
741			table.insert(buf, "([^,%)]*)%s*")
742			if i < noargs then
743				table.insert(buf, ",%s*")
744			end
745		end
746		table.insert(buf, "%)")
747		pattern = table.concat(buf)
748	end
749
750	-- build macro funcion
751	local func = function(input)
752		return input:gsub(pattern, repl)
753	end
754
755	return name, func
756end
757
758
759-- ------------
760-- LCPP INTERFACE
761-- ------------
762
763--- initialies a lcpp state. not needed manually. handy for testing
764function lcpp.init(input, predefines)
765	-- create sate var
766	local state     = {}              -- init the state object
767	state.defines   = {}              -- the table of known defines and replacements
768	state.screener  = screener(input)
769	state.lineno    = 0               -- the current line number
770	state.stack     = {}              -- stores wether the current stack level is to be included
771	state.once      = {}              -- stack level was once true (first if that evals to true)
772
773	-- funcs
774	state.define = define
775	state.undefine = function(state, key)
776		state:define(key, nil)
777	end
778	state.defined = function(state, key)
779		return state.defines[key] ~= nil
780	end
781	state.apply = apply
782	state.includeFile = includeFile
783	state.doWork = doWork
784	state.doWork2 = doWork2
785	state.getIndent = function(state)
786		return #state.stack
787	end
788	state.openBlock = function(state, bool)
789		state.stack[#state.stack+1] = bool
790		state.once [#state.once+1]  = bool
791		state:define(__LCPP_INDENT__, state:getIndent(), true)
792	end
793	state.elseBlock = function(state, bool)
794		if state.once[#state.once] then
795			state.stack[#state.stack] = false
796		else
797			state.stack[#state.stack] = bool
798			if bool then state.once[#state.once] = true end
799		end
800	end
801	state.closeBlock = function(state)
802		state.stack[#state.stack] = nil
803		state.once [#state.once]  = nil
804		state:define(__LCPP_INDENT__, state:getIndent(), true)
805		if state:getIndent() < 0 then error("Unopened block detected. Indentaion problem.") end
806	end
807	state.skip = function(state)
808		for i = 1, #state.stack do
809			if not state.stack[i] then return true end
810		end
811		return false
812	end
813	state.getLine = function(state)
814		state.lineno = state.lineno + 1
815		state:define(__LINE__, state.lineno, true)
816		return state.screener()
817	end
818	state.prepareMacro = prepareMacro
819	state.parseExpr = parseExpr
820	state.parseFunction = parseFunction
821
822	-- predefines
823	state:define(__DATE__, os.date("%B %d %Y"), true)
824	state:define(__TIME__, os.date("%H:%M:%S"), true)
825	state:define(__LINE__, state.lineno, true)
826	state:define(__LCPP_INDENT__, state:getIndent(), true)
827	predefines = predefines or {}
828	for k,v in pairs(lcpp.ENV) do	state:define(k, v, true) end	-- static ones
829	for k,v in pairs(predefines) do	state:define(k, v, true) end
830
831	if lcpp.LCPP_TEST then lcpp.STATE = state end -- activate static state debugging
832
833	return state
834end
835
836--- the preprocessors main function.
837-- returns the preprocessed output as a string.
838-- @param code data as string
839-- @param predefines OPTIONAL a table of predefined variables
840-- @usage lcpp.compile("#define bar 0x1337\nstatic const int foo = bar;")
841-- @usage lcpp.compile("#define bar 0x1337\nstatic const int foo = bar;", {["bar"] = "0x1338"})
842function lcpp.compile(code, predefines)
843	local state = lcpp.init(code, predefines)
844	local buf = {}
845	for output in state:doWork() do
846		table.insert(buf, output)
847	end
848	local output = table.concat(buf, NEWL)
849	if lcpp.DEBUG then print(output) end
850	return output, state
851end
852
853function lcpp.compile2(code, predefines)
854	local state = lcpp.init(code, predefines)
855	state:define(__LCPP_INSIDE_HEADERFILE__,true)
856	local buf = {}
857	for output in state:doWork2() do
858		table.insert(buf, output)
859	end
860	local output = table.concat(buf, NEWL)
861	if lcpp.DEBUG then print(output) end
862	return output, state
863end
864
865--- preprocesses a file
866-- @param filename the file to read
867-- @param predefines OPTIONAL a table of predefined variables
868-- @usage out, state = lcpp.compileFile("../odbg/plugin.h", {["MAXPATH"]=260, ["UNICODE"]=true})
869function lcpp.compileFile(filename, predefines)
870	if not filename then error("processFile() arg1 has to be a string") end
871	local file = io.open(filename, 'r')
872	if not file then error("file not found: "..filename) end
873	local code = file:read('*a')
874	predefines = predefines or {}
875	predefines[__FILE__] = filename
876	return lcpp.compile(code, predefines)
877end
878
879function lcpp.compileHeaderFile(filename, predefines)
880	if not filename then error("processFile() arg1 has to be a string") end
881	local file = io.open(filename, 'r')
882	if not file then error("file not found: "..filename) end
883	local code = file:read('*a')
884	predefines = predefines or {}
885	predefines[__FILE__] = filename
886	return lcpp.compile2(code, predefines)
887end
888
889
890
891-- ------------
892-- REGISTER LCPP
893-- ------------
894
895--- disable lcpp processing for ffi, loadstring and such
896lcpp.disable = function()
897	if lcpp.LCPP_LUA then
898		-- activate LCPP_LUA actually does anything useful
899		-- _G.loadstring = _G.loadstring_lcpp_backup
900	end
901
902	if lcpp.LCPP_FFI and pcall(require, "ffi") then
903		ffi = require("ffi")
904		if ffi.lcpp_cdef_backup then
905			ffi.cdef = ffi.lcpp_cdef_backup
906			ffi.lcpp_cdef_backup = nil
907		end
908	end
909end
910
911--- (re)enable lcpp processing for ffi, loadstring and such
912lcpp.enable = function()
913	-- Use LCPP to process Lua code (load, loadfile, loadstring...)
914	if lcpp.LCPP_LUA then
915		-- TODO: make it properly work on all functions
916		error("lcpp.LCPP_LUA = true -- not properly implemented yet");
917		_G.loadstring_lcpp_backup = _G.loadstring
918		_G.loadstring = function(str, chunk)
919			return loadstring_lcpp_backup(lcpp.compile(str), chunk)
920		end
921	end
922	-- Use LCPP as LuaJIT PreProcessor if used inside LuaJIT. i.e. Hook ffi.cdef
923	if lcpp.LCPP_FFI and pcall(require, "ffi") then
924		ffi = require("ffi")
925		if not ffi.lcpp_cdef_backup then
926			if not ffi.lcpp_defs then ffi.lcpp_defs = {} end -- defs are stored and reused
927			ffi.lcpp = function(input)
928				local output, state = lcpp.compile(input, ffi.lcpp_defs)
929				ffi.lcpp_defs = state.defines
930				return output
931			end
932			ffi.lcpp_cdef_backup = ffi.cdef
933			ffi.cdef = function(input) return ffi.lcpp_cdef_backup(ffi.lcpp(input)) end
934		end
935	end
936end
937
938lcpp.enable()
939return lcpp