1#!/usr/bin/env python
2# encoding: utf-8
3# Thomas Nagy, 2006-2018 (ita)
4
5"""
6C/C++ preprocessor for finding dependencies
7
8Reasons for using the Waf preprocessor by default
9
10#. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
11#. Not all compilers provide .d files for obtaining the dependencies (portability)
12#. A naive file scanner will not catch the constructs such as "#include foo()"
13#. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
14
15Regarding the speed concerns:
16
17* the preprocessing is performed only when files must be compiled
18* the macros are evaluated only for #if/#elif/#include
19* system headers are not scanned by default
20
21Now if you do not want the Waf preprocessor, the tool +gccdeps* uses the .d files produced
22during the compilation to track the dependencies (useful when used with the boost libraries).
23It only works with gcc >= 4.4 though.
24
25A dumb preprocessor is also available in the tool *c_dumbpreproc*
26"""
27# TODO: more varargs, pragma once
28
29import re, string, traceback
30from waflib import Logs, Utils, Errors
31
32class PreprocError(Errors.WafError):
33	pass
34
35FILE_CACHE_SIZE = 100000
36LINE_CACHE_SIZE = 100000
37
38POPFILE = '-'
39"Constant representing a special token used in :py:meth:`waflib.Tools.c_preproc.c_parser.start` iteration to switch to a header read previously"
40
41recursion_limit = 150
42"Limit on the amount of files to read in the dependency scanner"
43
44go_absolute = False
45"Set to True to track headers on files in /usr/include, else absolute paths are ignored (but it becomes very slow)"
46
47standard_includes = ['/usr/local/include', '/usr/include']
48if Utils.is_win32:
49	standard_includes = []
50
51use_trigraphs = 0
52"""Apply trigraph rules (False by default)"""
53
54# obsolete, do not use
55strict_quotes = 0
56
57g_optrans = {
58'not':'!',
59'not_eq':'!',
60'and':'&&',
61'and_eq':'&=',
62'or':'||',
63'or_eq':'|=',
64'xor':'^',
65'xor_eq':'^=',
66'bitand':'&',
67'bitor':'|',
68'compl':'~',
69}
70"""Operators such as and/or/xor for c++. Set an empty dict to disable."""
71
72# ignore #warning and #error
73re_lines = re.compile(
74	'^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
75	re.IGNORECASE | re.MULTILINE)
76"""Match #include lines"""
77
78re_mac = re.compile(r"^[a-zA-Z_]\w*")
79"""Match macro definitions"""
80
81re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
82"""Match macro functions"""
83
84re_pragma_once = re.compile(r'^\s*once\s*', re.IGNORECASE)
85"""Match #pragma once statements"""
86
87re_nl = re.compile('\\\\\r*\n', re.MULTILINE)
88"""Match newlines"""
89
90re_cpp = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE )
91"""Filter C/C++ comments"""
92
93trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')]
94"""Trigraph definitions"""
95
96chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
97"""Escape characters"""
98
99NUM   = 'i'
100"""Number token"""
101
102OP    = 'O'
103"""Operator token"""
104
105IDENT = 'T'
106"""Identifier token"""
107
108STR   = 's'
109"""String token"""
110
111CHAR  = 'c'
112"""Character token"""
113
114tok_types = [NUM, STR, IDENT, OP]
115"""Token types"""
116
117exp_types = [
118	r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
119	r'L?"([^"\\]|\\.)*"',
120	r'[a-zA-Z_]\w*',
121	r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
122]
123"""Expression types"""
124
125re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M)
126"""Match expressions into tokens"""
127
128accepted  = 'a'
129"""Parser state is *accepted*"""
130
131ignored   = 'i'
132"""Parser state is *ignored*, for example preprocessor lines in an #if 0 block"""
133
134undefined = 'u'
135"""Parser state is *undefined* at the moment"""
136
137skipped   = 's'
138"""Parser state is *skipped*, for example preprocessor lines in a #elif 0 block"""
139
140def repl(m):
141	"""Replace function used with :py:attr:`waflib.Tools.c_preproc.re_cpp`"""
142	s = m.group()
143	if s[0] == '/':
144		return ' '
145	return s
146
147prec = {}
148"""
149Operator precedence rules required for parsing expressions of the form::
150
151	#if 1 && 2 != 0
152"""
153ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
154for x, syms in enumerate(ops):
155	for u in syms.split():
156		prec[u] = x
157
158def reduce_nums(val_1, val_2, val_op):
159	"""
160	Apply arithmetic rules to compute a result
161
162	:param val1: input parameter
163	:type val1: int or string
164	:param val2: input parameter
165	:type val2: int or string
166	:param val_op: C operator in *+*, */*, *-*, etc
167	:type val_op: string
168	:rtype: int
169	"""
170	#print val_1, val_2, val_op
171
172	# now perform the operation, make certain a and b are numeric
173	try:
174		a = 0 + val_1
175	except TypeError:
176		a = int(val_1)
177	try:
178		b = 0 + val_2
179	except TypeError:
180		b = int(val_2)
181
182	d = val_op
183	if d == '%':
184		c = a % b
185	elif d=='+':
186		c = a + b
187	elif d=='-':
188		c = a - b
189	elif d=='*':
190		c = a * b
191	elif d=='/':
192		c = a / b
193	elif d=='^':
194		c = a ^ b
195	elif d=='==':
196		c = int(a == b)
197	elif d=='|'  or d == 'bitor':
198		c = a | b
199	elif d=='||' or d == 'or' :
200		c = int(a or b)
201	elif d=='&'  or d == 'bitand':
202		c = a & b
203	elif d=='&&' or d == 'and':
204		c = int(a and b)
205	elif d=='!=' or d == 'not_eq':
206		c = int(a != b)
207	elif d=='^'  or d == 'xor':
208		c = int(a^b)
209	elif d=='<=':
210		c = int(a <= b)
211	elif d=='<':
212		c = int(a < b)
213	elif d=='>':
214		c = int(a > b)
215	elif d=='>=':
216		c = int(a >= b)
217	elif d=='<<':
218		c = a << b
219	elif d=='>>':
220		c = a >> b
221	else:
222		c = 0
223	return c
224
225def get_num(lst):
226	"""
227	Try to obtain a number from a list of tokens. The token types are defined in :py:attr:`waflib.Tools.ccroot.tok_types`.
228
229	:param lst: list of preprocessor tokens
230	:type lst: list of tuple (tokentype, value)
231	:return: a pair containing the number and the rest of the list
232	:rtype: tuple(value, list)
233	"""
234	if not lst:
235		raise PreprocError('empty list for get_num')
236	(p, v) = lst[0]
237	if p == OP:
238		if v == '(':
239			count_par = 1
240			i = 1
241			while i < len(lst):
242				(p, v) = lst[i]
243
244				if p == OP:
245					if v == ')':
246						count_par -= 1
247						if count_par == 0:
248							break
249					elif v == '(':
250						count_par += 1
251				i += 1
252			else:
253				raise PreprocError('rparen expected %r' % lst)
254
255			(num, _) = get_term(lst[1:i])
256			return (num, lst[i+1:])
257
258		elif v == '+':
259			return get_num(lst[1:])
260		elif v == '-':
261			num, lst = get_num(lst[1:])
262			return (reduce_nums('-1', num, '*'), lst)
263		elif v == '!':
264			num, lst = get_num(lst[1:])
265			return (int(not int(num)), lst)
266		elif v == '~':
267			num, lst = get_num(lst[1:])
268			return (~ int(num), lst)
269		else:
270			raise PreprocError('Invalid op token %r for get_num' % lst)
271	elif p == NUM:
272		return v, lst[1:]
273	elif p == IDENT:
274		# all macros should have been replaced, remaining identifiers eval to 0
275		return 0, lst[1:]
276	else:
277		raise PreprocError('Invalid token %r for get_num' % lst)
278
279def get_term(lst):
280	"""
281	Evaluate an expression recursively, for example::
282
283		1+1+1 -> 2+1 -> 3
284
285	:param lst: list of tokens
286	:type lst: list of tuple(token, value)
287	:return: the value and the remaining tokens
288	:rtype: value, list
289	"""
290
291	if not lst:
292		raise PreprocError('empty list for get_term')
293	num, lst = get_num(lst)
294	if not lst:
295		return (num, [])
296	(p, v) = lst[0]
297	if p == OP:
298		if v == ',':
299			# skip
300			return get_term(lst[1:])
301		elif v == '?':
302			count_par = 0
303			i = 1
304			while i < len(lst):
305				(p, v) = lst[i]
306
307				if p == OP:
308					if v == ')':
309						count_par -= 1
310					elif v == '(':
311						count_par += 1
312					elif v == ':':
313						if count_par == 0:
314							break
315				i += 1
316			else:
317				raise PreprocError('rparen expected %r' % lst)
318
319			if int(num):
320				return get_term(lst[1:i])
321			else:
322				return get_term(lst[i+1:])
323
324		else:
325			num2, lst = get_num(lst[1:])
326
327			if not lst:
328				# no more tokens to process
329				num2 = reduce_nums(num, num2, v)
330				return get_term([(NUM, num2)] + lst)
331
332			# operator precedence
333			p2, v2 = lst[0]
334			if p2 != OP:
335				raise PreprocError('op expected %r' % lst)
336
337			if prec[v2] >= prec[v]:
338				num2 = reduce_nums(num, num2, v)
339				return get_term([(NUM, num2)] + lst)
340			else:
341				num3, lst = get_num(lst[1:])
342				num3 = reduce_nums(num2, num3, v2)
343				return get_term([(NUM, num), (p, v), (NUM, num3)] + lst)
344
345
346	raise PreprocError('cannot reduce %r' % lst)
347
348def reduce_eval(lst):
349	"""
350	Take a list of tokens and output true or false for #if/#elif conditions.
351
352	:param lst: a list of tokens
353	:type lst: list of tuple(token, value)
354	:return: a token
355	:rtype: tuple(NUM, int)
356	"""
357	num, lst = get_term(lst)
358	return (NUM, num)
359
360def stringize(lst):
361	"""
362	Merge a list of tokens into a string
363
364	:param lst: a list of tokens
365	:type lst: list of tuple(token, value)
366	:rtype: string
367	"""
368	lst = [str(v2) for (p2, v2) in lst]
369	return "".join(lst)
370
371def paste_tokens(t1, t2):
372	"""
373	Token pasting works between identifiers, particular operators, and identifiers and numbers::
374
375		a ## b  ->  ab
376		> ## =  ->  >=
377		a ## 2  ->  a2
378
379	:param t1: token
380	:type t1: tuple(type, value)
381	:param t2: token
382	:type t2: tuple(type, value)
383	"""
384	p1 = None
385	if t1[0] == OP and t2[0] == OP:
386		p1 = OP
387	elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM):
388		p1 = IDENT
389	elif t1[0] == NUM and t2[0] == NUM:
390		p1 = NUM
391	if not p1:
392		raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2))
393	return (p1, t1[1] + t2[1])
394
395def reduce_tokens(lst, defs, ban=[]):
396	"""
397	Replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied
398
399	:param lst: list of tokens
400	:type lst: list of tuple(token, value)
401	:param defs: macro definitions
402	:type defs: dict
403	:param ban: macros that cannot be substituted (recursion is not allowed)
404	:type ban: list of string
405	:return: the new list of tokens
406	:rtype: value, list
407	"""
408
409	i = 0
410	while i < len(lst):
411		(p, v) = lst[i]
412
413		if p == IDENT and v == "defined":
414			del lst[i]
415			if i < len(lst):
416				(p2, v2) = lst[i]
417				if p2 == IDENT:
418					if v2 in defs:
419						lst[i] = (NUM, 1)
420					else:
421						lst[i] = (NUM, 0)
422				elif p2 == OP and v2 == '(':
423					del lst[i]
424					(p2, v2) = lst[i]
425					del lst[i] # remove the ident, and change the ) for the value
426					if v2 in defs:
427						lst[i] = (NUM, 1)
428					else:
429						lst[i] = (NUM, 0)
430				else:
431					raise PreprocError('Invalid define expression %r' % lst)
432
433		elif p == IDENT and v in defs:
434
435			if isinstance(defs[v], str):
436				a, b = extract_macro(defs[v])
437				defs[v] = b
438			macro_def = defs[v]
439			to_add = macro_def[1]
440
441			if isinstance(macro_def[0], list):
442				# macro without arguments
443				del lst[i]
444				accu = to_add[:]
445				reduce_tokens(accu, defs, ban+[v])
446				for tmp in accu:
447					lst.insert(i, tmp)
448					i += 1
449			else:
450				# collect the arguments for the funcall
451
452				args = []
453				del lst[i]
454
455				if i >= len(lst):
456					raise PreprocError('expected ( after %r (got nothing)' % v)
457
458				(p2, v2) = lst[i]
459				if p2 != OP or v2 != '(':
460					raise PreprocError('expected ( after %r' % v)
461
462				del lst[i]
463
464				one_param = []
465				count_paren = 0
466				while i < len(lst):
467					p2, v2 = lst[i]
468
469					del lst[i]
470					if p2 == OP and count_paren == 0:
471						if v2 == '(':
472							one_param.append((p2, v2))
473							count_paren += 1
474						elif v2 == ')':
475							if one_param:
476								args.append(one_param)
477							break
478						elif v2 == ',':
479							if not one_param:
480								raise PreprocError('empty param in funcall %r' % v)
481							args.append(one_param)
482							one_param = []
483						else:
484							one_param.append((p2, v2))
485					else:
486						one_param.append((p2, v2))
487						if   v2 == '(':
488							count_paren += 1
489						elif v2 == ')':
490							count_paren -= 1
491				else:
492					raise PreprocError('malformed macro')
493
494				# substitute the arguments within the define expression
495				accu = []
496				arg_table = macro_def[0]
497				j = 0
498				while j < len(to_add):
499					(p2, v2) = to_add[j]
500
501					if p2 == OP and v2 == '#':
502						# stringize is for arguments only
503						if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
504							toks = args[arg_table[to_add[j+1][1]]]
505							accu.append((STR, stringize(toks)))
506							j += 1
507						else:
508							accu.append((p2, v2))
509					elif p2 == OP and v2 == '##':
510						# token pasting, how can man invent such a complicated system?
511						if accu and j+1 < len(to_add):
512							# we have at least two tokens
513
514							t1 = accu[-1]
515
516							if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
517								toks = args[arg_table[to_add[j+1][1]]]
518
519								if toks:
520									accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1])
521									accu.extend(toks[1:])
522								else:
523									# error, case "a##"
524									accu.append((p2, v2))
525									accu.extend(toks)
526							elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__':
527								# first collect the tokens
528								va_toks = []
529								st = len(macro_def[0])
530								pt = len(args)
531								for x in args[pt-st+1:]:
532									va_toks.extend(x)
533									va_toks.append((OP, ','))
534								if va_toks:
535									va_toks.pop() # extra comma
536								if len(accu)>1:
537									(p3, v3) = accu[-1]
538									(p4, v4) = accu[-2]
539									if v3 == '##':
540										# remove the token paste
541										accu.pop()
542										if v4 == ',' and pt < st:
543											# remove the comma
544											accu.pop()
545								accu += va_toks
546							else:
547								accu[-1] = paste_tokens(t1, to_add[j+1])
548
549							j += 1
550						else:
551							# Invalid paste, case    "##a" or "b##"
552							accu.append((p2, v2))
553
554					elif p2 == IDENT and v2 in arg_table:
555						toks = args[arg_table[v2]]
556						reduce_tokens(toks, defs, ban+[v])
557						accu.extend(toks)
558					else:
559						accu.append((p2, v2))
560
561					j += 1
562
563
564				reduce_tokens(accu, defs, ban+[v])
565
566				for x in range(len(accu)-1, -1, -1):
567					lst.insert(i, accu[x])
568
569		i += 1
570
571
572def eval_macro(lst, defs):
573	"""
574	Reduce the tokens by :py:func:`waflib.Tools.c_preproc.reduce_tokens` and try to return a 0/1 result by :py:func:`waflib.Tools.c_preproc.reduce_eval`.
575
576	:param lst: list of tokens
577	:type lst: list of tuple(token, value)
578	:param defs: macro definitions
579	:type defs: dict
580	:rtype: int
581	"""
582	reduce_tokens(lst, defs, [])
583	if not lst:
584		raise PreprocError('missing tokens to evaluate')
585
586	if lst:
587		p, v = lst[0]
588		if p == IDENT and v not in defs:
589			raise PreprocError('missing macro %r' % lst)
590
591	p, v = reduce_eval(lst)
592	return int(v) != 0
593
594def extract_macro(txt):
595	"""
596	Process a macro definition of the form::
597		 #define f(x, y) x * y
598
599	into a function or a simple macro without arguments
600
601	:param txt: expression to exact a macro definition from
602	:type txt: string
603	:return: a tuple containing the name, the list of arguments and the replacement
604	:rtype: tuple(string, [list, list])
605	"""
606	t = tokenize(txt)
607	if re_fun.search(txt):
608		p, name = t[0]
609
610		p, v = t[1]
611		if p != OP:
612			raise PreprocError('expected (')
613
614		i = 1
615		pindex = 0
616		params = {}
617		prev = '('
618
619		while 1:
620			i += 1
621			p, v = t[i]
622
623			if prev == '(':
624				if p == IDENT:
625					params[v] = pindex
626					pindex += 1
627					prev = p
628				elif p == OP and v == ')':
629					break
630				else:
631					raise PreprocError('unexpected token (3)')
632			elif prev == IDENT:
633				if p == OP and v == ',':
634					prev = v
635				elif p == OP and v == ')':
636					break
637				else:
638					raise PreprocError('comma or ... expected')
639			elif prev == ',':
640				if p == IDENT:
641					params[v] = pindex
642					pindex += 1
643					prev = p
644				elif p == OP and v == '...':
645					raise PreprocError('not implemented (1)')
646				else:
647					raise PreprocError('comma or ... expected (2)')
648			elif prev == '...':
649				raise PreprocError('not implemented (2)')
650			else:
651				raise PreprocError('unexpected else')
652
653		#~ print (name, [params, t[i+1:]])
654		return (name, [params, t[i+1:]])
655	else:
656		(p, v) = t[0]
657		if len(t) > 1:
658			return (v, [[], t[1:]])
659		else:
660			# empty define, assign an empty token
661			return (v, [[], [('T','')]])
662
663re_include = re.compile(r'^\s*(<(?:.*)>|"(?:.*)")')
664def extract_include(txt, defs):
665	"""
666	Process a line in the form::
667
668		#include foo
669
670	:param txt: include line to process
671	:type txt: string
672	:param defs: macro definitions
673	:type defs: dict
674	:return: the file name
675	:rtype: string
676	"""
677	m = re_include.search(txt)
678	if m:
679		txt = m.group(1)
680		return txt[0], txt[1:-1]
681
682	# perform preprocessing and look at the result, it must match an include
683	toks = tokenize(txt)
684	reduce_tokens(toks, defs, ['waf_include'])
685
686	if not toks:
687		raise PreprocError('could not parse include %r' % txt)
688
689	if len(toks) == 1:
690		if toks[0][0] == STR:
691			return '"', toks[0][1]
692	else:
693		if toks[0][1] == '<' and toks[-1][1] == '>':
694			ret = '<', stringize(toks).lstrip('<').rstrip('>')
695			return ret
696
697	raise PreprocError('could not parse include %r' % txt)
698
699def parse_char(txt):
700	"""
701	Parse a c character
702
703	:param txt: character to parse
704	:type txt: string
705	:return: a character literal
706	:rtype: string
707	"""
708
709	if not txt:
710		raise PreprocError('attempted to parse a null char')
711	if txt[0] != '\\':
712		return ord(txt)
713	c = txt[1]
714	if c == 'x':
715		if len(txt) == 4 and txt[3] in string.hexdigits:
716			return int(txt[2:], 16)
717		return int(txt[2:], 16)
718	elif c.isdigit():
719		if c == '0' and len(txt)==2:
720			return 0
721		for i in 3, 2, 1:
722			if len(txt) > i and txt[1:1+i].isdigit():
723				return (1+i, int(txt[1:1+i], 8))
724	else:
725		try:
726			return chr_esc[c]
727		except KeyError:
728			raise PreprocError('could not parse char literal %r' % txt)
729
730def tokenize(s):
731	"""
732	Convert a string into a list of tokens (shlex.split does not apply to c/c++/d)
733
734	:param s: input to tokenize
735	:type s: string
736	:return: a list of tokens
737	:rtype: list of tuple(token, value)
738	"""
739	return tokenize_private(s)[:] # force a copy of the results
740
741def tokenize_private(s):
742	ret = []
743	for match in re_clexer.finditer(s):
744		m = match.group
745		for name in tok_types:
746			v = m(name)
747			if v:
748				if name == IDENT:
749					if v in g_optrans:
750						name = OP
751					elif v.lower() == "true":
752						v = 1
753						name = NUM
754					elif v.lower() == "false":
755						v = 0
756						name = NUM
757				elif name == NUM:
758					if m('oct'):
759						v = int(v, 8)
760					elif m('hex'):
761						v = int(m('hex'), 16)
762					elif m('n0'):
763						v = m('n0')
764					else:
765						v = m('char')
766						if v:
767							v = parse_char(v)
768						else:
769							v = m('n2') or m('n4')
770				elif name == OP:
771					if v == '%:':
772						v = '#'
773					elif v == '%:%:':
774						v = '##'
775				elif name == STR:
776					# remove the quotes around the string
777					v = v[1:-1]
778				ret.append((name, v))
779				break
780	return ret
781
782def format_defines(lst):
783	ret = []
784	for y in lst:
785		if y:
786			pos = y.find('=')
787			if pos == -1:
788				# "-DFOO" should give "#define FOO 1"
789				ret.append(y)
790			elif pos > 0:
791				# all others are assumed to be -DX=Y
792				ret.append('%s %s' % (y[:pos], y[pos+1:]))
793			else:
794				raise ValueError('Invalid define expression %r' % y)
795	return ret
796
797class c_parser(object):
798	"""
799	Used by :py:func:`waflib.Tools.c_preproc.scan` to parse c/h files. Note that by default,
800	only project headers are parsed.
801	"""
802	def __init__(self, nodepaths=None, defines=None):
803		self.lines = []
804		"""list of lines read"""
805
806		if defines is None:
807			self.defs  = {}
808		else:
809			self.defs  = dict(defines) # make a copy
810		self.state = []
811
812		self.count_files = 0
813		self.currentnode_stack = []
814
815		self.nodepaths = nodepaths or []
816		"""Include paths"""
817
818		self.nodes = []
819		"""List of :py:class:`waflib.Node.Node` found so far"""
820
821		self.names = []
822		"""List of file names that could not be matched by any file"""
823
824		self.curfile = ''
825		"""Current file"""
826
827		self.ban_includes = set()
828		"""Includes that must not be read (#pragma once)"""
829
830		self.listed = set()
831		"""Include nodes/names already listed to avoid duplicates in self.nodes/self.names"""
832
833	def cached_find_resource(self, node, filename):
834		"""
835		Find a file from the input directory
836
837		:param node: directory
838		:type node: :py:class:`waflib.Node.Node`
839		:param filename: header to find
840		:type filename: string
841		:return: the node if found, or None
842		:rtype: :py:class:`waflib.Node.Node`
843		"""
844		try:
845			cache = node.ctx.preproc_cache_node
846		except AttributeError:
847			cache = node.ctx.preproc_cache_node = Utils.lru_cache(FILE_CACHE_SIZE)
848
849		key = (node, filename)
850		try:
851			return cache[key]
852		except KeyError:
853			ret = node.find_resource(filename)
854			if ret:
855				if getattr(ret, 'children', None):
856					ret = None
857				elif ret.is_child_of(node.ctx.bldnode):
858					tmp = node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode))
859					if tmp and getattr(tmp, 'children', None):
860						ret = None
861			cache[key] = ret
862			return ret
863
864	def tryfind(self, filename, kind='"', env=None):
865		"""
866		Try to obtain a node from the filename based from the include paths. Will add
867		the node found to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` or the file name to
868		:py:attr:`waflib.Tools.c_preproc.c_parser.names` if no corresponding file is found. Called by
869		:py:attr:`waflib.Tools.c_preproc.c_parser.start`.
870
871		:param filename: header to find
872		:type filename: string
873		:return: the node if found
874		:rtype: :py:class:`waflib.Node.Node`
875		"""
876		if filename.endswith('.moc'):
877			# we could let the qt4 module use a subclass, but then the function "scan" below must be duplicated
878			# in the qt4 and in the qt5 classes. So we have two lines here and it is sufficient.
879			self.names.append(filename)
880			return None
881
882		self.curfile = filename
883
884		found = None
885		if kind == '"':
886			if env.MSVC_VERSION:
887				for n in reversed(self.currentnode_stack):
888					found = self.cached_find_resource(n, filename)
889					if found:
890						break
891			else:
892				found = self.cached_find_resource(self.currentnode_stack[-1], filename)
893
894		if not found:
895			for n in self.nodepaths:
896				found = self.cached_find_resource(n, filename)
897				if found:
898					break
899
900		listed = self.listed
901		if found and not found in self.ban_includes:
902			if found not in listed:
903				listed.add(found)
904				self.nodes.append(found)
905			self.addlines(found)
906		else:
907			if filename not in listed:
908				listed.add(filename)
909				self.names.append(filename)
910		return found
911
912	def filter_comments(self, node):
913		"""
914		Filter the comments from a c/h file, and return the preprocessor lines.
915		The regexps :py:attr:`waflib.Tools.c_preproc.re_cpp`, :py:attr:`waflib.Tools.c_preproc.re_nl` and :py:attr:`waflib.Tools.c_preproc.re_lines` are used internally.
916
917		:return: the preprocessor directives as a list of (keyword, line)
918		:rtype: a list of string pairs
919		"""
920		# return a list of tuples : keyword, line
921		code = node.read()
922		if use_trigraphs:
923			for (a, b) in trig_def:
924				code = code.split(a).join(b)
925		code = re_nl.sub('', code)
926		code = re_cpp.sub(repl, code)
927		return re_lines.findall(code)
928
929	def parse_lines(self, node):
930		try:
931			cache = node.ctx.preproc_cache_lines
932		except AttributeError:
933			cache = node.ctx.preproc_cache_lines = Utils.lru_cache(LINE_CACHE_SIZE)
934		try:
935			return cache[node]
936		except KeyError:
937			cache[node] = lines = self.filter_comments(node)
938			lines.append((POPFILE, ''))
939			lines.reverse()
940			return lines
941
942	def addlines(self, node):
943		"""
944		Add the lines from a header in the list of preprocessor lines to parse
945
946		:param node: header
947		:type node: :py:class:`waflib.Node.Node`
948		"""
949
950		self.currentnode_stack.append(node.parent)
951
952		self.count_files += 1
953		if self.count_files > recursion_limit:
954			# issue #812
955			raise PreprocError('recursion limit exceeded')
956
957		if Logs.verbose:
958			Logs.debug('preproc: reading file %r', node)
959		try:
960			lines = self.parse_lines(node)
961		except EnvironmentError:
962			raise PreprocError('could not read the file %r' % node)
963		except Exception:
964			if Logs.verbose > 0:
965				Logs.error('parsing %r failed %s', node, traceback.format_exc())
966		else:
967			self.lines.extend(lines)
968
969	def start(self, node, env):
970		"""
971		Preprocess a source file to obtain the dependencies, which are accumulated to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes`
972		and :py:attr:`waflib.Tools.c_preproc.c_parser.names`.
973
974		:param node: source file
975		:type node: :py:class:`waflib.Node.Node`
976		:param env: config set containing additional defines to take into account
977		:type env: :py:class:`waflib.ConfigSet.ConfigSet`
978		"""
979		Logs.debug('preproc: scanning %s (in %s)', node.name, node.parent.name)
980
981		self.current_file = node
982		self.addlines(node)
983
984		# macros may be defined on the command-line, so they must be parsed as if they were part of the file
985		if env.DEFINES:
986			lst = format_defines(env.DEFINES)
987			lst.reverse()
988			self.lines.extend([('define', x) for x in lst])
989
990		while self.lines:
991			(token, line) = self.lines.pop()
992			if token == POPFILE:
993				self.count_files -= 1
994				self.currentnode_stack.pop()
995				continue
996
997			try:
998				state = self.state
999
1000				# make certain we define the state if we are about to enter in an if block
1001				if token[:2] == 'if':
1002					state.append(undefined)
1003				elif token == 'endif':
1004					state.pop()
1005
1006				# skip lines when in a dead 'if' branch, wait for the endif
1007				if token[0] != 'e':
1008					if skipped in self.state or ignored in self.state:
1009						continue
1010
1011				if token == 'if':
1012					ret = eval_macro(tokenize(line), self.defs)
1013					if ret:
1014						state[-1] = accepted
1015					else:
1016						state[-1] = ignored
1017				elif token == 'ifdef':
1018					m = re_mac.match(line)
1019					if m and m.group() in self.defs:
1020						state[-1] = accepted
1021					else:
1022						state[-1] = ignored
1023				elif token == 'ifndef':
1024					m = re_mac.match(line)
1025					if m and m.group() in self.defs:
1026						state[-1] = ignored
1027					else:
1028						state[-1] = accepted
1029				elif token == 'include' or token == 'import':
1030					(kind, inc) = extract_include(line, self.defs)
1031					self.current_file = self.tryfind(inc, kind, env)
1032					if token == 'import':
1033						self.ban_includes.add(self.current_file)
1034				elif token == 'elif':
1035					if state[-1] == accepted:
1036						state[-1] = skipped
1037					elif state[-1] == ignored:
1038						if eval_macro(tokenize(line), self.defs):
1039							state[-1] = accepted
1040				elif token == 'else':
1041					if state[-1] == accepted:
1042						state[-1] = skipped
1043					elif state[-1] == ignored:
1044						state[-1] = accepted
1045				elif token == 'define':
1046					try:
1047						self.defs[self.define_name(line)] = line
1048					except AttributeError:
1049						raise PreprocError('Invalid define line %r' % line)
1050				elif token == 'undef':
1051					m = re_mac.match(line)
1052					if m and m.group() in self.defs:
1053						self.defs.__delitem__(m.group())
1054						#print "undef %s" % name
1055				elif token == 'pragma':
1056					if re_pragma_once.match(line.lower()):
1057						self.ban_includes.add(self.current_file)
1058			except Exception as e:
1059				if Logs.verbose:
1060					Logs.debug('preproc: line parsing failed (%s): %s %s', e, line, traceback.format_exc())
1061
1062	def define_name(self, line):
1063		"""
1064		:param line: define line
1065		:type line: string
1066		:rtype: string
1067		:return: the define name
1068		"""
1069		return re_mac.match(line).group()
1070
1071def scan(task):
1072	"""
1073	Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind::
1074
1075		#include some_macro()
1076
1077	This function is bound as a task method on :py:class:`waflib.Tools.c.c` and :py:class:`waflib.Tools.cxx.cxx` for example
1078	"""
1079	try:
1080		incn = task.generator.includes_nodes
1081	except AttributeError:
1082		raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": ' % task.generator)
1083
1084	if go_absolute:
1085		nodepaths = incn + [task.generator.bld.root.find_dir(x) for x in standard_includes]
1086	else:
1087		nodepaths = [x for x in incn if x.is_child_of(x.ctx.srcnode) or x.is_child_of(x.ctx.bldnode)]
1088
1089	tmp = c_parser(nodepaths)
1090	tmp.start(task.inputs[0], task.env)
1091	return (tmp.nodes, tmp.names)
1092