1# MIT License
2#
3# Copyright The SCons Foundation
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be included
14# in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
17# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
18# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24"""SCons C Pre-Processor module"""
25
26import os
27import re
28
29# First "subsystem" of regular expressions that we set up:
30#
31# Stuff to turn the C preprocessor directives in a file's contents into
32# a list of tuples that we can process easily.
33#
34# A table of regular expressions that fetch the arguments from the rest of
35# a C preprocessor line.  Different directives have different arguments
36# that we want to fetch, using the regular expressions to which the lists
37# of preprocessor directives map.
38cpp_lines_dict = {
39    # Fetch the rest of a #if/#elif as one argument,
40    # with white space optional.
41    ('if', 'elif')      : r'\s*(.+)',
42
43    # Fetch the rest of a #ifdef/#ifndef as one argument,
44    # separated from the keyword by white space.
45    ('ifdef', 'ifndef',): r'\s+(.+)',
46
47    # Fetch the rest of a #import/#include/#include_next line as one
48    # argument, with white space optional.
49    ('import', 'include', 'include_next',)
50                        : r'\s*(.+)',
51
52    # We don't care what comes after a #else or #endif line.
53    ('else', 'endif',)  : '',
54
55    # Fetch three arguments from a #define line:
56    #   1) The #defined keyword.
57    #   2) The optional parentheses and arguments (if it's a function-like
58    #      macro, '' if it's not).
59    #   3) The expansion value.
60    ('define',)         : r'\s+([_A-Za-z][_A-Za-z0-9_]*)(\([^)]*\))?\s*(.*)',
61
62    # Fetch the #undefed keyword from a #undef line.
63    ('undef',)          : r'\s+([_A-Za-z][A-Za-z0-9_]*)',
64}
65
66# Create a table that maps each individual C preprocessor directive to
67# the corresponding compiled regular expression that fetches the arguments
68# we care about.
69Table = {}
70for op_list, expr in cpp_lines_dict.items():
71    e = re.compile(expr)
72    for op in op_list:
73        Table[op] = e
74del e
75del op
76del op_list
77
78# Create a list of the expressions we'll use to match all of the
79# preprocessor directives.  These are the same as the directives
80# themselves *except* that we must use a negative lookahead assertion
81# when matching "if" so it doesn't match the "if" in "ifdef" or "ifndef".
82override = {
83    'if'                        : 'if(?!n?def)',
84}
85l = [override.get(x, x) for x in Table.keys()]
86
87
88# Turn the list of expressions into one big honkin' regular expression
89# that will match all the preprocessor lines at once.  This will return
90# a list of tuples, one for each preprocessor line.  The preprocessor
91# directive will be the first element in each tuple, and the rest of
92# the line will be the second element.
93e = r'^\s*#\s*(' + '|'.join(l) + ')(.*)$'
94
95# And last but not least, compile the expression.
96CPP_Expression = re.compile(e, re.M)
97
98# A list with RE to cleanup CPP Expressions (tuples)
99# We should remove all comments and carriage returns (\r) before evaluating
100CPP_Expression_Cleaner_List = [
101    r"/\*.*\*/",
102    r"/\*.*",
103    r"//.*",
104    r"\r"
105]
106CPP_Expression_Cleaner_RE = re.compile(
107    r"\s*(" + "|".join(CPP_Expression_Cleaner_List) + ")")
108
109def Cleanup_CPP_Expressions(ts):
110    return [(t[0], CPP_Expression_Cleaner_RE.sub("", t[1])) for t in ts]
111
112#
113# Second "subsystem" of regular expressions that we set up:
114#
115# Stuff to translate a C preprocessor expression (as found on a #if or
116# #elif line) into an equivalent Python expression that we can eval().
117#
118
119# A dictionary that maps the C representation of Boolean operators
120# to their Python equivalents.
121CPP_to_Python_Ops_Dict = {
122    '!'         : ' not ',
123    '!='        : ' != ',
124    '&&'        : ' and ',
125    '||'        : ' or ',
126    '?'         : ' and ',
127    ':'         : ' or ',
128}
129
130CPP_to_Python_Ops_Sub = lambda m: CPP_to_Python_Ops_Dict[m.group(0)]
131
132# We have to sort the keys by length so that longer expressions
133# come *before* shorter expressions--in particular, "!=" must
134# come before "!" in the alternation.  Without this, the Python
135# re module, as late as version 2.2.2, empirically matches the
136# "!" in "!=" first, instead of finding the longest match.
137# What's up with that?
138l = sorted(list(CPP_to_Python_Ops_Dict.keys()), key=lambda a: len(a), reverse=True)
139
140# Turn the list of keys into one regular expression that will allow us
141# to substitute all of the operators at once.
142expr = '|'.join(map(re.escape, l))
143
144# ...and compile the expression.
145CPP_to_Python_Ops_Expression = re.compile(expr)
146
147# A separate list of expressions to be evaluated and substituted
148# sequentially, not all at once.
149CPP_to_Python_Eval_List = [
150    [r'defined\s+(\w+)',                 '"\\1" in __dict__'],
151    [r'defined\s*\((\w+)\)',             '"\\1" in __dict__'],
152    [r'(0x[0-9A-Fa-f]+)(?:L|UL)?',  '\\1'],
153    [r'(\d+)(?:L|UL)?',  '\\1'],
154]
155
156# Replace the string representations of the regular expressions in the
157# list with compiled versions.
158for l in CPP_to_Python_Eval_List:
159    l[0] = re.compile(l[0])
160
161# Wrap up all of the above into a handy function.
162def CPP_to_Python(s):
163    """
164    Converts a C pre-processor expression into an equivalent
165    Python expression that can be evaluated.
166    """
167    s = CPP_to_Python_Ops_Expression.sub(CPP_to_Python_Ops_Sub, s)
168    for expr, repl in CPP_to_Python_Eval_List:
169        s = re.sub(expr, repl, s)
170    return s
171
172
173
174del expr
175del l
176del override
177
178
179
180class FunctionEvaluator:
181    """
182    Handles delayed evaluation of a #define function call.
183    """
184    def __init__(self, name, args, expansion):
185        """
186        Squirrels away the arguments and expansion value of a #define
187        macro function for later evaluation when we must actually expand
188        a value that uses it.
189        """
190        self.name = name
191        self.args = function_arg_separator.split(args)
192        try:
193            expansion = expansion.split('##')
194        except AttributeError:
195            pass
196        self.expansion = expansion
197    def __call__(self, *values):
198        """
199        Evaluates the expansion of a #define macro function called
200        with the specified values.
201        """
202        if len(self.args) != len(values):
203            raise ValueError("Incorrect number of arguments to `%s'" % self.name)
204        # Create a dictionary that maps the macro arguments to the
205        # corresponding values in this "call."  We'll use this when we
206        # eval() the expansion so that arguments will get expanded to
207        # the right values.
208        locals = {}
209        for k, v in zip(self.args, values):
210            locals[k] = v
211
212        parts = []
213        for s in self.expansion:
214            if s not in self.args:
215                s = repr(s)
216            parts.append(s)
217        statement = ' + '.join(parts)
218
219        return eval(statement, globals(), locals)
220
221
222
223# Find line continuations.
224line_continuations = re.compile('\\\\\r?\n')
225
226# Search for a "function call" macro on an expansion.  Returns the
227# two-tuple of the "function" name itself, and a string containing the
228# arguments within the call parentheses.
229function_name = re.compile(r'(\S+)\(([^)]*)\)')
230
231# Split a string containing comma-separated function call arguments into
232# the separate arguments.
233function_arg_separator = re.compile(r',\s*')
234
235
236
237class PreProcessor:
238
239    """
240    The main workhorse class for handling C pre-processing.
241    """
242    def __init__(self, current=os.curdir, cpppath=(), dict={}, all=0, depth=-1):
243        global Table
244
245        cpppath = tuple(cpppath)
246
247        self.searchpath = {
248            '"' :       (current,) + cpppath,
249            '<' :       cpppath + (current,),
250        }
251
252        # Initialize our C preprocessor namespace for tracking the
253        # values of #defined keywords.  We use this namespace to look
254        # for keywords on #ifdef/#ifndef lines, and to eval() the
255        # expressions on #if/#elif lines (after massaging them from C to
256        # Python).
257        self.cpp_namespace = dict.copy()
258        self.cpp_namespace['__dict__'] = self.cpp_namespace
259
260        # Return all includes without resolving
261        if all:
262           self.do_include = self.all_include
263
264        # Max depth of nested includes:
265        # -1 = unlimited
266        # 0 - disabled nesting
267        # >0 - number of allowed nested includes
268        self.depth = depth
269
270        # For efficiency, a dispatch table maps each C preprocessor
271        # directive (#if, #define, etc.) to the method that should be
272        # called when we see it.  We accomodate state changes (#if,
273        # #ifdef, #ifndef) by pushing the current dispatch table on a
274        # stack and changing what method gets called for each relevant
275        # directive we might see next at this level (#else, #elif).
276        # #endif will simply pop the stack.
277        d = {
278            'scons_current_file'    : self.scons_current_file
279        }
280        for op in Table.keys():
281            d[op] = getattr(self, 'do_' + op)
282        self.default_table = d
283
284    def __call__(self, file):
285        """
286        Pre-processes a file.
287
288        This is the main public entry point.
289        """
290        self.current_file = file
291        return self.process_file(file)
292
293    def process_file(self, file):
294        """
295        Pre-processes a file.
296
297        This is the main internal entry point.
298        """
299        return self._process_tuples(self.tupleize(self.read_file(file)), file)
300
301    def process_contents(self, contents):
302        """
303        Pre-processes a file contents.
304
305        Is used by tests
306        """
307        return self._process_tuples(self.tupleize(contents))
308
309    def _process_tuples(self, tuples, file=None):
310        self.stack = []
311        self.dispatch_table = self.default_table.copy()
312        self.current_file = file
313        self.tuples = tuples
314
315        self.initialize_result(file)
316        while self.tuples:
317            t = self.tuples.pop(0)
318            # Uncomment to see the list of tuples being processed (e.g.,
319            # to validate the CPP lines are being translated correctly).
320            # print(t)
321            self.dispatch_table[t[0]](t)
322        return self.finalize_result(file)
323
324    def tupleize(self, contents):
325        """
326        Turns the contents of a file into a list of easily-processed
327        tuples describing the CPP lines in the file.
328
329        The first element of each tuple is the line's preprocessor
330        directive (#if, #include, #define, etc., minus the initial '#').
331        The remaining elements are specific to the type of directive, as
332        pulled apart by the regular expression.
333        """
334        return self._match_tuples(self._parse_tuples(contents))
335
336    def _parse_tuples(self, contents):
337        global CPP_Expression
338        contents = line_continuations.sub('', contents)
339        tuples = CPP_Expression.findall(contents)
340        return Cleanup_CPP_Expressions(tuples)
341
342    def _match_tuples(self, tuples):
343        global Table
344        result = []
345        for t in tuples:
346            m = Table[t[0]].match(t[1])
347            if m:
348                result.append((t[0],) + m.groups())
349        return result
350
351    # Dispatch table stack manipulation methods.
352
353    def save(self):
354        """
355        Pushes the current dispatch table on the stack and re-initializes
356        the current dispatch table to the default.
357        """
358        self.stack.append(self.dispatch_table)
359        self.dispatch_table = self.default_table.copy()
360
361    def restore(self):
362        """
363        Pops the previous dispatch table off the stack and makes it the
364        current one.
365        """
366        try: self.dispatch_table = self.stack.pop()
367        except IndexError: pass
368
369    # Utility methods.
370
371    def do_nothing(self, t):
372        """
373        Null method for when we explicitly want the action for a
374        specific preprocessor directive to do nothing.
375        """
376        pass
377
378    def scons_current_file(self, t):
379        self.current_file = t[1]
380
381    def eval_expression(self, t):
382        """
383        Evaluates a C preprocessor expression.
384
385        This is done by converting it to a Python equivalent and
386        eval()ing it in the C preprocessor namespace we use to
387        track #define values.
388        """
389        t = CPP_to_Python(' '.join(t[1:]))
390        try:
391            return eval(t, self.cpp_namespace)
392        except (NameError, TypeError, SyntaxError):
393            return 0
394
395    def initialize_result(self, fname):
396        self.result = [fname]
397
398    def finalize_result(self, fname):
399        return self.result[1:]
400
401    def find_include_file(self, t):
402        """
403        Finds the #include file for a given preprocessor tuple.
404        """
405        fname = t[2]
406        for d in self.searchpath[t[1]]:
407            if d == os.curdir:
408                f = fname
409            else:
410                f = os.path.join(d, fname)
411            if os.path.isfile(f):
412                return f
413        return None
414
415    def read_file(self, file):
416        with open(file) as f:
417            return f.read()
418
419    # Start and stop processing include lines.
420
421    def start_handling_includes(self, t=None):
422        """
423        Causes the PreProcessor object to start processing #import,
424        #include and #include_next lines.
425
426        This method will be called when a #if, #ifdef, #ifndef or #elif
427        evaluates True, or when we reach the #else in a #if, #ifdef,
428        #ifndef or #elif block where a condition already evaluated
429        False.
430
431        """
432        d = self.dispatch_table
433        p = self.stack[-1] if self.stack else self.default_table
434
435        for k in ('import', 'include', 'include_next', 'define', 'undef'):
436            d[k] = p[k]
437
438    def stop_handling_includes(self, t=None):
439        """
440        Causes the PreProcessor object to stop processing #import,
441        #include and #include_next lines.
442
443        This method will be called when a #if, #ifdef, #ifndef or #elif
444        evaluates False, or when we reach the #else in a #if, #ifdef,
445        #ifndef or #elif block where a condition already evaluated True.
446        """
447        d = self.dispatch_table
448        d['import'] = self.do_nothing
449        d['include'] =  self.do_nothing
450        d['include_next'] =  self.do_nothing
451        d['define'] =  self.do_nothing
452        d['undef'] =  self.do_nothing
453
454    # Default methods for handling all of the preprocessor directives.
455    # (Note that what actually gets called for a given directive at any
456    # point in time is really controlled by the dispatch_table.)
457
458    def _do_if_else_condition(self, condition):
459        """
460        Common logic for evaluating the conditions on #if, #ifdef and
461        #ifndef lines.
462        """
463        self.save()
464        d = self.dispatch_table
465        if condition:
466            self.start_handling_includes()
467            d['elif'] = self.stop_handling_includes
468            d['else'] = self.stop_handling_includes
469        else:
470            self.stop_handling_includes()
471            d['elif'] = self.do_elif
472            d['else'] = self.start_handling_includes
473
474    def do_ifdef(self, t):
475        """
476        Default handling of a #ifdef line.
477        """
478        self._do_if_else_condition(t[1] in self.cpp_namespace)
479
480    def do_ifndef(self, t):
481        """
482        Default handling of a #ifndef line.
483        """
484        self._do_if_else_condition(t[1] not in self.cpp_namespace)
485
486    def do_if(self, t):
487        """
488        Default handling of a #if line.
489        """
490        self._do_if_else_condition(self.eval_expression(t))
491
492    def do_elif(self, t):
493        """
494        Default handling of a #elif line.
495        """
496        d = self.dispatch_table
497        if self.eval_expression(t):
498            self.start_handling_includes()
499            d['elif'] = self.stop_handling_includes
500            d['else'] = self.stop_handling_includes
501
502    def do_else(self, t):
503        """
504        Default handling of a #else line.
505        """
506        pass
507
508    def do_endif(self, t):
509        """
510        Default handling of a #endif line.
511        """
512        self.restore()
513
514    def do_define(self, t):
515        """
516        Default handling of a #define line.
517        """
518        _, name, args, expansion = t
519        try:
520            expansion = int(expansion)
521        except (TypeError, ValueError):
522            # handle "defined" chain "! (defined (A) || defined (B)" ...
523            if "defined " in expansion:
524                self.cpp_namespace[name] = self.eval_expression(t[2:])
525                return
526
527        if args:
528            evaluator = FunctionEvaluator(name, args[1:-1], expansion)
529            self.cpp_namespace[name] = evaluator
530        else:
531            self.cpp_namespace[name] = expansion
532
533    def do_undef(self, t):
534        """
535        Default handling of a #undef line.
536        """
537        try: del self.cpp_namespace[t[1]]
538        except KeyError: pass
539
540    def do_import(self, t):
541        """
542        Default handling of a #import line.
543        """
544        # XXX finish this -- maybe borrow/share logic from do_include()...?
545        pass
546
547    def do_include(self, t):
548        """
549        Default handling of a #include line.
550        """
551        t = self.resolve_include(t)
552        if not t:
553            return
554        include_file = self.find_include_file(t)
555        # avoid infinite recursion
556        if not include_file or include_file in self.result:
557            return
558        self.result.append(include_file)
559        # print include_file, len(self.tuples)
560
561        # Handle maximum depth of nested includes
562        if self.depth != -1:
563            current_depth = 0
564            for t in self.tuples:
565                if t[0] == "scons_current_file":
566                    current_depth += 1
567            if current_depth >= self.depth:
568                return
569
570        new_tuples = [('scons_current_file', include_file)] + \
571                      self.tupleize(self.read_file(include_file)) + \
572                     [('scons_current_file', self.current_file)]
573        self.tuples[:] = new_tuples + self.tuples
574
575    # Date: Tue, 22 Nov 2005 20:26:09 -0500
576    # From: Stefan Seefeld <seefeld@sympatico.ca>
577    #
578    # By the way, #include_next is not the same as #include. The difference
579    # being that #include_next starts its search in the path following the
580    # path that let to the including file. In other words, if your system
581    # include paths are ['/foo', '/bar'], and you are looking at a header
582    # '/foo/baz.h', it might issue an '#include_next <baz.h>' which would
583    # correctly resolve to '/bar/baz.h' (if that exists), but *not* see
584    # '/foo/baz.h' again. See http://www.delorie.com/gnu/docs/gcc/cpp_11.html
585    # for more reasoning.
586    #
587    # I have no idea in what context 'import' might be used.
588
589    # XXX is #include_next really the same as #include ?
590    do_include_next = do_include
591
592    # Utility methods for handling resolution of include files.
593
594    def resolve_include(self, t):
595        """Resolve a tuple-ized #include line.
596
597        This handles recursive expansion of values without "" or <>
598        surrounding the name until an initial " or < is found, to handle
599        #include FILE where FILE is a #define somewhere else.
600        """
601        s = t[1].strip()
602        while not s[0] in '<"':
603            try:
604                s = self.cpp_namespace[s]
605            except KeyError:
606                m = function_name.search(s)
607
608                # Date: Mon, 28 Nov 2016 17:47:13 UTC
609                # From: Ivan Kravets <ikravets@platformio.org>
610                #
611                # Ignore `#include` directive that depends on dynamic macro
612                # which is not located in state TABLE
613                # For example, `#include MYCONFIG_FILE`
614                if not m:
615                    return None
616
617                s = self.cpp_namespace[m.group(1)]
618                if callable(s):
619                    args = function_arg_separator.split(m.group(2))
620                    s = s(*args)
621            if not s:
622                return None
623        return (t[0], s[0], s[1:-1])
624
625    def all_include(self, t):
626        """
627        """
628        self.result.append(self.resolve_include(t))
629
630
631class DumbPreProcessor(PreProcessor):
632    """A preprocessor that ignores all #if/#elif/#else/#endif directives
633    and just reports back *all* of the #include files (like the classic
634    SCons scanner did).
635
636    This is functionally equivalent to using a regular expression to
637    find all of the #include lines, only slower.  It exists mainly as
638    an example of how the main PreProcessor class can be sub-classed
639    to tailor its behavior.
640    """
641    def __init__(self, *args, **kw):
642        PreProcessor.__init__(self, *args, **kw)
643        d = self.default_table
644        for func in ['if', 'elif', 'else', 'endif', 'ifdef', 'ifndef']:
645            d[func] = d[func] = self.do_nothing
646
647# Local Variables:
648# tab-width:4
649# indent-tabs-mode:nil
650# End:
651# vim: set expandtab tabstop=4 shiftwidth=4:
652