1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4"""
5This is a very primitive line based preprocessor, for times when using
6a C preprocessor isn't an option.
7
8It currently supports the following grammar for expressions, whitespace is
9ignored:
10
11expression :
12  and_cond ( '||' expression ) ? ;
13and_cond:
14  test ( '&&' and_cond ) ? ;
15test:
16  unary ( ( '==' | '!=' ) unary ) ? ;
17unary :
18  '!'? value ;
19value :
20  [0-9]+ # integer
21  | 'defined(' \w+ ')'
22  | \w+  # string identifier or value;
23"""
24
25from __future__ import absolute_import, print_function, unicode_literals
26
27import errno
28import io
29from optparse import OptionParser
30import os
31import re
32import six
33import sys
34
35from mozbuild.makeutil import Makefile
36from mozpack.path import normsep
37
38# hack around win32 mangling our line endings
39# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65443
40if sys.platform == "win32":
41    import msvcrt
42    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
43    os.linesep = '\n'
44
45
46__all__ = [
47  'Context',
48  'Expression',
49  'Preprocessor',
50  'preprocess'
51]
52
53
54def _to_text(a):
55    # We end up converting a lot of different types (text_type, binary_type,
56    # int, etc.) to Unicode in this script. This function handles all of those
57    # possibilities.
58    if isinstance(a, (six.text_type, six.binary_type)):
59        return six.ensure_text(a)
60    return six.text_type(a)
61
62
63def path_starts_with(path, prefix):
64    if os.altsep:
65        prefix = prefix.replace(os.altsep, os.sep)
66        path = path.replace(os.altsep, os.sep)
67    prefix = [os.path.normcase(p) for p in prefix.split(os.sep)]
68    path = [os.path.normcase(p) for p in path.split(os.sep)]
69    return path[:len(prefix)] == prefix
70
71
72class Expression:
73    def __init__(self, expression_string):
74        """
75        Create a new expression with this string.
76        The expression will already be parsed into an Abstract Syntax Tree.
77        """
78        self.content = expression_string
79        self.offset = 0
80        self.__ignore_whitespace()
81        self.e = self.__get_logical_or()
82        if self.content:
83            raise Expression.ParseError(self)
84
85    def __get_logical_or(self):
86        """
87        Production: and_cond ( '||' expression ) ?
88        """
89        if not len(self.content):
90            return None
91        rv = Expression.__AST("logical_op")
92        # test
93        rv.append(self.__get_logical_and())
94        self.__ignore_whitespace()
95        if self.content[:2] != '||':
96            # no logical op needed, short cut to our prime element
97            return rv[0]
98        # append operator
99        rv.append(Expression.__ASTLeaf('op', self.content[:2]))
100        self.__strip(2)
101        self.__ignore_whitespace()
102        rv.append(self.__get_logical_or())
103        self.__ignore_whitespace()
104        return rv
105
106    def __get_logical_and(self):
107        """
108        Production: test ( '&&' and_cond ) ?
109        """
110        if not len(self.content):
111            return None
112        rv = Expression.__AST("logical_op")
113        # test
114        rv.append(self.__get_equality())
115        self.__ignore_whitespace()
116        if self.content[:2] != '&&':
117            # no logical op needed, short cut to our prime element
118            return rv[0]
119        # append operator
120        rv.append(Expression.__ASTLeaf('op', self.content[:2]))
121        self.__strip(2)
122        self.__ignore_whitespace()
123        rv.append(self.__get_logical_and())
124        self.__ignore_whitespace()
125        return rv
126
127    def __get_equality(self):
128        """
129        Production: unary ( ( '==' | '!=' ) unary ) ?
130        """
131        if not len(self.content):
132            return None
133        rv = Expression.__AST("equality")
134        # unary
135        rv.append(self.__get_unary())
136        self.__ignore_whitespace()
137        if not re.match('[=!]=', self.content):
138            # no equality needed, short cut to our prime unary
139            return rv[0]
140        # append operator
141        rv.append(Expression.__ASTLeaf('op', self.content[:2]))
142        self.__strip(2)
143        self.__ignore_whitespace()
144        rv.append(self.__get_unary())
145        self.__ignore_whitespace()
146        return rv
147
148    def __get_unary(self):
149        """
150        Production: '!'? value
151        """
152        # eat whitespace right away, too
153        not_ws = re.match('!\s*', self.content)
154        if not not_ws:
155            return self.__get_value()
156        rv = Expression.__AST('not')
157        self.__strip(not_ws.end())
158        rv.append(self.__get_value())
159        self.__ignore_whitespace()
160        return rv
161
162    def __get_value(self):
163        """
164        Production: ( [0-9]+ | 'defined(' \w+ ')' | \w+ )
165        Note that the order is important, and the expression is kind-of
166        ambiguous as \w includes 0-9. One could make it unambiguous by
167        removing 0-9 from the first char of a string literal.
168        """
169        rv = None
170        m = re.match('defined\s*\(\s*(\w+)\s*\)', self.content)
171        if m:
172            word_len = m.end()
173            rv = Expression.__ASTLeaf('defined', m.group(1))
174        else:
175            word_len = re.match('[0-9]*', self.content).end()
176            if word_len:
177                value = int(self.content[:word_len])
178                rv = Expression.__ASTLeaf('int', value)
179            else:
180                word_len = re.match('\w*', self.content).end()
181                if word_len:
182                    rv = Expression.__ASTLeaf('string', self.content[:word_len])
183                else:
184                    raise Expression.ParseError(self)
185        self.__strip(word_len)
186        self.__ignore_whitespace()
187        return rv
188
189    def __ignore_whitespace(self):
190        ws_len = re.match('\s*', self.content).end()
191        self.__strip(ws_len)
192        return
193
194    def __strip(self, length):
195        """
196        Remove a given amount of chars from the input and update
197        the offset.
198        """
199        self.content = self.content[length:]
200        self.offset += length
201
202    def evaluate(self, context):
203        """
204        Evaluate the expression with the given context
205        """
206
207        # Helper function to evaluate __get_equality results
208        def eval_equality(tok):
209            left = opmap[tok[0].type](tok[0])
210            right = opmap[tok[2].type](tok[2])
211            rv = left == right
212            if tok[1].value == '!=':
213                rv = not rv
214            return rv
215
216        # Helper function to evaluate __get_logical_and and __get_logical_or results
217        def eval_logical_op(tok):
218            left = opmap[tok[0].type](tok[0])
219            right = opmap[tok[2].type](tok[2])
220            if tok[1].value == '&&':
221                return left and right
222            elif tok[1].value == '||':
223                return left or right
224            raise Expression.ParseError(self)
225
226        # Mapping from token types to evaluator functions
227        # Apart from (non-)equality, all these can be simple lambda forms.
228        opmap = {
229          'logical_op': eval_logical_op,
230          'equality': eval_equality,
231          'not': lambda tok: not opmap[tok[0].type](tok[0]),
232          'string': lambda tok: context[tok.value],
233          'defined': lambda tok: tok.value in context,
234          'int': lambda tok: tok.value}
235
236        return opmap[self.e.type](self.e)
237
238    class __AST(list):
239        """
240        Internal class implementing Abstract Syntax Tree nodes
241        """
242
243        def __init__(self, type):
244            self.type = type
245            super(self.__class__, self).__init__(self)
246
247    class __ASTLeaf:
248        """
249        Internal class implementing Abstract Syntax Tree leafs
250        """
251
252        def __init__(self, type, value):
253            self.value = value
254            self.type = type
255
256        def __str__(self):
257            return self.value.__str__()
258
259        def __repr__(self):
260            return self.value.__repr__()
261
262    class ParseError(Exception):
263        """
264        Error raised when parsing fails.
265        It has two members, offset and content, which give the offset of the
266        error and the offending content.
267        """
268
269        def __init__(self, expression):
270            self.offset = expression.offset
271            self.content = expression.content[:3]
272
273        def __str__(self):
274            return 'Unexpected content at offset {0}, "{1}"'.format(self.offset,
275                                                                    self.content)
276
277
278class Context(dict):
279    """
280    This class holds variable values by subclassing dict, and while it
281    truthfully reports True and False on
282
283    name in context
284
285    it returns the variable name itself on
286
287    context["name"]
288
289    to reflect the ambiguity between string literals and preprocessor
290    variables.
291    """
292
293    def __getitem__(self, key):
294        if key in self:
295            return super(self.__class__, self).__getitem__(key)
296        return key
297
298
299class Preprocessor:
300    """
301    Class for preprocessing text files.
302    """
303    class Error(RuntimeError):
304        def __init__(self, cpp, MSG, context):
305            self.file = cpp.context['FILE']
306            self.line = cpp.context['LINE']
307            self.key = MSG
308            RuntimeError.__init__(self, (self.file, self.line, self.key, context))
309
310    def __init__(self, defines=None, marker='#'):
311        self.context = Context()
312        self.context.update({
313            'FILE': '',
314            'LINE': 0,
315            'DIRECTORY': os.path.abspath('.')
316            })
317        try:
318            # Can import globally because of bootstrapping issues.
319            from buildconfig import topsrcdir, topobjdir
320        except ImportError:
321            # Allow this script to still work independently of a configured objdir.
322            topsrcdir = topobjdir = None
323        self.topsrcdir = topsrcdir
324        self.topobjdir = topobjdir
325        self.curdir = '.'
326        self.actionLevel = 0
327        self.disableLevel = 0
328        # ifStates can be
329        #  0: hadTrue
330        #  1: wantsTrue
331        #  2: #else found
332        self.ifStates = []
333        self.checkLineNumbers = False
334        self.filters = []
335        self.cmds = {}
336        for cmd, level in (
337            ('define', 0),
338            ('undef', 0),
339            ('if', sys.maxsize),
340            ('ifdef', sys.maxsize),
341            ('ifndef', sys.maxsize),
342            ('else', 1),
343            ('elif', 1),
344            ('elifdef', 1),
345            ('elifndef', 1),
346            ('endif', sys.maxsize),
347            ('expand', 0),
348            ('literal', 0),
349            ('filter', 0),
350            ('unfilter', 0),
351            ('include', 0),
352            ('includesubst', 0),
353            ('error', 0),
354        ):
355            self.cmds[cmd] = (level, getattr(self, 'do_' + cmd))
356        self.out = sys.stdout
357        self.setMarker(marker)
358        self.varsubst = re.compile('@(?P<VAR>\w+)@', re.U)
359        self.includes = set()
360        self.silenceMissingDirectiveWarnings = False
361        if defines:
362            self.context.update(defines)
363
364    def failUnused(self, file):
365        msg = None
366        if self.actionLevel == 0 and not self.silenceMissingDirectiveWarnings:
367            msg = 'no preprocessor directives found'
368        elif self.actionLevel == 1:
369            msg = 'no useful preprocessor directives found'
370        if msg:
371            class Fake(object):
372                pass
373            fake = Fake()
374            fake.context = {
375                'FILE': file,
376                'LINE': None,
377            }
378            raise Preprocessor.Error(fake, msg, None)
379
380    def setMarker(self, aMarker):
381        """
382        Set the marker to be used for processing directives.
383        Used for handling CSS files, with pp.setMarker('%'), for example.
384        The given marker may be None, in which case no markers are processed.
385        """
386        self.marker = aMarker
387        if aMarker:
388            self.instruction = re.compile('\s*{0}(?P<cmd>[a-z]+)(?:\s+(?P<args>.*?))?\s*$'
389                                          .format(aMarker))
390            self.comment = re.compile(aMarker, re.U)
391        else:
392            class NoMatch(object):
393                def match(self, *args):
394                    return False
395            self.instruction = self.comment = NoMatch()
396
397    def setSilenceDirectiveWarnings(self, value):
398        """
399        Sets whether missing directive warnings are silenced, according to
400        ``value``.  The default behavior of the preprocessor is to emit
401        such warnings.
402        """
403        self.silenceMissingDirectiveWarnings = value
404
405    def addDefines(self, defines):
406        """
407        Adds the specified defines to the preprocessor.
408        ``defines`` may be a dictionary object or an iterable of key/value pairs
409        (as tuples or other iterables of length two)
410        """
411        self.context.update(defines)
412
413    def clone(self):
414        """
415        Create a clone of the current processor, including line ending
416        settings, marker, variable definitions, output stream.
417        """
418        rv = Preprocessor()
419        rv.context.update(self.context)
420        rv.setMarker(self.marker)
421        rv.out = self.out
422        return rv
423
424    def processFile(self, input, output, depfile=None):
425        """
426        Preprocesses the contents of the ``input`` stream and writes the result
427        to the ``output`` stream. If ``depfile`` is set,  the dependencies of
428        ``output`` file are written to ``depfile`` in Makefile format.
429        """
430        self.out = output
431
432        self.do_include(input, False)
433        self.failUnused(input.name)
434
435        if depfile:
436            mk = Makefile()
437            mk.create_rule([output.name]).add_dependencies(self.includes)
438            mk.dump(depfile)
439
440    def computeDependencies(self, input):
441        """
442        Reads the ``input`` stream, and computes the dependencies for that input.
443        """
444        try:
445            old_out = self.out
446            self.out = None
447            self.do_include(input, False)
448
449            return self.includes
450        finally:
451            self.out = old_out
452
453    def applyFilters(self, aLine):
454        for f in self.filters:
455            aLine = f[1](aLine)
456        return aLine
457
458    def noteLineInfo(self):
459        # Record the current line and file. Called once before transitioning
460        # into or out of an included file and after writing each line.
461        self.line_info = self.context['FILE'], self.context['LINE']
462
463    def write(self, aLine):
464        """
465        Internal method for handling output.
466        """
467        if not self.out:
468            return
469
470        next_line, next_file = self.context['LINE'], self.context['FILE']
471        if self.checkLineNumbers:
472            expected_file, expected_line = self.line_info
473            expected_line += 1
474            if (expected_line != next_line or
475                expected_file and expected_file != next_file):
476                self.out.write('//@line {line} "{file}"\n'.format(line=next_line,
477                                                                  file=next_file))
478        self.noteLineInfo()
479
480        filteredLine = self.applyFilters(aLine)
481        if filteredLine != aLine:
482            self.actionLevel = 2
483        self.out.write(filteredLine)
484
485    def handleCommandLine(self, args, defaultToStdin=False):
486        """
487        Parse a commandline into this parser.
488        Uses OptionParser internally, no args mean sys.argv[1:].
489        """
490        def get_output_file(path):
491            dir = os.path.dirname(path)
492            if dir:
493                try:
494                    os.makedirs(dir)
495                except OSError as error:
496                    if error.errno != errno.EEXIST:
497                        raise
498            return io.open(path, 'w', encoding='utf-8', newline='\n')
499
500        p = self.getCommandLineParser()
501        options, args = p.parse_args(args=args)
502        out = self.out
503        depfile = None
504
505        if options.output:
506            out = get_output_file(options.output)
507        if defaultToStdin and len(args) == 0:
508            args = [sys.stdin]
509            if options.depend:
510                raise Preprocessor.Error(self, "--depend doesn't work with stdin",
511                                         None)
512        if options.depend:
513            if not options.output:
514                raise Preprocessor.Error(self, "--depend doesn't work with stdout",
515                                         None)
516            depfile = get_output_file(options.depend)
517
518        if args:
519            for f in args:
520                with io.open(f, 'rU', encoding='utf-8') as input:
521                    self.processFile(input=input, output=out)
522            if depfile:
523                mk = Makefile()
524                mk.create_rule(
525                    [six.ensure_text(options.output)]
526                ).add_dependencies(self.includes)
527                mk.dump(depfile)
528                depfile.close()
529
530        if options.output:
531            out.close()
532
533    def getCommandLineParser(self, unescapeDefines=False):
534        escapedValue = re.compile('".*"$')
535        numberValue = re.compile('\d+$')
536
537        def handleD(option, opt, value, parser):
538            vals = value.split('=', 1)
539            if len(vals) == 1:
540                vals.append(1)
541            elif unescapeDefines and escapedValue.match(vals[1]):
542                # strip escaped string values
543                vals[1] = vals[1][1:-1]
544            elif numberValue.match(vals[1]):
545                vals[1] = int(vals[1])
546            self.context[vals[0]] = vals[1]
547
548        def handleU(option, opt, value, parser):
549            del self.context[value]
550
551        def handleF(option, opt, value, parser):
552            self.do_filter(value)
553
554        def handleMarker(option, opt, value, parser):
555            self.setMarker(value)
556
557        def handleSilenceDirectiveWarnings(option, opt, value, parse):
558            self.setSilenceDirectiveWarnings(True)
559        p = OptionParser()
560        p.add_option('-D', action='callback', callback=handleD, type="string",
561                     metavar="VAR[=VAL]", help='Define a variable')
562        p.add_option('-U', action='callback', callback=handleU, type="string",
563                     metavar="VAR", help='Undefine a variable')
564        p.add_option('-F', action='callback', callback=handleF, type="string",
565                     metavar="FILTER", help='Enable the specified filter')
566        p.add_option('-o', '--output', type="string", default=None,
567                     metavar="FILENAME", help='Output to the specified file ' +
568                     'instead of stdout')
569        p.add_option('--depend', type="string", default=None, metavar="FILENAME",
570                     help='Generate dependencies in the given file')
571        p.add_option('--marker', action='callback', callback=handleMarker,
572                     type="string",
573                     help='Use the specified marker instead of #')
574        p.add_option('--silence-missing-directive-warnings', action='callback',
575                     callback=handleSilenceDirectiveWarnings,
576                     help='Don\'t emit warnings about missing directives')
577        return p
578
579    def handleLine(self, aLine):
580        """
581        Handle a single line of input (internal).
582        """
583        if self.actionLevel == 0 and self.comment.match(aLine):
584            self.actionLevel = 1
585        m = self.instruction.match(aLine)
586        if m:
587            args = None
588            cmd = m.group('cmd')
589            try:
590                args = m.group('args')
591            except IndexError:
592                pass
593            if cmd not in self.cmds:
594                raise Preprocessor.Error(self, 'INVALID_CMD', aLine)
595            level, cmd = self.cmds[cmd]
596            if (level >= self.disableLevel):
597                cmd(args)
598            if cmd != 'literal':
599                self.actionLevel = 2
600        elif self.disableLevel == 0 and not self.comment.match(aLine):
601            self.write(aLine)
602
603    # Instruction handlers
604    # These are named do_'instruction name' and take one argument
605
606    # Variables
607    def do_define(self, args):
608        m = re.match('(?P<name>\w+)(?:\s(?P<value>.*))?', args, re.U)
609        if not m:
610            raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
611        val = ''
612        if m.group('value'):
613            val = self.applyFilters(m.group('value'))
614            try:
615                val = int(val)
616            except Exception:
617                pass
618        self.context[m.group('name')] = val
619
620    def do_undef(self, args):
621        m = re.match('(?P<name>\w+)$', args, re.U)
622        if not m:
623            raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
624        if args in self.context:
625            del self.context[args]
626
627    # Logic
628    def ensure_not_else(self):
629        if len(self.ifStates) == 0 or self.ifStates[-1] == 2:
630            sys.stderr.write('WARNING: bad nesting of #else in %s\n' % self.context['FILE'])
631
632    def do_if(self, args, replace=False):
633        if self.disableLevel and not replace:
634            self.disableLevel += 1
635            return
636        val = None
637        try:
638            e = Expression(args)
639            val = e.evaluate(self.context)
640        except Exception:
641            # XXX do real error reporting
642            raise Preprocessor.Error(self, 'SYNTAX_ERR', args)
643        if isinstance(val, six.text_type) or isinstance(val, six.binary_type):
644            # we're looking for a number value, strings are false
645            val = False
646        if not val:
647            self.disableLevel = 1
648        if replace:
649            if val:
650                self.disableLevel = 0
651            self.ifStates[-1] = self.disableLevel
652        else:
653            self.ifStates.append(self.disableLevel)
654
655    def do_ifdef(self, args, replace=False):
656        if self.disableLevel and not replace:
657            self.disableLevel += 1
658            return
659        if re.search('\W', args, re.U):
660            raise Preprocessor.Error(self, 'INVALID_VAR', args)
661        if args not in self.context:
662            self.disableLevel = 1
663        if replace:
664            if args in self.context:
665                self.disableLevel = 0
666            self.ifStates[-1] = self.disableLevel
667        else:
668            self.ifStates.append(self.disableLevel)
669
670    def do_ifndef(self, args, replace=False):
671        if self.disableLevel and not replace:
672            self.disableLevel += 1
673            return
674        if re.search('\W', args, re.U):
675            raise Preprocessor.Error(self, 'INVALID_VAR', args)
676        if args in self.context:
677            self.disableLevel = 1
678        if replace:
679            if args not in self.context:
680                self.disableLevel = 0
681            self.ifStates[-1] = self.disableLevel
682        else:
683            self.ifStates.append(self.disableLevel)
684
685    def do_else(self, args, ifState=2):
686        self.ensure_not_else()
687        hadTrue = self.ifStates[-1] == 0
688        self.ifStates[-1] = ifState  # in-else
689        if hadTrue:
690            self.disableLevel = 1
691            return
692        self.disableLevel = 0
693
694    def do_elif(self, args):
695        if self.disableLevel == 1:
696            if self.ifStates[-1] == 1:
697                self.do_if(args, replace=True)
698        else:
699            self.do_else(None, self.ifStates[-1])
700
701    def do_elifdef(self, args):
702        if self.disableLevel == 1:
703            if self.ifStates[-1] == 1:
704                self.do_ifdef(args, replace=True)
705        else:
706            self.do_else(None, self.ifStates[-1])
707
708    def do_elifndef(self, args):
709        if self.disableLevel == 1:
710            if self.ifStates[-1] == 1:
711                self.do_ifndef(args, replace=True)
712        else:
713            self.do_else(None, self.ifStates[-1])
714
715    def do_endif(self, args):
716        if self.disableLevel > 0:
717            self.disableLevel -= 1
718        if self.disableLevel == 0:
719            self.ifStates.pop()
720
721    # output processing
722    def do_expand(self, args):
723        lst = re.split('__(\w+)__', args, re.U)
724
725        def vsubst(v):
726            if v in self.context:
727                return _to_text(self.context[v])
728            return ''
729        for i in range(1, len(lst), 2):
730            lst[i] = vsubst(lst[i])
731        lst.append('\n')  # add back the newline
732        self.write(six.moves.reduce(lambda x, y: x+y, lst, ''))
733
734    def do_literal(self, args):
735        self.write(args + '\n')
736
737    def do_filter(self, args):
738        filters = [f for f in args.split(' ') if hasattr(self, 'filter_' + f)]
739        if len(filters) == 0:
740            return
741        current = dict(self.filters)
742        for f in filters:
743            current[f] = getattr(self, 'filter_' + f)
744        self.filters = [(fn, current[fn]) for fn in sorted(current.keys())]
745        return
746
747    def do_unfilter(self, args):
748        filters = args.split(' ')
749        current = dict(self.filters)
750        for f in filters:
751            if f in current:
752                del current[f]
753        self.filters = [(fn, current[fn]) for fn in sorted(current.keys())]
754        return
755
756    # Filters
757    #
758    # emptyLines: Strips blank lines from the output.
759    def filter_emptyLines(self, aLine):
760        if aLine == '\n':
761            return ''
762        return aLine
763
764    # slashslash: Strips everything after //.
765    def filter_slashslash(self, aLine):
766        if (aLine.find('//') == -1):
767            return aLine
768        [aLine, rest] = aLine.split('//', 1)
769        if rest:
770            aLine += '\n'
771        return aLine
772
773    # spaces: Collapses sequences of spaces into a single space.
774    def filter_spaces(self, aLine):
775        return re.sub(' +', ' ', aLine).strip(' ')
776
777    # substitution: variables wrapped in @ are replaced with their value.
778    def filter_substitution(self, aLine, fatal=True):
779        def repl(matchobj):
780            varname = matchobj.group('VAR')
781            if varname in self.context:
782                return _to_text(self.context[varname])
783            if fatal:
784                raise Preprocessor.Error(self, 'UNDEFINED_VAR', varname)
785            return matchobj.group(0)
786        return self.varsubst.sub(repl, aLine)
787
788    # attemptSubstitution: variables wrapped in @ are replaced with their
789    # value, or an empty string if the variable is not defined.
790    def filter_attemptSubstitution(self, aLine):
791        return self.filter_substitution(aLine, fatal=False)
792
793    # File ops
794    def do_include(self, args, filters=True):
795        """
796        Preprocess a given file.
797        args can either be a file name, or a file-like object.
798        Files should be opened, and will be closed after processing.
799        """
800        isName = isinstance(args, six.string_types)
801        oldCheckLineNumbers = self.checkLineNumbers
802        self.checkLineNumbers = False
803        if isName:
804            try:
805                args = _to_text(args)
806                if filters:
807                    args = self.applyFilters(args)
808                if not os.path.isabs(args):
809                    args = os.path.join(self.curdir, args)
810                args = io.open(args, 'rU', encoding='utf-8')
811            except Preprocessor.Error:
812                raise
813            except Exception:
814                raise Preprocessor.Error(self, 'FILE_NOT_FOUND', _to_text(args))
815        self.checkLineNumbers = bool(re.search('\.(js|jsm|java|webidl)(?:\.in)?$', args.name))
816        oldFile = self.context['FILE']
817        oldLine = self.context['LINE']
818        oldDir = self.context['DIRECTORY']
819        oldCurdir = self.curdir
820        self.noteLineInfo()
821
822        if args.isatty():
823            # we're stdin, use '-' and '' for file and dir
824            self.context['FILE'] = '-'
825            self.context['DIRECTORY'] = ''
826            self.curdir = '.'
827        else:
828            abspath = os.path.abspath(args.name)
829            self.curdir = os.path.dirname(abspath)
830            self.includes.add(six.ensure_text(abspath))
831            if self.topobjdir and path_starts_with(abspath, self.topobjdir):
832                abspath = '$OBJDIR' + normsep(abspath[len(self.topobjdir):])
833            elif self.topsrcdir and path_starts_with(abspath, self.topsrcdir):
834                abspath = '$SRCDIR' + normsep(abspath[len(self.topsrcdir):])
835            self.context['FILE'] = abspath
836            self.context['DIRECTORY'] = os.path.dirname(abspath)
837        self.context['LINE'] = 0
838
839        for l in args:
840            self.context['LINE'] += 1
841            self.handleLine(l)
842        if isName:
843            args.close()
844
845        self.context['FILE'] = oldFile
846        self.checkLineNumbers = oldCheckLineNumbers
847        self.context['LINE'] = oldLine
848        self.context['DIRECTORY'] = oldDir
849        self.curdir = oldCurdir
850
851    def do_includesubst(self, args):
852        args = self.filter_substitution(args)
853        self.do_include(args)
854
855    def do_error(self, args):
856        raise Preprocessor.Error(self, 'Error: ', _to_text(args))
857
858
859def preprocess(includes=[sys.stdin], defines={},
860               output=sys.stdout,
861               marker='#'):
862    pp = Preprocessor(defines=defines,
863                      marker=marker)
864    for f in includes:
865        with io.open(f, 'rU', encoding='utf-8') as input:
866            pp.processFile(input=input, output=output)
867    return pp.includes
868
869
870# Keep this module independently executable.
871if __name__ == "__main__":
872    pp = Preprocessor()
873    pp.handleCommandLine(None, True)
874