1# MIT License
2#
3# Copyright The SCons Foundation
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be included
14# in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
17# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
18# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24"""Dependency scanner for LaTeX code."""
25
26import os.path
27import re
28
29import SCons.Scanner
30import SCons.Util
31
32# list of graphics file extensions for TeX and LaTeX
33TexGraphics   = ['.eps', '.ps']
34#LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
35LatexGraphics = [ '.png', '.jpg', '.gif', '.tif']
36
37
38# Used as a return value of modify_env_var if the variable is not set.
39class _Null:
40    pass
41_null = _Null
42
43# The user specifies the paths in env[variable], similar to other builders.
44# They may be relative and must be converted to absolute, as expected
45# by LaTeX and Co. The environment may already have some paths in
46# env['ENV'][var]. These paths are honored, but the env[var] paths have
47# higher precedence. All changes are un-done on exit.
48def modify_env_var(env, var, abspath):
49    try:
50        save = env['ENV'][var]
51    except KeyError:
52        save = _null
53    env.PrependENVPath(var, abspath)
54    try:
55        if SCons.Util.is_List(env[var]):
56            env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
57        else:
58            # Split at os.pathsep to convert into absolute path
59            env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
60    except KeyError:
61        pass
62
63    # Convert into a string explicitly to append ":" (without which it won't search system
64    # paths as well). The problem is that env.AppendENVPath(var, ":")
65    # does not work, refuses to append ":" (os.pathsep).
66
67    if SCons.Util.is_List(env['ENV'][var]):
68        env['ENV'][var] = os.pathsep.join(env['ENV'][var])
69    # Append the trailing os.pathsep character here to catch the case with no env[var]
70    env['ENV'][var] = env['ENV'][var] + os.pathsep
71
72    return save
73
74class FindENVPathDirs:
75    """
76    A class to bind a specific E{*}PATH variable name to a function that
77    will return all of the E{*}path directories.
78    """
79    def __init__(self, variable):
80        self.variable = variable
81    def __call__(self, env, dir=None, target=None, source=None, argument=None):
82        import SCons.PathList
83        try:
84            path = env['ENV'][self.variable]
85        except KeyError:
86            return ()
87
88        dir = dir or env.fs._cwd
89        path = SCons.PathList.PathList(path).subst_path(env, target, source)
90        return tuple(dir.Rfindalldirs(path))
91
92
93def LaTeXScanner():
94    """
95    Return a prototype Scanner instance for scanning LaTeX source files
96    when built with latex.
97    """
98    ds = LaTeX(name = "LaTeXScanner",
99               suffixes =  '$LATEXSUFFIXES',
100               # in the search order, see below in LaTeX class docstring
101               graphics_extensions = TexGraphics,
102               recursive = 0)
103    return ds
104
105
106def PDFLaTeXScanner():
107    """
108    Return a prototype Scanner instance for scanning LaTeX source files
109    when built with pdflatex.
110    """
111    ds = LaTeX(name = "PDFLaTeXScanner",
112               suffixes =  '$LATEXSUFFIXES',
113               # in the search order, see below in LaTeX class docstring
114               graphics_extensions = LatexGraphics,
115               recursive = 0)
116    return ds
117
118
119class LaTeX(SCons.Scanner.Base):
120    """Class for scanning LaTeX files for included files.
121
122    Unlike most scanners, which use regular expressions that just
123    return the included file name, this returns a tuple consisting
124    of the keyword for the inclusion ("include", "includegraphics",
125    "input", or "bibliography"), and then the file name itself.
126    Based on a quick look at LaTeX documentation, it seems that we
127    should append .tex suffix for the "include" keywords, append .tex if
128    there is no extension for the "input" keyword, and need to add .bib
129    for the "bibliography" keyword that does not accept extensions by itself.
130
131    Finally, if there is no extension for an "includegraphics" keyword
132    latex will append .ps or .eps to find the file, while pdftex may use .pdf,
133    .jpg, .tif, .mps, or .png.
134
135    The actual subset and search order may be altered by
136    DeclareGraphicsExtensions command. This complication is ignored.
137    The default order corresponds to experimentation with teTeX::
138
139        $ latex --version
140        pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
141        kpathsea version 3.5.4
142
143    The order is:
144        ['.eps', '.ps'] for latex
145        ['.png', '.pdf', '.jpg', '.tif'].
146
147    Another difference is that the search path is determined by the type
148    of the file being searched:
149    env['TEXINPUTS'] for "input" and "include" keywords
150    env['TEXINPUTS'] for "includegraphics" keyword
151    env['TEXINPUTS'] for "lstinputlisting" keyword
152    env['BIBINPUTS'] for "bibliography" keyword
153    env['BSTINPUTS'] for "bibliographystyle" keyword
154    env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed.
155
156    FIXME: also look for the class or style in document[class|style]{}
157    FIXME: also look for the argument of bibliographystyle{}
158    """
159    keyword_paths = {'include': 'TEXINPUTS',
160                     'input': 'TEXINPUTS',
161                     'includegraphics': 'TEXINPUTS',
162                     'bibliography': 'BIBINPUTS',
163                     'bibliographystyle': 'BSTINPUTS',
164                     'addbibresource': 'BIBINPUTS',
165                     'addglobalbib': 'BIBINPUTS',
166                     'addsectionbib': 'BIBINPUTS',
167                     'makeindex': 'INDEXSTYLE',
168                     'usepackage': 'TEXINPUTS',
169                     'lstinputlisting': 'TEXINPUTS'}
170    env_variables = SCons.Util.unique(list(keyword_paths.values()))
171    two_arg_commands = ['import', 'subimport',
172                        'includefrom', 'subincludefrom',
173                        'inputfrom', 'subinputfrom']
174
175    def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
176        regex = r'''
177            \\(
178                include
179              | includegraphics(?:\s*\[[^\]]+\])?
180              | lstinputlisting(?:\[[^\]]+\])?
181              | input
182              | import
183              | subimport
184              | includefrom
185              | subincludefrom
186              | inputfrom
187              | subinputfrom
188              | bibliography
189              | addbibresource
190              | addglobalbib
191              | addsectionbib
192              | usepackage
193              )
194                  \s*{([^}]*)}       # first arg
195              (?: \s*{([^}]*)} )?    # maybe another arg
196        '''
197        self.cre = re.compile(regex, re.M | re.X)
198        self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
199
200        self.graphics_extensions = graphics_extensions
201
202        def _scan(node, env, path=(), self=self):
203            node = node.rfile()
204            if not node.exists():
205                return []
206            return self.scan_recurse(node, path)
207
208        class FindMultiPathDirs:
209            """The stock FindPathDirs function has the wrong granularity:
210            it is called once per target, while we need the path that depends
211            on what kind of included files is being searched. This wrapper
212            hides multiple instances of FindPathDirs, one per the LaTeX path
213            variable in the environment. When invoked, the function calculates
214            and returns all the required paths as a dictionary (converted into
215            a tuple to become hashable). Then the scan function converts it
216            back and uses a dictionary of tuples rather than a single tuple
217            of paths.
218            """
219            def __init__(self, dictionary):
220                self.dictionary = {}
221                for k,n in dictionary.items():
222                    self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
223                                           FindENVPathDirs(n) )
224
225            def __call__(self, env, dir=None, target=None, source=None,
226                                    argument=None):
227                di = {}
228                for k,(c,cENV)  in self.dictionary.items():
229                    di[k] = ( c(env, dir=None, target=None, source=None,
230                                   argument=None) ,
231                              cENV(env, dir=None, target=None, source=None,
232                                   argument=None) )
233                # To prevent "dict is not hashable error"
234                return tuple(di.items())
235
236        class LaTeXScanCheck:
237            """Skip all but LaTeX source files, i.e., do not scan *.eps,
238            *.pdf, *.jpg, etc.
239            """
240            def __init__(self, suffixes):
241                self.suffixes = suffixes
242            def __call__(self, node, env):
243                current = not node.has_builder() or node.is_up_to_date()
244                scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
245                # Returning false means that the file is not scanned.
246                return scannable and current
247
248        kw['function'] = _scan
249        kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
250        kw['recursive'] = 0
251        kw['skeys'] = suffixes
252        kw['scan_check'] = LaTeXScanCheck(suffixes)
253        kw['name'] = name
254
255        SCons.Scanner.Base.__init__(self, *args, **kw)
256
257    def _latex_names(self, include_type, filename):
258        if include_type == 'input':
259            base, ext = os.path.splitext( filename )
260            if ext == "":
261                return [filename + '.tex']
262        if include_type in ('include', 'import', 'subimport',
263                            'includefrom', 'subincludefrom',
264                            'inputfrom', 'subinputfrom'):
265            base, ext = os.path.splitext( filename )
266            if ext == "":
267                return [filename + '.tex']
268        if include_type == 'bibliography':
269            base, ext = os.path.splitext( filename )
270            if ext == "":
271                return [filename + '.bib']
272        if include_type == 'usepackage':
273            base, ext = os.path.splitext( filename )
274            if ext == "":
275                return [filename + '.sty']
276        if include_type == 'includegraphics':
277            base, ext = os.path.splitext( filename )
278            if ext == "":
279                #return [filename+e for e in self.graphics_extensions + TexGraphics]
280                # use the line above to find dependencies for the PDF builder
281                # when only an .eps figure is present.  Since it will be found
282                # if the user tells scons how to make the pdf figure, leave
283                # it out for now.
284                return [filename+e for e in self.graphics_extensions]
285        return [filename]
286
287    def sort_key(self, include):
288        return SCons.Node.FS._my_normcase(str(include))
289
290    def find_include(self, include, source_dir, path):
291        inc_type, inc_subdir, inc_filename = include
292        try:
293            sub_paths = path[inc_type]
294        except (IndexError, KeyError):
295            sub_paths = ((), ())
296        try_names = self._latex_names(inc_type, inc_filename)
297
298        # There are three search paths to try:
299        #  1. current directory "source_dir"
300        #  2. env[var]
301        #  3. env['ENV'][var]
302        search_paths = [(source_dir,)] + list(sub_paths)
303
304        for n in try_names:
305            for search_path in search_paths:
306                paths = tuple([d.Dir(inc_subdir) for d in search_path])
307                i = SCons.Node.FS.find_file(n, paths)
308                if i:
309                    return i, include
310        return None, include
311
312    def canonical_text(self, text):
313        """Standardize an input TeX-file contents.
314
315        Currently:
316          * removes comments, unwrapping comment-wrapped lines.
317        """
318        out = []
319        line_continues_a_comment = False
320        for line in text.splitlines():
321            line,comment = self.comment_re.findall(line)[0]
322            if line_continues_a_comment:
323                out[-1] = out[-1] + line.lstrip()
324            else:
325                out.append(line)
326            line_continues_a_comment = len(comment) > 0
327        return '\n'.join(out).rstrip()+'\n'
328
329    def scan(self, node, subdir='.'):
330        # Modify the default scan function to allow for the regular
331        # expression to return a comma separated list of file names
332        # as can be the case with the bibliography keyword.
333
334        # Cache the includes list in node so we only scan it once:
335        # path_dict = dict(list(path))
336        # add option for whitespace (\s) before the '['
337        noopt_cre = re.compile(r'\s*\[.*$')
338        if node.includes is not None:
339            includes = node.includes
340        else:
341            text = self.canonical_text(node.get_text_contents())
342            includes = self.cre.findall(text)
343            # 1. Split comma-separated lines, e.g.
344            #      ('bibliography', 'phys,comp')
345            #    should become two entries
346            #      ('bibliography', 'phys')
347            #      ('bibliography', 'comp')
348            # 2. Remove the options, e.g., such as
349            #      ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps')
350            #    should become
351            #      ('includegraphics', 'picture.eps')
352            split_includes = []
353            for include in includes:
354                inc_type = noopt_cre.sub('', include[0])
355                inc_subdir = subdir
356                if inc_type in self.two_arg_commands:
357                    inc_subdir = os.path.join(subdir, include[1])
358                    inc_list = include[2].split(',')
359                else:
360                    inc_list = include[1].split(',')
361                for inc in inc_list:
362                    split_includes.append((inc_type, inc_subdir, inc))
363
364            includes = split_includes
365            node.includes = includes
366
367        return includes
368
369    def scan_recurse(self, node, path=()):
370        """ do a recursive scan of the top level target file
371        This lets us search for included files based on the
372        directory of the main file just as latex does"""
373
374        path_dict = dict(list(path))
375
376        queue = []
377        queue.extend( self.scan(node) )
378        seen = {}
379
380        # This is a hand-coded DSU (decorate-sort-undecorate, or
381        # Schwartzian transform) pattern.  The sort key is the raw name
382        # of the file as specifed on the \include, \input, etc. line.
383        # TODO: what about the comment in the original Classic scanner:
384        # """which lets
385        # us keep the sort order constant regardless of whether the file
386        # is actually found in a Repository or locally."""
387        nodes = []
388        source_dir = node.get_dir()
389        #for include in includes:
390        while queue:
391
392            include = queue.pop()
393            inc_type, inc_subdir, inc_filename = include
394
395            try:
396                if seen[inc_filename] == 1:
397                    continue
398            except KeyError:
399                seen[inc_filename] = 1
400
401            #
402            # Handle multiple filenames in include[1]
403            #
404            n, i = self.find_include(include, source_dir, path_dict)
405            if n is None:
406                # Do not bother with 'usepackage' warnings, as they most
407                # likely refer to system-level files
408                if inc_type != 'usepackage':
409                    SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
410                                        "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
411            else:
412                sortkey = self.sort_key(n)
413                nodes.append((sortkey, n))
414                # recurse down
415                queue.extend( self.scan(n, inc_subdir) )
416
417        return [pair[1] for pair in sorted(nodes)]
418
419# Local Variables:
420# tab-width:4
421# indent-tabs-mode:nil
422# End:
423# vim: set expandtab tabstop=4 shiftwidth=4:
424