1# MIT License 2# 3# Copyright The SCons Foundation 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be included 14# in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 17# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 18# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 24"""Dependency scanner for LaTeX code.""" 25 26import os.path 27import re 28 29import SCons.Scanner 30import SCons.Util 31 32# list of graphics file extensions for TeX and LaTeX 33TexGraphics = ['.eps', '.ps'] 34#LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] 35LatexGraphics = [ '.png', '.jpg', '.gif', '.tif'] 36 37 38# Used as a return value of modify_env_var if the variable is not set. 39class _Null: 40 pass 41_null = _Null 42 43# The user specifies the paths in env[variable], similar to other builders. 44# They may be relative and must be converted to absolute, as expected 45# by LaTeX and Co. The environment may already have some paths in 46# env['ENV'][var]. These paths are honored, but the env[var] paths have 47# higher precedence. All changes are un-done on exit. 48def modify_env_var(env, var, abspath): 49 try: 50 save = env['ENV'][var] 51 except KeyError: 52 save = _null 53 env.PrependENVPath(var, abspath) 54 try: 55 if SCons.Util.is_List(env[var]): 56 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) 57 else: 58 # Split at os.pathsep to convert into absolute path 59 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)]) 60 except KeyError: 61 pass 62 63 # Convert into a string explicitly to append ":" (without which it won't search system 64 # paths as well). The problem is that env.AppendENVPath(var, ":") 65 # does not work, refuses to append ":" (os.pathsep). 66 67 if SCons.Util.is_List(env['ENV'][var]): 68 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) 69 # Append the trailing os.pathsep character here to catch the case with no env[var] 70 env['ENV'][var] = env['ENV'][var] + os.pathsep 71 72 return save 73 74class FindENVPathDirs: 75 """ 76 A class to bind a specific E{*}PATH variable name to a function that 77 will return all of the E{*}path directories. 78 """ 79 def __init__(self, variable): 80 self.variable = variable 81 def __call__(self, env, dir=None, target=None, source=None, argument=None): 82 import SCons.PathList 83 try: 84 path = env['ENV'][self.variable] 85 except KeyError: 86 return () 87 88 dir = dir or env.fs._cwd 89 path = SCons.PathList.PathList(path).subst_path(env, target, source) 90 return tuple(dir.Rfindalldirs(path)) 91 92 93def LaTeXScanner(): 94 """ 95 Return a prototype Scanner instance for scanning LaTeX source files 96 when built with latex. 97 """ 98 ds = LaTeX(name = "LaTeXScanner", 99 suffixes = '$LATEXSUFFIXES', 100 # in the search order, see below in LaTeX class docstring 101 graphics_extensions = TexGraphics, 102 recursive = 0) 103 return ds 104 105 106def PDFLaTeXScanner(): 107 """ 108 Return a prototype Scanner instance for scanning LaTeX source files 109 when built with pdflatex. 110 """ 111 ds = LaTeX(name = "PDFLaTeXScanner", 112 suffixes = '$LATEXSUFFIXES', 113 # in the search order, see below in LaTeX class docstring 114 graphics_extensions = LatexGraphics, 115 recursive = 0) 116 return ds 117 118 119class LaTeX(SCons.Scanner.Base): 120 """Class for scanning LaTeX files for included files. 121 122 Unlike most scanners, which use regular expressions that just 123 return the included file name, this returns a tuple consisting 124 of the keyword for the inclusion ("include", "includegraphics", 125 "input", or "bibliography"), and then the file name itself. 126 Based on a quick look at LaTeX documentation, it seems that we 127 should append .tex suffix for the "include" keywords, append .tex if 128 there is no extension for the "input" keyword, and need to add .bib 129 for the "bibliography" keyword that does not accept extensions by itself. 130 131 Finally, if there is no extension for an "includegraphics" keyword 132 latex will append .ps or .eps to find the file, while pdftex may use .pdf, 133 .jpg, .tif, .mps, or .png. 134 135 The actual subset and search order may be altered by 136 DeclareGraphicsExtensions command. This complication is ignored. 137 The default order corresponds to experimentation with teTeX:: 138 139 $ latex --version 140 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) 141 kpathsea version 3.5.4 142 143 The order is: 144 ['.eps', '.ps'] for latex 145 ['.png', '.pdf', '.jpg', '.tif']. 146 147 Another difference is that the search path is determined by the type 148 of the file being searched: 149 env['TEXINPUTS'] for "input" and "include" keywords 150 env['TEXINPUTS'] for "includegraphics" keyword 151 env['TEXINPUTS'] for "lstinputlisting" keyword 152 env['BIBINPUTS'] for "bibliography" keyword 153 env['BSTINPUTS'] for "bibliographystyle" keyword 154 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed. 155 156 FIXME: also look for the class or style in document[class|style]{} 157 FIXME: also look for the argument of bibliographystyle{} 158 """ 159 keyword_paths = {'include': 'TEXINPUTS', 160 'input': 'TEXINPUTS', 161 'includegraphics': 'TEXINPUTS', 162 'bibliography': 'BIBINPUTS', 163 'bibliographystyle': 'BSTINPUTS', 164 'addbibresource': 'BIBINPUTS', 165 'addglobalbib': 'BIBINPUTS', 166 'addsectionbib': 'BIBINPUTS', 167 'makeindex': 'INDEXSTYLE', 168 'usepackage': 'TEXINPUTS', 169 'lstinputlisting': 'TEXINPUTS'} 170 env_variables = SCons.Util.unique(list(keyword_paths.values())) 171 two_arg_commands = ['import', 'subimport', 172 'includefrom', 'subincludefrom', 173 'inputfrom', 'subinputfrom'] 174 175 def __init__(self, name, suffixes, graphics_extensions, *args, **kw): 176 regex = r''' 177 \\( 178 include 179 | includegraphics(?:\s*\[[^\]]+\])? 180 | lstinputlisting(?:\[[^\]]+\])? 181 | input 182 | import 183 | subimport 184 | includefrom 185 | subincludefrom 186 | inputfrom 187 | subinputfrom 188 | bibliography 189 | addbibresource 190 | addglobalbib 191 | addsectionbib 192 | usepackage 193 ) 194 \s*{([^}]*)} # first arg 195 (?: \s*{([^}]*)} )? # maybe another arg 196 ''' 197 self.cre = re.compile(regex, re.M | re.X) 198 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M) 199 200 self.graphics_extensions = graphics_extensions 201 202 def _scan(node, env, path=(), self=self): 203 node = node.rfile() 204 if not node.exists(): 205 return [] 206 return self.scan_recurse(node, path) 207 208 class FindMultiPathDirs: 209 """The stock FindPathDirs function has the wrong granularity: 210 it is called once per target, while we need the path that depends 211 on what kind of included files is being searched. This wrapper 212 hides multiple instances of FindPathDirs, one per the LaTeX path 213 variable in the environment. When invoked, the function calculates 214 and returns all the required paths as a dictionary (converted into 215 a tuple to become hashable). Then the scan function converts it 216 back and uses a dictionary of tuples rather than a single tuple 217 of paths. 218 """ 219 def __init__(self, dictionary): 220 self.dictionary = {} 221 for k,n in dictionary.items(): 222 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), 223 FindENVPathDirs(n) ) 224 225 def __call__(self, env, dir=None, target=None, source=None, 226 argument=None): 227 di = {} 228 for k,(c,cENV) in self.dictionary.items(): 229 di[k] = ( c(env, dir=None, target=None, source=None, 230 argument=None) , 231 cENV(env, dir=None, target=None, source=None, 232 argument=None) ) 233 # To prevent "dict is not hashable error" 234 return tuple(di.items()) 235 236 class LaTeXScanCheck: 237 """Skip all but LaTeX source files, i.e., do not scan *.eps, 238 *.pdf, *.jpg, etc. 239 """ 240 def __init__(self, suffixes): 241 self.suffixes = suffixes 242 def __call__(self, node, env): 243 current = not node.has_builder() or node.is_up_to_date() 244 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0] 245 # Returning false means that the file is not scanned. 246 return scannable and current 247 248 kw['function'] = _scan 249 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) 250 kw['recursive'] = 0 251 kw['skeys'] = suffixes 252 kw['scan_check'] = LaTeXScanCheck(suffixes) 253 kw['name'] = name 254 255 SCons.Scanner.Base.__init__(self, *args, **kw) 256 257 def _latex_names(self, include_type, filename): 258 if include_type == 'input': 259 base, ext = os.path.splitext( filename ) 260 if ext == "": 261 return [filename + '.tex'] 262 if include_type in ('include', 'import', 'subimport', 263 'includefrom', 'subincludefrom', 264 'inputfrom', 'subinputfrom'): 265 base, ext = os.path.splitext( filename ) 266 if ext == "": 267 return [filename + '.tex'] 268 if include_type == 'bibliography': 269 base, ext = os.path.splitext( filename ) 270 if ext == "": 271 return [filename + '.bib'] 272 if include_type == 'usepackage': 273 base, ext = os.path.splitext( filename ) 274 if ext == "": 275 return [filename + '.sty'] 276 if include_type == 'includegraphics': 277 base, ext = os.path.splitext( filename ) 278 if ext == "": 279 #return [filename+e for e in self.graphics_extensions + TexGraphics] 280 # use the line above to find dependencies for the PDF builder 281 # when only an .eps figure is present. Since it will be found 282 # if the user tells scons how to make the pdf figure, leave 283 # it out for now. 284 return [filename+e for e in self.graphics_extensions] 285 return [filename] 286 287 def sort_key(self, include): 288 return SCons.Node.FS._my_normcase(str(include)) 289 290 def find_include(self, include, source_dir, path): 291 inc_type, inc_subdir, inc_filename = include 292 try: 293 sub_paths = path[inc_type] 294 except (IndexError, KeyError): 295 sub_paths = ((), ()) 296 try_names = self._latex_names(inc_type, inc_filename) 297 298 # There are three search paths to try: 299 # 1. current directory "source_dir" 300 # 2. env[var] 301 # 3. env['ENV'][var] 302 search_paths = [(source_dir,)] + list(sub_paths) 303 304 for n in try_names: 305 for search_path in search_paths: 306 paths = tuple([d.Dir(inc_subdir) for d in search_path]) 307 i = SCons.Node.FS.find_file(n, paths) 308 if i: 309 return i, include 310 return None, include 311 312 def canonical_text(self, text): 313 """Standardize an input TeX-file contents. 314 315 Currently: 316 * removes comments, unwrapping comment-wrapped lines. 317 """ 318 out = [] 319 line_continues_a_comment = False 320 for line in text.splitlines(): 321 line,comment = self.comment_re.findall(line)[0] 322 if line_continues_a_comment: 323 out[-1] = out[-1] + line.lstrip() 324 else: 325 out.append(line) 326 line_continues_a_comment = len(comment) > 0 327 return '\n'.join(out).rstrip()+'\n' 328 329 def scan(self, node, subdir='.'): 330 # Modify the default scan function to allow for the regular 331 # expression to return a comma separated list of file names 332 # as can be the case with the bibliography keyword. 333 334 # Cache the includes list in node so we only scan it once: 335 # path_dict = dict(list(path)) 336 # add option for whitespace (\s) before the '[' 337 noopt_cre = re.compile(r'\s*\[.*$') 338 if node.includes is not None: 339 includes = node.includes 340 else: 341 text = self.canonical_text(node.get_text_contents()) 342 includes = self.cre.findall(text) 343 # 1. Split comma-separated lines, e.g. 344 # ('bibliography', 'phys,comp') 345 # should become two entries 346 # ('bibliography', 'phys') 347 # ('bibliography', 'comp') 348 # 2. Remove the options, e.g., such as 349 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') 350 # should become 351 # ('includegraphics', 'picture.eps') 352 split_includes = [] 353 for include in includes: 354 inc_type = noopt_cre.sub('', include[0]) 355 inc_subdir = subdir 356 if inc_type in self.two_arg_commands: 357 inc_subdir = os.path.join(subdir, include[1]) 358 inc_list = include[2].split(',') 359 else: 360 inc_list = include[1].split(',') 361 for inc in inc_list: 362 split_includes.append((inc_type, inc_subdir, inc)) 363 364 includes = split_includes 365 node.includes = includes 366 367 return includes 368 369 def scan_recurse(self, node, path=()): 370 """ do a recursive scan of the top level target file 371 This lets us search for included files based on the 372 directory of the main file just as latex does""" 373 374 path_dict = dict(list(path)) 375 376 queue = [] 377 queue.extend( self.scan(node) ) 378 seen = {} 379 380 # This is a hand-coded DSU (decorate-sort-undecorate, or 381 # Schwartzian transform) pattern. The sort key is the raw name 382 # of the file as specifed on the \include, \input, etc. line. 383 # TODO: what about the comment in the original Classic scanner: 384 # """which lets 385 # us keep the sort order constant regardless of whether the file 386 # is actually found in a Repository or locally.""" 387 nodes = [] 388 source_dir = node.get_dir() 389 #for include in includes: 390 while queue: 391 392 include = queue.pop() 393 inc_type, inc_subdir, inc_filename = include 394 395 try: 396 if seen[inc_filename] == 1: 397 continue 398 except KeyError: 399 seen[inc_filename] = 1 400 401 # 402 # Handle multiple filenames in include[1] 403 # 404 n, i = self.find_include(include, source_dir, path_dict) 405 if n is None: 406 # Do not bother with 'usepackage' warnings, as they most 407 # likely refer to system-level files 408 if inc_type != 'usepackage': 409 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, 410 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node)) 411 else: 412 sortkey = self.sort_key(n) 413 nodes.append((sortkey, n)) 414 # recurse down 415 queue.extend( self.scan(n, inc_subdir) ) 416 417 return [pair[1] for pair in sorted(nodes)] 418 419# Local Variables: 420# tab-width:4 421# indent-tabs-mode:nil 422# End: 423# vim: set expandtab tabstop=4 shiftwidth=4: 424