1"""SCons.Scanner.Fortran
2
3This module implements the dependency scanner for Fortran code.
4
5"""
6
7#
8# Copyright (c) 2001 - 2014 The SCons Foundation
9#
10# Permission is hereby granted, free of charge, to any person obtaining
11# a copy of this software and associated documentation files (the
12# "Software"), to deal in the Software without restriction, including
13# without limitation the rights to use, copy, modify, merge, publish,
14# distribute, sublicense, and/or sell copies of the Software, and to
15# permit persons to whom the Software is furnished to do so, subject to
16# the following conditions:
17#
18# The above copyright notice and this permission notice shall be included
19# in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
22# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
23# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29__revision__ = "src/engine/SCons/Scanner/Fortran.py  2014/08/24 12:12:31 garyo"
30
31import re
32
33import SCons.Node
34import SCons.Node.FS
35import SCons.Scanner
36import SCons.Util
37import SCons.Warnings
38
39class F90Scanner(SCons.Scanner.Classic):
40    """
41    A Classic Scanner subclass for Fortran source files which takes
42    into account both USE and INCLUDE statements.  This scanner will
43    work for both F77 and F90 (and beyond) compilers.
44
45    Currently, this scanner assumes that the include files do not contain
46    USE statements.  To enable the ability to deal with USE statements
47    in include files, add logic right after the module names are found
48    to loop over each include file, search for and locate each USE
49    statement, and append each module name to the list of dependencies.
50    Caching the search results in a common dictionary somewhere so that
51    the same include file is not searched multiple times would be a
52    smart thing to do.
53    """
54
55    def __init__(self, name, suffixes, path_variable,
56                 use_regex, incl_regex, def_regex, *args, **kw):
57
58        self.cre_use = re.compile(use_regex, re.M)
59        self.cre_incl = re.compile(incl_regex, re.M)
60        self.cre_def = re.compile(def_regex, re.M)
61
62        def _scan(node, env, path, self=self):
63            node = node.rfile()
64
65            if not node.exists():
66                return []
67
68            return self.scan(node, env, path)
69
70        kw['function'] = _scan
71        kw['path_function'] = SCons.Scanner.FindPathDirs(path_variable)
72        kw['recursive'] = 1
73        kw['skeys'] = suffixes
74        kw['name'] = name
75
76        SCons.Scanner.Current.__init__(self, *args, **kw)
77
78    def scan(self, node, env, path=()):
79
80        # cache the includes list in node so we only scan it once:
81        if node.includes != None:
82            mods_and_includes = node.includes
83        else:
84            # retrieve all included filenames
85            includes = self.cre_incl.findall(node.get_text_contents())
86            # retrieve all USE'd module names
87            modules = self.cre_use.findall(node.get_text_contents())
88            # retrieve all defined module names
89            defmodules = self.cre_def.findall(node.get_text_contents())
90
91            # Remove all USE'd module names that are defined in the same file
92            # (case-insensitively)
93            d = {}
94            for m in defmodules:
95                d[m.lower()] = 1
96            modules = [m for m in modules if m.lower() not in d]
97
98            # Convert module name to a .mod filename
99            suffix = env.subst('$FORTRANMODSUFFIX')
100            modules = [x.lower() + suffix for x in modules]
101            # Remove unique items from the list
102            mods_and_includes = SCons.Util.unique(includes+modules)
103            node.includes = mods_and_includes
104
105        # This is a hand-coded DSU (decorate-sort-undecorate, or
106        # Schwartzian transform) pattern.  The sort key is the raw name
107        # of the file as specifed on the USE or INCLUDE line, which lets
108        # us keep the sort order constant regardless of whether the file
109        # is actually found in a Repository or locally.
110        nodes = []
111        source_dir = node.get_dir()
112        if callable(path):
113            path = path()
114        for dep in mods_and_includes:
115            n, i = self.find_include(dep, source_dir, path)
116
117            if n is None:
118                SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
119                                    "No dependency generated for file: %s (referenced by: %s) -- file not found" % (i, node))
120            else:
121                sortkey = self.sort_key(dep)
122                nodes.append((sortkey, n))
123
124        return [pair[1] for pair in sorted(nodes)]
125
126def FortranScan(path_variable="FORTRANPATH"):
127    """Return a prototype Scanner instance for scanning source files
128    for Fortran USE & INCLUDE statements"""
129
130#   The USE statement regex matches the following:
131#
132#   USE module_name
133#   USE :: module_name
134#   USE, INTRINSIC :: module_name
135#   USE, NON_INTRINSIC :: module_name
136#
137#   Limitations
138#
139#   --  While the regex can handle multiple USE statements on one line,
140#       it cannot properly handle them if they are commented out.
141#       In either of the following cases:
142#
143#            !  USE mod_a ; USE mod_b         [entire line is commented out]
144#               USE mod_a ! ; USE mod_b       [in-line comment of second USE statement]
145#
146#       the second module name (mod_b) will be picked up as a dependency
147#       even though it should be ignored.  The only way I can see
148#       to rectify this would be to modify the scanner to eliminate
149#       the call to re.findall, read in the contents of the file,
150#       treating the comment character as an end-of-line character
151#       in addition to the normal linefeed, loop over each line,
152#       weeding out the comments, and looking for the USE statements.
153#       One advantage to this is that the regex passed to the scanner
154#       would no longer need to match a semicolon.
155#
156#   --  I question whether or not we need to detect dependencies to
157#       INTRINSIC modules because these are built-in to the compiler.
158#       If we consider them a dependency, will SCons look for them, not
159#       find them, and kill the build?  Or will we there be standard
160#       compiler-specific directories we will need to point to so the
161#       compiler and SCons can locate the proper object and mod files?
162
163#   Here is a breakdown of the regex:
164#
165#   (?i)               : regex is case insensitive
166#   ^                  : start of line
167#   (?:                : group a collection of regex symbols without saving the match as a "group"
168#      ^|;             : matches either the start of the line or a semicolon - semicolon
169#   )                  : end the unsaved grouping
170#   \s*                : any amount of white space
171#   USE                : match the string USE, case insensitive
172#   (?:                : group a collection of regex symbols without saving the match as a "group"
173#      \s+|            : match one or more whitespace OR ....  (the next entire grouped set of regex symbols)
174#      (?:             : group a collection of regex symbols without saving the match as a "group"
175#         (?:          : establish another unsaved grouping of regex symbols
176#            \s*          : any amount of white space
177#            ,         : match a comma
178#            \s*       : any amount of white space
179#            (?:NON_)? : optionally match the prefix NON_, case insensitive
180#            INTRINSIC : match the string INTRINSIC, case insensitive
181#         )?           : optionally match the ", INTRINSIC/NON_INTRINSIC" grouped expression
182#         \s*          : any amount of white space
183#         ::           : match a double colon that must appear after the INTRINSIC/NON_INTRINSIC attribute
184#      )               : end the unsaved grouping
185#   )                  : end the unsaved grouping
186#   \s*                : match any amount of white space
187#   (\w+)              : match the module name that is being USE'd
188#
189#
190    use_regex = "(?i)(?:^|;)\s*USE(?:\s+|(?:(?:\s*,\s*(?:NON_)?INTRINSIC)?\s*::))\s*(\w+)"
191
192
193#   The INCLUDE statement regex matches the following:
194#
195#   INCLUDE 'some_Text'
196#   INCLUDE "some_Text"
197#   INCLUDE "some_Text" ; INCLUDE "some_Text"
198#   INCLUDE kind_"some_Text"
199#   INCLUDE kind_'some_Text"
200#
201#   where some_Text can include any alphanumeric and/or special character
202#   as defined by the Fortran 2003 standard.
203#
204#   Limitations:
205#
206#   --  The Fortran standard dictates that a " or ' in the INCLUDE'd
207#       string must be represented as a "" or '', if the quotes that wrap
208#       the entire string are either a ' or ", respectively.   While the
209#       regular expression below can detect the ' or " characters just fine,
210#       the scanning logic, presently is unable to detect them and reduce
211#       them to a single instance.  This probably isn't an issue since,
212#       in practice, ' or " are not generally used in filenames.
213#
214#   --  This regex will not properly deal with multiple INCLUDE statements
215#       when the entire line has been commented out, ala
216#
217#           ! INCLUDE 'some_file' ; INCLUDE 'some_file'
218#
219#       In such cases, it will properly ignore the first INCLUDE file,
220#       but will actually still pick up the second.  Interestingly enough,
221#       the regex will properly deal with these cases:
222#
223#             INCLUDE 'some_file'
224#             INCLUDE 'some_file' !; INCLUDE 'some_file'
225#
226#       To get around the above limitation, the FORTRAN programmer could
227#       simply comment each INCLUDE statement separately, like this
228#
229#           ! INCLUDE 'some_file' !; INCLUDE 'some_file'
230#
231#       The way I see it, the only way to get around this limitation would
232#       be to modify the scanning logic to replace the calls to re.findall
233#       with a custom loop that processes each line separately, throwing
234#       away fully commented out lines before attempting to match against
235#       the INCLUDE syntax.
236#
237#   Here is a breakdown of the regex:
238#
239#   (?i)               : regex is case insensitive
240#   (?:                : begin a non-saving group that matches the following:
241#      ^               :    either the start of the line
242#      |               :                or
243#      ['">]\s*;       :    a semicolon that follows a single quote,
244#                           double quote or greater than symbol (with any
245#                           amount of whitespace in between).  This will
246#                           allow the regex to match multiple INCLUDE
247#                           statements per line (although it also requires
248#                           the positive lookahead assertion that is
249#                           used below).  It will even properly deal with
250#                           (i.e. ignore) cases in which the additional
251#                           INCLUDES are part of an in-line comment, ala
252#                                           "  INCLUDE 'someFile' ! ; INCLUDE 'someFile2' "
253#   )                  : end of non-saving group
254#   \s*                : any amount of white space
255#   INCLUDE            : match the string INCLUDE, case insensitive
256#   \s+                : match one or more white space characters
257#   (?\w+_)?           : match the optional "kind-param _" prefix allowed by the standard
258#   [<"']              : match the include delimiter - an apostrophe, double quote, or less than symbol
259#   (.+?)              : match one or more characters that make up
260#                        the included path and file name and save it
261#                        in a group.  The Fortran standard allows for
262#                        any non-control character to be used.  The dot
263#                        operator will pick up any character, including
264#                        control codes, but I can't conceive of anyone
265#                        putting control codes in their file names.
266#                        The question mark indicates it is non-greedy so
267#                        that regex will match only up to the next quote,
268#                        double quote, or greater than symbol
269#   (?=["'>])          : positive lookahead assertion to match the include
270#                        delimiter - an apostrophe, double quote, or
271#                        greater than symbol.  This level of complexity
272#                        is required so that the include delimiter is
273#                        not consumed by the match, thus allowing the
274#                        sub-regex discussed above to uniquely match a
275#                        set of semicolon-separated INCLUDE statements
276#                        (as allowed by the F2003 standard)
277
278    include_regex = """(?i)(?:^|['">]\s*;)\s*INCLUDE\s+(?:\w+_)?[<"'](.+?)(?=["'>])"""
279
280#   The MODULE statement regex finds module definitions by matching
281#   the following:
282#
283#   MODULE module_name
284#
285#   but *not* the following:
286#
287#   MODULE PROCEDURE procedure_name
288#
289#   Here is a breakdown of the regex:
290#
291#   (?i)               : regex is case insensitive
292#   ^\s*               : any amount of white space
293#   MODULE             : match the string MODULE, case insensitive
294#   \s+                : match one or more white space characters
295#   (?!PROCEDURE)      : but *don't* match if the next word matches
296#                        PROCEDURE (negative lookahead assertion),
297#                        case insensitive
298#   (\w+)              : match one or more alphanumeric characters
299#                        that make up the defined module name and
300#                        save it in a group
301
302    def_regex = """(?i)^\s*MODULE\s+(?!PROCEDURE)(\w+)"""
303
304    scanner = F90Scanner("FortranScan",
305                         "$FORTRANSUFFIXES",
306                         path_variable,
307                         use_regex,
308                         include_regex,
309                         def_regex)
310    return scanner
311
312# Local Variables:
313# tab-width:4
314# indent-tabs-mode:nil
315# End:
316# vim: set expandtab tabstop=4 shiftwidth=4:
317