xref: /freebsd/share/mk/meta2deps.py (revision 076ad2f8)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $
42
43	Copyright (c) 2011-2013, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        return path
85    if path == '.':
86        return cwd
87    if path.startswith('./'):
88        return cwd + path[1:]
89    if last_dir == cwd:
90        last_dir = None
91    for d in [last_dir, cwd]:
92        if not d:
93            continue
94        p = '/'.join([d,path])
95        if debug > 2:
96            print("looking for:", p, end=' ', file=debug_out)
97        if not os.path.exists(p):
98            if debug > 2:
99                print("nope", file=debug_out)
100            p = None
101            continue
102        if debug > 2:
103            print("found:", p, file=debug_out)
104        return p
105    return None
106
107def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
108    """
109    Return an absolute path, resolving via cwd or last_dir if needed.
110    this gets called a lot, so we try to avoid calling realpath
111    until we know we have something.
112    """
113    rpath = resolve(path, cwd, last_dir, debug, debug_out)
114    if rpath:
115        path = rpath
116    if (path.find('/') < 0 or
117        path.find('./') > 0 or
118        path.endswith('/..') or
119        os.path.islink(path)):
120        return os.path.realpath(path)
121    return path
122
123def sort_unique(list, cmp=None, key=None, reverse=False):
124    list.sort(cmp, key, reverse)
125    nl = []
126    le = None
127    for e in list:
128        if e == le:
129            continue
130        nl.append(e)
131    return nl
132
133def add_trims(x):
134    return ['/' + x + '/',
135            '/' + x,
136            x + '/',
137            x]
138
139class MetaFile:
140    """class to parse meta files generated by bmake."""
141
142    conf = None
143    dirdep_re = None
144    host_target = None
145    srctops = []
146    objroots = []
147    excludes = []
148    seen = {}
149    obj_deps = []
150    src_deps = []
151    file_deps = []
152
153    def __init__(self, name, conf={}):
154        """if name is set we will parse it now.
155        conf can have the follwing keys:
156
157        SRCTOPS list of tops of the src tree(s).
158
159        CURDIR  the src directory 'bmake' was run from.
160
161        RELDIR  the relative path from SRCTOP to CURDIR
162
163        MACHINE the machine we built for.
164                set to 'none' if we are not cross-building.
165                More specifically if machine cannot be deduced from objdirs.
166
167        TARGET_SPEC
168                Sometimes MACHINE isn't enough.
169
170        HOST_TARGET
171                when we build for the pseudo machine 'host'
172                the object tree uses HOST_TARGET rather than MACHINE.
173
174        OBJROOTS a list of the common prefix for all obj dirs it might
175                end in '/' or '-'.
176
177        DPDEPS  names an optional file to which per file dependencies
178                will be appended.
179                For example if 'some/path/foo.h' is read from SRCTOP
180                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
181                This can allow 'bmake' to learn all the dirs within
182                the tree that depend on 'foo.h'
183
184        EXCLUDES
185                A list of paths to ignore.
186                ccache(1) can otherwise be trouble.
187
188        debug   desired debug level
189
190        debug_out open file to send debug output to (sys.stderr)
191
192        """
193
194        self.name = name
195        self.debug = getv(conf, 'debug', 0)
196        self.debug_out = getv(conf, 'debug_out', sys.stderr)
197
198        self.machine = getv(conf, 'MACHINE', '')
199        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
200        self.target_spec = getv(conf, 'TARGET_SPEC', '')
201        self.curdir = getv(conf, 'CURDIR')
202        self.reldir = getv(conf, 'RELDIR')
203        self.dpdeps = getv(conf, 'DPDEPS')
204        self.line = 0
205
206        if not self.conf:
207            # some of the steps below we want to do only once
208            self.conf = conf
209            self.host_target = getv(conf, 'HOST_TARGET')
210            for srctop in getv(conf, 'SRCTOPS', []):
211                if srctop[-1] != '/':
212                    srctop += '/'
213                if not srctop in self.srctops:
214                    self.srctops.append(srctop)
215                _srctop = os.path.realpath(srctop)
216                if _srctop[-1] != '/':
217                    _srctop += '/'
218                if not _srctop in self.srctops:
219                    self.srctops.append(_srctop)
220
221            trim_list = add_trims(self.machine)
222            if self.machine == 'host':
223                trim_list += add_trims(self.host_target)
224            if self.target_spec:
225                trim_list += add_trims(self.target_spec)
226
227            for objroot in getv(conf, 'OBJROOTS', []):
228                for e in trim_list:
229                    if objroot.endswith(e):
230                        # this is not what we want - fix it
231                        objroot = objroot[0:-len(e)]
232
233                if objroot[-1] != '/':
234                    objroot += '/'
235                if not objroot in self.objroots:
236                    self.objroots.append(objroot)
237                    _objroot = os.path.realpath(objroot)
238                    if objroot[-1] == '/':
239                        _objroot += '/'
240                    if not _objroot in self.objroots:
241                        self.objroots.append(_objroot)
242
243            # we want the longest match
244            self.srctops.sort(reverse=True)
245            self.objroots.sort(reverse=True)
246
247            self.excludes = getv(conf, 'EXCLUDES', [])
248
249            if self.debug:
250                print("host_target=", self.host_target, file=self.debug_out)
251                print("srctops=", self.srctops, file=self.debug_out)
252                print("objroots=", self.objroots, file=self.debug_out)
253                print("excludes=", self.excludes, file=self.debug_out)
254
255            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
256
257        if self.dpdeps and not self.reldir:
258            if self.debug:
259                print("need reldir:", end=' ', file=self.debug_out)
260            if self.curdir:
261                srctop = self.find_top(self.curdir, self.srctops)
262                if srctop:
263                    self.reldir = self.curdir.replace(srctop,'')
264                    if self.debug:
265                        print(self.reldir, file=self.debug_out)
266            if not self.reldir:
267                self.dpdeps = None      # we cannot do it?
268
269        self.cwd = os.getcwd()          # make sure this is initialized
270        self.last_dir = self.cwd
271
272        if name:
273            self.try_parse()
274
275    def reset(self):
276        """reset state if we are being passed meta files from multiple directories."""
277        self.seen = {}
278        self.obj_deps = []
279        self.src_deps = []
280        self.file_deps = []
281
282    def dirdeps(self, sep='\n'):
283        """return DIRDEPS"""
284        return sep.strip() + sep.join(self.obj_deps)
285
286    def src_dirdeps(self, sep='\n'):
287        """return SRC_DIRDEPS"""
288        return sep.strip() + sep.join(self.src_deps)
289
290    def file_depends(self, out=None):
291        """Append DPDEPS_${file} += ${RELDIR}
292        for each file we saw, to the output file."""
293        if not self.reldir:
294            return None
295        for f in sort_unique(self.file_deps):
296            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
297        # these entries provide for reverse DIRDEPS lookup
298        for f in self.obj_deps:
299            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
300
301    def seenit(self, dir):
302        """rememer that we have seen dir."""
303        self.seen[dir] = 1
304
305    def add(self, list, data, clue=''):
306        """add data to list if it isn't already there."""
307        if data not in list:
308            list.append(data)
309            if self.debug:
310                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
311
312    def find_top(self, path, list):
313        """the logical tree may be split across multiple trees"""
314        for top in list:
315            if path.startswith(top):
316                if self.debug > 2:
317                    print("found in", top, file=self.debug_out)
318                return top
319        return None
320
321    def find_obj(self, objroot, dir, path, input):
322        """return path within objroot, taking care of .dirdep files"""
323        ddep = None
324        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
325            if not ddep and os.path.exists(ddepf):
326                ddep = open(ddepf, 'r').readline().strip('# \n')
327                if self.debug > 1:
328                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
329                if ddep.endswith(self.machine):
330                    ddep = ddep[0:-(1+len(self.machine))]
331                elif self.target_spec and ddep.endswith(self.target_spec):
332                    ddep = ddep[0:-(1+len(self.target_spec))]
333
334        if not ddep:
335            # no .dirdeps, so remember that we've seen the raw input
336            self.seenit(input)
337            self.seenit(dir)
338            if self.machine == 'none':
339                if dir.startswith(objroot):
340                    return dir.replace(objroot,'')
341                return None
342            m = self.dirdep_re.match(dir.replace(objroot,''))
343            if m:
344                ddep = m.group(2)
345                dmachine = m.group(1)
346                if dmachine != self.machine:
347                    if not (self.machine == 'host' and
348                            dmachine == self.host_target):
349                        if self.debug > 2:
350                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
351                        ddep += '.' + dmachine
352
353        return ddep
354
355    def try_parse(self, name=None, file=None):
356        """give file and line number causing exception"""
357        try:
358            self.parse(name, file)
359        except:
360            # give a useful clue
361            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
362            raise
363
364    def parse(self, name=None, file=None):
365        """A meta file looks like:
366
367        # Meta data file "path"
368        CMD "command-line"
369        CWD "cwd"
370        TARGET "target"
371        -- command output --
372        -- filemon acquired metadata --
373        # buildmon version 3
374        V 3
375        C "pid" "cwd"
376        E "pid" "path"
377        F "pid" "child"
378        R "pid" "path"
379        W "pid" "path"
380        X "pid" "status"
381        D "pid" "path"
382        L "pid" "src" "target"
383        M "pid" "old" "new"
384        S "pid" "path"
385        # Bye bye
386
387        We go to some effort to avoid processing a dependency more than once.
388        Of the above record types only C,E,F,L,R,V and W are of interest.
389        """
390
391        version = 0                     # unknown
392        if name:
393            self.name = name;
394        if file:
395            f = file
396            cwd = self.last_dir = self.cwd
397        else:
398            f = open(self.name, 'r')
399        skip = True
400        pid_cwd = {}
401        pid_last_dir = {}
402        last_pid = 0
403
404        self.line = 0
405        if self.curdir:
406            self.seenit(self.curdir)    # we ignore this
407
408        interesting = 'CEFLRV'
409        for line in f:
410            self.line += 1
411            # ignore anything we don't care about
412            if not line[0] in interesting:
413                continue
414            if self.debug > 2:
415                print("input:", line, end=' ', file=self.debug_out)
416            w = line.split()
417
418            if skip:
419                if w[0] == 'V':
420                    skip = False
421                    version = int(w[1])
422                    """
423                    if version < 4:
424                        # we cannot ignore 'W' records
425                        # as they may be 'rw'
426                        interesting += 'W'
427                    """
428                elif w[0] == 'CWD':
429                    self.cwd = cwd = self.last_dir = w[1]
430                    self.seenit(cwd)    # ignore this
431                    if self.debug:
432                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
433                continue
434
435            pid = int(w[1])
436            if pid != last_pid:
437                if last_pid:
438                    pid_last_dir[last_pid] = self.last_dir
439                cwd = getv(pid_cwd, pid, self.cwd)
440                self.last_dir = getv(pid_last_dir, pid, self.cwd)
441                last_pid = pid
442
443            # process operations
444            if w[0] == 'F':
445                npid = int(w[2])
446                pid_cwd[npid] = cwd
447                pid_last_dir[npid] = cwd
448                last_pid = npid
449                continue
450            elif w[0] == 'C':
451                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
452                if cwd.endswith('/.'):
453                    cwd = cwd[0:-2]
454                self.last_dir = pid_last_dir[pid] = cwd
455                pid_cwd[pid] = cwd
456                if self.debug > 1:
457                    print("cwd=", cwd, file=self.debug_out)
458                continue
459
460            if w[2] in self.seen:
461                if self.debug > 2:
462                    print("seen:", w[2], file=self.debug_out)
463                continue
464            # file operations
465            if w[0] in 'ML':
466                # these are special, tread src as read and
467                # target as write
468                self.parse_path(w[1].strip("'"), cwd, 'R', w)
469                self.parse_path(w[2].strip("'"), cwd, 'W', w)
470                continue
471            elif w[0] in 'ERWS':
472                path = w[2]
473                self.parse_path(path, cwd, w[0], w)
474
475        if not file:
476            f.close()
477
478    def parse_path(self, path, cwd, op=None, w=[]):
479        """look at a path for the op specified"""
480
481        if not op:
482            op = w[0]
483
484        # we are never interested in .dirdep files as dependencies
485        if path.endswith('.dirdep'):
486            return
487        for p in self.excludes:
488            if p and path.startswith(p):
489                if self.debug > 2:
490                    print("exclude:", p, path, file=self.debug_out)
491                return
492        # we don't want to resolve the last component if it is
493        # a symlink
494        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
495        if not path:
496            return
497        dir,base = os.path.split(path)
498        if dir in self.seen:
499            if self.debug > 2:
500                print("seen:", dir, file=self.debug_out)
501            return
502        # we can have a path in an objdir which is a link
503        # to the src dir, we may need to add dependencies for each
504        rdir = dir
505        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
506        if rdir == dir or rdir.find('./') > 0:
507            rdir = None
508        # now put path back together
509        path = '/'.join([dir,base])
510        if self.debug > 1:
511            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
512        if op in 'RWS':
513            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
514                if self.debug > 1:
515                    print("skipping:", path, file=self.debug_out)
516                return
517            if os.path.isdir(path):
518                if op in 'RW':
519                    self.last_dir = path;
520                if self.debug > 1:
521                    print("ldir=", self.last_dir, file=self.debug_out)
522                return
523
524        if op in 'ERW':
525            # finally, we get down to it
526            if dir == self.cwd or dir == self.curdir:
527                return
528            srctop = self.find_top(path, self.srctops)
529            if srctop:
530                if self.dpdeps:
531                    self.add(self.file_deps, path.replace(srctop,''), 'file')
532                self.add(self.src_deps, dir.replace(srctop,''), 'src')
533                self.seenit(w[2])
534                self.seenit(dir)
535                if rdir and not rdir.startswith(srctop):
536                    dir = rdir      # for below
537                    rdir = None
538                else:
539                    return
540
541            objroot = None
542            for dir in [dir,rdir]:
543                if not dir:
544                    continue
545                objroot = self.find_top(dir, self.objroots)
546                if objroot:
547                    break
548            if objroot:
549                ddep = self.find_obj(objroot, dir, path, w[2])
550                if ddep:
551                    self.add(self.obj_deps, ddep, 'obj')
552                    if self.dpdeps and objroot.endswith('/stage/'):
553                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
554                        self.add(self.file_deps, sp, 'file')
555            else:
556                # don't waste time looking again
557                self.seenit(w[2])
558                self.seenit(dir)
559
560
561def main(argv, klass=MetaFile, xopts='', xoptf=None):
562    """Simple driver for class MetaFile.
563
564    Usage:
565        script [options] [key=value ...] "meta" ...
566
567    Options and key=value pairs contribute to the
568    dictionary passed to MetaFile.
569
570    -S "SRCTOP"
571                add "SRCTOP" to the "SRCTOPS" list.
572
573    -C "CURDIR"
574
575    -O "OBJROOT"
576                add "OBJROOT" to the "OBJROOTS" list.
577
578    -m "MACHINE"
579
580    -a "MACHINE_ARCH"
581
582    -H "HOST_TARGET"
583
584    -D "DPDEPS"
585
586    -d  bumps debug level
587
588    """
589    import getopt
590
591    # import Psyco if we can
592    # it can speed things up quite a bit
593    have_psyco = 0
594    try:
595        import psyco
596        psyco.full()
597        have_psyco = 1
598    except:
599        pass
600
601    conf = {
602        'SRCTOPS': [],
603        'OBJROOTS': [],
604        'EXCLUDES': [],
605        }
606
607    try:
608        machine = os.environ['MACHINE']
609        if machine:
610            conf['MACHINE'] = machine
611        machine_arch = os.environ['MACHINE_ARCH']
612        if machine_arch:
613            conf['MACHINE_ARCH'] = machine_arch
614        srctop = os.environ['SB_SRC']
615        if srctop:
616            conf['SRCTOPS'].append(srctop)
617        objroot = os.environ['SB_OBJROOT']
618        if objroot:
619            conf['OBJROOTS'].append(objroot)
620    except:
621        pass
622
623    debug = 0
624    output = True
625
626    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
627    for o, a in opts:
628        if o == '-a':
629            conf['MACHINE_ARCH'] = a
630        elif o == '-d':
631            debug += 1
632        elif o == '-q':
633            output = False
634        elif o == '-H':
635            conf['HOST_TARGET'] = a
636        elif o == '-S':
637            if a not in conf['SRCTOPS']:
638                conf['SRCTOPS'].append(a)
639        elif o == '-C':
640            conf['CURDIR'] = a
641        elif o == '-O':
642            if a not in conf['OBJROOTS']:
643                conf['OBJROOTS'].append(a)
644        elif o == '-R':
645            conf['RELDIR'] = a
646        elif o == '-D':
647            conf['DPDEPS'] = a
648        elif o == '-m':
649            conf['MACHINE'] = a
650        elif o == '-T':
651            conf['TARGET_SPEC'] = a
652        elif o == '-X':
653            if a not in conf['EXCLUDES']:
654                conf['EXCLUDES'].append(a)
655        elif xoptf:
656            xoptf(o, a, conf)
657
658    conf['debug'] = debug
659
660    # get any var=val assignments
661    eaten = []
662    for a in args:
663        if a.find('=') > 0:
664            k,v = a.split('=')
665            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
666                if k == 'SRCTOP':
667                    k = 'SRCTOPS'
668                elif k == 'OBJROOT':
669                    k = 'OBJROOTS'
670                if v not in conf[k]:
671                    conf[k].append(v)
672            else:
673                conf[k] = v
674            eaten.append(a)
675            continue
676        break
677
678    for a in eaten:
679        args.remove(a)
680
681    debug_out = getv(conf, 'debug_out', sys.stderr)
682
683    if debug:
684        print("config:", file=debug_out)
685        print("psyco=", have_psyco, file=debug_out)
686        for k,v in list(conf.items()):
687            print("%s=%s" % (k,v), file=debug_out)
688
689    m = None
690    for a in args:
691        if a.endswith('.meta'):
692            if not os.path.exists(a):
693                continue
694            m = klass(a, conf)
695        elif a.startswith('@'):
696            # there can actually multiple files per line
697            for line in open(a[1:]):
698                for f in line.strip().split():
699                    if not os.path.exists(f):
700                        continue
701                    m = klass(f, conf)
702
703    if output and m:
704        print(m.dirdeps())
705
706        print(m.src_dirdeps('\nsrc:'))
707
708        dpdeps = getv(conf, 'DPDEPS')
709        if dpdeps:
710            m.file_depends(open(dpdeps, 'wb'))
711
712    return m
713
714if __name__ == '__main__':
715    try:
716        main(sys.argv)
717    except:
718        # yes, this goes to stdout
719        print("ERROR: ", sys.exc_info()[1])
720        raise
721
722