xref: /freebsd/share/mk/meta2deps.py (revision b0b1dbdd)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $
42
43	Copyright (c) 2011-2013, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        return path
85    if path == '.':
86        return cwd
87    if path.startswith('./'):
88        return cwd + path[1:]
89    if last_dir == cwd:
90        last_dir = None
91    for d in [last_dir, cwd]:
92        if not d:
93            continue
94        p = '/'.join([d,path])
95        if debug > 2:
96            print("looking for:", p, end=' ', file=debug_out)
97        if not os.path.exists(p):
98            if debug > 2:
99                print("nope", file=debug_out)
100            p = None
101            continue
102        if debug > 2:
103            print("found:", p, file=debug_out)
104        return p
105    return None
106
107def cleanpath(path):
108    """cleanup path without using realpath(3)"""
109    if path.startswith('/'):
110        r = '/'
111    else:
112        r = ''
113    p = []
114    w = path.split('/')
115    for d in w:
116        if not d or d == '.':
117            continue
118        if d == '..':
119            p.pop()
120            continue
121        p.append(d)
122
123    return r + '/'.join(p)
124
125def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
126    """
127    Return an absolute path, resolving via cwd or last_dir if needed.
128    this gets called a lot, so we try to avoid calling realpath.
129    """
130    rpath = resolve(path, cwd, last_dir, debug, debug_out)
131    if rpath:
132        path = rpath
133    if (path.find('/') < 0 or
134        path.find('./') > 0 or
135        path.endswith('/..')):
136        path = cleanpath(path)
137    return path
138
139def sort_unique(list, cmp=None, key=None, reverse=False):
140    list.sort(cmp, key, reverse)
141    nl = []
142    le = None
143    for e in list:
144        if e == le:
145            continue
146	le = e
147        nl.append(e)
148    return nl
149
150def add_trims(x):
151    return ['/' + x + '/',
152            '/' + x,
153            x + '/',
154            x]
155
156class MetaFile:
157    """class to parse meta files generated by bmake."""
158
159    conf = None
160    dirdep_re = None
161    host_target = None
162    srctops = []
163    objroots = []
164    excludes = []
165    seen = {}
166    obj_deps = []
167    src_deps = []
168    file_deps = []
169
170    def __init__(self, name, conf={}):
171        """if name is set we will parse it now.
172        conf can have the follwing keys:
173
174        SRCTOPS list of tops of the src tree(s).
175
176        CURDIR  the src directory 'bmake' was run from.
177
178        RELDIR  the relative path from SRCTOP to CURDIR
179
180        MACHINE the machine we built for.
181                set to 'none' if we are not cross-building.
182                More specifically if machine cannot be deduced from objdirs.
183
184        TARGET_SPEC
185                Sometimes MACHINE isn't enough.
186
187        HOST_TARGET
188                when we build for the pseudo machine 'host'
189                the object tree uses HOST_TARGET rather than MACHINE.
190
191        OBJROOTS a list of the common prefix for all obj dirs it might
192                end in '/' or '-'.
193
194        DPDEPS  names an optional file to which per file dependencies
195                will be appended.
196                For example if 'some/path/foo.h' is read from SRCTOP
197                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
198                This can allow 'bmake' to learn all the dirs within
199                the tree that depend on 'foo.h'
200
201        EXCLUDES
202                A list of paths to ignore.
203                ccache(1) can otherwise be trouble.
204
205        debug   desired debug level
206
207        debug_out open file to send debug output to (sys.stderr)
208
209        """
210
211        self.name = name
212        self.debug = getv(conf, 'debug', 0)
213        self.debug_out = getv(conf, 'debug_out', sys.stderr)
214
215        self.machine = getv(conf, 'MACHINE', '')
216        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
217        self.target_spec = getv(conf, 'TARGET_SPEC', '')
218        self.curdir = getv(conf, 'CURDIR')
219        self.reldir = getv(conf, 'RELDIR')
220        self.dpdeps = getv(conf, 'DPDEPS')
221        self.line = 0
222
223        if not self.conf:
224            # some of the steps below we want to do only once
225            self.conf = conf
226            self.host_target = getv(conf, 'HOST_TARGET')
227            for srctop in getv(conf, 'SRCTOPS', []):
228                if srctop[-1] != '/':
229                    srctop += '/'
230                if not srctop in self.srctops:
231                    self.srctops.append(srctop)
232                _srctop = os.path.realpath(srctop)
233                if _srctop[-1] != '/':
234                    _srctop += '/'
235                if not _srctop in self.srctops:
236                    self.srctops.append(_srctop)
237
238            trim_list = add_trims(self.machine)
239            if self.machine == 'host':
240                trim_list += add_trims(self.host_target)
241            if self.target_spec:
242                trim_list += add_trims(self.target_spec)
243
244            for objroot in getv(conf, 'OBJROOTS', []):
245                for e in trim_list:
246                    if objroot.endswith(e):
247                        # this is not what we want - fix it
248                        objroot = objroot[0:-len(e)]
249
250                if objroot[-1] != '/':
251                    objroot += '/'
252                if not objroot in self.objroots:
253                    self.objroots.append(objroot)
254                    _objroot = os.path.realpath(objroot)
255                    if objroot[-1] == '/':
256                        _objroot += '/'
257                    if not _objroot in self.objroots:
258                        self.objroots.append(_objroot)
259
260            # we want the longest match
261            self.srctops.sort(reverse=True)
262            self.objroots.sort(reverse=True)
263
264            self.excludes = getv(conf, 'EXCLUDES', [])
265
266            if self.debug:
267                print("host_target=", self.host_target, file=self.debug_out)
268                print("srctops=", self.srctops, file=self.debug_out)
269                print("objroots=", self.objroots, file=self.debug_out)
270                print("excludes=", self.excludes, file=self.debug_out)
271
272            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
273
274        if self.dpdeps and not self.reldir:
275            if self.debug:
276                print("need reldir:", end=' ', file=self.debug_out)
277            if self.curdir:
278                srctop = self.find_top(self.curdir, self.srctops)
279                if srctop:
280                    self.reldir = self.curdir.replace(srctop,'')
281                    if self.debug:
282                        print(self.reldir, file=self.debug_out)
283            if not self.reldir:
284                self.dpdeps = None      # we cannot do it?
285
286        self.cwd = os.getcwd()          # make sure this is initialized
287        self.last_dir = self.cwd
288
289        if name:
290            self.try_parse()
291
292    def reset(self):
293        """reset state if we are being passed meta files from multiple directories."""
294        self.seen = {}
295        self.obj_deps = []
296        self.src_deps = []
297        self.file_deps = []
298
299    def dirdeps(self, sep='\n'):
300        """return DIRDEPS"""
301        return sep.strip() + sep.join(self.obj_deps)
302
303    def src_dirdeps(self, sep='\n'):
304        """return SRC_DIRDEPS"""
305        return sep.strip() + sep.join(self.src_deps)
306
307    def file_depends(self, out=None):
308        """Append DPDEPS_${file} += ${RELDIR}
309        for each file we saw, to the output file."""
310        if not self.reldir:
311            return None
312        for f in sort_unique(self.file_deps):
313            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
314        # these entries provide for reverse DIRDEPS lookup
315        for f in self.obj_deps:
316            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
317
318    def seenit(self, dir):
319        """rememer that we have seen dir."""
320        self.seen[dir] = 1
321
322    def add(self, list, data, clue=''):
323        """add data to list if it isn't already there."""
324        if data not in list:
325            list.append(data)
326            if self.debug:
327                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
328
329    def find_top(self, path, list):
330        """the logical tree may be split across multiple trees"""
331        for top in list:
332            if path.startswith(top):
333                if self.debug > 2:
334                    print("found in", top, file=self.debug_out)
335                return top
336        return None
337
338    def find_obj(self, objroot, dir, path, input):
339        """return path within objroot, taking care of .dirdep files"""
340        ddep = None
341        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
342            if not ddep and os.path.exists(ddepf):
343                ddep = open(ddepf, 'r').readline().strip('# \n')
344                if self.debug > 1:
345                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
346                if ddep.endswith(self.machine):
347                    ddep = ddep[0:-(1+len(self.machine))]
348                elif self.target_spec and ddep.endswith(self.target_spec):
349                    ddep = ddep[0:-(1+len(self.target_spec))]
350
351        if not ddep:
352            # no .dirdeps, so remember that we've seen the raw input
353            self.seenit(input)
354            self.seenit(dir)
355            if self.machine == 'none':
356                if dir.startswith(objroot):
357                    return dir.replace(objroot,'')
358                return None
359            m = self.dirdep_re.match(dir.replace(objroot,''))
360            if m:
361                ddep = m.group(2)
362                dmachine = m.group(1)
363                if dmachine != self.machine:
364                    if not (self.machine == 'host' and
365                            dmachine == self.host_target):
366                        if self.debug > 2:
367                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
368                        ddep += '.' + dmachine
369
370        return ddep
371
372    def try_parse(self, name=None, file=None):
373        """give file and line number causing exception"""
374        try:
375            self.parse(name, file)
376        except:
377            # give a useful clue
378            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
379            raise
380
381    def parse(self, name=None, file=None):
382        """A meta file looks like:
383
384        # Meta data file "path"
385        CMD "command-line"
386        CWD "cwd"
387        TARGET "target"
388        -- command output --
389        -- filemon acquired metadata --
390        # buildmon version 3
391        V 3
392        C "pid" "cwd"
393        E "pid" "path"
394        F "pid" "child"
395        R "pid" "path"
396        W "pid" "path"
397        X "pid" "status"
398        D "pid" "path"
399        L "pid" "src" "target"
400        M "pid" "old" "new"
401        S "pid" "path"
402        # Bye bye
403
404        We go to some effort to avoid processing a dependency more than once.
405        Of the above record types only C,E,F,L,R,V and W are of interest.
406        """
407
408        version = 0                     # unknown
409        if name:
410            self.name = name;
411        if file:
412            f = file
413            cwd = self.last_dir = self.cwd
414        else:
415            f = open(self.name, 'r')
416        skip = True
417        pid_cwd = {}
418        pid_last_dir = {}
419        last_pid = 0
420
421        self.line = 0
422        if self.curdir:
423            self.seenit(self.curdir)    # we ignore this
424
425        interesting = 'CEFLRV'
426        for line in f:
427            self.line += 1
428            # ignore anything we don't care about
429            if not line[0] in interesting:
430                continue
431            if self.debug > 2:
432                print("input:", line, end=' ', file=self.debug_out)
433            w = line.split()
434
435            if skip:
436                if w[0] == 'V':
437                    skip = False
438                    version = int(w[1])
439                    """
440                    if version < 4:
441                        # we cannot ignore 'W' records
442                        # as they may be 'rw'
443                        interesting += 'W'
444                    """
445                elif w[0] == 'CWD':
446                    self.cwd = cwd = self.last_dir = w[1]
447                    self.seenit(cwd)    # ignore this
448                    if self.debug:
449                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
450                continue
451
452            pid = int(w[1])
453            if pid != last_pid:
454                if last_pid:
455                    pid_last_dir[last_pid] = self.last_dir
456                cwd = getv(pid_cwd, pid, self.cwd)
457                self.last_dir = getv(pid_last_dir, pid, self.cwd)
458                last_pid = pid
459
460            # process operations
461            if w[0] == 'F':
462                npid = int(w[2])
463                pid_cwd[npid] = cwd
464                pid_last_dir[npid] = cwd
465                last_pid = npid
466                continue
467            elif w[0] == 'C':
468                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
469                if cwd.endswith('/.'):
470                    cwd = cwd[0:-2]
471                self.last_dir = pid_last_dir[pid] = cwd
472                pid_cwd[pid] = cwd
473                if self.debug > 1:
474                    print("cwd=", cwd, file=self.debug_out)
475                continue
476
477            if w[2] in self.seen:
478                if self.debug > 2:
479                    print("seen:", w[2], file=self.debug_out)
480                continue
481            # file operations
482            if w[0] in 'ML':
483                # these are special, tread src as read and
484                # target as write
485                self.parse_path(w[1].strip("'"), cwd, 'R', w)
486                self.parse_path(w[2].strip("'"), cwd, 'W', w)
487                continue
488            elif w[0] in 'ERWS':
489                path = w[2]
490                self.parse_path(path, cwd, w[0], w)
491
492        if not file:
493            f.close()
494
495    def parse_path(self, path, cwd, op=None, w=[]):
496        """look at a path for the op specified"""
497
498        if not op:
499            op = w[0]
500
501        # we are never interested in .dirdep files as dependencies
502        if path.endswith('.dirdep'):
503            return
504        for p in self.excludes:
505            if p and path.startswith(p):
506                if self.debug > 2:
507                    print("exclude:", p, path, file=self.debug_out)
508                return
509        # we don't want to resolve the last component if it is
510        # a symlink
511        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
512        if not path:
513            return
514        dir,base = os.path.split(path)
515        if dir in self.seen:
516            if self.debug > 2:
517                print("seen:", dir, file=self.debug_out)
518            return
519        # we can have a path in an objdir which is a link
520        # to the src dir, we may need to add dependencies for each
521        rdir = dir
522        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
523        if rdir == dir or rdir.find('./') > 0:
524            rdir = None
525        if os.path.islink(dir):
526            rdir = os.path.realpath(dir)
527        # now put path back together
528        path = '/'.join([dir,base])
529        if self.debug > 1:
530            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
531        if op in 'RWS':
532            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
533                if self.debug > 1:
534                    print("skipping:", path, file=self.debug_out)
535                return
536            if os.path.isdir(path):
537                if op in 'RW':
538                    self.last_dir = path;
539                if self.debug > 1:
540                    print("ldir=", self.last_dir, file=self.debug_out)
541                return
542
543        if op in 'ERW':
544            # finally, we get down to it
545            if dir == self.cwd or dir == self.curdir:
546                return
547            srctop = self.find_top(path, self.srctops)
548            if srctop:
549                if self.dpdeps:
550                    self.add(self.file_deps, path.replace(srctop,''), 'file')
551                self.add(self.src_deps, dir.replace(srctop,''), 'src')
552                self.seenit(w[2])
553                self.seenit(dir)
554                if rdir and not rdir.startswith(srctop):
555                    dir = rdir      # for below
556                    rdir = None
557                else:
558                    return
559
560            objroot = None
561            for dir in [dir,rdir]:
562                if not dir:
563                    continue
564                objroot = self.find_top(dir, self.objroots)
565                if objroot:
566                    break
567            if objroot:
568                ddep = self.find_obj(objroot, dir, path, w[2])
569                if ddep:
570                    self.add(self.obj_deps, ddep, 'obj')
571                    if self.dpdeps and objroot.endswith('/stage/'):
572                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
573                        self.add(self.file_deps, sp, 'file')
574            else:
575                # don't waste time looking again
576                self.seenit(w[2])
577                self.seenit(dir)
578
579
580def main(argv, klass=MetaFile, xopts='', xoptf=None):
581    """Simple driver for class MetaFile.
582
583    Usage:
584        script [options] [key=value ...] "meta" ...
585
586    Options and key=value pairs contribute to the
587    dictionary passed to MetaFile.
588
589    -S "SRCTOP"
590                add "SRCTOP" to the "SRCTOPS" list.
591
592    -C "CURDIR"
593
594    -O "OBJROOT"
595                add "OBJROOT" to the "OBJROOTS" list.
596
597    -m "MACHINE"
598
599    -a "MACHINE_ARCH"
600
601    -H "HOST_TARGET"
602
603    -D "DPDEPS"
604
605    -d  bumps debug level
606
607    """
608    import getopt
609
610    # import Psyco if we can
611    # it can speed things up quite a bit
612    have_psyco = 0
613    try:
614        import psyco
615        psyco.full()
616        have_psyco = 1
617    except:
618        pass
619
620    conf = {
621        'SRCTOPS': [],
622        'OBJROOTS': [],
623        'EXCLUDES': [],
624        }
625
626    try:
627        machine = os.environ['MACHINE']
628        if machine:
629            conf['MACHINE'] = machine
630        machine_arch = os.environ['MACHINE_ARCH']
631        if machine_arch:
632            conf['MACHINE_ARCH'] = machine_arch
633        srctop = os.environ['SB_SRC']
634        if srctop:
635            conf['SRCTOPS'].append(srctop)
636        objroot = os.environ['SB_OBJROOT']
637        if objroot:
638            conf['OBJROOTS'].append(objroot)
639    except:
640        pass
641
642    debug = 0
643    output = True
644
645    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
646    for o, a in opts:
647        if o == '-a':
648            conf['MACHINE_ARCH'] = a
649        elif o == '-d':
650            debug += 1
651        elif o == '-q':
652            output = False
653        elif o == '-H':
654            conf['HOST_TARGET'] = a
655        elif o == '-S':
656            if a not in conf['SRCTOPS']:
657                conf['SRCTOPS'].append(a)
658        elif o == '-C':
659            conf['CURDIR'] = a
660        elif o == '-O':
661            if a not in conf['OBJROOTS']:
662                conf['OBJROOTS'].append(a)
663        elif o == '-R':
664            conf['RELDIR'] = a
665        elif o == '-D':
666            conf['DPDEPS'] = a
667        elif o == '-m':
668            conf['MACHINE'] = a
669        elif o == '-T':
670            conf['TARGET_SPEC'] = a
671        elif o == '-X':
672            if a not in conf['EXCLUDES']:
673                conf['EXCLUDES'].append(a)
674        elif xoptf:
675            xoptf(o, a, conf)
676
677    conf['debug'] = debug
678
679    # get any var=val assignments
680    eaten = []
681    for a in args:
682        if a.find('=') > 0:
683            k,v = a.split('=')
684            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
685                if k == 'SRCTOP':
686                    k = 'SRCTOPS'
687                elif k == 'OBJROOT':
688                    k = 'OBJROOTS'
689                if v not in conf[k]:
690                    conf[k].append(v)
691            else:
692                conf[k] = v
693            eaten.append(a)
694            continue
695        break
696
697    for a in eaten:
698        args.remove(a)
699
700    debug_out = getv(conf, 'debug_out', sys.stderr)
701
702    if debug:
703        print("config:", file=debug_out)
704        print("psyco=", have_psyco, file=debug_out)
705        for k,v in list(conf.items()):
706            print("%s=%s" % (k,v), file=debug_out)
707
708    m = None
709    for a in args:
710        if a.endswith('.meta'):
711            if not os.path.exists(a):
712                continue
713            m = klass(a, conf)
714        elif a.startswith('@'):
715            # there can actually multiple files per line
716            for line in open(a[1:]):
717                for f in line.strip().split():
718                    if not os.path.exists(f):
719                        continue
720                    m = klass(f, conf)
721
722    if output and m:
723        print(m.dirdeps())
724
725        print(m.src_dirdeps('\nsrc:'))
726
727        dpdeps = getv(conf, 'DPDEPS')
728        if dpdeps:
729            m.file_depends(open(dpdeps, 'wb'))
730
731    return m
732
733if __name__ == '__main__':
734    try:
735        main(sys.argv)
736    except:
737        # yes, this goes to stdout
738        print("ERROR: ", sys.exc_info()[1])
739        raise
740
741