xref: /freebsd/share/mk/meta2deps.py (revision 9768746b)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.40 2021/12/13 19:32:46 sjg Exp $
42
43	Copyright (c) 2011-2020, Simon J. Gerraty
44	Copyright (c) 2011-2017, Juniper Networks, Inc.
45	All rights reserved.
46
47	Redistribution and use in source and binary forms, with or without
48	modification, are permitted provided that the following conditions
49	are met:
50	1. Redistributions of source code must retain the above copyright
51	   notice, this list of conditions and the following disclaimer.
52	2. Redistributions in binary form must reproduce the above copyright
53	   notice, this list of conditions and the following disclaimer in the
54	   documentation and/or other materials provided with the distribution.
55
56	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
57	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
58	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
59	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
60	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
62	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
66	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67
68"""
69
70import os, re, sys
71
72def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
73    """
74    Return an absolute path, resolving via cwd or last_dir if needed.
75    """
76    if path.endswith('/.'):
77        path = path[0:-2]
78    if len(path) > 0 and path[0] == '/':
79        if os.path.exists(path):
80            return path
81        if debug > 2:
82            print("skipping non-existent:", path, file=debug_out)
83        return None
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        if path == '..':
94            dw = d.split('/')
95            p = '/'.join(dw[:-1])
96            if not p:
97                p = '/'
98            return p
99        p = '/'.join([d,path])
100        if debug > 2:
101            print("looking for:", p, end=' ', file=debug_out)
102        if not os.path.exists(p):
103            if debug > 2:
104                print("nope", file=debug_out)
105            p = None
106            continue
107        if debug > 2:
108            print("found:", p, file=debug_out)
109        return p
110    return None
111
112def cleanpath(path):
113    """cleanup path without using realpath(3)"""
114    if path.startswith('/'):
115        r = '/'
116    else:
117        r = ''
118    p = []
119    w = path.split('/')
120    for d in w:
121        if not d or d == '.':
122            continue
123        if d == '..':
124            try:
125                p.pop()
126                continue
127            except:
128                break
129        p.append(d)
130
131    return r + '/'.join(p)
132
133def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
134    """
135    Return an absolute path, resolving via cwd or last_dir if needed.
136    this gets called a lot, so we try to avoid calling realpath.
137    """
138    rpath = resolve(path, cwd, last_dir, debug, debug_out)
139    if rpath:
140        path = rpath
141    elif len(path) > 0 and path[0] == '/':
142        return None
143    if (path.find('/') < 0 or
144        path.find('./') > 0 or
145        path.endswith('/..')):
146        path = cleanpath(path)
147    return path
148
149def sort_unique(list, cmp=None, key=None, reverse=False):
150    if sys.version_info[0] == 2:
151        list.sort(cmp, key, reverse)
152    else:
153        list.sort(reverse=reverse)
154    nl = []
155    le = None
156    for e in list:
157        if e == le:
158            continue
159        le = e
160        nl.append(e)
161    return nl
162
163def add_trims(x):
164    return ['/' + x + '/',
165            '/' + x,
166            x + '/',
167            x]
168
169def target_spec_exts(target_spec):
170    """return a list of dirdep extensions that could match target_spec"""
171
172    if target_spec.find(',') < 0:
173        return ['.'+target_spec]
174    w = target_spec.split(',')
175    n = len(w)
176    e = []
177    while n > 0:
178        e.append('.'+','.join(w[0:n]))
179        n -= 1
180    return e
181
182class MetaFile:
183    """class to parse meta files generated by bmake."""
184
185    conf = None
186    dirdep_re = None
187    host_target = None
188    srctops = []
189    objroots = []
190    excludes = []
191    seen = {}
192    obj_deps = []
193    src_deps = []
194    file_deps = []
195
196    def __init__(self, name, conf={}):
197        """if name is set we will parse it now.
198        conf can have the follwing keys:
199
200        SRCTOPS list of tops of the src tree(s).
201
202        CURDIR  the src directory 'bmake' was run from.
203
204        RELDIR  the relative path from SRCTOP to CURDIR
205
206        MACHINE the machine we built for.
207                set to 'none' if we are not cross-building.
208                More specifically if machine cannot be deduced from objdirs.
209
210        TARGET_SPEC
211                Sometimes MACHINE isn't enough.
212
213        HOST_TARGET
214                when we build for the pseudo machine 'host'
215                the object tree uses HOST_TARGET rather than MACHINE.
216
217        OBJROOTS a list of the common prefix for all obj dirs it might
218                end in '/' or '-'.
219
220        DPDEPS  names an optional file to which per file dependencies
221                will be appended.
222                For example if 'some/path/foo.h' is read from SRCTOP
223                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
224                This can allow 'bmake' to learn all the dirs within
225                the tree that depend on 'foo.h'
226
227        EXCLUDES
228                A list of paths to ignore.
229                ccache(1) can otherwise be trouble.
230
231        debug   desired debug level
232
233        debug_out open file to send debug output to (sys.stderr)
234
235        """
236
237        self.name = name
238        self.debug = conf.get('debug', 0)
239        self.debug_out = conf.get('debug_out', sys.stderr)
240
241        self.machine = conf.get('MACHINE', '')
242        self.machine_arch = conf.get('MACHINE_ARCH', '')
243        self.target_spec = conf.get('TARGET_SPEC', self.machine)
244        self.exts = target_spec_exts(self.target_spec)
245        self.curdir = conf.get('CURDIR')
246        self.reldir = conf.get('RELDIR')
247        self.dpdeps = conf.get('DPDEPS')
248        self.line = 0
249
250        if not self.conf:
251            # some of the steps below we want to do only once
252            self.conf = conf
253            self.host_target = conf.get('HOST_TARGET')
254            for srctop in conf.get('SRCTOPS', []):
255                if srctop[-1] != '/':
256                    srctop += '/'
257                if not srctop in self.srctops:
258                    self.srctops.append(srctop)
259                _srctop = os.path.realpath(srctop)
260                if _srctop[-1] != '/':
261                    _srctop += '/'
262                if not _srctop in self.srctops:
263                    self.srctops.append(_srctop)
264
265            trim_list = add_trims(self.machine)
266            if self.machine == 'host':
267                trim_list += add_trims(self.host_target)
268            if self.target_spec != self.machine:
269                trim_list += add_trims(self.target_spec)
270
271            for objroot in conf.get('OBJROOTS', []):
272                for e in trim_list:
273                    if objroot.endswith(e):
274                        # this is not what we want - fix it
275                        objroot = objroot[0:-len(e)]
276
277                if objroot[-1] != '/':
278                    objroot += '/'
279                if not objroot in self.objroots:
280                    self.objroots.append(objroot)
281                    _objroot = os.path.realpath(objroot)
282                    if objroot[-1] == '/':
283                        _objroot += '/'
284                    if not _objroot in self.objroots:
285                        self.objroots.append(_objroot)
286
287            # we want the longest match
288            self.srctops.sort(reverse=True)
289            self.objroots.sort(reverse=True)
290
291            self.excludes = conf.get('EXCLUDES', [])
292
293            if self.debug:
294                print("host_target=", self.host_target, file=self.debug_out)
295                print("srctops=", self.srctops, file=self.debug_out)
296                print("objroots=", self.objroots, file=self.debug_out)
297                print("excludes=", self.excludes, file=self.debug_out)
298                print("ext_list=", self.exts, file=self.debug_out)
299
300            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
301
302        if self.dpdeps and not self.reldir:
303            if self.debug:
304                print("need reldir:", end=' ', file=self.debug_out)
305            if self.curdir:
306                srctop = self.find_top(self.curdir, self.srctops)
307                if srctop:
308                    self.reldir = self.curdir.replace(srctop,'')
309                    if self.debug:
310                        print(self.reldir, file=self.debug_out)
311            if not self.reldir:
312                self.dpdeps = None      # we cannot do it?
313
314        self.cwd = os.getcwd()          # make sure this is initialized
315        self.last_dir = self.cwd
316
317        if name:
318            self.try_parse()
319
320    def reset(self):
321        """reset state if we are being passed meta files from multiple directories."""
322        self.seen = {}
323        self.obj_deps = []
324        self.src_deps = []
325        self.file_deps = []
326
327    def dirdeps(self, sep='\n'):
328        """return DIRDEPS"""
329        return sep.strip() + sep.join(self.obj_deps)
330
331    def src_dirdeps(self, sep='\n'):
332        """return SRC_DIRDEPS"""
333        return sep.strip() + sep.join(self.src_deps)
334
335    def file_depends(self, out=None):
336        """Append DPDEPS_${file} += ${RELDIR}
337        for each file we saw, to the output file."""
338        if not self.reldir:
339            return None
340        for f in sort_unique(self.file_deps):
341            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
342        # these entries provide for reverse DIRDEPS lookup
343        for f in self.obj_deps:
344            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
345
346    def seenit(self, dir):
347        """rememer that we have seen dir."""
348        self.seen[dir] = 1
349
350    def add(self, list, data, clue=''):
351        """add data to list if it isn't already there."""
352        if data not in list:
353            list.append(data)
354            if self.debug:
355                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
356
357    def find_top(self, path, list):
358        """the logical tree may be split across multiple trees"""
359        for top in list:
360            if path.startswith(top):
361                if self.debug > 2:
362                    print("found in", top, file=self.debug_out)
363                return top
364        return None
365
366    def find_obj(self, objroot, dir, path, input):
367        """return path within objroot, taking care of .dirdep files"""
368        ddep = None
369        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
370            if not ddep and os.path.exists(ddepf):
371                ddep = open(ddepf, 'r').readline().strip('# \n')
372                if self.debug > 1:
373                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
374                for e in self.exts:
375                    if ddep.endswith(e):
376                        ddep = ddep[0:-len(e)]
377                        break
378
379        if not ddep:
380            # no .dirdeps, so remember that we've seen the raw input
381            self.seenit(input)
382            self.seenit(dir)
383            if self.machine == 'none':
384                if dir.startswith(objroot):
385                    return dir.replace(objroot,'')
386                return None
387            m = self.dirdep_re.match(dir.replace(objroot,''))
388            if m:
389                ddep = m.group(2)
390                dmachine = m.group(1)
391                if dmachine != self.machine:
392                    if not (self.machine == 'host' and
393                            dmachine == self.host_target):
394                        if self.debug > 2:
395                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
396                        ddep += '.' + dmachine
397
398        return ddep
399
400    def try_parse(self, name=None, file=None):
401        """give file and line number causing exception"""
402        try:
403            self.parse(name, file)
404        except:
405            # give a useful clue
406            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
407            raise
408
409    def parse(self, name=None, file=None):
410        """A meta file looks like:
411
412        # Meta data file "path"
413        CMD "command-line"
414        CWD "cwd"
415        TARGET "target"
416        -- command output --
417        -- filemon acquired metadata --
418        # buildmon version 3
419        V 3
420        C "pid" "cwd"
421        E "pid" "path"
422        F "pid" "child"
423        R "pid" "path"
424        W "pid" "path"
425        X "pid" "status"
426        D "pid" "path"
427        L "pid" "src" "target"
428        M "pid" "old" "new"
429        S "pid" "path"
430        # Bye bye
431
432        We go to some effort to avoid processing a dependency more than once.
433        Of the above record types only C,E,F,L,R,V and W are of interest.
434        """
435
436        version = 0                     # unknown
437        if name:
438            self.name = name;
439        if file:
440            f = file
441            cwd = self.last_dir = self.cwd
442        else:
443            f = open(self.name, 'r')
444        skip = True
445        pid_cwd = {}
446        pid_last_dir = {}
447        last_pid = 0
448
449        self.line = 0
450        if self.curdir:
451            self.seenit(self.curdir)    # we ignore this
452
453        interesting = 'CEFLRV'
454        for line in f:
455            self.line += 1
456            # ignore anything we don't care about
457            if not line[0] in interesting:
458                continue
459            if self.debug > 2:
460                print("input:", line, end=' ', file=self.debug_out)
461            w = line.split()
462
463            if skip:
464                if w[0] == 'V':
465                    skip = False
466                    version = int(w[1])
467                    """
468                    if version < 4:
469                        # we cannot ignore 'W' records
470                        # as they may be 'rw'
471                        interesting += 'W'
472                    """
473                elif w[0] == 'CWD':
474                    self.cwd = cwd = self.last_dir = w[1]
475                    self.seenit(cwd)    # ignore this
476                    if self.debug:
477                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
478                continue
479
480            pid = int(w[1])
481            if pid != last_pid:
482                if last_pid:
483                    pid_last_dir[last_pid] = self.last_dir
484                cwd = pid_cwd.get(pid, self.cwd)
485                self.last_dir = pid_last_dir.get(pid, self.cwd)
486                last_pid = pid
487
488            # process operations
489            if w[0] == 'F':
490                npid = int(w[2])
491                pid_cwd[npid] = cwd
492                pid_last_dir[npid] = cwd
493                last_pid = npid
494                continue
495            elif w[0] == 'C':
496                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
497                if not cwd:
498                    cwd = w[2]
499                    if self.debug > 1:
500                        print("missing cwd=", cwd, file=self.debug_out)
501                if cwd.endswith('/.'):
502                    cwd = cwd[0:-2]
503                self.last_dir = pid_last_dir[pid] = cwd
504                pid_cwd[pid] = cwd
505                if self.debug > 1:
506                    print("cwd=", cwd, file=self.debug_out)
507                continue
508
509            if w[2] in self.seen:
510                if self.debug > 2:
511                    print("seen:", w[2], file=self.debug_out)
512                continue
513            # file operations
514            if w[0] in 'ML':
515                # these are special, tread src as read and
516                # target as write
517                self.parse_path(w[2].strip("'"), cwd, 'R', w)
518                self.parse_path(w[3].strip("'"), cwd, 'W', w)
519                continue
520            elif w[0] in 'ERWS':
521                path = w[2]
522                if path == '.':
523                    continue
524                self.parse_path(path, cwd, w[0], w)
525
526        assert(version > 0)
527        if not file:
528            f.close()
529
530    def is_src(self, base, dir, rdir):
531        """is base in srctop"""
532        for dir in [dir,rdir]:
533            if not dir:
534                continue
535            path = '/'.join([dir,base])
536            srctop = self.find_top(path, self.srctops)
537            if srctop:
538                if self.dpdeps:
539                    self.add(self.file_deps, path.replace(srctop,''), 'file')
540                self.add(self.src_deps, dir.replace(srctop,''), 'src')
541                self.seenit(dir)
542                return True
543        return False
544
545    def parse_path(self, path, cwd, op=None, w=[]):
546        """look at a path for the op specified"""
547
548        if not op:
549            op = w[0]
550
551        # we are never interested in .dirdep files as dependencies
552        if path.endswith('.dirdep'):
553            return
554        for p in self.excludes:
555            if p and path.startswith(p):
556                if self.debug > 2:
557                    print("exclude:", p, path, file=self.debug_out)
558                return
559        # we don't want to resolve the last component if it is
560        # a symlink
561        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
562        if not path:
563            return
564        dir,base = os.path.split(path)
565        if dir in self.seen:
566            if self.debug > 2:
567                print("seen:", dir, file=self.debug_out)
568            return
569        # we can have a path in an objdir which is a link
570        # to the src dir, we may need to add dependencies for each
571        rdir = dir
572        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
573        if dir:
574            rdir = os.path.realpath(dir)
575        else:
576            dir = rdir
577        if rdir == dir:
578            rdir = None
579        # now put path back together
580        path = '/'.join([dir,base])
581        if self.debug > 1:
582            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
583        if op in 'RWS':
584            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
585                if self.debug > 1:
586                    print("skipping:", path, file=self.debug_out)
587                return
588            if os.path.isdir(path):
589                if op in 'RW':
590                    self.last_dir = path;
591                if self.debug > 1:
592                    print("ldir=", self.last_dir, file=self.debug_out)
593                return
594
595        if op in 'ER':
596            # finally, we get down to it
597            if dir == self.cwd or dir == self.curdir:
598                return
599            if self.is_src(base, dir, rdir):
600                self.seenit(w[2])
601                if not rdir:
602                    return
603
604            objroot = None
605            for dir in [dir,rdir]:
606                if not dir:
607                    continue
608                objroot = self.find_top(dir, self.objroots)
609                if objroot:
610                    break
611            if objroot:
612                ddep = self.find_obj(objroot, dir, path, w[2])
613                if ddep:
614                    self.add(self.obj_deps, ddep, 'obj')
615                    if self.dpdeps and objroot.endswith('/stage/'):
616                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
617                        self.add(self.file_deps, sp, 'file')
618            else:
619                # don't waste time looking again
620                self.seenit(w[2])
621                self.seenit(dir)
622
623
624def main(argv, klass=MetaFile, xopts='', xoptf=None):
625    """Simple driver for class MetaFile.
626
627    Usage:
628        script [options] [key=value ...] "meta" ...
629
630    Options and key=value pairs contribute to the
631    dictionary passed to MetaFile.
632
633    -S "SRCTOP"
634                add "SRCTOP" to the "SRCTOPS" list.
635
636    -C "CURDIR"
637
638    -O "OBJROOT"
639                add "OBJROOT" to the "OBJROOTS" list.
640
641    -m "MACHINE"
642
643    -a "MACHINE_ARCH"
644
645    -H "HOST_TARGET"
646
647    -D "DPDEPS"
648
649    -d  bumps debug level
650
651    """
652    import getopt
653
654    # import Psyco if we can
655    # it can speed things up quite a bit
656    have_psyco = 0
657    try:
658        import psyco
659        psyco.full()
660        have_psyco = 1
661    except:
662        pass
663
664    conf = {
665        'SRCTOPS': [],
666        'OBJROOTS': [],
667        'EXCLUDES': [],
668        }
669
670    try:
671        machine = os.environ['MACHINE']
672        if machine:
673            conf['MACHINE'] = machine
674        machine_arch = os.environ['MACHINE_ARCH']
675        if machine_arch:
676            conf['MACHINE_ARCH'] = machine_arch
677        srctop = os.environ['SB_SRC']
678        if srctop:
679            conf['SRCTOPS'].append(srctop)
680        objroot = os.environ['SB_OBJROOT']
681        if objroot:
682            conf['OBJROOTS'].append(objroot)
683    except:
684        pass
685
686    debug = 0
687    output = True
688
689    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
690    for o, a in opts:
691        if o == '-a':
692            conf['MACHINE_ARCH'] = a
693        elif o == '-d':
694            debug += 1
695        elif o == '-q':
696            output = False
697        elif o == '-H':
698            conf['HOST_TARGET'] = a
699        elif o == '-S':
700            if a not in conf['SRCTOPS']:
701                conf['SRCTOPS'].append(a)
702        elif o == '-C':
703            conf['CURDIR'] = a
704        elif o == '-O':
705            if a not in conf['OBJROOTS']:
706                conf['OBJROOTS'].append(a)
707        elif o == '-R':
708            conf['RELDIR'] = a
709        elif o == '-D':
710            conf['DPDEPS'] = a
711        elif o == '-m':
712            conf['MACHINE'] = a
713        elif o == '-T':
714            conf['TARGET_SPEC'] = a
715        elif o == '-X':
716            if a not in conf['EXCLUDES']:
717                conf['EXCLUDES'].append(a)
718        elif xoptf:
719            xoptf(o, a, conf)
720
721    conf['debug'] = debug
722
723    # get any var=val assignments
724    eaten = []
725    for a in args:
726        if a.find('=') > 0:
727            k,v = a.split('=')
728            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
729                if k == 'SRCTOP':
730                    k = 'SRCTOPS'
731                elif k == 'OBJROOT':
732                    k = 'OBJROOTS'
733                if v not in conf[k]:
734                    conf[k].append(v)
735            else:
736                conf[k] = v
737            eaten.append(a)
738            continue
739        break
740
741    for a in eaten:
742        args.remove(a)
743
744    debug_out = conf.get('debug_out', sys.stderr)
745
746    if debug:
747        print("config:", file=debug_out)
748        print("psyco=", have_psyco, file=debug_out)
749        for k,v in list(conf.items()):
750            print("%s=%s" % (k,v), file=debug_out)
751
752    m = None
753    for a in args:
754        if a.endswith('.meta'):
755            if not os.path.exists(a):
756                continue
757            m = klass(a, conf)
758        elif a.startswith('@'):
759            # there can actually multiple files per line
760            for line in open(a[1:]):
761                for f in line.strip().split():
762                    if not os.path.exists(f):
763                        continue
764                    m = klass(f, conf)
765
766    if output and m:
767        print(m.dirdeps())
768
769        print(m.src_dirdeps('\nsrc:'))
770
771        dpdeps = conf.get('DPDEPS')
772        if dpdeps:
773            m.file_depends(open(dpdeps, 'w'))
774
775    return m
776
777if __name__ == '__main__':
778    try:
779        main(sys.argv)
780    except:
781        # yes, this goes to stdout
782        print("ERROR: ", sys.exc_info()[1])
783        raise
784
785