1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.15 2013/07/29 20:41:23 sjg Exp $ 39 40 Copyright (c) 2011-2013, Juniper Networks, Inc. 41 All rights reserved. 42 43 Redistribution and use in source and binary forms, with or without 44 modification, are permitted provided that the following conditions 45 are met: 46 1. Redistributions of source code must retain the above copyright 47 notice, this list of conditions and the following disclaimer. 48 2. Redistributions in binary form must reproduce the above copyright 49 notice, this list of conditions and the following disclaimer in the 50 documentation and/or other materials provided with the distribution. 51 52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 63 64""" 65 66import os, re, sys 67 68def getv(dict, key, d=None): 69 """Lookup key in dict and return value or the supplied default.""" 70 if key in dict: 71 return dict[key] 72 return d 73 74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 75 """ 76 Return an absolute path, resolving via cwd or last_dir if needed. 77 """ 78 if path.endswith('/.'): 79 path = path[0:-2] 80 if len(path) > 0 and path[0] == '/': 81 return path 82 if path == '.': 83 return cwd 84 if path.startswith('./'): 85 return cwd + path[1:] 86 if last_dir == cwd: 87 last_dir = None 88 for d in [last_dir, cwd]: 89 if not d: 90 continue 91 p = '/'.join([d,path]) 92 if debug > 2: 93 print >> debug_out, "looking for:", p, 94 if not os.path.exists(p): 95 if debug > 2: 96 print >> debug_out, "nope" 97 p = None 98 continue 99 if debug > 2: 100 print >> debug_out, "found:", p 101 return p 102 return None 103 104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 105 """ 106 Return an absolute path, resolving via cwd or last_dir if needed. 107 this gets called a lot, so we try to avoid calling realpath 108 until we know we have something. 109 """ 110 rpath = resolve(path, cwd, last_dir, debug, debug_out) 111 if rpath: 112 path = rpath 113 if (path.find('./') > 0 or 114 path.endswith('/..') or 115 os.path.islink(path)): 116 return os.path.realpath(path) 117 return path 118 119def sort_unique(list, cmp=None, key=None, reverse=False): 120 list.sort(cmp, key, reverse) 121 nl = [] 122 le = None 123 for e in list: 124 if e == le: 125 continue 126 nl.append(e) 127 return nl 128 129def add_trims(x): 130 return ['/' + x + '/', 131 '/' + x, 132 x + '/', 133 x] 134 135class MetaFile: 136 """class to parse meta files generated by bmake.""" 137 138 conf = None 139 dirdep_re = None 140 host_target = None 141 srctops = [] 142 objroots = [] 143 144 seen = {} 145 obj_deps = [] 146 src_deps = [] 147 file_deps = [] 148 149 def __init__(self, name, conf={}): 150 """if name is set we will parse it now. 151 conf can have the follwing keys: 152 153 SRCTOPS list of tops of the src tree(s). 154 155 CURDIR the src directory 'bmake' was run from. 156 157 RELDIR the relative path from SRCTOP to CURDIR 158 159 MACHINE the machine we built for. 160 set to 'none' if we are not cross-building. 161 More specifically if machine cannot be deduced from objdirs. 162 163 TARGET_SPEC 164 Sometimes MACHINE isn't enough. 165 166 HOST_TARGET 167 when we build for the psuedo machine 'host' 168 the object tree uses HOST_TARGET rather than MACHINE. 169 170 OBJROOTS a list of the common prefix for all obj dirs it might 171 end in '/' or '-'. 172 173 DPDEPS names an optional file to which per file dependencies 174 will be appended. 175 For example if 'some/path/foo.h' is read from SRCTOP 176 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 177 This can allow 'bmake' to learn all the dirs within 178 the tree that depend on 'foo.h' 179 180 debug desired debug level 181 182 debug_out open file to send debug output to (sys.stderr) 183 184 """ 185 186 self.name = name 187 self.debug = getv(conf, 'debug', 0) 188 self.debug_out = getv(conf, 'debug_out', sys.stderr) 189 190 self.machine = getv(conf, 'MACHINE', '') 191 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 192 self.target_spec = getv(conf, 'TARGET_SPEC', '') 193 self.curdir = getv(conf, 'CURDIR') 194 self.reldir = getv(conf, 'RELDIR') 195 self.dpdeps = getv(conf, 'DPDEPS') 196 self.line = 0 197 198 if not self.conf: 199 # some of the steps below we want to do only once 200 self.conf = conf 201 self.host_target = getv(conf, 'HOST_TARGET') 202 for srctop in getv(conf, 'SRCTOPS', []): 203 if srctop[-1] != '/': 204 srctop += '/' 205 if not srctop in self.srctops: 206 self.srctops.append(srctop) 207 _srctop = os.path.realpath(srctop) 208 if _srctop[-1] != '/': 209 _srctop += '/' 210 if not _srctop in self.srctops: 211 self.srctops.append(_srctop) 212 213 trim_list = add_trims(self.machine) 214 if self.machine == 'host': 215 trim_list += add_trims(self.host_target) 216 if self.target_spec: 217 trim_list += add_trims(self.target_spec) 218 219 for objroot in getv(conf, 'OBJROOTS', []): 220 for e in trim_list: 221 if objroot.endswith(e): 222 # this is not what we want - fix it 223 objroot = objroot[0:-len(e)] 224 if e.endswith('/'): 225 objroot += '/' 226 if not objroot in self.objroots: 227 self.objroots.append(objroot) 228 _objroot = os.path.realpath(objroot) 229 if objroot[-1] == '/': 230 _objroot += '/' 231 if not _objroot in self.objroots: 232 self.objroots.append(_objroot) 233 234 # we want the longest match 235 self.srctops.sort(reverse=True) 236 self.objroots.sort(reverse=True) 237 238 if self.debug: 239 print >> self.debug_out, "host_target=", self.host_target 240 print >> self.debug_out, "srctops=", self.srctops 241 print >> self.debug_out, "objroots=", self.objroots 242 243 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 244 245 if self.dpdeps and not self.reldir: 246 if self.debug: 247 print >> self.debug_out, "need reldir:", 248 if self.curdir: 249 srctop = self.find_top(self.curdir, self.srctops) 250 if srctop: 251 self.reldir = self.curdir.replace(srctop,'') 252 if self.debug: 253 print >> self.debug_out, self.reldir 254 if not self.reldir: 255 self.dpdeps = None # we cannot do it? 256 257 self.cwd = os.getcwd() # make sure this is initialized 258 259 if name: 260 self.try_parse() 261 262 def reset(self): 263 """reset state if we are being passed meta files from multiple directories.""" 264 self.seen = {} 265 self.obj_deps = [] 266 self.src_deps = [] 267 self.file_deps = [] 268 269 def dirdeps(self, sep='\n'): 270 """return DIRDEPS""" 271 return sep.strip() + sep.join(self.obj_deps) 272 273 def src_dirdeps(self, sep='\n'): 274 """return SRC_DIRDEPS""" 275 return sep.strip() + sep.join(self.src_deps) 276 277 def file_depends(self, out=None): 278 """Append DPDEPS_${file} += ${RELDIR} 279 for each file we saw, to the output file.""" 280 if not self.reldir: 281 return None 282 for f in sort_unique(self.file_deps): 283 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 284 285 def seenit(self, dir): 286 """rememer that we have seen dir.""" 287 self.seen[dir] = 1 288 289 def add(self, list, data, clue=''): 290 """add data to list if it isn't already there.""" 291 if data not in list: 292 list.append(data) 293 if self.debug: 294 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 295 296 def find_top(self, path, list): 297 """the logical tree may be split accross multiple trees""" 298 for top in list: 299 if path.startswith(top): 300 if self.debug > 2: 301 print >> self.debug_out, "found in", top 302 return top 303 return None 304 305 def find_obj(self, objroot, dir, path, input): 306 """return path within objroot, taking care of .dirdep files""" 307 ddep = None 308 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 309 if not ddep and os.path.exists(ddepf): 310 ddep = open(ddepf, 'rb').readline().strip('# \n') 311 if self.debug > 1: 312 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 313 if ddep.endswith(self.machine): 314 ddep = ddep[0:-(1+len(self.machine))] 315 elif self.target_spec and ddep.endswith(self.target_spec): 316 ddep = ddep[0:-(1+len(self.target_spec))] 317 318 if not ddep: 319 # no .dirdeps, so remember that we've seen the raw input 320 self.seenit(input) 321 self.seenit(dir) 322 if self.machine == 'none': 323 if dir.startswith(objroot): 324 return dir.replace(objroot,'') 325 return None 326 m = self.dirdep_re.match(dir.replace(objroot,'')) 327 if m: 328 ddep = m.group(2) 329 dmachine = m.group(1) 330 if dmachine != self.machine: 331 if not (self.machine == 'host' and 332 dmachine == self.host_target): 333 if self.debug > 2: 334 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 335 ddep += '.' + dmachine 336 337 return ddep 338 339 def try_parse(self, name=None, file=None): 340 """give file and line number causing exception""" 341 try: 342 self.parse(name, file) 343 except: 344 # give a useful clue 345 print >> sys.stderr, '{}:{}: '.format(self.name, self.line), 346 raise 347 348 def parse(self, name=None, file=None): 349 """A meta file looks like: 350 351 # Meta data file "path" 352 CMD "command-line" 353 CWD "cwd" 354 TARGET "target" 355 -- command output -- 356 -- filemon acquired metadata -- 357 # buildmon version 3 358 V 3 359 C "pid" "cwd" 360 E "pid" "path" 361 F "pid" "child" 362 R "pid" "path" 363 W "pid" "path" 364 X "pid" "status" 365 D "pid" "path" 366 L "pid" "src" "target" 367 M "pid" "old" "new" 368 S "pid" "path" 369 # Bye bye 370 371 We go to some effort to avoid processing a dependency more than once. 372 Of the above record types only C,E,F,L,R,V and W are of interest. 373 """ 374 375 version = 0 # unknown 376 if name: 377 self.name = name; 378 if file: 379 f = file 380 cwd = last_dir = self.cwd 381 else: 382 f = open(self.name, 'rb') 383 skip = True 384 pid_cwd = {} 385 pid_last_dir = {} 386 last_pid = 0 387 388 self.line = 0 389 if self.curdir: 390 self.seenit(self.curdir) # we ignore this 391 392 interesting = 'CEFLRV' 393 for line in f: 394 self.line += 1 395 # ignore anything we don't care about 396 if not line[0] in interesting: 397 continue 398 if self.debug > 2: 399 print >> self.debug_out, "input:", line, 400 w = line.split() 401 402 if skip: 403 if w[0] == 'V': 404 skip = False 405 version = int(w[1]) 406 """ 407 if version < 4: 408 # we cannot ignore 'W' records 409 # as they may be 'rw' 410 interesting += 'W' 411 """ 412 elif w[0] == 'CWD': 413 self.cwd = cwd = last_dir = w[1] 414 self.seenit(cwd) # ignore this 415 if self.debug: 416 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 417 continue 418 419 pid = int(w[1]) 420 if pid != last_pid: 421 if last_pid: 422 pid_cwd[last_pid] = cwd 423 pid_last_dir[last_pid] = last_dir 424 cwd = getv(pid_cwd, pid, self.cwd) 425 last_dir = getv(pid_last_dir, pid, self.cwd) 426 last_pid = pid 427 428 # process operations 429 if w[0] == 'F': 430 npid = int(w[2]) 431 pid_cwd[npid] = cwd 432 pid_last_dir[npid] = cwd 433 last_pid = npid 434 continue 435 elif w[0] == 'C': 436 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 437 if cwd.endswith('/.'): 438 cwd = cwd[0:-2] 439 last_dir = cwd 440 if self.debug > 1: 441 print >> self.debug_out, "cwd=", cwd 442 continue 443 444 if w[2] in self.seen: 445 if self.debug > 2: 446 print >> self.debug_out, "seen:", w[2] 447 continue 448 # file operations 449 if w[0] in 'ML': 450 path = w[2].strip("'") 451 else: 452 path = w[2] 453 # we are never interested in .dirdep files as dependencies 454 if path.endswith('.dirdep'): 455 continue 456 # we don't want to resolve the last component if it is 457 # a symlink 458 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 459 if not path: 460 continue 461 dir,base = os.path.split(path) 462 if dir in self.seen: 463 if self.debug > 2: 464 print >> self.debug_out, "seen:", dir 465 continue 466 # we can have a path in an objdir which is a link 467 # to the src dir, we may need to add dependencies for each 468 rdir = dir 469 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 470 if rdir == dir or rdir.find('./') > 0: 471 rdir = None 472 # now put path back together 473 path = '/'.join([dir,base]) 474 if self.debug > 1: 475 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 476 if w[0] in 'SRWL': 477 if w[0] == 'W' and path.endswith('.dirdep'): 478 continue 479 if path in [last_dir, cwd, self.cwd, self.curdir]: 480 if self.debug > 1: 481 print >> self.debug_out, "skipping:", path 482 continue 483 if os.path.isdir(path): 484 if w[0] in 'RW': 485 last_dir = path; 486 if self.debug > 1: 487 print >> self.debug_out, "ldir=", last_dir 488 continue 489 490 if w[0] in 'REWML': 491 # finally, we get down to it 492 if dir == self.cwd or dir == self.curdir: 493 continue 494 srctop = self.find_top(path, self.srctops) 495 if srctop: 496 if self.dpdeps: 497 self.add(self.file_deps, path.replace(srctop,''), 'file') 498 self.add(self.src_deps, dir.replace(srctop,''), 'src') 499 self.seenit(w[2]) 500 self.seenit(dir) 501 if rdir and not rdir.startswith(srctop): 502 dir = rdir # for below 503 rdir = None 504 else: 505 continue 506 507 objroot = None 508 for dir in [dir,rdir]: 509 if not dir: 510 continue 511 objroot = self.find_top(dir, self.objroots) 512 if objroot: 513 break 514 if objroot: 515 ddep = self.find_obj(objroot, dir, path, w[2]) 516 if ddep: 517 self.add(self.obj_deps, ddep, 'obj') 518 else: 519 # don't waste time looking again 520 self.seenit(w[2]) 521 self.seenit(dir) 522 if not file: 523 f.close() 524 525 526def main(argv, klass=MetaFile, xopts='', xoptf=None): 527 """Simple driver for class MetaFile. 528 529 Usage: 530 script [options] [key=value ...] "meta" ... 531 532 Options and key=value pairs contribute to the 533 dictionary passed to MetaFile. 534 535 -S "SRCTOP" 536 add "SRCTOP" to the "SRCTOPS" list. 537 538 -C "CURDIR" 539 540 -O "OBJROOT" 541 add "OBJROOT" to the "OBJROOTS" list. 542 543 -m "MACHINE" 544 545 -a "MACHINE_ARCH" 546 547 -H "HOST_TARGET" 548 549 -D "DPDEPS" 550 551 -d bumps debug level 552 553 """ 554 import getopt 555 556 # import Psyco if we can 557 # it can speed things up quite a bit 558 have_psyco = 0 559 try: 560 import psyco 561 psyco.full() 562 have_psyco = 1 563 except: 564 pass 565 566 conf = { 567 'SRCTOPS': [], 568 'OBJROOTS': [], 569 } 570 571 try: 572 machine = os.environ['MACHINE'] 573 if machine: 574 conf['MACHINE'] = machine 575 machine_arch = os.environ['MACHINE_ARCH'] 576 if machine_arch: 577 conf['MACHINE_ARCH'] = machine_arch 578 srctop = os.environ['SB_SRC'] 579 if srctop: 580 conf['SRCTOPS'].append(srctop) 581 objroot = os.environ['SB_OBJROOT'] 582 if objroot: 583 conf['OBJROOTS'].append(objroot) 584 except: 585 pass 586 587 debug = 0 588 output = True 589 590 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts) 591 for o, a in opts: 592 if o == '-a': 593 conf['MACHINE_ARCH'] = a 594 elif o == '-d': 595 debug += 1 596 elif o == '-q': 597 output = False 598 elif o == '-H': 599 conf['HOST_TARGET'] = a 600 elif o == '-S': 601 if a not in conf['SRCTOPS']: 602 conf['SRCTOPS'].append(a) 603 elif o == '-C': 604 conf['CURDIR'] = a 605 elif o == '-O': 606 if a not in conf['OBJROOTS']: 607 conf['OBJROOTS'].append(a) 608 elif o == '-R': 609 conf['RELDIR'] = a 610 elif o == '-D': 611 conf['DPDEPS'] = a 612 elif o == '-m': 613 conf['MACHINE'] = a 614 elif o == '-T': 615 conf['TARGET_SPEC'] = a 616 elif xoptf: 617 xoptf(o, a, conf) 618 619 conf['debug'] = debug 620 621 # get any var=val assignments 622 eaten = [] 623 for a in args: 624 if a.find('=') > 0: 625 k,v = a.split('=') 626 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 627 if k == 'SRCTOP': 628 k = 'SRCTOPS' 629 elif k == 'OBJROOT': 630 k = 'OBJROOTS' 631 if v not in conf[k]: 632 conf[k].append(v) 633 else: 634 conf[k] = v 635 eaten.append(a) 636 continue 637 break 638 639 for a in eaten: 640 args.remove(a) 641 642 debug_out = getv(conf, 'debug_out', sys.stderr) 643 644 if debug: 645 print >> debug_out, "config:" 646 print >> debug_out, "psyco=", have_psyco 647 for k,v in conf.items(): 648 print >> debug_out, "%s=%s" % (k,v) 649 650 for a in args: 651 if a.endswith('.meta'): 652 m = klass(a, conf) 653 elif a.startswith('@'): 654 # there can actually multiple files per line 655 for line in open(a[1:]): 656 for f in line.strip().split(): 657 m = klass(f, conf) 658 659 if output: 660 print m.dirdeps() 661 662 print m.src_dirdeps('\nsrc:') 663 664 dpdeps = getv(conf, 'DPDEPS') 665 if dpdeps: 666 m.file_depends(open(dpdeps, 'wb')) 667 668 return m 669 670if __name__ == '__main__': 671 try: 672 main(sys.argv) 673 except: 674 # yes, this goes to stdout 675 print "ERROR: ", sys.exc_info()[1] 676 raise 677 678