1# $Id: ncbicxx_build_info.py 595471 2019-10-22 15:54:41Z ucko $ 2import ast 3from datetime import date, datetime, timedelta, timezone 4from distutils.sysconfig import parse_makefile, expand_makefile_vars 5import hashlib 6import mmap 7import os 8import pwd 9import re 10import subprocess 11import time 12from warnings import warn 13 14class IrrelevantCommandError(Exception): 15 pass 16 17class Collector(object): 18 def init(self, wanted): 19 self.info = { 'tech_stack': 'cxx' } 20 self.wanted = wanted 21 22 def in_want_list(self, key): 23 if key in self.wanted or '*' in self.wanted: 24 return True 25 else: 26 return False 27 28 def run_command(self, command): 29 start_time = datetime.now(timezone.utc) 30 status = subprocess.call(command, close_fds = False) 31 end_time = datetime.now(timezone.utc) 32 if os.fork() > 0: 33 if status < 0: 34 status = 128 - status 35 os._exit(status) # continue in background 36 37 self.info['start_time'] = start_time 38 self.info['end_time'] = end_time 39 self.info['duration'] = (end_time - start_time).total_seconds() 40 self.info['succeeded'] = status == 0 41 self.info['command_line'] = ' '.join(command) 42 self.info['directory'] = os.getcwd() 43 44 return status 45 46 def collect_target_info(self, target_name, target_type, target_fullpath, srcdir, mf): 47 self.info['name'] = target_name 48 self.info['source_directory'] = srcdir 49 50 if target_type == 'lib': 51 self.info['type'] = 'library' 52 else: 53 self.info['type'] = 'app' 54 if target_type == 'app' and False: 55 try: 56 cmd = [target_fullpath, '-version'] 57 self.info['app_version'] = subprocess.check_output(cmd) 58 except subprocess.CalledProcessError: 59 pass 60 61 if self.in_want_list('contact'): 62 self.info['contact'] = self.get_contact(mf) 63 64 def collect_vcs_info(self): 65 if 'source_directory' not in self.info: 66 return 67 68 if self.in_want_list('vcs_type'): 69 vcs_info = self.get_vcs_info(self.info['source_directory']) 70 else: 71 vcs_info = None 72 73 if vcs_info is not None: 74 self.info.update(vcs_info) 75 76 def collect_tc_info(self): 77 tcprops = self.read_teamcity_properties() 78 79 if 'teamcity.version' in tcprops: 80 self.info['build_type'] = 'standard' 81 else: 82 self.info['build_type'] = 'legacy' 83 if 'teamcity.build.id' in tcprops: 84 self.info['build_id'] = tcprops['teamcity.build.id'] 85 if 'build.number' in tcprops: 86 self.info['build_number'] = tcprops['build.number'] 87 88 if 'teamcity.version' in tcprops: 89 self.info['tc_vars'] = tcprops 90 if 'teamcity.agent.name' in tcprops: 91 self.info['tc_agent_name'] = tcprops['teamcity.agent.name'] 92 93 def collect_env_info(self): 94 if 'NCBI_AUTOMATED_BUILD' in os.environ: 95 self.info['execution_type'] = 'automated' 96 else: 97 self.info['execution_type'] = 'manual' 98 99 if self.in_want_list('env_vars'): 100 self.info['env_vars'] = dict(os.environ) 101 102 def collect_build_config(self, status_dir): 103 if self.in_want_list('build_config'): 104 bcfg = {} 105 with open(os.path.join(status_dir, 'config.log'), 'r') as f: 106 uid = os.fstat(f.fileno()).st_uid 107 try: 108 bcfg['user_id'] = pwd.getpwuid(uid)[0] 109 except: 110 bcfg['user_id'] = uid 111 for l in f: 112 if l.startswith(' $ '): 113 bcfg['command'] = l[4:].rstrip('\n') 114 elif l.startswith('hostname = '): 115 bcfg['host'] = l[11:].rstrip('\n') 116 elif ' configurables below ' in l: 117 bcfg['cwd'] = l[l.find(' below ') + 7:].rstrip('.\n') 118 self.info['build_config'] = bcfg 119 120 def collect_artifact_info(self, sc_version, status): 121 if sc_version is not None and sc_version > 0: 122 self.info['artifact_version'] = 'SC-%d' % sc_version 123 else: 124 self.info['artifact_version'] = 'trunk' 125 126 filename = self.get_target_path(self.info['name'], self.info['type']) 127 self.info['artifact_name'] = os.path.basename(filename) 128 if status == 0 and self.in_want_list('artifact_hash'): 129 h = self.get_artifact_hash(filename) 130 if h is not None: 131 self.info['artifact_hash'] = h 132 133 def collect(self, command, status_dir, wanted = ('*',), sc_version = None): 134 try: 135 command_info = self.parse_command(command) 136 except IrrelevantCommandError: 137 os.execv(command[0], command) 138 139 self.init(wanted) 140 status = self.run_command(command) 141 142 target_type = command_info['target_type'] 143 mfname = 'Makefile.%s.%s' % (command_info['target_name'], target_type) 144 srcdir = os.path.realpath(self.get_srcdir(command_info, mfname)) 145 mf = self.read_makefile(os.path.join(srcdir, mfname), 146 command_info['target_name'], target_type) 147 target_name = expand_makefile_vars('$(%s)' % target_type.upper(), mf) 148 target_fullpath = os.path.join('.', target_name) 149 150 # order matters in some cases. Reorder these call at your own peril 151 self.collect_target_info(target_name, target_type, target_fullpath, srcdir, mf) 152 self.collect_vcs_info() 153 self.collect_tc_info() 154 self.collect_env_info() 155 self.collect_build_config(status_dir) 156 self.collect_artifact_info(sc_version, status) 157 if self.in_want_list('libs'): 158 self.info['libs'] \ 159 = ','.join(self.get_libs_from_log(command_info['target_name'])) 160 161 def get_as_string(self, name): 162 v = self.info[name] 163 if isinstance(v, str): 164 return v 165 elif isinstance(v, bool): 166 if v: 167 return 'T' 168 else: 169 return 'F' 170 elif isinstance(v, date): 171 return v.isoformat() 172 else: 173 return repr(v) 174 175 def parse_command(self, command): 176 if command[0].endswith('.sh'): 177 raise IrrelevantCommandError 178 179 irrelevant_re = re.compile(r'(?:check|clean|export-headers' 180 + r'|mark-as-disabled|purge|requirements' 181 + r'|sources)(?:[._].+)?$') 182 template_re = re.compile(r'/Makefile\.(app|lib).tmpl$') 183 wrapper_re = re.compile(r'Makefile\.(.*)_(app|lib)$') 184 185 info = {} 186 irrelevant_targets = [] 187 relevant_targets = [] 188 value_expected = False 189 for x in command[1:]: 190 if value_expected: 191 match_info = wrapper_re.match(x) 192 if match_info is not None: 193 (info['target_name'], info['target_type']) \ 194 = match_info.groups() 195 info['srcdir'] = '.' 196 else: 197 match_info = template_re.search(x) 198 if match_info is not None: 199 info['target_type'] = match_info.group(1) 200 value_expected = False 201 elif len(x) == 2 and x[0] == '-' and x[1] in 'CIWfo': 202 value_expected = True 203 elif irrelevant_re.match(x) is not None: 204 irrelevant_targets.append(x) 205 elif x.startswith('TMPL='): 206 info['target_name'] = x[5:] 207 elif x.startswith('srcdir='): 208 info['srcdir'] = x[7:] 209 elif x[0] != '-' and not '=' in x: 210 relevant_targets.append(x) 211 if len(info) < 3 \ 212 or (len(irrelevant_targets) > 0 and len(relevant_targets) == 0): 213 raise IrrelevantCommandError 214 return info 215 216 def get_srcdir(self, command_info, mfname): 217 if 'srcdir' in command_info: 218 return command_info['srcdir'] 219 elif os.path.exists(mfname): 220 return '.' 221 elif os.path.exists('Makefile'): 222 mf = parse_makefile('Makefile') 223 return expand_makefile_vars('$(srcdir)', mf) 224 else: 225 return re.sub('/[^/]*/build/', '/src/', os.getcwd()) 226 227 def read_makefile(self, mfpath, target_name, target_type): 228 try: 229 return parse_makefile(mfpath) 230 except IOError: 231 return { target_type.upper(): target_name } 232 233 def read_teamcity_properties(self): 234 props = {} 235 if 'TEAMCITY_BUILD_PROPERTIES_FILE' in os.environ and \ 236 (self.in_want_list('build_type') 237 or self.in_want_list('build_number') 238 or self.in_want_list('tc_agent_name')): 239 fname = os.environ['TEAMCITY_BUILD_PROPERTIES_FILE'] 240 try: 241 with open(fname, 'r') as f: 242 prop_re = re.compile(r'((?:(?![:=])\S|\\.)+)' 243 + r'(?:\s*[:=]\s*|\s+)(.*)') 244 for l in f: 245 l = l.lstrip() 246 if len(l) == 0 or l[0] in '#!': 247 continue 248 l = l.rstrip('\n') 249 while (l.endswith('\\')): 250 l = l.rstrip('\\') + f.next().lstrip().rstrip('\n') 251 mi = prop_re.match(l) 252 if mi is None: 253 warn('Malformed line in ' + fname + ': ' + l) 254 else: 255 k = ast.literal_eval("'''"+mi.group(1)+"'''") 256 v = ast.literal_eval("'''"+mi.group(2)+"'''") 257 props[k] = v 258 except Exception as e: 259 warn("Failed to open %s: %s" % (fname, e)) 260 pass 261 if len(props) == 0: 262 if 'NCBI_BUILD_SESSION_ID' in os.environ: 263 props['build.number'] = os.environ['NCBI_BUILD_SESSION_ID'] 264 # Synthesize anything else? 265 pass 266 return props 267 268 def get_vcs_info(self, srcdir, rest = (), fallback = None): 269 if os.path.isdir(os.path.join(srcdir, '.svn')): 270 return self.get_svn_info(srcdir, rest) 271 elif os.path.isdir(os.path.join(srcdir, '.git')): 272 return self.get_git_info(srcdir, rest) 273 elif len(rest) == 0 and os.path.isdir(os.path.join(srcdir, 'CVS')): 274 return self.get_cvs_info(srcdir) 275 elif os.path.isfile(os.path.join(srcdir, 276 'include/common/ncbi_package_ver.h')): 277 fallback = self.get_package_info(srcdir, rest) 278 279 if srcdir != '/': 280 (d, b) = os.path.split(srcdir) 281 return self.get_vcs_info(d, (b,) + rest, fallback) 282 else: 283 return fallback 284 285 def get_svn_info(self, srcdir, rest): 286 info = { 'vcs_type': 'svn' } 287 with subprocess.Popen(['svn', 'info', os.path.join(srcdir, *rest)], 288 stdout = subprocess.PIPE, 289 stderr = subprocess.DEVNULL, 290 universal_newlines = True) as svn: 291 for l in svn.stdout: 292 (k, v) = l.rstrip('\n').split(': ', 1) 293 if k == 'URL': 294 info['vcs_path'] = v 295 if '/trunk/' in v: 296 info['vcs_branch'] = 'trunk' 297 else: 298 match_info = re.search('/components/[^/]+/([0-9.]+)/', 299 v) 300 if match_info is not None: 301 info['vcs_branch'] = 'SC-' + match_info.group(1) 302 else: 303 match_info = re.search('/branches/([^/]+)/', v) 304 if match_info is not None: 305 info['vcs_branch'] = match_info.group(1) 306 break 307 if 'vcs_path' not in info: 308 # Maybe controlled by git after all, in a hybrid layout? 309 if os.path.isdir(os.path.join(srcdir, '.git')): 310 return self.get_git_info(srcdir, rest) 311 while srcdir != '/': 312 (srcdir, child) = os.path.split(srcdir) 313 if os.path.isdir(os.path.join(srcdir, '.git')): 314 return self.get_git_info(srcdir, (child,) + rest) 315 return None 316 return info 317 318 def get_git_info(self, srcdir, rest): 319 info = { 'vcs_type': 'git' } 320 git = os.environ.get('TEAMCITY_GIT_PATH', 'git') 321 url = None 322 try: 323 cmd = [git, 'remote', 'get-url', 'origin'] 324 url = subprocess.check_output(cmd, stderr = subprocess.DEVNULL, 325 universal_newlines = True, 326 cwd = srcdir) 327 url = url.rstrip('\n') 328 except subprocess.CalledProcessError: 329 try: 330 cmd = [git, 'remote', 'show', 'origin'] 331 with subprocess.Popen(cmd, stdout = subprocess.PIPE, 332 stderr = subprocess.DEVNULL, 333 universal_newlines = True, 334 cwd = srcdir) as remote: 335 for l in remote.stdout: 336 (k, v) = l.strip().split(': ', 1) 337 if k == 'Fetch URL': 338 url = v 339 break 340 except subprocess.CalledProcessError: 341 pass 342 if url is None: 343 url = 'file://' + srcdir 344 if url is not None: 345 if len(rest) > 0: 346 url = url + '#' + os.path.join(*rest) 347 info['vcs_path'] = url 348 try: 349 cmd = [git, 'rev-parse', '--symbolic-full-name', 'HEAD'] 350 rev = subprocess.check_output(cmd, stderr = subprocess.DEVNULL, 351 universal_newlines = True, 352 cwd = srcdir) 353 rev = rev.rstrip('\n') 354 info['vcs_branch'] = re.sub(r'^refs/(?:heads|tags)/', '', rev) 355 except subprocess.CalledProcessError: 356 pass 357 if 'vcs_branch' not in info and info['vcs_path'].startswith('file://'): 358 # Maybe controlled by Subversion after all, in a hybrid layout? 359 # (No need to check for .svn at this level, because get_svn_info 360 # looks for it first.) 361 while srcdir != '/': 362 (srcdir, child) = os.path.split(srcdir) 363 if os.path.isdir(os.path.join(srcdir, '.svn')): 364 return self.get_svn_info(srcdir, (child,) + rest) 365 return None 366 return info 367 368 def get_cvs_info(self, srcdir): 369 info = { 'vcs_type': 'cvs' } 370 cvs_dir = os.path.join(srcdir, 'CVS') 371 with open(os.path.join(cvs_dir, 'Root'), 'r') as f: 372 cvs_root = f.readline().rstrip('\n') 373 with open(os.path.join(cvs_dir, 'Repository'), 'r') as f: 374 cvs_path = f.readline().rstrip('\n') 375 if cvs_path.startswith('/'): 376 pos = cvs_root.find(':') + 1 377 info['vcs_path'] = cvs_root[:pos] + cvs_path 378 else: 379 info['vcs_path'] = cvs_root + '/' + cvs_path 380 with open(os.path.join(cvs_dir, 'Entries'), 'r') as f: 381 l = f.readline().rstrip('\n') 382 match_info = re.match(r'/.*?/.*?/.*?/.*?/[^D](.+)', l) 383 if match_info is None: 384 info['vcs_branch'] = 'HEAD' 385 else: 386 info['vcs_branch'] = match_info.group(1) 387 return info 388 389 def get_package_info(self, srcdir, rest): 390 filename = os.path.join(srcdir, 'include/common/ncbi_package_ver.h') 391 package_name = None 392 version = [None, None, None] 393 with open(filename) as f: 394 for l in f: 395 if l.startswith('#define NCBI_PACKAGE_'): 396 words = l.split() 397 if words[1] == 'NCBI_PACKAGE_NAME': 398 package_name = words[2].strip('"') 399 elif words[1] == 'NCBI_PACKAGE_VERSION_MAJOR': 400 version[0] = words[2] 401 elif words[1] == 'NCBI_PACKAGE_VERSION_MINOR': 402 version[1] = words[2] 403 elif words[1] == 'NCBI_PACKAGE_VERSION_PATCH': 404 version[2] = words[2] 405 if package_name is not None and version[0] is not None \ 406 and version[1] is not None and version[2] is not None \ 407 and (package_name != 'unknown' or version != ['0', '0', '0']): 408 base = 'https://svn.ncbi.nlm.nih.gov/repos/toolkit/release' 409 version = '.'.join(version) 410 url = '/'.join((base, package_name, version, 'c++') + rest) 411 return { 'vcs_type': 'svn', 412 'vcs_path': url, 413 'vcs_branch': package_name + '-' + version } 414 return None 415 416 def get_contact(self, mf): 417 next_dir = os.getcwd() 418 while mf is not None: 419 if 'WATCHERS' in mf: 420 return expand_makefile_vars('$(WATCHERS)', mf) 421 elif next_dir is None: 422 break 423 mfname = os.path.join(next_dir, 'Makefile') 424 if os.path.exists(mfname): 425 mf = parse_makefile(mfname) 426 else: 427 break 428 if next_dir == '/': 429 next_dir = None 430 else: 431 next_dir = os.path.dirname(next_dir) 432 433 return '-' 434 # if 'LOGNAME' in os.environ: 435 # return os.environ['LOGNAME'] 436 # elif 'USER' in os.environ: 437 # return os.environ['USER'] 438 # else: 439 # uid = os.getuid() 440 # try: 441 # return pwd.getpwuid(uid)[0] 442 # except: 443 # return str(uid) 444 445 def get_target_path(self, target_name, target_type): 446 if target_type == 'app': 447 if os.path.exists(target_name + '.exe'): 448 filename = target_name + '.exe' 449 else: 450 filename = target_name 451 else: 452 filename = 'lib' + target_name 453 for x in ('.dylib', '-dll.dylib', '.so', '-dll.so', '.a'): 454 if os.path.exists(filename + x): 455 filename = filename + x 456 break 457 458 return filename 459 460 def get_artifact_hash(self, filename): 461 if not os.path.exists(filename): 462 warn('Unable to find ' + filename + ' to hash') 463 return None 464 with open(filename, 'rb') as f: 465 with mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ) as mm: 466 return hashlib.md5(mm).hexdigest() 467 468 def get_libs_from_log(self, project_name): 469 filename = 'make_' + project_name + '.log' 470 if not os.path.exists(filename): 471 warn('Unable to find ' + filename + ' to examine') 472 # Fall back on readelf -d (or otool, on macOS)? 473 return set() 474 last_link_line = '' 475 with open(filename, 'r', errors='ignore') as f: 476 for l in f: 477 if l.find(' -l') >= 0: 478 last_link_line = l 479 return self.get_libs_from_command(last_link_line.split()) 480 481 def get_libs_from_command(self, command): 482 libs = set() 483 skip = False 484 for x in command: 485 if skip: 486 skip = False 487 continue 488 elif x.startswith('-'): 489 if x.startswith('-l'): 490 l = x[2:] 491 elif x == '-o': 492 skip = True 493 continue 494 else: 495 continue 496 elif x.endswith('.a') or x.endswith('.so') or x.endswith('.dylib'): 497 l = x[x.rfind('/')+1:x.rfind('.')] 498 if l.startswith('lib'): 499 l = l[3:] 500 else: 501 continue 502 if l.endswith('-dll'): 503 l = l[:-4] 504 elif l.endswith('-static'): 505 l = l[:-7] 506 libs.add(l) 507 return libs 508 509 510class CollectorCMake(Collector): 511 def collect(self, command, top_src_dir, wanted = ('*',), sc_version = None): 512 try: 513 command_info = self.parse_command(command) 514 except IrrelevantCommandError: 515 os.execv(command[0], command) 516 517 self.init(wanted) 518 status = self.run_command(command) 519 520 target_type = command_info['target_type'] 521 target_name = command_info['target_name'] 522 self.target_fullpath = command_info['target_fullpath'] 523 path = os.getcwd() 524 src_dir = re.sub('/[^/]*/build/', '/src/', path) # tentatively 525 tail = '' 526 while path != '/': 527 cache_name = os.path.join(path, 'CMakeCache.txt') 528 if os.path.exists(cache_name): 529 break 530 (path, child) = os.path.split(path) 531 tail = os.path.join(child, tail) 532 if os.path.exists(cache_name): 533 with open(cache_name, 'r', errors='ignore') as f: 534 src_dir_re = re.compile('^CPP_SOURCE_DIR:.+=(.+)') 535 for l in f: 536 match_info = src_dir_re.match(l) 537 if match_info is not None: 538 src_dir = os.path.join(match_info.group(1), 539 tail.rstrip('/')) 540 541 # order matters in some cases. Reorder these call at your own peril 542 self.collect_target_info(target_name, target_type, 543 self.target_fullpath, src_dir, None) 544 self.collect_vcs_info() 545 self.collect_tc_info() 546 self.collect_env_info() 547 self.collect_artifact_info(sc_version, status) 548 if self.in_want_list('libs'): 549 self.info['libs'] = ','.join(self.get_libs_from_command(command)) 550 551 def get_target_path(self, target_name, target_type): 552 return self.target_fullpath 553 554 def parse_command(self, command): 555 if not command[0].endswith('g++') and not command[0].endswith('gcc'): 556 raise IrrelevantCommandError 557 558 info = {} 559 value_expected = False 560 for x in command[1:]: 561 if value_expected: 562 output_path = x 563 info['target_fullpath'] = os.path.abspath(output_path) 564 target_filename = os.path.basename(output_path) 565 (target_name,ext) = os.path.splitext(target_filename) 566 info['target_name'] = target_name 567 if ext in ('.so', '.a', '.lib', '.dll', '.dylib'): 568 info['target_type'] = 'lib' 569 if info['target_name'].startswith("lib"): 570 info['target_name'] = info['target_name'][3:] 571 elif ext in ('.o', '.obj'): 572 raise IrrelevantCommandError 573 else: 574 if ext and ext != '.exe': 575 info['target_name'] = target_filename 576 info['target_type'] = 'app' 577 value_expected = False 578 elif x == '-o': 579 value_expected = True 580 581 if len(info) != 3: 582 raise IrrelevantCommandError 583 584 return info 585 586 587