1# $Id: ncbicxx_build_info.py 595471 2019-10-22 15:54:41Z ucko $
2import ast
3from datetime import date, datetime, timedelta, timezone
4from distutils.sysconfig import parse_makefile, expand_makefile_vars
5import hashlib
6import mmap
7import os
8import pwd
9import re
10import subprocess
11import time
12from warnings import warn
13
14class IrrelevantCommandError(Exception):
15    pass
16
17class Collector(object):
18    def init(self, wanted):
19        self.info    = { 'tech_stack': 'cxx' }
20        self.wanted  = wanted
21
22    def in_want_list(self, key):
23        if key in self.wanted or '*' in self.wanted:
24            return True
25        else:
26            return False
27
28    def run_command(self, command):
29        start_time   = datetime.now(timezone.utc)
30        status       = subprocess.call(command, close_fds = False)
31        end_time     = datetime.now(timezone.utc)
32        if os.fork() > 0:
33            if status < 0:
34                status = 128 - status
35            os._exit(status) # continue in background
36
37        self.info['start_time']   = start_time
38        self.info['end_time']     = end_time
39        self.info['duration']     = (end_time - start_time).total_seconds()
40        self.info['succeeded']    = status == 0
41        self.info['command_line'] = ' '.join(command)
42        self.info['directory']    = os.getcwd()
43
44        return status
45
46    def collect_target_info(self, target_name, target_type, target_fullpath, srcdir, mf):
47        self.info['name'] = target_name
48        self.info['source_directory'] = srcdir
49
50        if target_type == 'lib':
51            self.info['type'] = 'library'
52        else:
53            self.info['type'] = 'app'
54        if target_type == 'app' and False:
55            try:
56                cmd = [target_fullpath, '-version']
57                self.info['app_version'] = subprocess.check_output(cmd)
58            except subprocess.CalledProcessError:
59                pass
60
61        if self.in_want_list('contact'):
62            self.info['contact'] = self.get_contact(mf)
63
64    def collect_vcs_info(self):
65        if 'source_directory' not in self.info:
66            return
67
68        if self.in_want_list('vcs_type'):
69            vcs_info = self.get_vcs_info(self.info['source_directory'])
70        else:
71            vcs_info = None
72
73        if vcs_info is not None:
74            self.info.update(vcs_info)
75
76    def collect_tc_info(self):
77        tcprops = self.read_teamcity_properties()
78
79        if 'teamcity.version' in tcprops:
80            self.info['build_type'] = 'standard'
81        else:
82            self.info['build_type'] = 'legacy'
83        if 'teamcity.build.id' in tcprops:
84            self.info['build_id'] = tcprops['teamcity.build.id']
85        if 'build.number' in tcprops:
86            self.info['build_number'] = tcprops['build.number']
87
88        if 'teamcity.version' in tcprops:
89            self.info['tc_vars']    = tcprops
90            if 'teamcity.agent.name' in tcprops:
91                self.info['tc_agent_name'] = tcprops['teamcity.agent.name']
92
93    def collect_env_info(self):
94        if 'NCBI_AUTOMATED_BUILD' in os.environ:
95            self.info['execution_type'] = 'automated'
96        else:
97            self.info['execution_type'] = 'manual'
98
99        if self.in_want_list('env_vars'):
100            self.info['env_vars']   = dict(os.environ)
101
102    def collect_build_config(self, status_dir):
103        if self.in_want_list('build_config'):
104            bcfg = {}
105            with open(os.path.join(status_dir, 'config.log'), 'r') as f:
106                uid = os.fstat(f.fileno()).st_uid
107                try:
108                    bcfg['user_id'] = pwd.getpwuid(uid)[0]
109                except:
110                    bcfg['user_id'] = uid
111                for l in f:
112                    if l.startswith('  $ '):
113                        bcfg['command'] = l[4:].rstrip('\n')
114                    elif l.startswith('hostname = '):
115                        bcfg['host'] = l[11:].rstrip('\n')
116                    elif ' configurables below ' in l:
117                        bcfg['cwd'] = l[l.find(' below ') + 7:].rstrip('.\n')
118            self.info['build_config'] = bcfg
119
120    def collect_artifact_info(self, sc_version, status):
121        if sc_version is not None and sc_version > 0:
122            self.info['artifact_version'] = 'SC-%d' % sc_version
123        else:
124            self.info['artifact_version'] = 'trunk'
125
126        filename = self.get_target_path(self.info['name'], self.info['type'])
127        self.info['artifact_name'] = os.path.basename(filename)
128        if status == 0 and self.in_want_list('artifact_hash'):
129            h = self.get_artifact_hash(filename)
130            if h is not None:
131                self.info['artifact_hash'] = h
132
133    def collect(self, command, status_dir, wanted = ('*',), sc_version = None):
134        try:
135            command_info = self.parse_command(command)
136        except IrrelevantCommandError:
137            os.execv(command[0], command)
138
139        self.init(wanted)
140        status = self.run_command(command)
141
142        target_type = command_info['target_type']
143        mfname = 'Makefile.%s.%s' % (command_info['target_name'], target_type)
144        srcdir = os.path.realpath(self.get_srcdir(command_info, mfname))
145        mf = self.read_makefile(os.path.join(srcdir, mfname),
146                                command_info['target_name'], target_type)
147        target_name = expand_makefile_vars('$(%s)' % target_type.upper(), mf)
148        target_fullpath = os.path.join('.', target_name)
149
150        # order matters in some cases. Reorder these call at your own peril
151        self.collect_target_info(target_name, target_type, target_fullpath, srcdir, mf)
152        self.collect_vcs_info()
153        self.collect_tc_info()
154        self.collect_env_info()
155        self.collect_build_config(status_dir)
156        self.collect_artifact_info(sc_version, status)
157        if self.in_want_list('libs'):
158            self.info['libs'] \
159                = ','.join(self.get_libs_from_log(command_info['target_name']))
160
161    def get_as_string(self, name):
162        v = self.info[name]
163        if isinstance(v, str):
164            return v
165        elif isinstance(v, bool):
166            if v:
167                return 'T'
168            else:
169                return 'F'
170        elif isinstance(v, date):
171            return v.isoformat()
172        else:
173            return repr(v)
174
175    def parse_command(self, command):
176        if command[0].endswith('.sh'):
177            raise IrrelevantCommandError
178
179        irrelevant_re = re.compile(r'(?:check|clean|export-headers'
180                                   + r'|mark-as-disabled|purge|requirements'
181                                   + r'|sources)(?:[._].+)?$')
182        template_re   = re.compile(r'/Makefile\.(app|lib).tmpl$')
183        wrapper_re    = re.compile(r'Makefile\.(.*)_(app|lib)$')
184
185        info                = {}
186        irrelevant_targets  = []
187        relevant_targets    = []
188        value_expected      = False
189        for x in command[1:]:
190            if value_expected:
191                match_info = wrapper_re.match(x)
192                if match_info is not None:
193                    (info['target_name'], info['target_type']) \
194                        = match_info.groups()
195                    info['srcdir'] = '.'
196                else:
197                    match_info = template_re.search(x)
198                    if match_info is not None:
199                        info['target_type'] = match_info.group(1)
200                value_expected = False
201            elif len(x) == 2 and x[0] == '-' and x[1] in 'CIWfo':
202                value_expected = True
203            elif irrelevant_re.match(x) is not None:
204                irrelevant_targets.append(x)
205            elif x.startswith('TMPL='):
206                info['target_name'] = x[5:]
207            elif x.startswith('srcdir='):
208                info['srcdir'] = x[7:]
209            elif x[0] != '-' and not '=' in x:
210                relevant_targets.append(x)
211        if len(info) < 3 \
212           or (len(irrelevant_targets) > 0 and len(relevant_targets) == 0):
213            raise IrrelevantCommandError
214        return info
215
216    def get_srcdir(self, command_info, mfname):
217        if 'srcdir' in command_info:
218            return command_info['srcdir']
219        elif os.path.exists(mfname):
220            return '.'
221        elif os.path.exists('Makefile'):
222            mf = parse_makefile('Makefile')
223            return expand_makefile_vars('$(srcdir)', mf)
224        else:
225            return re.sub('/[^/]*/build/', '/src/', os.getcwd())
226
227    def read_makefile(self, mfpath, target_name, target_type):
228        try:
229            return parse_makefile(mfpath)
230        except IOError:
231            return { target_type.upper(): target_name }
232
233    def read_teamcity_properties(self):
234        props = {}
235        if 'TEAMCITY_BUILD_PROPERTIES_FILE' in os.environ and \
236           (self.in_want_list('build_type')
237            or self.in_want_list('build_number')
238            or self.in_want_list('tc_agent_name')):
239            fname = os.environ['TEAMCITY_BUILD_PROPERTIES_FILE']
240            try:
241                with open(fname, 'r') as f:
242                    prop_re = re.compile(r'((?:(?![:=])\S|\\.)+)'
243                                         + r'(?:\s*[:=]\s*|\s+)(.*)')
244                    for l in f:
245                        l = l.lstrip()
246                        if len(l) == 0 or l[0] in '#!':
247                            continue
248                        l = l.rstrip('\n')
249                        while (l.endswith('\\')):
250                            l = l.rstrip('\\') + f.next().lstrip().rstrip('\n')
251                        mi = prop_re.match(l)
252                        if mi is None:
253                            warn('Malformed line in ' + fname + ': ' + l)
254                        else:
255                            k = ast.literal_eval("'''"+mi.group(1)+"'''")
256                            v = ast.literal_eval("'''"+mi.group(2)+"'''")
257                            props[k] = v
258            except Exception as e:
259                warn("Failed to open %s: %s" % (fname, e))
260                pass
261        if len(props) == 0:
262            if 'NCBI_BUILD_SESSION_ID' in os.environ:
263                props['build.number'] = os.environ['NCBI_BUILD_SESSION_ID']
264            # Synthesize anything else?
265            pass
266        return props
267
268    def get_vcs_info(self, srcdir, rest = (), fallback = None):
269        if os.path.isdir(os.path.join(srcdir, '.svn')):
270            return self.get_svn_info(srcdir, rest)
271        elif os.path.isdir(os.path.join(srcdir, '.git')):
272            return self.get_git_info(srcdir, rest)
273        elif len(rest) == 0 and os.path.isdir(os.path.join(srcdir, 'CVS')):
274            return self.get_cvs_info(srcdir)
275        elif os.path.isfile(os.path.join(srcdir,
276                                         'include/common/ncbi_package_ver.h')):
277            fallback = self.get_package_info(srcdir, rest)
278
279        if srcdir != '/':
280            (d, b) = os.path.split(srcdir)
281            return self.get_vcs_info(d, (b,) + rest, fallback)
282        else:
283            return fallback
284
285    def get_svn_info(self, srcdir, rest):
286        info = { 'vcs_type': 'svn' }
287        with subprocess.Popen(['svn', 'info', os.path.join(srcdir, *rest)],
288                              stdout = subprocess.PIPE,
289                              stderr = subprocess.DEVNULL,
290                              universal_newlines = True) as svn:
291            for l in svn.stdout:
292                (k, v) = l.rstrip('\n').split(': ', 1)
293                if k == 'URL':
294                    info['vcs_path'] = v
295                    if '/trunk/' in v:
296                        info['vcs_branch'] = 'trunk'
297                    else:
298                        match_info = re.search('/components/[^/]+/([0-9.]+)/',
299                                               v)
300                        if match_info is not None:
301                            info['vcs_branch'] = 'SC-' + match_info.group(1)
302                        else:
303                            match_info = re.search('/branches/([^/]+)/', v)
304                            if match_info is not None:
305                                info['vcs_branch'] = match_info.group(1)
306                    break
307        if 'vcs_path' not in info:
308            # Maybe controlled by git after all, in a hybrid layout?
309            if os.path.isdir(os.path.join(srcdir, '.git')):
310                return self.get_git_info(srcdir, rest)
311            while srcdir != '/':
312                (srcdir, child) = os.path.split(srcdir)
313                if os.path.isdir(os.path.join(srcdir, '.git')):
314                    return self.get_git_info(srcdir, (child,) + rest)
315            return None
316        return info
317
318    def get_git_info(self, srcdir, rest):
319        info = { 'vcs_type': 'git' }
320        git = os.environ.get('TEAMCITY_GIT_PATH', 'git')
321        url = None
322        try:
323            cmd = [git, 'remote', 'get-url', 'origin']
324            url = subprocess.check_output(cmd, stderr = subprocess.DEVNULL,
325                                          universal_newlines = True,
326                                          cwd = srcdir)
327            url = url.rstrip('\n')
328        except subprocess.CalledProcessError:
329            try:
330                cmd = [git, 'remote', 'show', 'origin']
331                with subprocess.Popen(cmd, stdout = subprocess.PIPE,
332                                      stderr = subprocess.DEVNULL,
333                                      universal_newlines = True,
334                                      cwd = srcdir) as remote:
335                    for l in remote.stdout:
336                        (k, v) = l.strip().split(': ', 1)
337                        if k == 'Fetch URL':
338                            url = v
339                            break
340            except subprocess.CalledProcessError:
341                pass
342            if url is None:
343                url = 'file://' + srcdir
344        if url is not None:
345            if len(rest) > 0:
346                url = url + '#' + os.path.join(*rest)
347            info['vcs_path'] = url
348        try:
349            cmd = [git, 'rev-parse', '--symbolic-full-name', 'HEAD']
350            rev = subprocess.check_output(cmd, stderr = subprocess.DEVNULL,
351                                          universal_newlines = True,
352                                          cwd = srcdir)
353            rev = rev.rstrip('\n')
354            info['vcs_branch'] = re.sub(r'^refs/(?:heads|tags)/', '', rev)
355        except subprocess.CalledProcessError:
356            pass
357        if 'vcs_branch' not in info and info['vcs_path'].startswith('file://'):
358            # Maybe controlled by Subversion after all, in a hybrid layout?
359            # (No need to check for .svn at this level, because get_svn_info
360            # looks for it first.)
361            while srcdir != '/':
362                (srcdir, child) = os.path.split(srcdir)
363                if os.path.isdir(os.path.join(srcdir, '.svn')):
364                    return self.get_svn_info(srcdir, (child,) + rest)
365            return None
366        return info
367
368    def get_cvs_info(self, srcdir):
369        info = { 'vcs_type': 'cvs' }
370        cvs_dir = os.path.join(srcdir, 'CVS')
371        with open(os.path.join(cvs_dir, 'Root'), 'r') as f:
372            cvs_root = f.readline().rstrip('\n')
373        with open(os.path.join(cvs_dir, 'Repository'), 'r') as f:
374            cvs_path = f.readline().rstrip('\n')
375            if cvs_path.startswith('/'):
376                pos = cvs_root.find(':') + 1
377                info['vcs_path'] = cvs_root[:pos] + cvs_path
378            else:
379                info['vcs_path'] = cvs_root + '/' + cvs_path
380        with open(os.path.join(cvs_dir, 'Entries'), 'r') as f:
381            l = f.readline().rstrip('\n')
382            match_info = re.match(r'/.*?/.*?/.*?/.*?/[^D](.+)', l)
383            if match_info is None:
384                info['vcs_branch'] = 'HEAD'
385            else:
386                info['vcs_branch'] = match_info.group(1)
387        return info
388
389    def get_package_info(self, srcdir, rest):
390        filename = os.path.join(srcdir, 'include/common/ncbi_package_ver.h')
391        package_name = None
392        version      = [None, None, None]
393        with open(filename) as f:
394            for l in f:
395                if l.startswith('#define NCBI_PACKAGE_'):
396                    words = l.split()
397                    if words[1] == 'NCBI_PACKAGE_NAME':
398                        package_name = words[2].strip('"')
399                    elif words[1] == 'NCBI_PACKAGE_VERSION_MAJOR':
400                        version[0] = words[2]
401                    elif words[1] == 'NCBI_PACKAGE_VERSION_MINOR':
402                        version[1] = words[2]
403                    elif words[1] == 'NCBI_PACKAGE_VERSION_PATCH':
404                        version[2] = words[2]
405            if package_name is not None and version[0] is not None \
406               and version[1] is not None and version[2] is not None \
407               and (package_name != 'unknown' or version != ['0', '0', '0']):
408                base    = 'https://svn.ncbi.nlm.nih.gov/repos/toolkit/release'
409                version = '.'.join(version)
410                url     = '/'.join((base, package_name, version, 'c++') + rest)
411                return { 'vcs_type':   'svn',
412                         'vcs_path':   url,
413                         'vcs_branch': package_name + '-' + version }
414        return None
415
416    def get_contact(self, mf):
417        next_dir = os.getcwd()
418        while mf is not None:
419            if 'WATCHERS' in mf:
420                return expand_makefile_vars('$(WATCHERS)', mf)
421            elif next_dir is None:
422                break
423            mfname = os.path.join(next_dir, 'Makefile')
424            if os.path.exists(mfname):
425                mf = parse_makefile(mfname)
426            else:
427                break
428            if next_dir == '/':
429                next_dir = None
430            else:
431                next_dir = os.path.dirname(next_dir)
432
433        return '-'
434        # if 'LOGNAME' in os.environ:
435        #     return os.environ['LOGNAME']
436        # elif 'USER' in os.environ:
437        #     return os.environ['USER']
438        # else:
439        #     uid = os.getuid()
440        #     try:
441        #         return pwd.getpwuid(uid)[0]
442        #     except:
443        #         return str(uid)
444
445    def get_target_path(self, target_name, target_type):
446        if target_type == 'app':
447            if os.path.exists(target_name + '.exe'):
448                filename = target_name + '.exe'
449            else:
450                filename = target_name
451        else:
452            filename = 'lib' + target_name
453            for x in ('.dylib', '-dll.dylib', '.so', '-dll.so', '.a'):
454                if os.path.exists(filename + x):
455                    filename = filename + x
456                    break
457
458        return filename
459
460    def get_artifact_hash(self, filename):
461        if not os.path.exists(filename):
462            warn('Unable to find ' + filename + ' to hash')
463            return None
464        with open(filename, 'rb') as f:
465            with mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ) as mm:
466                return hashlib.md5(mm).hexdigest()
467
468    def get_libs_from_log(self, project_name):
469        filename = 'make_' + project_name + '.log'
470        if not os.path.exists(filename):
471            warn('Unable to find ' + filename + ' to examine')
472            # Fall back on readelf -d (or otool, on macOS)?
473            return set()
474        last_link_line = ''
475        with open(filename, 'r', errors='ignore') as f:
476            for l in f:
477                if l.find(' -l') >= 0:
478                    last_link_line = l
479        return self.get_libs_from_command(last_link_line.split())
480
481    def get_libs_from_command(self, command):
482        libs = set()
483        skip = False
484        for x in command:
485            if skip:
486                skip = False
487                continue
488            elif x.startswith('-'):
489                if x.startswith('-l'):
490                    l = x[2:]
491                elif x == '-o':
492                    skip = True
493                    continue
494                else:
495                    continue
496            elif x.endswith('.a') or x.endswith('.so') or x.endswith('.dylib'):
497                l = x[x.rfind('/')+1:x.rfind('.')]
498                if l.startswith('lib'):
499                    l = l[3:]
500            else:
501                continue
502            if l.endswith('-dll'):
503                l = l[:-4]
504            elif l.endswith('-static'):
505                l = l[:-7]
506            libs.add(l)
507        return libs
508
509
510class CollectorCMake(Collector):
511    def collect(self, command, top_src_dir, wanted = ('*',), sc_version = None):
512        try:
513            command_info = self.parse_command(command)
514        except IrrelevantCommandError:
515            os.execv(command[0], command)
516
517        self.init(wanted)
518        status = self.run_command(command)
519
520        target_type = command_info['target_type']
521        target_name = command_info['target_name']
522        self.target_fullpath = command_info['target_fullpath']
523        path = os.getcwd()
524        src_dir = re.sub('/[^/]*/build/', '/src/', path) # tentatively
525        tail = ''
526        while path != '/':
527            cache_name = os.path.join(path, 'CMakeCache.txt')
528            if os.path.exists(cache_name):
529                break
530            (path, child) = os.path.split(path)
531            tail = os.path.join(child, tail)
532        if os.path.exists(cache_name):
533            with open(cache_name, 'r', errors='ignore') as f:
534                src_dir_re = re.compile('^CPP_SOURCE_DIR:.+=(.+)')
535                for l in f:
536                    match_info = src_dir_re.match(l)
537                    if match_info is not None:
538                        src_dir = os.path.join(match_info.group(1),
539                                               tail.rstrip('/'))
540
541        # order matters in some cases. Reorder these call at your own peril
542        self.collect_target_info(target_name, target_type,
543                                 self.target_fullpath, src_dir, None)
544        self.collect_vcs_info()
545        self.collect_tc_info()
546        self.collect_env_info()
547        self.collect_artifact_info(sc_version, status)
548        if self.in_want_list('libs'):
549            self.info['libs'] = ','.join(self.get_libs_from_command(command))
550
551    def get_target_path(self, target_name, target_type):
552        return self.target_fullpath
553
554    def parse_command(self, command):
555        if not command[0].endswith('g++') and not command[0].endswith('gcc'):
556            raise IrrelevantCommandError
557
558        info                = {}
559        value_expected      = False
560        for x in command[1:]:
561            if value_expected:
562                output_path = x
563                info['target_fullpath'] = os.path.abspath(output_path)
564                target_filename = os.path.basename(output_path)
565                (target_name,ext) = os.path.splitext(target_filename)
566                info['target_name'] = target_name
567                if ext in ('.so', '.a', '.lib', '.dll', '.dylib'):
568                    info['target_type'] = 'lib'
569                    if info['target_name'].startswith("lib"):
570                        info['target_name'] = info['target_name'][3:]
571                elif ext in ('.o', '.obj'):
572                    raise IrrelevantCommandError
573                else:
574                    if ext and ext != '.exe':
575                        info['target_name'] = target_filename
576                    info['target_type'] = 'app'
577                value_expected = False
578            elif x == '-o':
579                value_expected = True
580
581        if len(info) != 3:
582            raise IrrelevantCommandError
583
584        return info
585
586
587