1#! /usr/bin/env python3
2
3#
4# Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
5# All rights reserved.
6#
7# This source code is licensed under the BSD-style license found in the
8# LICENSE file in the root directory of this source tree. An additional grant
9# of patent rights can be found in the PATENTS file in the same directory.
10#
11
12# Limitations:
13# - doesn't support filenames with spaces
14# - dir1/lz4 and dir2/lz4 will be merged in a single results file
15
16import argparse
17import os
18import string
19import subprocess
20import time
21import traceback
22import hashlib
23
24script_version = 'v1.7.2 (2016-11-08)'
25default_repo_url = 'https://github.com/lz4/lz4.git'
26working_dir_name = 'speedTest'
27working_path = os.getcwd() + '/' + working_dir_name     # /path/to/lz4/tests/speedTest
28clone_path = working_path + '/' + 'lz4'                 # /path/to/lz4/tests/speedTest/lz4
29email_header = 'lz4_speedTest'
30pid = str(os.getpid())
31verbose = False
32clang_version = "unknown"
33gcc_version = "unknown"
34args = None
35
36
37def hashfile(hasher, fname, blocksize=65536):
38    with open(fname, "rb") as f:
39        for chunk in iter(lambda: f.read(blocksize), b""):
40            hasher.update(chunk)
41    return hasher.hexdigest()
42
43
44def log(text):
45    print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
46
47
48def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
49    if print_command:
50        log("> " + command)
51    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
52    stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
53    stderr_lines = stderr_lines.decode("utf-8")
54    stdout_lines = stdout_lines.decode("utf-8")
55    if print_output:
56        if stdout_lines:
57            print(stdout_lines)
58        if stderr_lines:
59            print(stderr_lines)
60    if popen.returncode is not None and popen.returncode != 0:
61        if stderr_lines and not print_output and print_error:
62            print(stderr_lines)
63        raise RuntimeError(stdout_lines + stderr_lines)
64    return (stdout_lines + stderr_lines).splitlines()
65execute.cwd = None
66
67
68def does_command_exist(command):
69    try:
70        execute(command, verbose, False, False)
71    except Exception:
72        return False
73    return True
74
75
76def send_email(emails, topic, text, have_mutt, have_mail):
77    logFileName = working_path + '/' + 'tmpEmailContent'
78    with open(logFileName, "w") as myfile:
79        myfile.writelines(text)
80        myfile.close()
81        if have_mutt:
82            execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
83        elif have_mail:
84            execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
85        else:
86            log("e-mail cannot be sent (mail or mutt not found)")
87
88
89def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
90                                logFileName, have_mutt, have_mail):
91    with open(logFileName, "w") as myfile:
92        myfile.writelines(text)
93        myfile.close()
94        email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
95                      % (email_header, pid, branch, commit, last_commit,
96                         args.lowerLimit, args.ratioLimit)
97        if have_mutt:
98            execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
99                    + ' < ' + logFileName)
100        elif have_mail:
101            execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
102        else:
103            log("e-mail cannot be sent (mail or mutt not found)")
104
105
106def git_get_branches():
107    execute('git fetch -p', verbose)
108    branches = execute('git branch -rl', verbose)
109    output = []
110    for line in branches:
111        if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
112            output.append(line.strip())
113    return output
114
115
116def git_get_changes(branch, commit, last_commit):
117    fmt = '--format="%h: (%an) %s, %ar"'
118    if last_commit is None:
119        commits = execute('git log -n 10 %s %s' % (fmt, commit))
120    else:
121        commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit))
122    return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits)
123
124
125def get_last_results(resultsFileName):
126    if not os.path.isfile(resultsFileName):
127        return None, None, None, None
128    commit = None
129    csize = []
130    cspeed = []
131    dspeed = []
132    with open(resultsFileName, 'r') as f:
133        for line in f:
134            words = line.split()
135            if len(words) <= 4:   # branch + commit + compilerVer + md5
136                commit = words[1]
137                csize = []
138                cspeed = []
139                dspeed = []
140            if (len(words) == 8) or (len(words) == 9):  # results: "filename" or "XX files"
141                csize.append(int(words[1]))
142                cspeed.append(float(words[3]))
143                dspeed.append(float(words[5]))
144    return commit, csize, cspeed, dspeed
145
146
147def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName,
148                          testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
149    sleepTime = 30
150    while os.getloadavg()[0] > args.maxLoadAvg:
151        log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
152            % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
153        time.sleep(sleepTime)
154    start_load = str(os.getloadavg())
155    result = execute('programs/%s -rqi5b1e%s %s' % (executableName, args.lastCLevel, testFilePath), print_output=True)
156    end_load = str(os.getloadavg())
157    linesExpected = args.lastCLevel + 1
158    if len(result) != linesExpected:
159        raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
160    with open(resultsFileName, "a") as myfile:
161        myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum))
162        myfile.write('\n'.join(result) + '\n')
163        myfile.close()
164        if (last_cspeed == None):
165            log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
166            return ""
167        commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
168        text = ""
169        for i in range(0, min(len(cspeed), len(last_cspeed))):
170            print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
171            if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
172                text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
173            if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
174                text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
175            if (float(last_csize[i])/csize[i] < args.ratioLimit):
176                text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
177        if text:
178            text = args.message + ("\nmaxLoadAvg=%s  load average at start=%s end=%s\n%s  last_commit=%s  md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text
179        return text
180
181
182def update_config_file(branch, commit):
183    last_commit = None
184    commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
185    if os.path.isfile(commitFileName):
186        with open(commitFileName, 'r') as infile:
187            last_commit = infile.read()
188    with open(commitFileName, 'w') as outfile:
189        outfile.write(commit)
190    return last_commit
191
192
193def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName):
194    last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
195    if not args.dry_run:
196        text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
197        if text:
198            log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
199            text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
200    return text
201
202
203def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
204    local_branch = branch.split('/')[1]
205    version = local_branch.rpartition('-')[2] + '_' + commit
206    if not args.dry_run:
207        execute('make -C programs clean lz4 CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DLZ4_GIT_COMMIT=%s" && ' % version +
208                'mv programs/lz4 programs/lz4_clang && ' +
209                'make -C programs clean lz4 lz4c32 MOREFLAGS="-DLZ4_GIT_COMMIT=%s"' % version)
210    md5_lz4 = hashfile(hashlib.md5(), clone_path + '/programs/lz4')
211    md5_lz4c32 = hashfile(hashlib.md5(), clone_path + '/programs/lz4c32')
212    md5_lz4_clang = hashfile(hashlib.md5(), clone_path + '/programs/lz4_clang')
213    print("md5(lz4)=%s\nmd5(lz4c32)=%s\nmd5(lz4_clang)=%s" % (md5_lz4, md5_lz4c32, md5_lz4_clang))
214    print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version))
215
216    logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
217    text_to_send = []
218    results_files = ""
219
220    for filePath in testFilePaths:
221        fileName = filePath.rpartition('/')[2]
222        resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
223        text = double_check(branch, commit, args, 'lz4', md5_lz4, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
224        if text:
225            text_to_send.append(text)
226            results_files += resultsFileName + " "
227        resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
228        text = double_check(branch, commit, args, 'lz4c32', md5_lz4c32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
229        if text:
230            text_to_send.append(text)
231            results_files += resultsFileName + " "
232        resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
233        text = double_check(branch, commit, args, 'lz4_clang', md5_lz4_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName)
234        if text:
235            text_to_send.append(text)
236            results_files += resultsFileName + " "
237    if text_to_send:
238        send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)
239
240
241if __name__ == '__main__':
242    parser = argparse.ArgumentParser()
243    parser.add_argument('testFileNames', help='file or directory names list for speed benchmark')
244    parser.add_argument('emails', help='list of e-mail addresses to send warnings')
245    parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="")
246    parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
247    parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98)
248    parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999)
249    parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
250    parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
251    parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
252    parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
253    parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
254    parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
255    args = parser.parse_args()
256    verbose = args.verbose
257
258    # check if test files are accessible
259    testFileNames = args.testFileNames.split()
260    testFilePaths = []
261    for fileName in testFileNames:
262        fileName = os.path.expanduser(fileName)
263        if os.path.isfile(fileName) or os.path.isdir(fileName):
264            testFilePaths.append(os.path.abspath(fileName))
265        else:
266            log("ERROR: File/directory not found: " + fileName)
267            exit(1)
268
269    # check availability of e-mail senders
270    have_mutt = does_command_exist("mutt -h")
271    have_mail = does_command_exist("mail -V")
272    if not have_mutt and not have_mail:
273        log("ERROR: e-mail senders 'mail' or 'mutt' not found")
274        exit(1)
275
276    clang_version = execute("clang -v 2>&1 | grep 'clang version' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
277    gcc_version = execute("gcc -dumpversion", verbose)[0];
278
279    if verbose:
280        print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
281        print("working_path=%s" % working_path)
282        print("clone_path=%s" % clone_path)
283        print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
284        print("message=%s" % args.message)
285        print("emails=%s" % args.emails)
286        print("maxLoadAvg=%s" % args.maxLoadAvg)
287        print("lowerLimit=%s" % args.lowerLimit)
288        print("ratioLimit=%s" % args.ratioLimit)
289        print("lastCLevel=%s" % args.lastCLevel)
290        print("sleepTime=%s" % args.sleepTime)
291        print("timeout=%s" % args.timeout)
292        print("dry_run=%s" % args.dry_run)
293        print("verbose=%s" % args.verbose)
294        print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
295
296    # clone lz4 repo if needed
297    if not os.path.isdir(working_path):
298        os.mkdir(working_path)
299    if not os.path.isdir(clone_path):
300        execute.cwd = working_path
301        execute('git clone ' + args.repoURL)
302    if not os.path.isdir(clone_path):
303        log("ERROR: lz4 clone not found: " + clone_path)
304        exit(1)
305    execute.cwd = clone_path
306
307    # check if speedTest.pid already exists
308    pidfile = "./speedTest.pid"
309    if os.path.isfile(pidfile):
310        log("ERROR: %s already exists, exiting" % pidfile)
311        exit(1)
312
313    send_email(args.emails, '[%s:%s] test-lz4-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
314    with open(pidfile, 'w') as the_file:
315        the_file.write(pid)
316
317    branch = ""
318    commit = ""
319    first_time = True
320    while True:
321        try:
322            if first_time:
323                first_time = False
324            else:
325                if verbose:
326                    log("sleep for %s seconds" % args.sleepTime)
327                time.sleep(args.sleepTime)
328            loadavg = os.getloadavg()[0]
329            if (loadavg <= args.maxLoadAvg):
330                branches = git_get_branches()
331                for branch in branches:
332                    commit = execute('git show -s --format=%h ' + branch, verbose)[0]
333                    last_commit = update_config_file(branch, commit)
334                    if commit == last_commit:
335                        log("skipping branch %s: head %s already processed" % (branch, commit))
336                    else:
337                        log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit))
338                        execute('git checkout -- . && git checkout ' + branch)
339                        print(git_get_changes(branch, commit, last_commit))
340                        test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
341            else:
342                log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
343        except Exception as e:
344            stack = traceback.format_exc()
345            email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
346            send_email(args.emails, email_topic, stack, have_mutt, have_mail)
347            print(stack)
348        except KeyboardInterrupt:
349            os.unlink(pidfile)
350            send_email(args.emails, '[%s:%s] test-lz4-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
351            exit(0)
352