1#!/usr/bin/env python3
2"""Test zstd interoperability between versions"""
3
4# ################################################################
5# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
6# All rights reserved.
7#
8# This source code is licensed under both the BSD-style license (found in the
9# LICENSE file in the root directory of this source tree) and the GPLv2 (found
10# in the COPYING file in the root directory of this source tree).
11# ################################################################
12
13import filecmp
14import glob
15import hashlib
16import os
17import shutil
18import sys
19import subprocess
20from subprocess import Popen, PIPE
21
22repo_url = 'https://github.com/facebook/zstd.git'
23tmp_dir_name = 'tests/versionsTest'
24make_cmd = 'make'
25git_cmd = 'git'
26test_dat_src = 'README.md'
27test_dat = 'test_dat'
28head = 'vdevel'
29dict_source = 'dict_source'
30dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c ./zstd/lib/decompress/*.c ./zstd/lib/dictBuilder/*.c ./zstd/lib/legacy/*.c '
31dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h'
32
33
34def execute(command, print_output=False, print_error=True, param_shell=False):
35    popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
36    stdout_lines, stderr_lines = popen.communicate()
37    stderr_lines = stderr_lines.decode("utf-8")
38    stdout_lines = stdout_lines.decode("utf-8")
39    if print_output:
40        print(stdout_lines)
41        print(stderr_lines)
42    if popen.returncode is not None and popen.returncode != 0:
43        if not print_output and print_error:
44            print(stderr_lines)
45    return popen.returncode
46
47
48def proc(cmd_args, pipe=True, dummy=False):
49    if dummy:
50        return
51    if pipe:
52        subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
53    else:
54        subproc = Popen(cmd_args)
55    return subproc.communicate()
56
57
58def make(args, pipe=True):
59    return proc([make_cmd] + args, pipe)
60
61
62def git(args, pipe=True):
63    return proc([git_cmd] + args, pipe)
64
65
66def get_git_tags():
67    stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]'])
68    tags = stdout.decode('utf-8').split()
69    return tags
70
71
72def create_dict(tag, dict_source_path):
73    dict_name = 'dict.' + tag
74    if not os.path.isfile(dict_name):
75        cFiles = glob.glob(dict_source_path + "/*.c")
76        hFiles = glob.glob(dict_source_path + "/*.h")
77        if tag == 'v0.5.0':
78            result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
79        else:
80            result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
81        if result == 0:
82            print(dict_name + ' created')
83        else:
84            print('ERROR: creating of ' + dict_name + ' failed')
85    else:
86        print(dict_name + ' already exists')
87
88
89def dict_compress_sample(tag, sample):
90    dict_name = 'dict.' + tag
91    DEVNULL = open(os.devnull, 'wb')
92    if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f',   sample], stderr=DEVNULL) == 0:
93        os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst')
94    if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f',  sample], stderr=DEVNULL) == 0:
95        os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst')
96    if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f',  sample], stderr=DEVNULL) == 0:
97        os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst')
98    if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0:
99        os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst')
100    if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0:
101        os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst')
102    # zstdFiles = glob.glob("*.zst*")
103    # print(zstdFiles)
104    print(tag + " : dict compression completed")
105
106
107def compress_sample(tag, sample):
108    DEVNULL = open(os.devnull, 'wb')
109    if subprocess.call(['./zstd.' + tag, '-f',   sample], stderr=DEVNULL) == 0:
110        os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst')
111    if subprocess.call(['./zstd.' + tag, '-5f',  sample], stderr=DEVNULL) == 0:
112        os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst')
113    if subprocess.call(['./zstd.' + tag, '-9f',  sample], stderr=DEVNULL) == 0:
114        os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst')
115    if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0:
116        os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst')
117    if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0:
118        os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst')
119    # zstdFiles = glob.glob("*.zst*")
120    # print(zstdFiles)
121    print(tag + " : compression completed")
122
123
124# http://stackoverflow.com/a/19711609/2132223
125def sha1_of_file(filepath):
126    with open(filepath, 'rb') as f:
127        return hashlib.sha1(f.read()).hexdigest()
128
129
130def remove_duplicates():
131    list_of_zst = sorted(glob.glob('*.zst'))
132    for i, ref_zst in enumerate(list_of_zst):
133        if not os.path.isfile(ref_zst):
134            continue
135        for j in range(i + 1, len(list_of_zst)):
136            compared_zst = list_of_zst[j]
137            if not os.path.isfile(compared_zst):
138                continue
139            if filecmp.cmp(ref_zst, compared_zst):
140                os.remove(compared_zst)
141                print('duplicated : {} == {}'.format(ref_zst, compared_zst))
142
143
144def decompress_zst(tag):
145    dec_error = 0
146    list_zst = sorted(glob.glob('*_nodict.zst'))
147    for file_zst in list_zst:
148        print(file_zst, end=' ')
149        print(tag, end=' ')
150        file_dec = file_zst + '_d64_' + tag + '.dec'
151        if tag <= 'v0.5.0':
152            params = ['./zstd.' + tag, '-df', file_zst, file_dec]
153        else:
154            params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec]
155        if execute(params) == 0:
156            if not filecmp.cmp(file_dec, test_dat):
157                print('ERR !! ')
158                dec_error = 1
159            else:
160                print('OK     ')
161        else:
162            print('command does not work')
163            dec_error = 1
164    return dec_error
165
166
167def decompress_dict(tag):
168    dec_error = 0
169    list_zst = sorted(glob.glob('*_dictio.zst'))
170    for file_zst in list_zst:
171        dict_tag = file_zst[0:len(file_zst)-11]  # remove "_dictio.zst"
172        if head in dict_tag: # find vdevel
173            dict_tag = head
174        else:
175            dict_tag = dict_tag[dict_tag.rfind('v'):]
176        if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
177            continue
178        dict_name = 'dict.' + dict_tag
179        print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ')
180        file_dec = file_zst + '_d64_' + tag + '.dec'
181        if tag <= 'v0.5.0':
182            params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec]
183        else:
184            params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec]
185        if execute(params) == 0:
186            if not filecmp.cmp(file_dec, test_dat):
187                print('ERR !! ')
188                dec_error = 1
189            else:
190                print('OK     ')
191        else:
192            print('command does not work')
193            dec_error = 1
194    return dec_error
195
196
197if __name__ == '__main__':
198    error_code = 0
199    base_dir = os.getcwd() + '/..'                  # /path/to/zstd
200    tmp_dir = base_dir + '/' + tmp_dir_name         # /path/to/zstd/tests/versionsTest
201    clone_dir = tmp_dir + '/' + 'zstd'              # /path/to/zstd/tests/versionsTest/zstd
202    dict_source_path = tmp_dir + '/' + dict_source  # /path/to/zstd/tests/versionsTest/dict_source
203    programs_dir = base_dir + '/programs'           # /path/to/zstd/programs
204    os.makedirs(tmp_dir, exist_ok=True)
205
206    # since Travis clones limited depth, we should clone full repository
207    if not os.path.isdir(clone_dir):
208        git(['clone', repo_url, clone_dir])
209
210    shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat)
211
212    # Retrieve all release tags
213    print('Retrieve all release tags :')
214    os.chdir(clone_dir)
215    alltags = get_git_tags() + [head]
216    tags = [t for t in alltags if t >= 'v0.4.0']
217    print(tags)
218
219    # Build all release zstd
220    for tag in tags:
221        os.chdir(base_dir)
222        dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
223        if not os.path.isfile(dst_zstd) or tag == head:
224            if tag != head:
225                r_dir = '{}/{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/<TAG>
226                os.makedirs(r_dir, exist_ok=True)
227                os.chdir(clone_dir)
228                git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
229                if tag == 'v0.5.0':
230                    os.chdir(r_dir + '/dictBuilder')  # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
231                    make(['clean', 'dictBuilder'], False)
232                    shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
233                os.chdir(r_dir + '/programs')  # /path/to/zstd/tests/versionsTest/<TAG>/programs
234                make(['clean', 'zstd'], False)
235            else:
236                os.chdir(programs_dir)
237                make(['zstd'], False)
238            shutil.copy2('zstd',   dst_zstd)
239
240    # remove any remaining *.zst and *.dec from previous test
241    os.chdir(tmp_dir)
242    for compressed in glob.glob("*.zst"):
243        os.remove(compressed)
244    for dec in glob.glob("*.dec"):
245        os.remove(dec)
246
247    # copy *.c and *.h to a temporary directory ("dict_source")
248    if not os.path.isdir(dict_source_path):
249        os.mkdir(dict_source_path)
250        print('cp ' + dict_files + ' ' + dict_source_path)
251        execute('cp ' + dict_files + ' ' + dict_source_path, param_shell=True)
252
253    print('Compress test.dat by all released zstd')
254
255    error_code = 0
256    for tag in tags:
257        print(tag)
258        if tag >= 'v0.5.0':
259            create_dict(tag, dict_source_path)
260            dict_compress_sample(tag, test_dat)
261            remove_duplicates()
262            error_code += decompress_dict(tag)
263        compress_sample(tag, test_dat)
264        remove_duplicates()
265        error_code += decompress_zst(tag)
266
267    print('')
268    print('Enumerate different compressed files')
269    zstds = sorted(glob.glob('*.zst'))
270    for zstd in zstds:
271        print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))
272
273    if error_code != 0:
274        print('======  ERROR !!!  =======')
275
276    sys.exit(error_code)
277