1#!/usr/bin/env python2
2"""Execute the tests for the razers3 program.
3
4The golden test outputs are generated by the script generate_outputs.sh.
5
6You have to give the root paths to the source and the binaries as arguments to
7the program.  These are the paths to the directory that contains the 'projects'
8directory.
9
10Usage:  run_tests.py SOURCE_ROOT_PATH BINARY_ROOT_PATH
11"""
12import logging
13import os.path
14import sys
15
16# Automagically add util/py_lib to PYTHONPATH environment variable.
17path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
18                                    '..', '..', 'util', 'py_lib'))
19sys.path.insert(0, path)
20
21import seqan.app_tests as app_tests
22
23class RemovePairIdColumn(object):
24    """Transformation to remove pair id column."""
25
26    def __init__(self, col_no=8, min_cols=8):
27        # The index of the column to remove.
28        self.col_no = col_no
29        # If there are less than min_col columns then we don't remove.
30        self.min_cols = min_cols
31
32    def apply(self, text, is_left):
33        lines = text.splitlines(True)
34        lines2 = []
35        for line in lines:
36            cols = line.split('\t')
37            if len(cols) > self.min_cols:
38                cols = cols[0:self.col_no] + cols[self.col_no + 1:]
39            lines2.append('\t'.join(cols))
40        return ''.join(lines2)
41
42
43def main(source_base, binary_base, num_threads=1):
44    """Main entry point of the script."""
45
46    print 'Executing test for razers3'
47    print '==========================='
48    print
49
50    ph = app_tests.TestPathHelper(
51        source_base, binary_base,
52        'apps/razers3/tests')  # tests dir
53
54    # ============================================================
55    # Auto-detect the binary path.
56    # ============================================================
57
58    path_to_program = app_tests.autolocateBinary(
59      binary_base, 'bin', 'razers3')
60
61    # ============================================================
62    # Built TestConf list.
63    # ============================================================
64
65    # Build list with TestConf objects, analoguely to how the output
66    # was generated in generate_outputs.sh.
67    conf_list = []
68
69    # We prepare a list of transforms to apply to the output files.  This is
70    # used to strip the input/output paths from the programs' output to
71    # make it more canonical and host independent.
72    ph.outFile('-')  # To ensure that the out path is set.
73    transforms = [
74        app_tests.ReplaceTransform(os.path.join(ph.source_base_path, 'apps/razers3/tests') + os.sep, '', right=True),
75        app_tests.ReplaceTransform(ph.temp_dir + os.sep, '', right=True),
76        ]
77
78    # Transforms for SAM output format only.  Make VN field of @PG header canonical.
79    sam_transforms = [app_tests.RegexpReplaceTransform(r'\tVN:[^\t]*', r'\tVN:VERSION', right=True, left=True)]
80
81    # Transforms for RazerS output format only.  Remove pair id column.
82    razers_transforms = [RemovePairIdColumn()]
83
84    # ============================================================
85    # Run Adeno Single-End Tests
86    # ============================================================
87
88    # We run the following for all read lengths we have reads for.
89    for rl in [36, 100]:
90        # Run with default options.
91        conf = app_tests.TestConf(
92            program=path_to_program,
93            redir_stdout=ph.outFile('se-adeno-reads%d_1-tc%d.stdout' % (rl, num_threads)),
94            args=['-tc', str(num_threads),
95                  ph.inFile('adeno-genome.fa'),
96                  ph.inFile('adeno-reads%d_1.fa' % rl),
97                  '-o', ph.outFile('se-adeno-reads%d_1-tc%d.razers' % (rl, num_threads))],
98            to_diff=[(ph.inFile('se-adeno-reads%d_1-tc%d.razers' % (rl, num_threads)),
99                      ph.outFile('se-adeno-reads%d_1-tc%d.razers' % (rl, num_threads))),
100                     (ph.inFile('se-adeno-reads%d_1-tc%d.stdout' % (rl, num_threads)),
101                      ph.outFile('se-adeno-reads%d_1-tc%d.stdout' % (rl, num_threads)))])
102        conf_list.append(conf)
103
104        # Allow indels.
105        conf = app_tests.TestConf(
106            program=path_to_program,
107            redir_stdout=ph.outFile('se-adeno-reads%d_1-ng-tc%d.stdout' % (rl, num_threads)),
108            args=['-tc', str(num_threads),
109                  '-ng',
110                  ph.inFile('adeno-genome.fa'),
111                  ph.inFile('adeno-reads%d_1.fa' % rl),
112                  '-o', ph.outFile('se-adeno-reads%d_1-ng-tc%d.razers' % (rl, num_threads))],
113            to_diff=[(ph.inFile('se-adeno-reads%d_1-ng-tc%d.razers' % (rl, num_threads)),
114                      ph.outFile('se-adeno-reads%d_1-ng-tc%d.razers' % (rl, num_threads))),
115                     (ph.inFile('se-adeno-reads%d_1-ng-tc%d.stdout' % (rl, num_threads)),
116                      ph.outFile('se-adeno-reads%d_1-ng-tc%d.stdout' % (rl, num_threads)))])
117        conf_list.append(conf)
118
119        # Compute forward/reverse matches only.
120        for o in ['-r', '-f']:
121            conf = app_tests.TestConf(
122                program=path_to_program,
123                redir_stdout=ph.outFile('se-adeno-reads%d_1%s-tc%d.stdout' % (rl, o, num_threads)),
124                args=['-tc', str(num_threads),
125                      o,
126                      ph.inFile('adeno-genome.fa'),
127                      ph.inFile('adeno-reads%d_1.fa' % rl),
128                      '-o', ph.outFile('se-adeno-reads%d_1%s-tc%d.razers' % (rl, o, num_threads))],
129                to_diff=[(ph.inFile('se-adeno-reads%d_1%s-tc%d.razers' % (rl, o, num_threads)),
130                          ph.outFile('se-adeno-reads%d_1%s-tc%d.razers' % (rl, o, num_threads))),
131                         (ph.inFile('se-adeno-reads%d_1%s-tc%d.stdout' % (rl, o, num_threads)),
132                          ph.outFile('se-adeno-reads%d_1%s-tc%d.stdout' % (rl, o, num_threads)))])
133            conf_list.append(conf)
134
135        # Compute with different identity rates.
136        for i in range(90, 101):
137            conf = app_tests.TestConf(
138                program=path_to_program,
139                redir_stdout=ph.outFile('se-adeno-reads%d_1-i%d-tc%d.stdout' % (rl, i, num_threads)),
140                args=['-tc', str(num_threads),
141                      '-i', str(i),
142                      ph.inFile('adeno-genome.fa'),
143                      ph.inFile('adeno-reads%d_1.fa' % rl),
144                      '-o', ph.outFile('se-adeno-reads%d_1-i%d-tc%d.razers' % (rl, i, num_threads))],
145                to_diff=[(ph.inFile('se-adeno-reads%d_1-i%d-tc%d.razers' % (rl, i, num_threads)),
146                          ph.outFile('se-adeno-reads%d_1-i%d-tc%d.razers' % (rl, i, num_threads))),
147                         (ph.inFile('se-adeno-reads%d_1-i%d-tc%d.stdout' % (rl, i, num_threads)),
148                          ph.outFile('se-adeno-reads%d_1-i%d-tc%d.stdout' % (rl, i, num_threads)))])
149            conf_list.append(conf)
150
151        # Compute with different output formats.
152        for of, suffix in enumerate(['razers', 'fa', 'eland', 'gff', 'sam', 'afg']):
153            this_transforms = list(transforms)
154            if suffix == 'razers':
155                this_transforms += razers_transforms
156            elif suffix == 'sam':
157                this_transforms += sam_transforms
158            conf = app_tests.TestConf(
159                program=path_to_program,
160                redir_stdout=ph.outFile('se-adeno-reads%d_1-of%d-tc%d.stdout' % (rl, of, num_threads)),
161                args=['-tc', str(num_threads),
162                      ph.inFile('adeno-genome.fa'),
163                      ph.inFile('adeno-reads%d_1.fa' % rl),
164                      '-o', ph.outFile('se-adeno-reads%d_1-of%d-tc%d.%s' % (rl, of, num_threads, suffix))],
165                to_diff=[(ph.inFile('se-adeno-reads%d_1-of%d-tc%d.%s' % (rl, of, num_threads, suffix)),
166                          ph.outFile('se-adeno-reads%d_1-of%d-tc%d.%s' % (rl, of, num_threads, suffix)),
167                          this_transforms),
168                         (ph.inFile('se-adeno-reads%d_1-of%d-tc%d.stdout' % (rl, of, num_threads)),
169                          ph.outFile('se-adeno-reads%d_1-of%d-tc%d.stdout' % (rl, of, num_threads)),
170                          transforms)])
171            conf_list.append(conf)
172
173        # Compute with different sort orders.
174        for so in [0, 1]:
175            conf = app_tests.TestConf(
176                program=path_to_program,
177                redir_stdout=ph.outFile('se-adeno-reads%d_1-so%d-tc%d.stdout' % (rl, so, num_threads)),
178                args=['-tc', str(num_threads),
179                      '-so', str(so),
180                      ph.inFile('adeno-genome.fa'),
181                      ph.inFile('adeno-reads%d_1.fa' % rl),
182                      '-o', ph.outFile('se-adeno-reads%d_1-so%d-tc%d.razers' % (rl, so, num_threads))],
183                to_diff=[(ph.inFile('se-adeno-reads%d_1-so%d-tc%d.razers' % (rl, so, num_threads)),
184                          ph.outFile('se-adeno-reads%d_1-so%d-tc%d.razers' % (rl, so, num_threads))),
185                         (ph.inFile('se-adeno-reads%d_1-so%d-tc%d.stdout' % (rl, so, num_threads)),
186                          ph.outFile('se-adeno-reads%d_1-so%d-tc%d.stdout' % (rl, so, num_threads)))])
187            conf_list.append(conf)
188
189    # ============================================================
190    # Run Adeno Paired-End Tests
191    # ============================================================
192
193    # We run the following for all read lengths we have reads for.
194    for rl in [36, 100]:
195        # Run with default options.
196        conf = app_tests.TestConf(
197            program=path_to_program,
198            redir_stdout=ph.outFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)),
199            args=['-tc', str(num_threads),
200                  ph.inFile('adeno-genome.fa'),
201                  ph.inFile('adeno-reads%d_1.fa' % rl),
202                  ph.inFile('adeno-reads%d_2.fa' % rl),
203                  '-o', ph.outFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads))],
204            to_diff=[(ph.inFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads)),
205                      ph.outFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads)),
206                      razers_transforms),
207                     (ph.inFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)),
208                      ph.outFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)))])
209        conf_list.append(conf)
210
211        # Allow indels.
212        conf = app_tests.TestConf(
213            program=path_to_program,
214            redir_stdout=ph.outFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)),
215            args=['-tc', str(num_threads),
216                  ph.inFile('adeno-genome.fa'),
217                  ph.inFile('adeno-reads%d_1.fa' % rl),
218                  ph.inFile('adeno-reads%d_2.fa' % rl),
219                  '-o', ph.outFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads))],
220            to_diff=[(ph.inFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads)),
221                      ph.outFile('pe-adeno-reads%d_2-tc%d.razers' % (rl, num_threads)),
222                      razers_transforms),
223                     (ph.inFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)),
224                      ph.outFile('pe-adeno-reads%d_2-tc%d.stdout' % (rl, num_threads)))])
225        conf_list.append(conf)
226
227        # Compute forward/reverse matches only.
228        for o in ['-r', '-f']:
229            conf = app_tests.TestConf(
230                program=path_to_program,
231                redir_stdout=ph.outFile('pe-adeno-reads%d_2%s-tc%d.stdout' % (rl, o, num_threads)),
232                args=['-tc', str(num_threads),
233                      o,
234                      ph.inFile('adeno-genome.fa'),
235                      ph.inFile('adeno-reads%d_1.fa' % rl),
236                      ph.inFile('adeno-reads%d_2.fa' % rl),
237                      '-o', ph.outFile('pe-adeno-reads%d_2%s-tc%d.razers' % (rl, o, num_threads))],
238                to_diff=[(ph.inFile('pe-adeno-reads%d_2%s-tc%d.razers' % (rl, o, num_threads)),
239                          ph.outFile('pe-adeno-reads%d_2%s-tc%d.razers' % (rl, o, num_threads)),
240                          razers_transforms),
241                         (ph.inFile('pe-adeno-reads%d_2%s-tc%d.stdout' % (rl, o, num_threads)),
242                          ph.outFile('pe-adeno-reads%d_2%s-tc%d.stdout' % (rl, o, num_threads)))])
243            conf_list.append(conf)
244
245        # Compute with different identity rates.
246        for i in range(90, 101):
247            conf = app_tests.TestConf(
248                program=path_to_program,
249                redir_stdout=ph.outFile('pe-adeno-reads%d_2-i%d-tc%d.stdout' % (rl, i, num_threads)),
250                args=['-tc', str(num_threads),
251                      '-i', str(i),
252                      ph.inFile('adeno-genome.fa'),
253                      ph.inFile('adeno-reads%d_1.fa' % rl),
254                      ph.inFile('adeno-reads%d_2.fa' % rl),
255                      '-o', ph.outFile('pe-adeno-reads%d_2-i%d-tc%d.razers' % (rl, i, num_threads))],
256                to_diff=[(ph.inFile('pe-adeno-reads%d_2-i%d-tc%d.razers' % (rl, i, num_threads)),
257                          ph.outFile('pe-adeno-reads%d_2-i%d-tc%d.razers' % (rl, i, num_threads)),
258                          razers_transforms),
259                         (ph.inFile('pe-adeno-reads%d_2-i%d-tc%d.stdout' % (rl, i, num_threads)),
260                          ph.outFile('pe-adeno-reads%d_2-i%d-tc%d.stdout' % (rl, i, num_threads)))])
261            conf_list.append(conf)
262
263        # Compute with different output formats.
264        for of, suffix in enumerate(['razers', 'fa', 'eland', 'gff', 'sam', 'afg']):
265            this_transforms = list(transforms)
266            if suffix == 'razers':
267                this_transforms += razers_transforms
268            elif suffix == 'sam':
269                this_transforms += sam_transforms
270            conf = app_tests.TestConf(
271                program=path_to_program,
272                redir_stdout=ph.outFile('pe-adeno-reads%d_2-of%d-tc%d.stdout' % (rl, of, num_threads)),
273                args=['-tc', str(num_threads),
274                      ph.inFile('adeno-genome.fa'),
275                      ph.inFile('adeno-reads%d_1.fa' % rl),
276                      ph.inFile('adeno-reads%d_2.fa' % rl),
277                      '-o', ph.outFile('pe-adeno-reads%d_2-of%d-tc%d.%s' % (rl, of, num_threads, suffix))],
278                to_diff=[(ph.inFile('pe-adeno-reads%d_2-of%d-tc%d.%s' % (rl, of, num_threads, suffix)),
279                          ph.outFile('pe-adeno-reads%d_2-of%d-tc%d.%s' % (rl, of, num_threads, suffix)),
280                          this_transforms),
281                         (ph.inFile('pe-adeno-reads%d_2-of%d-tc%d.stdout' % (rl, of, num_threads)),
282                          ph.outFile('pe-adeno-reads%d_2-of%d-tc%d.stdout' % (rl, of, num_threads)),
283                          this_transforms)])
284            conf_list.append(conf)
285
286        # Compute with different sort orders.
287        for so in [0, 1]:
288            conf = app_tests.TestConf(
289                program=path_to_program,
290                redir_stdout=ph.outFile('pe-adeno-reads%d_2-so%d-tc%d.stdout' % (rl, so, num_threads)),
291                args=['-tc', str(num_threads),
292                      '-so', str(so),
293                      ph.inFile('adeno-genome.fa'),
294                      ph.inFile('adeno-reads%d_1.fa' % rl),
295                      ph.inFile('adeno-reads%d_2.fa' % rl),
296                      '-o', ph.outFile('pe-adeno-reads%d_2-so%d-tc%d.razers' % (rl, so, num_threads))],
297                to_diff=[(ph.inFile('pe-adeno-reads%d_2-so%d-tc%d.razers' % (rl, so, num_threads)),
298                          ph.outFile('pe-adeno-reads%d_2-so%d-tc%d.razers' % (rl, so, num_threads)),
299                          razers_transforms),
300                         (ph.inFile('pe-adeno-reads%d_2-so%d-tc%d.stdout' % (rl, so, num_threads)),
301                          ph.outFile('pe-adeno-reads%d_2-so%d-tc%d.stdout' % (rl, so, num_threads)))])
302            conf_list.append(conf)
303
304    # Execute the tests.
305    failures = 0
306    for conf in conf_list:
307        res = app_tests.runTest(conf)
308        # Output to the user.
309        print ' '.join(['razers3'] + conf.args),
310        if res:
311             print 'OK'
312        else:
313            failures += 1
314            print 'FAILED'
315
316    # Cleanup.
317    ph.deleteTempDir()
318
319    print '=============================='
320    print '     total tests: %d' % len(conf_list)
321    print '    failed tests: %d' % failures
322    print 'successful tests: %d' % (len(conf_list) - failures)
323    print '=============================='
324    # Compute and return return code.
325    return failures != 0
326
327
328if __name__ == '__main__':
329    sys.exit(app_tests.main(main))
330