1import subprocess
2import sys
3import os
4from io import StringIO, BytesIO
5
6import dnaio
7import pytest
8
9from cutadapt.__main__ import main
10from utils import assert_files_equal, datapath, cutpath
11
12# pytest.mark.timeout will not fail even if pytest-timeout is not installed
13try:
14    import pytest_timeout as _unused
15except ImportError:  # pragma: no cover
16    raise ImportError("pytest_timeout needs to be installed")
17del _unused
18
19
20def test_does_not_close_stdout():
21    main([datapath("small.fastq")])
22    assert not sys.stdout.closed
23
24
25def test_help():
26    with pytest.raises(SystemExit) as e:
27        main(["--help"])
28    assert e.value.args[0] == 0
29
30
31def test_unknown_file_format(tmp_path):
32    path = tmp_path / "unknown_format.txt"
33    path.write_text("raw text")
34    with pytest.raises(SystemExit):
35        main([str(path)])
36
37
38def test_cores_negative():
39    with pytest.raises(SystemExit) as e:
40        main(["--cores=-1", datapath("simple.fasta")])
41    assert e.value.args[0] == 2
42    # "cannot be negative"
43
44
45def test_quiet_and_report():
46    with pytest.raises(SystemExit) as e:
47        main(["--quiet", "--report=minimal", datapath("simple.fasta")])
48    assert e.value.args[0] == 2
49    # "Options --quiet and --report cannot be used at the same time"
50
51
52def test_debug():
53    main(["--debug", "--", datapath("small.fastq")])
54
55
56def test_debug_trace():
57    main(["--debug", "--debug", "-a", "ACGT", datapath("small.fastq")])
58
59
60def test_example(run):
61    run('-N -b ADAPTER', 'example.fa', 'example.fa')
62
63
64def test_compressed_fasta(run):
65    run("", "simple.fasta", "simple.fasta.gz")
66
67
68def test_small(run):
69    run('-a TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq')
70
71
72def test_empty(run, cores):
73    """empty input"""
74    run("--cores {} -a TTAGACATATCTCCGTCG".format(cores), "empty.fastq", "empty.fastq")
75
76
77def test_newlines(run):
78    """DOS/Windows newlines"""
79    run('-e 0.12 -a TTAGACATATCTCCGTCG', 'dos.fastq', 'dos.fastq')
80
81
82def test_lowercase(run):
83    """lowercase adapter"""
84    run('-a ttagacatatctccgtcg', 'lowercase.fastq', 'small.fastq')
85
86
87def test_rest(run, tmpdir, cores):
88    """-r/--rest-file"""
89    rest = str(tmpdir.join("rest.tmp"))
90    run(['--cores', str(cores), '-b', 'ADAPTER', '-N', '-r', rest], "rest.fa", "rest.fa")
91    assert_files_equal(datapath('rest.txt'), rest)
92
93
94def test_restfront(run, tmpdir):
95    path = str(tmpdir.join("rest.txt"))
96    run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa")
97    assert_files_equal(datapath('restfront.txt'), path)
98
99
100def test_discard(run):
101    """--discard"""
102    run("-b TTAGACATATCTCCGTCG --discard", "discard.fastq", "small.fastq")
103
104
105def test_discard_untrimmed(run):
106    """--discard-untrimmed"""
107    run('-b CAAGAT --discard-untrimmed', 'discard-untrimmed.fastq', 'small.fastq')
108
109
110def test_extensiontxtgz(run):
111    """automatic recognition of "_sequence.txt.gz" extension"""
112    run("-b TTAGACATATCTCCGTCG", "s_1_sequence.txt", "s_1_sequence.txt.gz")
113
114
115def test_minimum_length(run):
116    """-m/--minimum-length"""
117    stats = run("-m 5 -a TTAGACATATCTCCGTCG", "minlen.fa", "lengths.fa")
118    assert stats.written_bp[0] == 45
119    assert stats.written == 6
120
121
122def test_too_short(run, tmpdir, cores):
123    too_short_path = str(tmpdir.join('tooshort.fa'))
124    stats = run([
125        "--cores", str(cores),
126        "-m", "5",
127        "-a", "TTAGACATATCTCCGTCG",
128        "--too-short-output", too_short_path
129    ], "minlen.fa", "lengths.fa")
130    assert_files_equal(datapath('tooshort.fa'), too_short_path)
131    assert stats.too_short == 5
132
133
134@pytest.mark.parametrize("redirect", (False, True))
135def test_too_short_statistics(redirect):
136    args = ["-a", "TTAGACATATCTCCGTCG", "-m", "24", "-o", os.devnull, datapath("small.fastq")]
137    if redirect:
138        args[:0] = ["--too-short-output", os.devnull]
139    stats = main(args)
140    assert stats.with_adapters[0] == 2
141    assert stats.written == 2
142    assert stats.written_bp[0] == 58
143    assert stats.too_short == 1
144
145
146def test_maximum_length(run):
147    """-M/--maximum-length"""
148    run("-M 5 -a TTAGACATATCTCCGTCG", "maxlen.fa", "lengths.fa")
149
150
151def test_too_long(run, tmpdir, cores):
152    """--too-long-output"""
153    too_long_path = str(tmpdir.join('toolong.fa'))
154    stats = run([
155        "--cores", str(cores),
156        "-M", "5",
157        "-a", "TTAGACATATCTCCGTCG",
158        "--too-long-output", too_long_path
159    ], "maxlen.fa", "lengths.fa")
160    assert_files_equal(datapath('toolong.fa'), too_long_path)
161    assert stats.too_long == 5
162
163
164def test_length_tag(run):
165    """454 data; -n and --length-tag"""
166    run("-n 3 -e 0.1 --length-tag length= "
167        "-b TGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG "
168        "-b TCCATCTCATCCCTGCGTGTCCCATCTGTTCCCTCCCTGTCTCA", '454.fa', '454.fa')
169
170
171@pytest.mark.parametrize("length", list(range(3, 11)))
172def test_overlap_a(tmpdir, length):
173    """-O/--overlap with -a"""
174    adapter = "catatctccg"
175    record = ">read\nGAGACCATTCCAATG" + adapter[:length] + '\n'
176    input = tmpdir.join("overlap.fasta")
177    input.write(record)
178    if length < 7:
179        expected = record
180    else:
181        expected = '>read\nGAGACCATTCCAATG\n'
182    output = tmpdir.join("overlap-trimmed.fasta")
183    main(["-O", "7", "-e", "0", "-a", adapter, "-o", str(output), str(input)])
184    assert expected == output.read()
185
186
187def test_overlap_b(run):
188    """-O/--overlap with -b"""
189    run("-O 10 -b TTAGACATATCTCCGTCG", "overlapb.fa", "overlapb.fa")
190
191
192def test_qualtrim(run):
193    """-q with low qualities"""
194    run("-q 10 -a XXXXXX", "lowqual.fastq", "lowqual.fastq")
195
196
197def test_qualbase(run):
198    """-q with low qualities, using ascii(quality+64) encoding"""
199    run("-q 10 --quality-base 64 -a XXXXXX", "illumina64.fastq", "illumina64.fastq")
200
201
202def test_quality_trim_only(run):
203    """only trim qualities, do not remove adapters"""
204    run("-q 10 --quality-base 64", "illumina64.fastq", "illumina64.fastq")
205
206
207def test_twoadapters(run):
208    """two adapters"""
209    run("-a AATTTCAGGAATT -a GTTCTCTAGTTCT", "twoadapters.fasta", "twoadapters.fasta")
210
211
212def test_polya(run):
213    """poly-A tails"""
214    run("-m 24 -O 10 -a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "polya.fasta", "polya.fasta")
215
216
217def test_polya_brace_notation(run):
218    """poly-A tails"""
219    run("-m 24 -O 10 -a A{35}", "polya.fasta", "polya.fasta")
220
221
222# the same as --action=none
223def test_no_trim(run):
224    run("--no-trim --discard-untrimmed -a CCCTAGTTAAAC", 'no-trim.fastq', 'small.fastq')
225
226
227def test_action_none(run):
228    run("--action=none --discard-untrimmed -a CCCTAGTTAAAC", 'no-trim.fastq', 'small.fastq')
229
230
231# the same as --action=mask
232def test_mask_adapter(run):
233    """mask adapter with N (reads maintain the same length)"""
234    run("-b CAAG -n 3 --mask-adapter", "anywhere_repeat.fastq", "anywhere_repeat.fastq")
235
236
237def test_action_mask(run):
238    """mask adapter with N (reads maintain the same length)"""
239    run("-b CAAG -n 3 --action=mask", "anywhere_repeat.fastq", "anywhere_repeat.fastq")
240
241
242def test_action_lowercase(run):
243    run("-b CAAG -n 3 --action=lowercase", "action_lowercase.fasta", "action_lowercase.fasta")
244
245
246def test_action_retain(run):
247    run("-g GGTTAACC -a CAAG --action=retain", "action_retain.fasta", "action_retain.fasta")
248
249
250def test_action_retain_times():
251    with pytest.raises(SystemExit):
252        main(["-a", "ACGT", "--times=2", "--action=retain", datapath("small.fastq")])
253
254
255def test_gz_multiblock(run):
256    """compressed gz file with multiple blocks (created by concatenating two .gz files)"""
257    run("-b TTAGACATATCTCCGTCG", "small.fastq", "multiblock.fastq.gz")
258
259
260def test_read_wildcard(run):
261    """test wildcards in reads"""
262    run("--match-read-wildcards -b ACGTACGT", "wildcard.fa", "wildcard.fa")
263
264
265@pytest.mark.parametrize("adapter_type,expected", [
266    ("-a", "wildcard_adapter.fa"),
267    ("-b", "wildcard_adapter_anywhere.fa"),
268])
269def test_adapter_wildcard(adapter_type, expected, run, tmpdir, cores):
270    """wildcards in adapter"""
271    wildcard_path = str(tmpdir.join("wildcards.txt"))
272    run([
273            "--cores", str(cores),
274            "--wildcard-file", wildcard_path,
275            adapter_type, "ACGTNNNACGT"
276        ],  expected, "wildcard_adapter.fa")
277    with open(wildcard_path) as wct:
278        lines = wct.readlines()
279    lines = [line.strip() for line in lines]
280    assert lines == ["AAA 1", "GGG 2", "CCC 3b", "TTT 4b"]
281
282
283def test_wildcard_N(run):
284    """test 'N' wildcard matching with no allowed errors"""
285    run("-e 0 -a GGGGGGG --match-read-wildcards", "wildcardN.fa", "wildcardN.fa")
286
287
288def test_illumina_adapter_wildcard(run):
289    run("-a VCCGAMCYUCKHRKDCUBBCNUWNSGHCGU", "illumina.fastq", "illumina.fastq.gz")
290
291
292def test_adapter_front(run):
293    """test adapter in front"""
294    run("--front ADAPTER -N", "examplefront.fa", "example.fa")
295
296
297def test_literal_N(run):
298    """test matching literal 'N's"""
299    run("-N -e 0.2 -a NNNNNNNNNNNNNN", "trimN3.fasta", "trimN3.fasta")
300
301
302def test_literal_N2(run):
303    run("-N -O 1 -g NNNNNNNNNNNNNN", "trimN5.fasta", "trimN5.fasta")
304
305
306def test_literal_N_brace_notation(run):
307    """test matching literal 'N's"""
308    run("-N -e 0.2 -a N{14}", "trimN3.fasta", "trimN3.fasta")
309
310
311def test_literal_N2_brace_notation(run):
312    run("-N -O 1 -g N{14}", "trimN5.fasta", "trimN5.fasta")
313
314
315def test_anchored_front(run):
316    run("-g ^FRONTADAPT -N", "anchored.fasta", "anchored.fasta")
317
318
319def test_anchored_front_ellipsis_notation(run):
320    run("-a ^FRONTADAPT... -N", "anchored.fasta", "anchored.fasta")
321
322
323def test_anchored_back(run):
324    run("-a BACKADAPTER$ -N", "anchored-back.fasta", "anchored-back.fasta")
325
326
327def test_anchored_back_ellipsis_notation(run):
328    run("-a ...BACKADAPTER$ -N", "anchored-back.fasta", "anchored-back.fasta")
329
330
331def test_anchored_back_no_indels(run):
332    run("-a BACKADAPTER$ -N --no-indels", "anchored-back.fasta", "anchored-back.fasta")
333
334
335def test_no_indels(run):
336    run('-a TTAGACATAT -g GAGATTGCCA --no-indels', 'no_indels.fasta', 'no_indels.fasta')
337
338
339def test_ellipsis_notation(run):
340    run('-a ...TTAGACATAT -g GAGATTGCCA --no-indels', 'no_indels.fasta', 'no_indels.fasta')
341
342
343def test_issue_46(run, tmpdir):
344    """issue 46 - IndexError with --wildcard-file"""
345    run("--anywhere=AACGTN --wildcard-file={}".format(
346        tmpdir.join("wildcards.txt")), "issue46.fasta", "issue46.fasta")
347
348
349def test_strip_suffix(run):
350    run("--strip-suffix _sequence -a XXXXXXX", "stripped.fasta", "simple.fasta")
351
352
353def test_info_file(run, tmpdir, cores):
354    # The true adapter sequence in the illumina.fastq.gz data set is
355    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
356    info_path = str(tmpdir.join("info.txt"))
357    run(["--cores", str(cores), "--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
358        "illumina.fastq", "illumina.fastq.gz")
359    assert_files_equal(cutpath("illumina.info.txt"), info_path)
360
361
362def test_info_file_times(run, tmpdir, cores):
363    info_path = str(tmpdir.join("info.txt"))
364    run(["--cores", str(cores), "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
365        "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq")
366    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
367
368
369def test_info_file_fasta(run, tmpdir, cores):
370    info_path = str(tmpdir.join("info.txt"))
371    # Just make sure that it runs
372    run(["--cores", str(cores), "--info-file", info_path, "-a", "TTAGACATAT", "-g", "GAGATTGCCA", "--no-indels"],
373        "no_indels.fasta", "no_indels.fasta")
374
375
376def test_named_adapter(run):
377    run("-a MY_ADAPTER=GCCGAACTTCTTAGACTGCCTTAAGGACGT", "illumina.fastq", "illumina.fastq.gz")
378
379
380def test_adapter_with_u(run):
381    run("-a GCCGAACUUCUUAGACUGCCUUAAGGACGU", "illumina.fastq", "illumina.fastq.gz")
382
383
384def test_bzip2_input(run, cores):
385    run(["--cores", str(cores), "-a", "TTAGACATATCTCCGTCG"], "small.fastq", "small.fastq.bz2")
386
387
388@pytest.mark.parametrize("extension", ["bz2", "xz", "gz"])
389def test_compressed_output(tmp_path, cores, extension):
390    out_path = str(tmp_path / ("small.fastq." + extension))
391    params = [
392        "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, datapath("small.fastq")]
393    main(params)
394
395
396if sys.version_info[:2] >= (3, 3):
397    def test_bzip2_multiblock(run):
398        run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'multiblock.fastq.bz2')
399
400
401def test_xz(run):
402    run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq.xz')
403
404
405def test_no_args():
406    with pytest.raises(SystemExit):
407        main([])
408
409
410def test_two_fastqs():
411    with pytest.raises(SystemExit):
412        main([datapath('paired.1.fastq'), datapath('paired.2.fastq')])
413
414
415def test_anchored_no_indels(run):
416    """anchored 5' adapter, mismatches only (no indels)"""
417    run('-g ^TTAGACATAT --no-indels -e 0.1', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta')
418
419
420def test_anchored_no_indels_wildcard_read(run):
421    """anchored 5' adapter, mismatches only (no indels), but wildcards in the read count as matches"""
422    run('-g ^TTAGACATAT --match-read-wildcards --no-indels -e 0.1',
423        'anchored_no_indels_wildcard.fasta', 'anchored_no_indels.fasta')
424
425
426def test_anchored_no_indels_wildcard_adapt(run):
427    """anchored 5' adapter, mismatches only (no indels), but wildcards in the adapter count as matches"""
428    run('-g ^TTAGACANAT --no-indels -e 0.12', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta')
429
430
431def test_non_iupac_characters(run):
432    with pytest.raises(SystemExit):
433        main(['-a', 'ZACGT', datapath('small.fastq')])
434
435
436def test_unconditional_cut_front(run):
437    run('-u 5', 'unconditional-front.fastq', 'small.fastq')
438
439
440def test_unconditional_cut_back(run):
441    run('-u -5', 'unconditional-back.fastq', 'small.fastq')
442
443
444def test_unconditional_cut_both(run):
445    run('-u -5 -u 5', 'unconditional-both.fastq', 'small.fastq')
446
447
448def test_unconditional_cut_too_many_commas():
449    with pytest.raises(SystemExit):
450        main(["-u", "5,7,8", datapath("small.fastq")])
451
452
453def test_unconditional_cut_invalid_number():
454    with pytest.raises(SystemExit):
455        main(["-u", "a,b", datapath("small.fastq")])
456
457
458def test_untrimmed_output(run, cores, tmpdir):
459    path = str(tmpdir.join("untrimmed.fastq"))
460    stats = run(["--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path],
461        "small.trimmed.fastq", "small.fastq")
462    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
463    assert stats.with_adapters[0] == 2
464    assert stats.written == 2
465    assert stats.written_bp[0] == 46
466
467
468def test_adapter_file(run):
469    run('-a file:' + datapath('adapter.fasta'), 'illumina.fastq', 'illumina.fastq.gz')
470
471
472def test_adapter_file_5p_anchored(run):
473    run('-N -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
474
475
476def test_adapter_file_3p_anchored(run):
477    run('-N -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
478
479
480def test_adapter_file_5p_anchored_no_indels(run):
481    run('-N --no-indels -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
482
483
484def test_adapter_file_3p_anchored_no_indels(run):
485    run('-N --no-indels -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
486
487
488def test_adapter_file_empty_name(run):
489    run('-N -a file:' + datapath('adapter-empty-name.fasta'), 'illumina.fastq', 'illumina.fastq.gz')
490
491
492@pytest.mark.parametrize("ext", ["", ".gz"])
493def test_demultiplex(cores, tmp_path, ext):
494    multiout = str(tmp_path / 'tmp-demulti.{name}.fasta') + ext
495    params = [
496        '--cores', str(cores),
497        '-a', 'first=AATTTCAGGAATT',
498        '-a', 'second=GTTCTCTAGTTCT',
499        '-o', multiout,
500        datapath('twoadapters.fasta'),
501    ]
502    main(params)
503    for name in ("first", "second", "unknown"):
504        actual = multiout.format(name=name)
505        if ext == ".gz":
506            subprocess.run(["gzip", "-d", actual], check=True)
507            actual = actual[:-3]
508        expected = cutpath("twoadapters.{name}.fasta".format(name=name))
509        assert_files_equal(expected, actual)
510
511
512def test_multiple_fake_anchored_adapters(run):
513    run("-g ^CGTCCGAAGTAGC -g ^ATTGCCCTAG "
514        "-a TTCCATGCAGCATT$ -a CCAGTCCCCCC$ "
515        "-a GCCGAACTTCTTAGACTGCCTTAAGGACGT",
516        "illumina.fastq",
517        "illumina.fastq.gz")
518
519
520def test_multiple_prefix_adapters(run):
521    run("-g ^GTACGGATTGTTCAGTA -g ^TATTAAGCTCATTC", "multiprefix.fasta", "multi.fasta")
522
523
524def test_multiple_prefix_adapters_noindels(run):
525    run("--no-indels -g ^GTACGGATTGTTCAGTA -g ^TATTAAGCTCATTC", "multiprefix.fasta", "multi.fasta")
526
527
528def test_multiple_suffix_adapters_noindels(run):
529    run("--no-indels -a CGTGATTATCTTGC$ -a CCTATTAGTGGTTGAAC$", "multisuffix.fasta", "multi.fasta")
530
531
532def test_max_n(run):
533    assert run('--max-n 0', 'maxn0.fasta', 'maxn.fasta').too_many_n == 4
534    assert run('--max-n 1', 'maxn1.fasta', 'maxn.fasta').too_many_n == 2
535    assert run('--max-n 2', 'maxn2.fasta', 'maxn.fasta').too_many_n == 1
536    assert run('--max-n 0.2', 'maxn0.2.fasta', 'maxn.fasta').too_many_n == 3
537    assert run('--max-n 0.4', 'maxn0.4.fasta', 'maxn.fasta').too_many_n == 2
538
539
540def test_quiet_is_quiet():
541    captured_standard_output = StringIO()
542    captured_standard_error = StringIO()
543    setattr(captured_standard_output, "buffer", BytesIO())
544    setattr(captured_standard_error, "buffer", BytesIO())
545    old_stdout = sys.stdout
546    old_stderr = sys.stderr
547    try:
548        sys.stdout = captured_standard_output
549        sys.stderr = captured_standard_error
550        main(['-o', os.devnull, '--quiet', datapath('small.fastq')])
551    finally:
552        sys.stdout = old_stdout
553        sys.stderr = old_stderr
554    assert captured_standard_output.getvalue() == ''
555    assert captured_standard_error.getvalue() == ''
556    assert getattr(captured_standard_output, "buffer").getvalue() == b''
557    assert getattr(captured_standard_output, "buffer").getvalue() == b''
558
559
560def test_x_brace_notation():
561    main(['-o', os.devnull, '--quiet', '-a', 'X{5}', datapath('small.fastq')])
562
563
564def test_nextseq(run):
565    run('--nextseq-trim 22', 'nextseq.fastq', 'nextseq.fastq')
566
567
568def test_linked_explicitly_anchored(run):
569    run('-a ^AAAAAAAAAA...TTTTTTTTTT', 'linked.fasta', 'linked.fasta')
570
571
572def test_linked_multiple(run):
573    run('-a ^AAAAAAAAAA...TTTTTTTTTT -a ^AAAAAAAAAA...GCGCGCGCGC', 'linked.fasta', 'linked.fasta')
574
575
576def test_linked_both_anchored(run):
577    run('-a ^AAAAAAAAAA...TTTTT$', 'linked-anchored.fasta', 'linked.fasta')
578
579
580def test_linked_5p_not_anchored(run):
581    run('-g AAAAAAAAAA...TTTTTTTTTT', 'linked-not-anchored.fasta', 'linked.fasta')
582
583
584def test_linked_discard_untrimmed(run):
585    run('-a ^AAAAAAAAAA...TTTTTTTTTT --discard-untrimmed', 'linked-discard.fasta', 'linked.fasta')
586
587
588def test_linked_discard_untrimmed_g(run):
589    run('-g AAAAAAAAAA...TTTTTTTTTT --discard-untrimmed', 'linked-discard-g.fasta', 'linked.fasta')
590
591
592def test_linked_lowercase(run):
593    run('-a ^AACCGGTTTT...GGGGGGG$ -a ^AAAA...TTTT$ --times=2 --action=lowercase',
594        'linked-lowercase.fasta', 'linked.fasta')
595
596
597def test_linked_info_file(tmpdir):
598    info_path = str(tmpdir.join('info.txt'))
599    main(['-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path,
600        '-o', str(tmpdir.join('out.fasta')), datapath('linked.fasta')])
601    assert_files_equal(cutpath('linked-info.txt'), info_path)
602
603
604def test_linked_anywhere():
605    with pytest.raises(SystemExit):
606        main(['-b', 'AAA...TTT', datapath('linked.fasta')])
607
608
609def test_anywhere_anchored_5p():
610    with pytest.raises(SystemExit):
611        main(['-b', '^AAA', datapath('small.fastq')])
612
613
614def test_anywhere_anchored_3p():
615    with pytest.raises(SystemExit):
616        main(['-b', 'TTT$', datapath('small.fastq')])
617
618
619def test_fasta(run):
620    run('-a TTAGACATATCTCCGTCG', 'small.fasta', 'small.fastq')
621
622
623def test_fasta_no_trim(run):
624    run([], 'small-no-trim.fasta', 'small.fastq')
625
626
627def test_length(run):
628    run('--length 5', 'shortened.fastq', 'small.fastq')
629
630
631def test_negative_length(run):
632    run('--length -5', 'shortened-negative.fastq', 'small.fastq')
633
634
635@pytest.mark.timeout(0.5)
636def test_issue_296(tmpdir):
637    # Hang when using both --no-trim and --info-file together
638    info_path = str(tmpdir.join('info.txt'))
639    reads_path = str(tmpdir.join('reads.fasta'))
640    out_path = str(tmpdir.join('out.fasta'))
641    with open(reads_path, 'w') as f:
642        f.write('>read\nCACAAA\n')
643    main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path])
644    # Output should be unchanged because of --no-trim
645    assert_files_equal(reads_path, out_path)
646
647
648def test_xadapter(run):
649    run('-g XTCCGAATAGA', 'xadapter.fasta', 'xadapterx.fasta')
650
651
652def test_adapterx(run):
653    run('-a TCCGAATAGAX', 'adapterx.fasta', 'xadapterx.fasta')
654
655
656def test_discard_casava(run):
657    stats = run('--discard-casava', 'casava.fastq', 'casava.fastq')
658    assert stats.casava_filtered == 1
659
660
661def test_underscore(run):
662    """File name ending in _fastq.gz (issue #275)"""
663    run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'underscore_fastq.gz')
664
665
666def test_cores_autodetect(run):
667    # Just make sure that it runs; functionality is not tested
668    run('--cores 0 -b TTAGACATATCTCCGTCG', 'small.fastq', 'underscore_fastq.gz')
669
670
671def test_write_compressed_fastq(cores, tmpdir):
672    main(['--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq')])
673
674
675def test_minimal_report(run):
676    run('-b TTAGACATATCTCCGTCG --report=minimal', 'small.fastq', 'small.fastq')
677
678
679def test_paired_separate(run):
680    """test separate trimming of paired-end reads"""
681    run("-a TTAGACATAT", "paired-separate.1.fastq", "paired.1.fastq")
682    run("-a CAGTGGAGTA", "paired-separate.2.fastq", "paired.2.fastq")
683
684
685def test_empty_read_with_wildcard_in_adapter(run):
686    run("-g CWC", "empty.fastq", "empty.fastq")
687
688
689def test_print_progress_to_tty(tmpdir, mocker):
690    mocker.patch("cutadapt.utils.sys.stderr").isatty.return_value = True
691    main(["-o", str(tmpdir.join("out.fastq")), datapath("small.fastq")])
692
693
694def test_adapter_order(run):
695    run("-g ^AAACC -a CCGGG", "adapterorder-ga.fasta", "adapterorder.fasta")
696    run("-a CCGGG -g ^AAACC", "adapterorder-ag.fasta", "adapterorder.fasta")
697
698
699def test_reverse_complement_no_rc_suffix(run, tmp_path):
700    out_path = str(tmp_path / "out.fastq")
701    main([
702        "-o", out_path,
703        "--revcomp",
704        "--no-index",
705        "--rename", "{header}",
706        "-g", "^TTATTTGTCT",
707        "-g", "^TCCGCACTGG",
708        datapath("revcomp.1.fastq")
709    ])
710    with dnaio.open(out_path) as f:
711        reads = list(f)
712    assert len(reads) == 6
713    assert reads[1].name == "read2/1"
714    assert reads[1].sequence == "ACCATCCGATATGTCTAATGTGGCCTGTTG"
715
716
717def test_reverse_complement_normalized(run):
718    stats = run(
719        "--revcomp --no-index -g ^TTATTTGTCT -g ^TCCGCACTGG",
720        "revcomp-single-normalize.fastq",
721        "revcomp.1.fastq",
722    )
723    assert stats.n == 6
724    assert stats.reverse_complemented == 2
725
726
727def test_reverse_complement_and_info_file(run, tmp_path, cores):
728    info_path = str(tmp_path / "info.txt")
729    run(
730        [
731            "--revcomp",
732            "--no-index",
733            "-g",
734            "^TTATTTGTCT",
735            "-g",
736            "^TCCGCACTGG",
737            "--info-file",
738            info_path,
739        ],
740        "revcomp-single-normalize.fastq",
741        "revcomp.1.fastq",
742    )
743    with open(info_path) as f:
744        lines = f.readlines()
745    assert len(lines) == 6
746    assert lines[0].split("\t")[0] == "read1/1"
747    assert lines[1].split("\t")[0] == "read2/1 rc"
748
749
750def test_max_expected_errors(run, cores):
751    stats = run("--max-ee=0.9", "maxee.fastq", "maxee.fastq")
752    assert stats.too_many_expected_errors == 2
753
754
755def test_max_expected_errors_fasta(tmp_path):
756    path = tmp_path / "input.fasta"
757    path.write_text(">read\nACGTACGT\n")
758    main(["--max-ee=0.001", "-o", os.devnull, str(path)])
759
760
761def test_warn_if_en_dashes_used():
762    with pytest.raises(SystemExit):
763        main(["–q", "25", "-o", os.devnull, "in.fastq"])
764
765
766@pytest.mark.parametrize("opt", ["-y", "--suffix"])
767def test_suffix(opt, run):
768    """-y/--suffix parameter"""
769    run([opt, ' {name}', '-e', '0', '-a', 'OnlyT=TTTTTTTT', '-a', 'OnlyG=GGGGGGGG'], "suffix.fastq", "suffix.fastq")
770
771
772@pytest.mark.parametrize("opt", ["--prefix", "--suffix"])
773def test_rename_cannot_be_combined_with_other_renaming_options(opt):
774    with pytest.raises(SystemExit):
775        main([opt, "something", "--rename='{id} {comment} extrainfo'", "-o", os.devnull, datapath("empty.fastq")])
776
777
778def test_rename(run):
779    run([
780        "--rename={id}_{cut_suffix} {header} {adapter_name}",
781        "--cut=-4",
782        "-a", "OnlyT=TTTTTT",
783        "-a", "OnlyG=GGGGGG",
784    ], "rename.fastq", "suffix.fastq")
785