1import subprocess 2import sys 3import os 4from io import StringIO, BytesIO 5 6import dnaio 7import pytest 8 9from cutadapt.__main__ import main 10from utils import assert_files_equal, datapath, cutpath 11 12# pytest.mark.timeout will not fail even if pytest-timeout is not installed 13try: 14 import pytest_timeout as _unused 15except ImportError: # pragma: no cover 16 raise ImportError("pytest_timeout needs to be installed") 17del _unused 18 19 20def test_does_not_close_stdout(): 21 main([datapath("small.fastq")]) 22 assert not sys.stdout.closed 23 24 25def test_help(): 26 with pytest.raises(SystemExit) as e: 27 main(["--help"]) 28 assert e.value.args[0] == 0 29 30 31def test_unknown_file_format(tmp_path): 32 path = tmp_path / "unknown_format.txt" 33 path.write_text("raw text") 34 with pytest.raises(SystemExit): 35 main([str(path)]) 36 37 38def test_cores_negative(): 39 with pytest.raises(SystemExit) as e: 40 main(["--cores=-1", datapath("simple.fasta")]) 41 assert e.value.args[0] == 2 42 # "cannot be negative" 43 44 45def test_quiet_and_report(): 46 with pytest.raises(SystemExit) as e: 47 main(["--quiet", "--report=minimal", datapath("simple.fasta")]) 48 assert e.value.args[0] == 2 49 # "Options --quiet and --report cannot be used at the same time" 50 51 52def test_debug(): 53 main(["--debug", "--", datapath("small.fastq")]) 54 55 56def test_debug_trace(): 57 main(["--debug", "--debug", "-a", "ACGT", datapath("small.fastq")]) 58 59 60def test_example(run): 61 run('-N -b ADAPTER', 'example.fa', 'example.fa') 62 63 64def test_compressed_fasta(run): 65 run("", "simple.fasta", "simple.fasta.gz") 66 67 68def test_small(run): 69 run('-a TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq') 70 71 72def test_empty(run, cores): 73 """empty input""" 74 run("--cores {} -a TTAGACATATCTCCGTCG".format(cores), "empty.fastq", "empty.fastq") 75 76 77def test_newlines(run): 78 """DOS/Windows newlines""" 79 run('-e 0.12 -a TTAGACATATCTCCGTCG', 'dos.fastq', 'dos.fastq') 80 81 82def test_lowercase(run): 83 """lowercase adapter""" 84 run('-a ttagacatatctccgtcg', 'lowercase.fastq', 'small.fastq') 85 86 87def test_rest(run, tmpdir, cores): 88 """-r/--rest-file""" 89 rest = str(tmpdir.join("rest.tmp")) 90 run(['--cores', str(cores), '-b', 'ADAPTER', '-N', '-r', rest], "rest.fa", "rest.fa") 91 assert_files_equal(datapath('rest.txt'), rest) 92 93 94def test_restfront(run, tmpdir): 95 path = str(tmpdir.join("rest.txt")) 96 run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa") 97 assert_files_equal(datapath('restfront.txt'), path) 98 99 100def test_discard(run): 101 """--discard""" 102 run("-b TTAGACATATCTCCGTCG --discard", "discard.fastq", "small.fastq") 103 104 105def test_discard_untrimmed(run): 106 """--discard-untrimmed""" 107 run('-b CAAGAT --discard-untrimmed', 'discard-untrimmed.fastq', 'small.fastq') 108 109 110def test_extensiontxtgz(run): 111 """automatic recognition of "_sequence.txt.gz" extension""" 112 run("-b TTAGACATATCTCCGTCG", "s_1_sequence.txt", "s_1_sequence.txt.gz") 113 114 115def test_minimum_length(run): 116 """-m/--minimum-length""" 117 stats = run("-m 5 -a TTAGACATATCTCCGTCG", "minlen.fa", "lengths.fa") 118 assert stats.written_bp[0] == 45 119 assert stats.written == 6 120 121 122def test_too_short(run, tmpdir, cores): 123 too_short_path = str(tmpdir.join('tooshort.fa')) 124 stats = run([ 125 "--cores", str(cores), 126 "-m", "5", 127 "-a", "TTAGACATATCTCCGTCG", 128 "--too-short-output", too_short_path 129 ], "minlen.fa", "lengths.fa") 130 assert_files_equal(datapath('tooshort.fa'), too_short_path) 131 assert stats.too_short == 5 132 133 134@pytest.mark.parametrize("redirect", (False, True)) 135def test_too_short_statistics(redirect): 136 args = ["-a", "TTAGACATATCTCCGTCG", "-m", "24", "-o", os.devnull, datapath("small.fastq")] 137 if redirect: 138 args[:0] = ["--too-short-output", os.devnull] 139 stats = main(args) 140 assert stats.with_adapters[0] == 2 141 assert stats.written == 2 142 assert stats.written_bp[0] == 58 143 assert stats.too_short == 1 144 145 146def test_maximum_length(run): 147 """-M/--maximum-length""" 148 run("-M 5 -a TTAGACATATCTCCGTCG", "maxlen.fa", "lengths.fa") 149 150 151def test_too_long(run, tmpdir, cores): 152 """--too-long-output""" 153 too_long_path = str(tmpdir.join('toolong.fa')) 154 stats = run([ 155 "--cores", str(cores), 156 "-M", "5", 157 "-a", "TTAGACATATCTCCGTCG", 158 "--too-long-output", too_long_path 159 ], "maxlen.fa", "lengths.fa") 160 assert_files_equal(datapath('toolong.fa'), too_long_path) 161 assert stats.too_long == 5 162 163 164def test_length_tag(run): 165 """454 data; -n and --length-tag""" 166 run("-n 3 -e 0.1 --length-tag length= " 167 "-b TGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG " 168 "-b TCCATCTCATCCCTGCGTGTCCCATCTGTTCCCTCCCTGTCTCA", '454.fa', '454.fa') 169 170 171@pytest.mark.parametrize("length", list(range(3, 11))) 172def test_overlap_a(tmpdir, length): 173 """-O/--overlap with -a""" 174 adapter = "catatctccg" 175 record = ">read\nGAGACCATTCCAATG" + adapter[:length] + '\n' 176 input = tmpdir.join("overlap.fasta") 177 input.write(record) 178 if length < 7: 179 expected = record 180 else: 181 expected = '>read\nGAGACCATTCCAATG\n' 182 output = tmpdir.join("overlap-trimmed.fasta") 183 main(["-O", "7", "-e", "0", "-a", adapter, "-o", str(output), str(input)]) 184 assert expected == output.read() 185 186 187def test_overlap_b(run): 188 """-O/--overlap with -b""" 189 run("-O 10 -b TTAGACATATCTCCGTCG", "overlapb.fa", "overlapb.fa") 190 191 192def test_qualtrim(run): 193 """-q with low qualities""" 194 run("-q 10 -a XXXXXX", "lowqual.fastq", "lowqual.fastq") 195 196 197def test_qualbase(run): 198 """-q with low qualities, using ascii(quality+64) encoding""" 199 run("-q 10 --quality-base 64 -a XXXXXX", "illumina64.fastq", "illumina64.fastq") 200 201 202def test_quality_trim_only(run): 203 """only trim qualities, do not remove adapters""" 204 run("-q 10 --quality-base 64", "illumina64.fastq", "illumina64.fastq") 205 206 207def test_twoadapters(run): 208 """two adapters""" 209 run("-a AATTTCAGGAATT -a GTTCTCTAGTTCT", "twoadapters.fasta", "twoadapters.fasta") 210 211 212def test_polya(run): 213 """poly-A tails""" 214 run("-m 24 -O 10 -a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "polya.fasta", "polya.fasta") 215 216 217def test_polya_brace_notation(run): 218 """poly-A tails""" 219 run("-m 24 -O 10 -a A{35}", "polya.fasta", "polya.fasta") 220 221 222# the same as --action=none 223def test_no_trim(run): 224 run("--no-trim --discard-untrimmed -a CCCTAGTTAAAC", 'no-trim.fastq', 'small.fastq') 225 226 227def test_action_none(run): 228 run("--action=none --discard-untrimmed -a CCCTAGTTAAAC", 'no-trim.fastq', 'small.fastq') 229 230 231# the same as --action=mask 232def test_mask_adapter(run): 233 """mask adapter with N (reads maintain the same length)""" 234 run("-b CAAG -n 3 --mask-adapter", "anywhere_repeat.fastq", "anywhere_repeat.fastq") 235 236 237def test_action_mask(run): 238 """mask adapter with N (reads maintain the same length)""" 239 run("-b CAAG -n 3 --action=mask", "anywhere_repeat.fastq", "anywhere_repeat.fastq") 240 241 242def test_action_lowercase(run): 243 run("-b CAAG -n 3 --action=lowercase", "action_lowercase.fasta", "action_lowercase.fasta") 244 245 246def test_action_retain(run): 247 run("-g GGTTAACC -a CAAG --action=retain", "action_retain.fasta", "action_retain.fasta") 248 249 250def test_action_retain_times(): 251 with pytest.raises(SystemExit): 252 main(["-a", "ACGT", "--times=2", "--action=retain", datapath("small.fastq")]) 253 254 255def test_gz_multiblock(run): 256 """compressed gz file with multiple blocks (created by concatenating two .gz files)""" 257 run("-b TTAGACATATCTCCGTCG", "small.fastq", "multiblock.fastq.gz") 258 259 260def test_read_wildcard(run): 261 """test wildcards in reads""" 262 run("--match-read-wildcards -b ACGTACGT", "wildcard.fa", "wildcard.fa") 263 264 265@pytest.mark.parametrize("adapter_type,expected", [ 266 ("-a", "wildcard_adapter.fa"), 267 ("-b", "wildcard_adapter_anywhere.fa"), 268]) 269def test_adapter_wildcard(adapter_type, expected, run, tmpdir, cores): 270 """wildcards in adapter""" 271 wildcard_path = str(tmpdir.join("wildcards.txt")) 272 run([ 273 "--cores", str(cores), 274 "--wildcard-file", wildcard_path, 275 adapter_type, "ACGTNNNACGT" 276 ], expected, "wildcard_adapter.fa") 277 with open(wildcard_path) as wct: 278 lines = wct.readlines() 279 lines = [line.strip() for line in lines] 280 assert lines == ["AAA 1", "GGG 2", "CCC 3b", "TTT 4b"] 281 282 283def test_wildcard_N(run): 284 """test 'N' wildcard matching with no allowed errors""" 285 run("-e 0 -a GGGGGGG --match-read-wildcards", "wildcardN.fa", "wildcardN.fa") 286 287 288def test_illumina_adapter_wildcard(run): 289 run("-a VCCGAMCYUCKHRKDCUBBCNUWNSGHCGU", "illumina.fastq", "illumina.fastq.gz") 290 291 292def test_adapter_front(run): 293 """test adapter in front""" 294 run("--front ADAPTER -N", "examplefront.fa", "example.fa") 295 296 297def test_literal_N(run): 298 """test matching literal 'N's""" 299 run("-N -e 0.2 -a NNNNNNNNNNNNNN", "trimN3.fasta", "trimN3.fasta") 300 301 302def test_literal_N2(run): 303 run("-N -O 1 -g NNNNNNNNNNNNNN", "trimN5.fasta", "trimN5.fasta") 304 305 306def test_literal_N_brace_notation(run): 307 """test matching literal 'N's""" 308 run("-N -e 0.2 -a N{14}", "trimN3.fasta", "trimN3.fasta") 309 310 311def test_literal_N2_brace_notation(run): 312 run("-N -O 1 -g N{14}", "trimN5.fasta", "trimN5.fasta") 313 314 315def test_anchored_front(run): 316 run("-g ^FRONTADAPT -N", "anchored.fasta", "anchored.fasta") 317 318 319def test_anchored_front_ellipsis_notation(run): 320 run("-a ^FRONTADAPT... -N", "anchored.fasta", "anchored.fasta") 321 322 323def test_anchored_back(run): 324 run("-a BACKADAPTER$ -N", "anchored-back.fasta", "anchored-back.fasta") 325 326 327def test_anchored_back_ellipsis_notation(run): 328 run("-a ...BACKADAPTER$ -N", "anchored-back.fasta", "anchored-back.fasta") 329 330 331def test_anchored_back_no_indels(run): 332 run("-a BACKADAPTER$ -N --no-indels", "anchored-back.fasta", "anchored-back.fasta") 333 334 335def test_no_indels(run): 336 run('-a TTAGACATAT -g GAGATTGCCA --no-indels', 'no_indels.fasta', 'no_indels.fasta') 337 338 339def test_ellipsis_notation(run): 340 run('-a ...TTAGACATAT -g GAGATTGCCA --no-indels', 'no_indels.fasta', 'no_indels.fasta') 341 342 343def test_issue_46(run, tmpdir): 344 """issue 46 - IndexError with --wildcard-file""" 345 run("--anywhere=AACGTN --wildcard-file={}".format( 346 tmpdir.join("wildcards.txt")), "issue46.fasta", "issue46.fasta") 347 348 349def test_strip_suffix(run): 350 run("--strip-suffix _sequence -a XXXXXXX", "stripped.fasta", "simple.fasta") 351 352 353def test_info_file(run, tmpdir, cores): 354 # The true adapter sequence in the illumina.fastq.gz data set is 355 # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here) 356 info_path = str(tmpdir.join("info.txt")) 357 run(["--cores", str(cores), "--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"], 358 "illumina.fastq", "illumina.fastq.gz") 359 assert_files_equal(cutpath("illumina.info.txt"), info_path) 360 361 362def test_info_file_times(run, tmpdir, cores): 363 info_path = str(tmpdir.join("info.txt")) 364 run(["--cores", str(cores), "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA", 365 "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq") 366 assert_files_equal(cutpath('illumina5.info.txt'), info_path) 367 368 369def test_info_file_fasta(run, tmpdir, cores): 370 info_path = str(tmpdir.join("info.txt")) 371 # Just make sure that it runs 372 run(["--cores", str(cores), "--info-file", info_path, "-a", "TTAGACATAT", "-g", "GAGATTGCCA", "--no-indels"], 373 "no_indels.fasta", "no_indels.fasta") 374 375 376def test_named_adapter(run): 377 run("-a MY_ADAPTER=GCCGAACTTCTTAGACTGCCTTAAGGACGT", "illumina.fastq", "illumina.fastq.gz") 378 379 380def test_adapter_with_u(run): 381 run("-a GCCGAACUUCUUAGACUGCCUUAAGGACGU", "illumina.fastq", "illumina.fastq.gz") 382 383 384def test_bzip2_input(run, cores): 385 run(["--cores", str(cores), "-a", "TTAGACATATCTCCGTCG"], "small.fastq", "small.fastq.bz2") 386 387 388@pytest.mark.parametrize("extension", ["bz2", "xz", "gz"]) 389def test_compressed_output(tmp_path, cores, extension): 390 out_path = str(tmp_path / ("small.fastq." + extension)) 391 params = [ 392 "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, datapath("small.fastq")] 393 main(params) 394 395 396if sys.version_info[:2] >= (3, 3): 397 def test_bzip2_multiblock(run): 398 run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'multiblock.fastq.bz2') 399 400 401def test_xz(run): 402 run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq.xz') 403 404 405def test_no_args(): 406 with pytest.raises(SystemExit): 407 main([]) 408 409 410def test_two_fastqs(): 411 with pytest.raises(SystemExit): 412 main([datapath('paired.1.fastq'), datapath('paired.2.fastq')]) 413 414 415def test_anchored_no_indels(run): 416 """anchored 5' adapter, mismatches only (no indels)""" 417 run('-g ^TTAGACATAT --no-indels -e 0.1', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta') 418 419 420def test_anchored_no_indels_wildcard_read(run): 421 """anchored 5' adapter, mismatches only (no indels), but wildcards in the read count as matches""" 422 run('-g ^TTAGACATAT --match-read-wildcards --no-indels -e 0.1', 423 'anchored_no_indels_wildcard.fasta', 'anchored_no_indels.fasta') 424 425 426def test_anchored_no_indels_wildcard_adapt(run): 427 """anchored 5' adapter, mismatches only (no indels), but wildcards in the adapter count as matches""" 428 run('-g ^TTAGACANAT --no-indels -e 0.12', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta') 429 430 431def test_non_iupac_characters(run): 432 with pytest.raises(SystemExit): 433 main(['-a', 'ZACGT', datapath('small.fastq')]) 434 435 436def test_unconditional_cut_front(run): 437 run('-u 5', 'unconditional-front.fastq', 'small.fastq') 438 439 440def test_unconditional_cut_back(run): 441 run('-u -5', 'unconditional-back.fastq', 'small.fastq') 442 443 444def test_unconditional_cut_both(run): 445 run('-u -5 -u 5', 'unconditional-both.fastq', 'small.fastq') 446 447 448def test_unconditional_cut_too_many_commas(): 449 with pytest.raises(SystemExit): 450 main(["-u", "5,7,8", datapath("small.fastq")]) 451 452 453def test_unconditional_cut_invalid_number(): 454 with pytest.raises(SystemExit): 455 main(["-u", "a,b", datapath("small.fastq")]) 456 457 458def test_untrimmed_output(run, cores, tmpdir): 459 path = str(tmpdir.join("untrimmed.fastq")) 460 stats = run(["--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path], 461 "small.trimmed.fastq", "small.fastq") 462 assert_files_equal(cutpath("small.untrimmed.fastq"), path) 463 assert stats.with_adapters[0] == 2 464 assert stats.written == 2 465 assert stats.written_bp[0] == 46 466 467 468def test_adapter_file(run): 469 run('-a file:' + datapath('adapter.fasta'), 'illumina.fastq', 'illumina.fastq.gz') 470 471 472def test_adapter_file_5p_anchored(run): 473 run('-N -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta') 474 475 476def test_adapter_file_3p_anchored(run): 477 run('-N -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta') 478 479 480def test_adapter_file_5p_anchored_no_indels(run): 481 run('-N --no-indels -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta') 482 483 484def test_adapter_file_3p_anchored_no_indels(run): 485 run('-N --no-indels -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta') 486 487 488def test_adapter_file_empty_name(run): 489 run('-N -a file:' + datapath('adapter-empty-name.fasta'), 'illumina.fastq', 'illumina.fastq.gz') 490 491 492@pytest.mark.parametrize("ext", ["", ".gz"]) 493def test_demultiplex(cores, tmp_path, ext): 494 multiout = str(tmp_path / 'tmp-demulti.{name}.fasta') + ext 495 params = [ 496 '--cores', str(cores), 497 '-a', 'first=AATTTCAGGAATT', 498 '-a', 'second=GTTCTCTAGTTCT', 499 '-o', multiout, 500 datapath('twoadapters.fasta'), 501 ] 502 main(params) 503 for name in ("first", "second", "unknown"): 504 actual = multiout.format(name=name) 505 if ext == ".gz": 506 subprocess.run(["gzip", "-d", actual], check=True) 507 actual = actual[:-3] 508 expected = cutpath("twoadapters.{name}.fasta".format(name=name)) 509 assert_files_equal(expected, actual) 510 511 512def test_multiple_fake_anchored_adapters(run): 513 run("-g ^CGTCCGAAGTAGC -g ^ATTGCCCTAG " 514 "-a TTCCATGCAGCATT$ -a CCAGTCCCCCC$ " 515 "-a GCCGAACTTCTTAGACTGCCTTAAGGACGT", 516 "illumina.fastq", 517 "illumina.fastq.gz") 518 519 520def test_multiple_prefix_adapters(run): 521 run("-g ^GTACGGATTGTTCAGTA -g ^TATTAAGCTCATTC", "multiprefix.fasta", "multi.fasta") 522 523 524def test_multiple_prefix_adapters_noindels(run): 525 run("--no-indels -g ^GTACGGATTGTTCAGTA -g ^TATTAAGCTCATTC", "multiprefix.fasta", "multi.fasta") 526 527 528def test_multiple_suffix_adapters_noindels(run): 529 run("--no-indels -a CGTGATTATCTTGC$ -a CCTATTAGTGGTTGAAC$", "multisuffix.fasta", "multi.fasta") 530 531 532def test_max_n(run): 533 assert run('--max-n 0', 'maxn0.fasta', 'maxn.fasta').too_many_n == 4 534 assert run('--max-n 1', 'maxn1.fasta', 'maxn.fasta').too_many_n == 2 535 assert run('--max-n 2', 'maxn2.fasta', 'maxn.fasta').too_many_n == 1 536 assert run('--max-n 0.2', 'maxn0.2.fasta', 'maxn.fasta').too_many_n == 3 537 assert run('--max-n 0.4', 'maxn0.4.fasta', 'maxn.fasta').too_many_n == 2 538 539 540def test_quiet_is_quiet(): 541 captured_standard_output = StringIO() 542 captured_standard_error = StringIO() 543 setattr(captured_standard_output, "buffer", BytesIO()) 544 setattr(captured_standard_error, "buffer", BytesIO()) 545 old_stdout = sys.stdout 546 old_stderr = sys.stderr 547 try: 548 sys.stdout = captured_standard_output 549 sys.stderr = captured_standard_error 550 main(['-o', os.devnull, '--quiet', datapath('small.fastq')]) 551 finally: 552 sys.stdout = old_stdout 553 sys.stderr = old_stderr 554 assert captured_standard_output.getvalue() == '' 555 assert captured_standard_error.getvalue() == '' 556 assert getattr(captured_standard_output, "buffer").getvalue() == b'' 557 assert getattr(captured_standard_output, "buffer").getvalue() == b'' 558 559 560def test_x_brace_notation(): 561 main(['-o', os.devnull, '--quiet', '-a', 'X{5}', datapath('small.fastq')]) 562 563 564def test_nextseq(run): 565 run('--nextseq-trim 22', 'nextseq.fastq', 'nextseq.fastq') 566 567 568def test_linked_explicitly_anchored(run): 569 run('-a ^AAAAAAAAAA...TTTTTTTTTT', 'linked.fasta', 'linked.fasta') 570 571 572def test_linked_multiple(run): 573 run('-a ^AAAAAAAAAA...TTTTTTTTTT -a ^AAAAAAAAAA...GCGCGCGCGC', 'linked.fasta', 'linked.fasta') 574 575 576def test_linked_both_anchored(run): 577 run('-a ^AAAAAAAAAA...TTTTT$', 'linked-anchored.fasta', 'linked.fasta') 578 579 580def test_linked_5p_not_anchored(run): 581 run('-g AAAAAAAAAA...TTTTTTTTTT', 'linked-not-anchored.fasta', 'linked.fasta') 582 583 584def test_linked_discard_untrimmed(run): 585 run('-a ^AAAAAAAAAA...TTTTTTTTTT --discard-untrimmed', 'linked-discard.fasta', 'linked.fasta') 586 587 588def test_linked_discard_untrimmed_g(run): 589 run('-g AAAAAAAAAA...TTTTTTTTTT --discard-untrimmed', 'linked-discard-g.fasta', 'linked.fasta') 590 591 592def test_linked_lowercase(run): 593 run('-a ^AACCGGTTTT...GGGGGGG$ -a ^AAAA...TTTT$ --times=2 --action=lowercase', 594 'linked-lowercase.fasta', 'linked.fasta') 595 596 597def test_linked_info_file(tmpdir): 598 info_path = str(tmpdir.join('info.txt')) 599 main(['-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path, 600 '-o', str(tmpdir.join('out.fasta')), datapath('linked.fasta')]) 601 assert_files_equal(cutpath('linked-info.txt'), info_path) 602 603 604def test_linked_anywhere(): 605 with pytest.raises(SystemExit): 606 main(['-b', 'AAA...TTT', datapath('linked.fasta')]) 607 608 609def test_anywhere_anchored_5p(): 610 with pytest.raises(SystemExit): 611 main(['-b', '^AAA', datapath('small.fastq')]) 612 613 614def test_anywhere_anchored_3p(): 615 with pytest.raises(SystemExit): 616 main(['-b', 'TTT$', datapath('small.fastq')]) 617 618 619def test_fasta(run): 620 run('-a TTAGACATATCTCCGTCG', 'small.fasta', 'small.fastq') 621 622 623def test_fasta_no_trim(run): 624 run([], 'small-no-trim.fasta', 'small.fastq') 625 626 627def test_length(run): 628 run('--length 5', 'shortened.fastq', 'small.fastq') 629 630 631def test_negative_length(run): 632 run('--length -5', 'shortened-negative.fastq', 'small.fastq') 633 634 635@pytest.mark.timeout(0.5) 636def test_issue_296(tmpdir): 637 # Hang when using both --no-trim and --info-file together 638 info_path = str(tmpdir.join('info.txt')) 639 reads_path = str(tmpdir.join('reads.fasta')) 640 out_path = str(tmpdir.join('out.fasta')) 641 with open(reads_path, 'w') as f: 642 f.write('>read\nCACAAA\n') 643 main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path]) 644 # Output should be unchanged because of --no-trim 645 assert_files_equal(reads_path, out_path) 646 647 648def test_xadapter(run): 649 run('-g XTCCGAATAGA', 'xadapter.fasta', 'xadapterx.fasta') 650 651 652def test_adapterx(run): 653 run('-a TCCGAATAGAX', 'adapterx.fasta', 'xadapterx.fasta') 654 655 656def test_discard_casava(run): 657 stats = run('--discard-casava', 'casava.fastq', 'casava.fastq') 658 assert stats.casava_filtered == 1 659 660 661def test_underscore(run): 662 """File name ending in _fastq.gz (issue #275)""" 663 run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'underscore_fastq.gz') 664 665 666def test_cores_autodetect(run): 667 # Just make sure that it runs; functionality is not tested 668 run('--cores 0 -b TTAGACATATCTCCGTCG', 'small.fastq', 'underscore_fastq.gz') 669 670 671def test_write_compressed_fastq(cores, tmpdir): 672 main(['--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq')]) 673 674 675def test_minimal_report(run): 676 run('-b TTAGACATATCTCCGTCG --report=minimal', 'small.fastq', 'small.fastq') 677 678 679def test_paired_separate(run): 680 """test separate trimming of paired-end reads""" 681 run("-a TTAGACATAT", "paired-separate.1.fastq", "paired.1.fastq") 682 run("-a CAGTGGAGTA", "paired-separate.2.fastq", "paired.2.fastq") 683 684 685def test_empty_read_with_wildcard_in_adapter(run): 686 run("-g CWC", "empty.fastq", "empty.fastq") 687 688 689def test_print_progress_to_tty(tmpdir, mocker): 690 mocker.patch("cutadapt.utils.sys.stderr").isatty.return_value = True 691 main(["-o", str(tmpdir.join("out.fastq")), datapath("small.fastq")]) 692 693 694def test_adapter_order(run): 695 run("-g ^AAACC -a CCGGG", "adapterorder-ga.fasta", "adapterorder.fasta") 696 run("-a CCGGG -g ^AAACC", "adapterorder-ag.fasta", "adapterorder.fasta") 697 698 699def test_reverse_complement_no_rc_suffix(run, tmp_path): 700 out_path = str(tmp_path / "out.fastq") 701 main([ 702 "-o", out_path, 703 "--revcomp", 704 "--no-index", 705 "--rename", "{header}", 706 "-g", "^TTATTTGTCT", 707 "-g", "^TCCGCACTGG", 708 datapath("revcomp.1.fastq") 709 ]) 710 with dnaio.open(out_path) as f: 711 reads = list(f) 712 assert len(reads) == 6 713 assert reads[1].name == "read2/1" 714 assert reads[1].sequence == "ACCATCCGATATGTCTAATGTGGCCTGTTG" 715 716 717def test_reverse_complement_normalized(run): 718 stats = run( 719 "--revcomp --no-index -g ^TTATTTGTCT -g ^TCCGCACTGG", 720 "revcomp-single-normalize.fastq", 721 "revcomp.1.fastq", 722 ) 723 assert stats.n == 6 724 assert stats.reverse_complemented == 2 725 726 727def test_reverse_complement_and_info_file(run, tmp_path, cores): 728 info_path = str(tmp_path / "info.txt") 729 run( 730 [ 731 "--revcomp", 732 "--no-index", 733 "-g", 734 "^TTATTTGTCT", 735 "-g", 736 "^TCCGCACTGG", 737 "--info-file", 738 info_path, 739 ], 740 "revcomp-single-normalize.fastq", 741 "revcomp.1.fastq", 742 ) 743 with open(info_path) as f: 744 lines = f.readlines() 745 assert len(lines) == 6 746 assert lines[0].split("\t")[0] == "read1/1" 747 assert lines[1].split("\t")[0] == "read2/1 rc" 748 749 750def test_max_expected_errors(run, cores): 751 stats = run("--max-ee=0.9", "maxee.fastq", "maxee.fastq") 752 assert stats.too_many_expected_errors == 2 753 754 755def test_max_expected_errors_fasta(tmp_path): 756 path = tmp_path / "input.fasta" 757 path.write_text(">read\nACGTACGT\n") 758 main(["--max-ee=0.001", "-o", os.devnull, str(path)]) 759 760 761def test_warn_if_en_dashes_used(): 762 with pytest.raises(SystemExit): 763 main(["–q", "25", "-o", os.devnull, "in.fastq"]) 764 765 766@pytest.mark.parametrize("opt", ["-y", "--suffix"]) 767def test_suffix(opt, run): 768 """-y/--suffix parameter""" 769 run([opt, ' {name}', '-e', '0', '-a', 'OnlyT=TTTTTTTT', '-a', 'OnlyG=GGGGGGGG'], "suffix.fastq", "suffix.fastq") 770 771 772@pytest.mark.parametrize("opt", ["--prefix", "--suffix"]) 773def test_rename_cannot_be_combined_with_other_renaming_options(opt): 774 with pytest.raises(SystemExit): 775 main([opt, "something", "--rename='{id} {comment} extrainfo'", "-o", os.devnull, datapath("empty.fastq")]) 776 777 778def test_rename(run): 779 run([ 780 "--rename={id}_{cut_suffix} {header} {adapter_name}", 781 "--cut=-4", 782 "-a", "OnlyT=TTTTTT", 783 "-a", "OnlyG=GGGGGG", 784 ], "rename.fastq", "suffix.fastq") 785