1from textwrap import dedent 2import pytest 3 4from dnaio import Sequence 5from cutadapt.adapters import LinkedAdapter, BackAdapter, FrontAdapter, InvalidCharacter 6from cutadapt.parser import AdapterParser, AdapterSpecification 7from cutadapt.modifiers import ModificationInfo 8 9 10def test_expand_braces(): 11 expand_braces = AdapterSpecification.expand_braces 12 assert expand_braces('') == '' 13 assert expand_braces('A') == 'A' 14 assert expand_braces('A{0}') == '' 15 assert expand_braces('A{1}') == 'A' 16 assert expand_braces('A{2}') == 'AA' 17 assert expand_braces('A{2}C') == 'AAC' 18 assert expand_braces('ACGTN{3}TGACCC') == 'ACGTNNNTGACCC' 19 assert expand_braces('ACGTN{10}TGACCC') == 'ACGTNNNNNNNNNNTGACCC' 20 assert expand_braces('ACGTN{3}TGA{4}CCC') == 'ACGTNNNTGAAAACCC' 21 assert expand_braces('ACGTN{0}TGA{4}CCC') == 'ACGTTGAAAACCC' 22 23 24def test_expand_braces_fail(): 25 for expression in ['{', '}', '{}', '{5', '{1}', 'A{-7}', 'A{', 'A{1', 'N{7', 'AN{7', 'A{4{}', 26 'A{4}{3}', 'A{b}', 'A{6X}', 'A{X6}', 'A}A']: 27 with pytest.raises(ValueError): 28 AdapterSpecification.expand_braces(expression) 29 30 31def test_parse_file_notation(tmpdir): 32 tmp_path = str(tmpdir.join('adapters.fasta')) 33 with open(tmp_path, 'w') as f: 34 f.write(dedent(""">first_name 35 ADAPTER1 36 >second_name 37 ADAPTER2 38 """)) 39 parser = AdapterParser( 40 max_errors=0.2, min_overlap=4, read_wildcards=False, 41 adapter_wildcards=False, indels=False) 42 43 adapters = list(parser.parse('file:' + tmp_path, cmdline_type='back')) 44 assert len(adapters) == 2 45 assert adapters[0].name == 'first_name' 46 assert adapters[0].sequence == 'ADAPTER1' 47 assert adapters[1].name == 'second_name' 48 assert adapters[1].sequence == 'ADAPTER2' 49 for a in adapters: 50 assert a.max_error_rate == 0.2 51 assert a.min_overlap == 4 52 assert not a.read_wildcards 53 assert not a.adapter_wildcards 54 assert not a.indels 55 56 57def test_parse_not_linked(): 58 p = AdapterSpecification.parse 59 assert p('A', 'front') == AdapterSpecification(None, None, 'A', {}, 'front') 60 assert p('A', 'back') == AdapterSpecification(None, None, 'A', {}, 'back') 61 assert p('A', 'anywhere') == AdapterSpecification(None, None, 'A', {}, 'anywhere') 62 assert p('^A', 'front') == AdapterSpecification(None, 'anchored', 'A', {}, 'front') 63 assert p('XXXA', 'front') == AdapterSpecification(None, 'noninternal', 'A', {}, 'front') 64 assert p('A$', 'back') == AdapterSpecification(None, 'anchored', 'A', {}, 'back') 65 assert p('AXXXX', 'back') == AdapterSpecification(None, 'noninternal', 'A', {}, 'back') 66 assert p('a_name=ADAPT', 'front') == AdapterSpecification('a_name', None, 'ADAPT', {}, 'front') 67 68 69@pytest.mark.parametrize("where", ("front", "back")) 70@pytest.mark.parametrize("reqopt", ("required", "optional")) 71def test_parse_invalid_adapter_specific_parameter(where, reqopt): 72 parser = AdapterParser() 73 with pytest.raises(ValueError) as e: 74 parser._parse_not_linked("A;{}".format(reqopt), "name", where) 75 assert "can only be used within linked adapters" in e.value.args[0] 76 77 78def test_parse_invalid_cmdline_type(): 79 with pytest.raises(ValueError) as e: 80 AdapterSpecification._parse('A', 'invalid_type') 81 assert "cmdline_type must be front, back or anywhere" in e.value.args[0] 82 83 84@pytest.mark.parametrize("spec,cmdline_type", [ 85 ("^XA", "front"), 86 ("^AX", "front"), 87 ("XA$", "back"), 88 ("AX$", "back"), 89]) 90def test_parse_double_placement_restrictions(spec, cmdline_type): 91 with pytest.raises(ValueError) as e: 92 AdapterSpecification._parse(spec, cmdline_type) 93 assert "cannot use multiple placement restrictions" in e.value.args[0] 94 95 96def test_parse_misplaced_placement_restrictions(): 97 with pytest.raises(ValueError) as e: 98 AdapterSpecification._parse("A$", "front") 99 assert "Allowed placement restrictions for a 5' adapter" in e.value.args[0] 100 with pytest.raises(ValueError) as e: 101 AdapterSpecification._parse("^A", "back") 102 assert "Allowed placement restrictions for a 3' adapter" in e.value.args[0] 103 104 105def test_restriction_to_class(): 106 with pytest.raises(ValueError) as e: 107 AdapterSpecification._restriction_to_class("anywhere", "noninternal") 108 assert "No placement may be specified" in e.value.args[0] 109 110 111def test_parse_parameters(): 112 p = AdapterSpecification._parse_parameters 113 assert p('e=0.1') == {'max_errors': 0.1} 114 assert p('error_rate=0.1') == {'max_errors': 0.1} 115 assert p('max_errors=2') == {'max_errors': 2} 116 assert p('o=5') == {'min_overlap': 5} 117 assert p('min_overlap=5') == {'min_overlap': 5} 118 assert p('o=7; e=0.4') == {'min_overlap': 7, 'max_errors': 0.4} 119 assert p('anywhere') == {'anywhere': True} 120 assert p('required') == {'required': True} 121 assert p('optional') == {'required': False} 122 123 with pytest.raises(ValueError): 124 p('e=hallo') 125 with pytest.raises(KeyError): 126 p('bla=0.1') 127 with pytest.raises(ValueError): 128 p('e=') 129 with pytest.raises(KeyError) as e: 130 p('e=0.1;e=0.1') 131 assert "specified twice" in e.value.args[0] 132 with pytest.raises(KeyError) as e: 133 p('e=0.1;max_errors=0.1') 134 assert "specified twice" in e.value.args[0] 135 with pytest.raises(ValueError) as e: 136 p('optional; required') 137 assert "cannot be specified at the same time" in e.value.args[0] 138 139 140def test_parse_with_parameters(tmp_path): 141 parser = AdapterParser( 142 max_errors=0.2, min_overlap=4, read_wildcards=False, 143 adapter_wildcards=False, indels=False) 144 a = parser._parse('ACGTACGT; e=0.15', 'front') 145 assert isinstance(a, FrontAdapter) 146 assert a.max_error_rate == 0.15 147 assert a.min_overlap == 4 148 149 a = parser._parse('ACGTAAAA; o=5; e=0.11', 'back') 150 assert isinstance(a, BackAdapter) 151 assert a.max_error_rate == 0.11 152 assert a.min_overlap == 5 153 154 for spec in ('thename=ACG;e=0.15 ... TGT;e=0.17', 'thename=ACG;e=0.15...TGT;e=0.17'): 155 a = parser._parse(spec, 'back') 156 assert isinstance(a, LinkedAdapter) 157 assert a.front_adapter.max_error_rate == 0.15 158 assert a.back_adapter.max_error_rate == 0.17 159 160 with pytest.raises(ValueError) as e: 161 parser._parse("A", "invalid-cmdline-type") 162 assert "cmdline_type cannot be" in e.value.args[0] 163 164 165def test_parse_with_adapter_sequence_as_a_path(tmp_path): 166 parser = AdapterParser() 167 with pytest.raises(InvalidCharacter): 168 parser._parse("invalid.character", "back") 169 # user forgot to write "file:" 170 path = (tmp_path / "afile.fasta") 171 path.write_text(">abc\nACGT\n") 172 with pytest.raises(InvalidCharacter) as e: 173 list(parser.parse(str(path), "back")) 174 assert "A file exists named" in e.value.args[0] 175 176 177def test_parse_multi(): 178 parser = AdapterParser() 179 with pytest.raises(ValueError) as e: 180 parser.parse_multi([("invalid-type", "A")]) 181 assert "adapter type must be" in e.value.args[0] 182 183 184def test_normalize_ellipsis(): 185 ne = AdapterParser._normalize_ellipsis 186 assert ne("ACGT", "", "back") == ("ACGT", "front") # -a ACGT... 187 assert ne("ACGT", "", "front") == ("ACGT", "front") # -g ACGT... 188 assert ne("", "ACGT", "back") == ("ACGT", "back") # -a ...ACGT 189 with pytest.raises(ValueError) as e: 190 # -g ...ACGT 191 ne("", "ACGT", "front") 192 assert "Invalid adapter specification" in e.value.args[0] 193 194 with pytest.raises(ValueError) as e: 195 ne("A", "C", "back") 196 assert "either" in e.value.args[0] 197 with pytest.raises(ValueError) as e: 198 ne("A", "", "anywhere") 199 assert "No ellipsis" in e.value.args[0] 200 201 202@pytest.mark.parametrize("seq,req1,req2", [ 203 ("ACG...TGT", False, False), 204 ("ACG...TGT$", False, True), 205 ("^ACG...TGT", True, False), 206 ("^ACG...TGT$", True, True), 207]) 208def test_anchoring_makes_front_linked_adapter_required(seq, req1, req2): 209 # -a X...Y 210 a = AdapterParser()._parse(seq, "back") 211 assert isinstance(a, LinkedAdapter) 212 assert a.front_required is req1 213 assert a.back_required is req2 214 215 216@pytest.mark.parametrize("r1,r2,req1,req2", [ 217 ("", "", False, False), 218 ("", ";required", False, True), 219 (";required", "", True, False), 220 (";required", ";required", True, True), 221 ("", ";optional", False, False), 222 (";optional", "", False, False), 223 (";optional", ";optional", False, False), 224]) 225def test_linked_adapter_back_required_optional(r1, r2, req1, req2): 226 # -a X...Y 227 a = AdapterParser()._parse("ACG" + r1 + "...TGT" + r2, "back") 228 assert isinstance(a, LinkedAdapter) 229 assert a.front_required is req1 230 assert a.back_required is req2 231 232 233@pytest.mark.parametrize("r1,r2,exp1,exp2", [ 234 ("", "", True, True), 235 ("", ";required", True, True), 236 (";required", "", True, True), 237 (";required", ";required", True, True), 238 ("", ";optional", True, False), 239 (";optional", "", False, True), 240 (";optional", ";optional", False, False), 241]) 242def test_linked_adapter_front_required_optional(r1, r2, exp1, exp2): 243 # -g X...Y 244 a = AdapterParser()._parse("ACG" + r1 + "...TGT" + r2, "front") 245 assert isinstance(a, LinkedAdapter) 246 assert a.front_required is exp1 247 assert a.back_required is exp2 248 249 250def test_linked_adapter_parameters(): 251 # issue #394 252 a = AdapterParser(max_errors=0.17, indels=False)._parse("ACG...TGT") 253 assert isinstance(a, LinkedAdapter) 254 assert a.front_adapter.max_error_rate == 0.17 255 assert a.back_adapter.max_error_rate == 0.17 256 assert not a.front_adapter.indels 257 assert not a.back_adapter.indels 258 259 260def test_linked_adapter_name(): 261 # issue #414 262 a = AdapterParser()._parse("the_name=^ACG...TGT") 263 assert isinstance(a, LinkedAdapter) 264 assert a.create_statistics().name == "the_name" 265 266 267def test_anywhere_parameter_back(): 268 parser = AdapterParser(max_errors=0.2, min_overlap=4, read_wildcards=False, 269 adapter_wildcards=False, indels=True) 270 adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0] 271 assert isinstance(adapter, BackAdapter) 272 assert adapter._force_anywhere 273 274 # TODO move the rest to a separate test 275 read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA') 276 from cutadapt.modifiers import AdapterCutter 277 cutter = AdapterCutter([adapter]) 278 trimmed_read = cutter(read, ModificationInfo(read)) 279 assert trimmed_read.sequence == '' 280 281 282def test_anywhere_parameter_front(): 283 parser = AdapterParser(max_errors=0.2, min_overlap=4, read_wildcards=False, 284 adapter_wildcards=False, indels=True) 285 adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'front'))[0] 286 assert isinstance(adapter, FrontAdapter) 287 assert adapter._force_anywhere 288 289 # TODO move the rest to a separate test 290 read = Sequence('foo1', 'AAAAAAAAAACTGAAGTGAA') 291 from cutadapt.modifiers import AdapterCutter 292 cutter = AdapterCutter([adapter]) 293 trimmed_read = cutter(read, ModificationInfo(read)) 294 assert trimmed_read.sequence == '' 295