1from textwrap import dedent
2import pytest
3
4from dnaio import Sequence
5from cutadapt.adapters import LinkedAdapter, BackAdapter, FrontAdapter, InvalidCharacter
6from cutadapt.parser import AdapterParser, AdapterSpecification
7from cutadapt.modifiers import ModificationInfo
8
9
10def test_expand_braces():
11    expand_braces = AdapterSpecification.expand_braces
12    assert expand_braces('') == ''
13    assert expand_braces('A') == 'A'
14    assert expand_braces('A{0}') == ''
15    assert expand_braces('A{1}') == 'A'
16    assert expand_braces('A{2}') == 'AA'
17    assert expand_braces('A{2}C') == 'AAC'
18    assert expand_braces('ACGTN{3}TGACCC') == 'ACGTNNNTGACCC'
19    assert expand_braces('ACGTN{10}TGACCC') == 'ACGTNNNNNNNNNNTGACCC'
20    assert expand_braces('ACGTN{3}TGA{4}CCC') == 'ACGTNNNTGAAAACCC'
21    assert expand_braces('ACGTN{0}TGA{4}CCC') == 'ACGTTGAAAACCC'
22
23
24def test_expand_braces_fail():
25    for expression in ['{', '}', '{}', '{5', '{1}', 'A{-7}', 'A{', 'A{1', 'N{7', 'AN{7', 'A{4{}',
26            'A{4}{3}', 'A{b}', 'A{6X}', 'A{X6}', 'A}A']:
27        with pytest.raises(ValueError):
28            AdapterSpecification.expand_braces(expression)
29
30
31def test_parse_file_notation(tmpdir):
32    tmp_path = str(tmpdir.join('adapters.fasta'))
33    with open(tmp_path, 'w') as f:
34        f.write(dedent(""">first_name
35            ADAPTER1
36            >second_name
37            ADAPTER2
38            """))
39    parser = AdapterParser(
40        max_errors=0.2, min_overlap=4, read_wildcards=False,
41        adapter_wildcards=False, indels=False)
42
43    adapters = list(parser.parse('file:' + tmp_path, cmdline_type='back'))
44    assert len(adapters) == 2
45    assert adapters[0].name == 'first_name'
46    assert adapters[0].sequence == 'ADAPTER1'
47    assert adapters[1].name == 'second_name'
48    assert adapters[1].sequence == 'ADAPTER2'
49    for a in adapters:
50        assert a.max_error_rate == 0.2
51        assert a.min_overlap == 4
52        assert not a.read_wildcards
53        assert not a.adapter_wildcards
54        assert not a.indels
55
56
57def test_parse_not_linked():
58    p = AdapterSpecification.parse
59    assert p('A', 'front') == AdapterSpecification(None, None, 'A', {}, 'front')
60    assert p('A', 'back') == AdapterSpecification(None, None, 'A', {}, 'back')
61    assert p('A', 'anywhere') == AdapterSpecification(None, None, 'A', {}, 'anywhere')
62    assert p('^A', 'front') == AdapterSpecification(None, 'anchored', 'A', {}, 'front')
63    assert p('XXXA', 'front') == AdapterSpecification(None, 'noninternal', 'A', {}, 'front')
64    assert p('A$', 'back') == AdapterSpecification(None, 'anchored', 'A', {}, 'back')
65    assert p('AXXXX', 'back') == AdapterSpecification(None, 'noninternal', 'A', {}, 'back')
66    assert p('a_name=ADAPT', 'front') == AdapterSpecification('a_name', None, 'ADAPT', {}, 'front')
67
68
69@pytest.mark.parametrize("where", ("front", "back"))
70@pytest.mark.parametrize("reqopt", ("required", "optional"))
71def test_parse_invalid_adapter_specific_parameter(where, reqopt):
72    parser = AdapterParser()
73    with pytest.raises(ValueError) as e:
74        parser._parse_not_linked("A;{}".format(reqopt), "name", where)
75    assert "can only be used within linked adapters" in e.value.args[0]
76
77
78def test_parse_invalid_cmdline_type():
79    with pytest.raises(ValueError) as e:
80        AdapterSpecification._parse('A', 'invalid_type')
81    assert "cmdline_type must be front, back or anywhere" in e.value.args[0]
82
83
84@pytest.mark.parametrize("spec,cmdline_type", [
85    ("^XA", "front"),
86    ("^AX", "front"),
87    ("XA$", "back"),
88    ("AX$", "back"),
89])
90def test_parse_double_placement_restrictions(spec, cmdline_type):
91    with pytest.raises(ValueError) as e:
92        AdapterSpecification._parse(spec, cmdline_type)
93    assert "cannot use multiple placement restrictions" in e.value.args[0]
94
95
96def test_parse_misplaced_placement_restrictions():
97    with pytest.raises(ValueError) as e:
98        AdapterSpecification._parse("A$", "front")
99    assert "Allowed placement restrictions for a 5' adapter" in e.value.args[0]
100    with pytest.raises(ValueError) as e:
101        AdapterSpecification._parse("^A", "back")
102    assert "Allowed placement restrictions for a 3' adapter" in e.value.args[0]
103
104
105def test_restriction_to_class():
106    with pytest.raises(ValueError) as e:
107        AdapterSpecification._restriction_to_class("anywhere", "noninternal")
108    assert "No placement may be specified" in e.value.args[0]
109
110
111def test_parse_parameters():
112    p = AdapterSpecification._parse_parameters
113    assert p('e=0.1') == {'max_errors': 0.1}
114    assert p('error_rate=0.1') == {'max_errors': 0.1}
115    assert p('max_errors=2') == {'max_errors': 2}
116    assert p('o=5') == {'min_overlap': 5}
117    assert p('min_overlap=5') == {'min_overlap': 5}
118    assert p('o=7; e=0.4') == {'min_overlap': 7, 'max_errors': 0.4}
119    assert p('anywhere') == {'anywhere': True}
120    assert p('required') == {'required': True}
121    assert p('optional') == {'required': False}
122
123    with pytest.raises(ValueError):
124        p('e=hallo')
125    with pytest.raises(KeyError):
126        p('bla=0.1')
127    with pytest.raises(ValueError):
128        p('e=')
129    with pytest.raises(KeyError) as e:
130        p('e=0.1;e=0.1')
131    assert "specified twice" in e.value.args[0]
132    with pytest.raises(KeyError) as e:
133        p('e=0.1;max_errors=0.1')
134    assert "specified twice" in e.value.args[0]
135    with pytest.raises(ValueError) as e:
136        p('optional; required')
137    assert "cannot be specified at the same time" in e.value.args[0]
138
139
140def test_parse_with_parameters(tmp_path):
141    parser = AdapterParser(
142        max_errors=0.2, min_overlap=4, read_wildcards=False,
143        adapter_wildcards=False, indels=False)
144    a = parser._parse('ACGTACGT; e=0.15', 'front')
145    assert isinstance(a, FrontAdapter)
146    assert a.max_error_rate == 0.15
147    assert a.min_overlap == 4
148
149    a = parser._parse('ACGTAAAA; o=5; e=0.11', 'back')
150    assert isinstance(a, BackAdapter)
151    assert a.max_error_rate == 0.11
152    assert a.min_overlap == 5
153
154    for spec in ('thename=ACG;e=0.15 ... TGT;e=0.17', 'thename=ACG;e=0.15...TGT;e=0.17'):
155        a = parser._parse(spec, 'back')
156        assert isinstance(a, LinkedAdapter)
157        assert a.front_adapter.max_error_rate == 0.15
158        assert a.back_adapter.max_error_rate == 0.17
159
160    with pytest.raises(ValueError) as e:
161        parser._parse("A", "invalid-cmdline-type")
162    assert "cmdline_type cannot be" in e.value.args[0]
163
164
165def test_parse_with_adapter_sequence_as_a_path(tmp_path):
166    parser = AdapterParser()
167    with pytest.raises(InvalidCharacter):
168        parser._parse("invalid.character", "back")
169    # user forgot to write "file:"
170    path = (tmp_path / "afile.fasta")
171    path.write_text(">abc\nACGT\n")
172    with pytest.raises(InvalidCharacter) as e:
173        list(parser.parse(str(path), "back"))
174    assert "A file exists named" in e.value.args[0]
175
176
177def test_parse_multi():
178    parser = AdapterParser()
179    with pytest.raises(ValueError) as e:
180        parser.parse_multi([("invalid-type", "A")])
181    assert "adapter type must be" in e.value.args[0]
182
183
184def test_normalize_ellipsis():
185    ne = AdapterParser._normalize_ellipsis
186    assert ne("ACGT", "", "back") == ("ACGT", "front")  # -a ACGT...
187    assert ne("ACGT", "", "front") == ("ACGT", "front")  # -g ACGT...
188    assert ne("", "ACGT", "back") == ("ACGT", "back")  # -a ...ACGT
189    with pytest.raises(ValueError) as e:
190        # -g ...ACGT
191        ne("", "ACGT", "front")
192    assert "Invalid adapter specification" in e.value.args[0]
193
194    with pytest.raises(ValueError) as e:
195        ne("A", "C", "back")
196    assert "either" in e.value.args[0]
197    with pytest.raises(ValueError) as e:
198        ne("A", "", "anywhere")
199    assert "No ellipsis" in e.value.args[0]
200
201
202@pytest.mark.parametrize("seq,req1,req2", [
203    ("ACG...TGT", False, False),
204    ("ACG...TGT$", False, True),
205    ("^ACG...TGT", True, False),
206    ("^ACG...TGT$", True, True),
207])
208def test_anchoring_makes_front_linked_adapter_required(seq, req1, req2):
209    # -a X...Y
210    a = AdapterParser()._parse(seq, "back")
211    assert isinstance(a, LinkedAdapter)
212    assert a.front_required is req1
213    assert a.back_required is req2
214
215
216@pytest.mark.parametrize("r1,r2,req1,req2", [
217    ("", "", False, False),
218    ("", ";required", False, True),
219    (";required", "", True, False),
220    (";required", ";required", True, True),
221    ("", ";optional", False, False),
222    (";optional", "", False, False),
223    (";optional", ";optional", False, False),
224])
225def test_linked_adapter_back_required_optional(r1, r2, req1, req2):
226    # -a X...Y
227    a = AdapterParser()._parse("ACG" + r1 + "...TGT" + r2, "back")
228    assert isinstance(a, LinkedAdapter)
229    assert a.front_required is req1
230    assert a.back_required is req2
231
232
233@pytest.mark.parametrize("r1,r2,exp1,exp2", [
234    ("", "", True, True),
235    ("", ";required", True, True),
236    (";required", "", True, True),
237    (";required", ";required", True, True),
238    ("", ";optional", True, False),
239    (";optional", "", False, True),
240    (";optional", ";optional", False, False),
241])
242def test_linked_adapter_front_required_optional(r1, r2, exp1, exp2):
243    # -g X...Y
244    a = AdapterParser()._parse("ACG" + r1 + "...TGT" + r2, "front")
245    assert isinstance(a, LinkedAdapter)
246    assert a.front_required is exp1
247    assert a.back_required is exp2
248
249
250def test_linked_adapter_parameters():
251    # issue #394
252    a = AdapterParser(max_errors=0.17, indels=False)._parse("ACG...TGT")
253    assert isinstance(a, LinkedAdapter)
254    assert a.front_adapter.max_error_rate == 0.17
255    assert a.back_adapter.max_error_rate == 0.17
256    assert not a.front_adapter.indels
257    assert not a.back_adapter.indels
258
259
260def test_linked_adapter_name():
261    # issue #414
262    a = AdapterParser()._parse("the_name=^ACG...TGT")
263    assert isinstance(a, LinkedAdapter)
264    assert a.create_statistics().name == "the_name"
265
266
267def test_anywhere_parameter_back():
268    parser = AdapterParser(max_errors=0.2, min_overlap=4, read_wildcards=False,
269        adapter_wildcards=False, indels=True)
270    adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0]
271    assert isinstance(adapter, BackAdapter)
272    assert adapter._force_anywhere
273
274    # TODO move the rest to a separate test
275    read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA')
276    from cutadapt.modifiers import AdapterCutter
277    cutter = AdapterCutter([adapter])
278    trimmed_read = cutter(read, ModificationInfo(read))
279    assert trimmed_read.sequence == ''
280
281
282def test_anywhere_parameter_front():
283    parser = AdapterParser(max_errors=0.2, min_overlap=4, read_wildcards=False,
284        adapter_wildcards=False, indels=True)
285    adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'front'))[0]
286    assert isinstance(adapter, FrontAdapter)
287    assert adapter._force_anywhere
288
289    # TODO move the rest to a separate test
290    read = Sequence('foo1', 'AAAAAAAAAACTGAAGTGAA')
291    from cutadapt.modifiers import AdapterCutter
292    cutter = AdapterCutter([adapter])
293    trimmed_read = cutter(read, ModificationInfo(read))
294    assert trimmed_read.sequence == ''
295