1""" Helper class for working with 1D basecall type analyses.
2"""
3import numpy as np
4
5from ont_fast5_api.analysis_tools.base_tool import BaseTool
6
7
8class Basecall1DTools(BaseTool):
9    """ Provides helper methods specific to 1D basecall analyses.
10    """
11    group_id = 'Basecall_1D'
12    analysis_id = 'basecall_1d'
13
14
15    def get_event_data(self, section):
16        """ Return either the template or complement event data, if present.
17
18        :param section: Either template or complement.
19        :return: Event data table.
20        """
21        event_group = '{}/BaseCalled_{}'.format(self.group_name, section)
22        data = self.handle.get_analysis_dataset(event_group, 'Events')
23        return data
24
25    def add_event_data(self, section, data):
26        """ Add template or complement basecalled event data.
27
28        :param section: Either template or complement.
29        :param data: Event data table to be written.
30        """
31        event_group = 'BaseCalled_{}'.format(section)
32        if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]:
33            self.handle.add_analysis_subgroup(self.group_name, event_group)
34        self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Events', data)
35
36    def get_called_sequence(self, section, fastq=False):
37        """ Return either the called sequence data, if present.
38
39        :param section: ['template', 'complement' or '2D']
40        :param fastq: If True, return a single, multiline fastq string. If
41            False, return a tuple of (name, sequence, qstring).
42        :return: Either the fastq string or the (name, sequence, qstring) tuple.
43        """
44
45        event_group = '{}/BaseCalled_{}'.format(self.group_name, section)
46        data = self.handle.get_analysis_dataset(event_group, 'Fastq')
47        if data is None:
48            raise KeyError("No fastq data in: {} {}".format(event_group, self.filename))
49        if fastq:
50            return data
51        name, sequence, _, qstring = data.strip().split('\n')
52        name = name[1:]
53        return name, sequence, qstring
54
55    def add_called_sequence(self, section, name, sequence, qstring):
56        """ Add basecalled sequence data
57
58        :param section: ['template', 'complement' or '2D']
59        :param name: The record ID to use for the fastq.
60        :param sequence: The called sequence.
61        :param qstring: The quality string.
62        """
63        event_group = 'BaseCalled_{}'.format(section)
64        if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]:
65            self.handle.add_analysis_subgroup(self.group_name, event_group)
66        fastq_text = '@{}\n{}\n+\n{}\n'.format(name, sequence, qstring)
67        fastq_arr = np.array(fastq_text, dtype=str)
68        self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Fastq', fastq_arr)
69