1""" Helper class for working with 1D basecall type analyses. 2""" 3import numpy as np 4 5from ont_fast5_api.analysis_tools.base_tool import BaseTool 6 7 8class Basecall1DTools(BaseTool): 9 """ Provides helper methods specific to 1D basecall analyses. 10 """ 11 group_id = 'Basecall_1D' 12 analysis_id = 'basecall_1d' 13 14 15 def get_event_data(self, section): 16 """ Return either the template or complement event data, if present. 17 18 :param section: Either template or complement. 19 :return: Event data table. 20 """ 21 event_group = '{}/BaseCalled_{}'.format(self.group_name, section) 22 data = self.handle.get_analysis_dataset(event_group, 'Events') 23 return data 24 25 def add_event_data(self, section, data): 26 """ Add template or complement basecalled event data. 27 28 :param section: Either template or complement. 29 :param data: Event data table to be written. 30 """ 31 event_group = 'BaseCalled_{}'.format(section) 32 if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]: 33 self.handle.add_analysis_subgroup(self.group_name, event_group) 34 self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Events', data) 35 36 def get_called_sequence(self, section, fastq=False): 37 """ Return either the called sequence data, if present. 38 39 :param section: ['template', 'complement' or '2D'] 40 :param fastq: If True, return a single, multiline fastq string. If 41 False, return a tuple of (name, sequence, qstring). 42 :return: Either the fastq string or the (name, sequence, qstring) tuple. 43 """ 44 45 event_group = '{}/BaseCalled_{}'.format(self.group_name, section) 46 data = self.handle.get_analysis_dataset(event_group, 'Fastq') 47 if data is None: 48 raise KeyError("No fastq data in: {} {}".format(event_group, self.filename)) 49 if fastq: 50 return data 51 name, sequence, _, qstring = data.strip().split('\n') 52 name = name[1:] 53 return name, sequence, qstring 54 55 def add_called_sequence(self, section, name, sequence, qstring): 56 """ Add basecalled sequence data 57 58 :param section: ['template', 'complement' or '2D'] 59 :param name: The record ID to use for the fastq. 60 :param sequence: The called sequence. 61 :param qstring: The quality string. 62 """ 63 event_group = 'BaseCalled_{}'.format(section) 64 if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]: 65 self.handle.add_analysis_subgroup(self.group_name, event_group) 66 fastq_text = '@{}\n{}\n+\n{}\n'.format(name, sequence, qstring) 67 fastq_arr = np.array(fastq_text, dtype=str) 68 self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Fastq', fastq_arr) 69