1# ===========================================================================
2#
3#                            PUBLIC DOMAIN NOTICE
4#               National Center for Biotechnology Information
5#
6#  This software/database is a "United States Government Work" under the
7#  terms of the United States Copyright Act.  It was written as part of
8#  the author's official duties as a United States Government employee and
9#  thus cannot be copyrighted.  This software/database is freely available
10#  to the public for use. The National Library of Medicine and the U.S.
11#  Government have not placed any restriction on its use or reproduction.
12#
13#  Although all reasonable efforts have been taken to ensure the accuracy
14#  and reliability of the software and data, the NLM and the U.S.
15#  Government do not and cannot warrant the performance or results that
16#  may be obtained by using this software or data. The NLM and the U.S.
17#  Government disclaim all warranties, express or implied, including
18#  warranties of performance, merchantability or fitness for any particular
19#  purpose.
20#
21#  Please cite the author in any work or product based on this material.
22#
23# ===========================================================================
24#
25#
26
27
28from ctypes import c_void_p, c_uint64, byref, create_string_buffer, c_char_p, c_int
29from . import NGS
30
31from .Refcount import Refcount
32from .ErrorMsg import ErrorMsg
33from .String import NGS_RawString, getNGSString, getNGSValue
34from .Read import Read
35from .ReadIterator import ReadIterator
36from .ReadGroup import ReadGroup
37from .ReadGroupIterator import ReadGroupIterator
38from .Reference import Reference
39from .ReferenceIterator import ReferenceIterator
40from .Alignment import Alignment
41from .AlignmentIterator import AlignmentIterator
42
43class ReadCollection(Refcount):
44    """Represents an NGS-capable object with a collection of
45    *Reads*, *References* and *Alignments*.
46
47    Each of the basic content types may be accessed by *id*
48    as either a standalone object, or more commonly through
49    an *Iterator* over a selected collection of objects.
50
51    Reads are grouped by *ReadGroup*. When
52    not specifically assigned, Reads will be placed into the
53    *default* ReadGroup.
54    """
55
56    def getName(self):
57        """Access the simple name of the ReadCollection.
58        This name is generally extracted from the "spec"
59        used to create the object, but may also be mapped
60        to a canonical name if one may be determined and
61        differs from that given in the spec.
62
63        if the name is extracted from "spec" and contains
64        well-known file extensions that do not form part of
65        a canonical name (e.g. ".sra"), they will be removed.
66
67        :returns: the simple name of the ReadCollection
68        :throws: ErrorMsg if the name cannot be retrieved
69        """
70        return getNGSString(self, NGS.lib_manager.PY_NGS_ReadCollectionGetName)
71
72    #----------------------------------------------------------------------
73    # READ GROUPS
74
75    def getReadGroups(self):
76        """Access all non-empty ReadGroups.
77        Iterator will contain at least one ReadGroup
78        unless the ReadCollection itself is empty.
79
80        :returns: an unordered Iterator of ReadGroups
81        :throws: ErrorMsg only upon an error accessing data
82        """
83        ret = ReadGroupIterator()
84        ret.ref = getNGSValue(self, NGS.lib_manager.PY_NGS_ReadCollectionGetReadGroups, c_void_p) # TODO: check if it works
85        return ret
86
87    def hasReadGroup(self, spec):
88        """check existence of a ReadGroup by name.
89
90        :param spec: the name of a possibly contained read group
91        :returns: true if the read group exists
92        """
93        ret = c_int()
94        ngs_str_err = NGS_RawString()
95        try:
96            res = NGS.lib_manager.PY_NGS_ReadCollectionHasReadGroup(self.ref, spec.encode("UTF-8"), byref(ret), byref(ngs_str_err.ref))
97        finally:
98            ngs_str_err.close()
99
100        return bool(ret.value)
101
102    def getReadGroup(self, spec):
103        """Access a single ReadGroup by name.
104
105        :param spec: the name of a contained read group
106        :returns: an instance of the designated ReadGroup
107        :throws: ErrorMsg if specified ReadGroup is not found
108        :throws: ErrorMsg upon an error accessing data
109        """
110        ret = ReadGroup()
111        ngs_str_err = NGS_RawString()
112        try:
113            res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadGroup(self.ref, spec.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref))
114        finally:
115            ngs_str_err.close()
116
117        return ret
118
119    #----------------------------------------------------------------------
120    # REFERENCES
121
122    def getReferences(self):
123        """Access all References having aligned Reads.
124        Iterator will contain at least one ReadGroup
125        unless no Reads are aligned.
126
127        :returns: an unordered Iterator of References
128        :throws: ErrorMsg upon an error accessing data
129        """
130        ret = ReferenceIterator()
131        ret.ref = getNGSValue(self, NGS.lib_manager.PY_NGS_ReadCollectionGetReferences, c_void_p) # TODO: check if it works
132        return ret
133
134    def hasReference(self, spec):
135        """check existence of a Reference by name.
136
137        :param spec: the name of a possibly contained reference sequence
138        :returns: true if the reference exists
139        """
140        ret = c_int()
141        ngs_str_err = NGS_RawString()
142        try:
143            res = NGS.lib_manager.PY_NGS_ReadCollectionHasReference(self.ref, spec.encode("UTF-8"), byref(ret), byref(ngs_str_err.ref))
144        finally:
145            ngs_str_err.close()
146
147        return bool(ret.value)
148
149    def getReference(self, spec):
150        ret = Reference()
151        ngs_str_err = NGS_RawString()
152        try:
153            res = NGS.lib_manager.PY_NGS_ReadCollectionGetReference(self.ref, spec.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref))
154        finally:
155            ngs_str_err.close()
156
157        return ret
158
159    #----------------------------------------------------------------------
160    # ALIGNMENTS
161
162    def getAlignment(self, alignmentId):
163        """:returns: an individual Alignment
164        :throws: ErrorMsg if Alignment does not exist
165        """
166        ret = Alignment()
167        ngs_str_err = NGS_RawString()
168        try:
169            res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignment(self.ref, alignmentId.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref))
170        finally:
171            ngs_str_err.close()
172
173        return ret
174
175    # AlignmentCategory
176    #  see Alignment for categories
177
178    def getAlignments(self, categories):
179        """
180        :returns: an iterator of all Alignments from specified categories
181        """
182        ret = AlignmentIterator()
183        ngs_str_err = NGS_RawString()
184        try:
185            res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignments(self.ref, categories, byref(ret.ref), byref(ngs_str_err.ref))
186        finally:
187            ngs_str_err.close()
188
189        return ret
190
191    def getAlignmentCount(self, categories=Alignment.all):
192        '''"categories" provides a means of filtering by AlignmentCategory
193        :returns: count of all alignments
194        '''
195        ret = c_uint64()
196        ngs_str_err = NGS_RawString()
197        try:
198            res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignmentCount(self.ref, categories, byref(ret), byref(ngs_str_err.ref))
199        finally:
200            ngs_str_err.close()
201
202        return ret.value
203
204    def getAlignmentRange(self, first, count, categories=Alignment.all): # TODO: parameters order!
205        '''"first" is an unsigned ordinal into set
206        "categories" provides a means of filtering by AlignmentCategory
207        :returns: an iterator across a range of Alignments
208        '''
209        ret = AlignmentIterator()
210        ngs_str_err = NGS_RawString()
211        try:
212            res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignmentRange(self.ref, first, count, categories, byref(ret.ref), byref(ngs_str_err.ref))
213        finally:
214            ngs_str_err.close()
215
216        return ret
217
218    #----------------------------------------------------------------------
219    # READ
220
221    def getRead(self, readId):
222        """
223        :returns: an individual Read
224        :throws: ErrorMsg if Read does not exist
225        """
226        ret = Read()
227        ngs_str_err = NGS_RawString()
228        try:
229            res = NGS.lib_manager.PY_NGS_ReadCollectionGetRead(self.ref, readId.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref))
230        finally:
231            ngs_str_err.close()
232
233        return ret
234
235    # ReadCategory
236    #  see Read for categories
237
238    def getReads(self, categories):
239        """
240        :returns: an iterator of all contained machine Reads
241        """
242        ret = ReadIterator()
243        ngs_str_err = NGS_RawString()
244        try:
245            res = NGS.lib_manager.PY_NGS_ReadCollectionGetReads(self.ref, categories, byref(ret.ref), byref(ngs_str_err.ref))
246        finally:
247            ngs_str_err.close()
248
249        return ret
250
251    def getReadCount(self, categories=Read.all):
252        """of all combined categories
253        :returns: the number of reads in the collection
254        """
255        ret = c_uint64()
256        ngs_str_err = NGS_RawString()
257        try:
258            res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadCount(self.ref, categories, byref(ret), byref(ngs_str_err.ref))
259        finally:
260            ngs_str_err.close()
261
262        return ret.value
263
264    def getReadRange(self, first, count, categories=Read.all):
265        ret = ReadIterator()
266        ngs_str_err = NGS_RawString()
267        try:
268            res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadRange(self.ref, first, count, categories, byref(ret.ref), byref(ngs_str_err.ref))
269        finally:
270            ngs_str_err.close()
271
272        return ret
273
274
275def openReadCollection(spec):
276    """Create an object representing a named collection of reads
277
278     :param: spec may be a path to an object or may be an id, accession, or URL
279
280     :throws: ErrorMsg if object cannot be located
281     :throws: ErrorMsg if object cannot be converted to a ReadCollection
282     :throws: ErrorMsg if an error occurs during construction
283    """
284
285    ret = ReadCollection()
286    ERROR_BUFFER_SIZE = 4096
287    str_err = create_string_buffer(ERROR_BUFFER_SIZE)
288    from . import PY_RES_OK
289    res = NGS.lib_manager.PY_NGS_Engine_ReadCollectionMake(spec.encode("UTF-8"), byref(ret.ref), str_err, len(str_err))
290    if res != PY_RES_OK:
291        raise ErrorMsg(str_err.value)
292
293    return ret
294