1# =========================================================================== 2# 3# PUBLIC DOMAIN NOTICE 4# National Center for Biotechnology Information 5# 6# This software/database is a "United States Government Work" under the 7# terms of the United States Copyright Act. It was written as part of 8# the author's official duties as a United States Government employee and 9# thus cannot be copyrighted. This software/database is freely available 10# to the public for use. The National Library of Medicine and the U.S. 11# Government have not placed any restriction on its use or reproduction. 12# 13# Although all reasonable efforts have been taken to ensure the accuracy 14# and reliability of the software and data, the NLM and the U.S. 15# Government do not and cannot warrant the performance or results that 16# may be obtained by using this software or data. The NLM and the U.S. 17# Government disclaim all warranties, express or implied, including 18# warranties of performance, merchantability or fitness for any particular 19# purpose. 20# 21# Please cite the author in any work or product based on this material. 22# 23# =========================================================================== 24# 25# 26 27 28from ctypes import c_void_p, c_uint64, byref, create_string_buffer, c_char_p, c_int 29from . import NGS 30 31from .Refcount import Refcount 32from .ErrorMsg import ErrorMsg 33from .String import NGS_RawString, getNGSString, getNGSValue 34from .Read import Read 35from .ReadIterator import ReadIterator 36from .ReadGroup import ReadGroup 37from .ReadGroupIterator import ReadGroupIterator 38from .Reference import Reference 39from .ReferenceIterator import ReferenceIterator 40from .Alignment import Alignment 41from .AlignmentIterator import AlignmentIterator 42 43class ReadCollection(Refcount): 44 """Represents an NGS-capable object with a collection of 45 *Reads*, *References* and *Alignments*. 46 47 Each of the basic content types may be accessed by *id* 48 as either a standalone object, or more commonly through 49 an *Iterator* over a selected collection of objects. 50 51 Reads are grouped by *ReadGroup*. When 52 not specifically assigned, Reads will be placed into the 53 *default* ReadGroup. 54 """ 55 56 def getName(self): 57 """Access the simple name of the ReadCollection. 58 This name is generally extracted from the "spec" 59 used to create the object, but may also be mapped 60 to a canonical name if one may be determined and 61 differs from that given in the spec. 62 63 if the name is extracted from "spec" and contains 64 well-known file extensions that do not form part of 65 a canonical name (e.g. ".sra"), they will be removed. 66 67 :returns: the simple name of the ReadCollection 68 :throws: ErrorMsg if the name cannot be retrieved 69 """ 70 return getNGSString(self, NGS.lib_manager.PY_NGS_ReadCollectionGetName) 71 72 #---------------------------------------------------------------------- 73 # READ GROUPS 74 75 def getReadGroups(self): 76 """Access all non-empty ReadGroups. 77 Iterator will contain at least one ReadGroup 78 unless the ReadCollection itself is empty. 79 80 :returns: an unordered Iterator of ReadGroups 81 :throws: ErrorMsg only upon an error accessing data 82 """ 83 ret = ReadGroupIterator() 84 ret.ref = getNGSValue(self, NGS.lib_manager.PY_NGS_ReadCollectionGetReadGroups, c_void_p) # TODO: check if it works 85 return ret 86 87 def hasReadGroup(self, spec): 88 """check existence of a ReadGroup by name. 89 90 :param spec: the name of a possibly contained read group 91 :returns: true if the read group exists 92 """ 93 ret = c_int() 94 ngs_str_err = NGS_RawString() 95 try: 96 res = NGS.lib_manager.PY_NGS_ReadCollectionHasReadGroup(self.ref, spec.encode("UTF-8"), byref(ret), byref(ngs_str_err.ref)) 97 finally: 98 ngs_str_err.close() 99 100 return bool(ret.value) 101 102 def getReadGroup(self, spec): 103 """Access a single ReadGroup by name. 104 105 :param spec: the name of a contained read group 106 :returns: an instance of the designated ReadGroup 107 :throws: ErrorMsg if specified ReadGroup is not found 108 :throws: ErrorMsg upon an error accessing data 109 """ 110 ret = ReadGroup() 111 ngs_str_err = NGS_RawString() 112 try: 113 res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadGroup(self.ref, spec.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref)) 114 finally: 115 ngs_str_err.close() 116 117 return ret 118 119 #---------------------------------------------------------------------- 120 # REFERENCES 121 122 def getReferences(self): 123 """Access all References having aligned Reads. 124 Iterator will contain at least one ReadGroup 125 unless no Reads are aligned. 126 127 :returns: an unordered Iterator of References 128 :throws: ErrorMsg upon an error accessing data 129 """ 130 ret = ReferenceIterator() 131 ret.ref = getNGSValue(self, NGS.lib_manager.PY_NGS_ReadCollectionGetReferences, c_void_p) # TODO: check if it works 132 return ret 133 134 def hasReference(self, spec): 135 """check existence of a Reference by name. 136 137 :param spec: the name of a possibly contained reference sequence 138 :returns: true if the reference exists 139 """ 140 ret = c_int() 141 ngs_str_err = NGS_RawString() 142 try: 143 res = NGS.lib_manager.PY_NGS_ReadCollectionHasReference(self.ref, spec.encode("UTF-8"), byref(ret), byref(ngs_str_err.ref)) 144 finally: 145 ngs_str_err.close() 146 147 return bool(ret.value) 148 149 def getReference(self, spec): 150 ret = Reference() 151 ngs_str_err = NGS_RawString() 152 try: 153 res = NGS.lib_manager.PY_NGS_ReadCollectionGetReference(self.ref, spec.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref)) 154 finally: 155 ngs_str_err.close() 156 157 return ret 158 159 #---------------------------------------------------------------------- 160 # ALIGNMENTS 161 162 def getAlignment(self, alignmentId): 163 """:returns: an individual Alignment 164 :throws: ErrorMsg if Alignment does not exist 165 """ 166 ret = Alignment() 167 ngs_str_err = NGS_RawString() 168 try: 169 res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignment(self.ref, alignmentId.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref)) 170 finally: 171 ngs_str_err.close() 172 173 return ret 174 175 # AlignmentCategory 176 # see Alignment for categories 177 178 def getAlignments(self, categories): 179 """ 180 :returns: an iterator of all Alignments from specified categories 181 """ 182 ret = AlignmentIterator() 183 ngs_str_err = NGS_RawString() 184 try: 185 res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignments(self.ref, categories, byref(ret.ref), byref(ngs_str_err.ref)) 186 finally: 187 ngs_str_err.close() 188 189 return ret 190 191 def getAlignmentCount(self, categories=Alignment.all): 192 '''"categories" provides a means of filtering by AlignmentCategory 193 :returns: count of all alignments 194 ''' 195 ret = c_uint64() 196 ngs_str_err = NGS_RawString() 197 try: 198 res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignmentCount(self.ref, categories, byref(ret), byref(ngs_str_err.ref)) 199 finally: 200 ngs_str_err.close() 201 202 return ret.value 203 204 def getAlignmentRange(self, first, count, categories=Alignment.all): # TODO: parameters order! 205 '''"first" is an unsigned ordinal into set 206 "categories" provides a means of filtering by AlignmentCategory 207 :returns: an iterator across a range of Alignments 208 ''' 209 ret = AlignmentIterator() 210 ngs_str_err = NGS_RawString() 211 try: 212 res = NGS.lib_manager.PY_NGS_ReadCollectionGetAlignmentRange(self.ref, first, count, categories, byref(ret.ref), byref(ngs_str_err.ref)) 213 finally: 214 ngs_str_err.close() 215 216 return ret 217 218 #---------------------------------------------------------------------- 219 # READ 220 221 def getRead(self, readId): 222 """ 223 :returns: an individual Read 224 :throws: ErrorMsg if Read does not exist 225 """ 226 ret = Read() 227 ngs_str_err = NGS_RawString() 228 try: 229 res = NGS.lib_manager.PY_NGS_ReadCollectionGetRead(self.ref, readId.encode("UTF-8"), byref(ret.ref), byref(ngs_str_err.ref)) 230 finally: 231 ngs_str_err.close() 232 233 return ret 234 235 # ReadCategory 236 # see Read for categories 237 238 def getReads(self, categories): 239 """ 240 :returns: an iterator of all contained machine Reads 241 """ 242 ret = ReadIterator() 243 ngs_str_err = NGS_RawString() 244 try: 245 res = NGS.lib_manager.PY_NGS_ReadCollectionGetReads(self.ref, categories, byref(ret.ref), byref(ngs_str_err.ref)) 246 finally: 247 ngs_str_err.close() 248 249 return ret 250 251 def getReadCount(self, categories=Read.all): 252 """of all combined categories 253 :returns: the number of reads in the collection 254 """ 255 ret = c_uint64() 256 ngs_str_err = NGS_RawString() 257 try: 258 res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadCount(self.ref, categories, byref(ret), byref(ngs_str_err.ref)) 259 finally: 260 ngs_str_err.close() 261 262 return ret.value 263 264 def getReadRange(self, first, count, categories=Read.all): 265 ret = ReadIterator() 266 ngs_str_err = NGS_RawString() 267 try: 268 res = NGS.lib_manager.PY_NGS_ReadCollectionGetReadRange(self.ref, first, count, categories, byref(ret.ref), byref(ngs_str_err.ref)) 269 finally: 270 ngs_str_err.close() 271 272 return ret 273 274 275def openReadCollection(spec): 276 """Create an object representing a named collection of reads 277 278 :param: spec may be a path to an object or may be an id, accession, or URL 279 280 :throws: ErrorMsg if object cannot be located 281 :throws: ErrorMsg if object cannot be converted to a ReadCollection 282 :throws: ErrorMsg if an error occurs during construction 283 """ 284 285 ret = ReadCollection() 286 ERROR_BUFFER_SIZE = 4096 287 str_err = create_string_buffer(ERROR_BUFFER_SIZE) 288 from . import PY_RES_OK 289 res = NGS.lib_manager.PY_NGS_Engine_ReadCollectionMake(spec.encode("UTF-8"), byref(ret.ref), str_err, len(str_err)) 290 if res != PY_RES_OK: 291 raise ErrorMsg(str_err.value) 292 293 return ret 294