1# Copyright 2012 by Wibowo Arindrarto. All rights reserved. 2# This file is part of the Biopython distribution and governed by your 3# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 4# Please see the LICENSE file that should have been included as part of this 5# package. 6"""Bio.SearchIO object to model a single database hit.""" 7 8 9from itertools import chain 10 11from Bio.SearchIO._utils import allitems, optionalcascade, getattr_str 12 13from ._base import _BaseSearchObject 14from .hsp import HSP 15 16 17class Hit(_BaseSearchObject): 18 """Class representing a single database hit of a search result. 19 20 Hit objects are the second-level container in the SearchIO module. They 21 are the objects contained within a QueryResult (see QueryResult). They 22 themselves are container for HSP objects and will contain at least one 23 HSP. 24 25 To have a quick look at a Hit and its contents, invoke ``print`` on it:: 26 27 >>> from Bio import SearchIO 28 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 29 >>> hit = qresult[3] 30 >>> print(hit) 31 Query: 33211 32 mir_1 33 Hit: gi|301171322|ref|NR_035857.1| (86) 34 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 35 HSPs: ---- -------- --------- ------ --------------- --------------------- 36 # E-value Bit score Span Query range Hit range 37 ---- -------- --------- ------ --------------- --------------------- 38 0 8.9e-20 100.47 60 [1:61] [13:73] 39 1 3.3e-06 55.39 60 [0:60] [13:73] 40 41 You can invoke ``len`` on a Hit object to see how many HSP objects it contains:: 42 43 >>> len(hit) 44 2 45 46 Hit objects behave very similar to Python lists. You can retrieve the HSP 47 object inside a Hit using the HSP's integer index. Hit objects can also be 48 sliced, which will return a new Hit objects containing only the sliced HSPs:: 49 50 # HSP items inside the Hit can be retrieved using its integer index 51 >>> hit[0] 52 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 53 54 # slicing returns a new Hit 55 >>> hit 56 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 57 >>> hit[:1] 58 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 59 >>> print(hit[1:]) 60 Query: 33211 61 mir_1 62 Hit: gi|301171322|ref|NR_035857.1| (86) 63 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 64 HSPs: ---- -------- --------- ------ --------------- --------------------- 65 # E-value Bit score Span Query range Hit range 66 ---- -------- --------- ------ --------------- --------------------- 67 0 3.3e-06 55.39 60 [0:60] [13:73] 68 69 Hit objects provide ``filter`` and ``map`` methods, which are analogous to 70 Python's built-in ``filter`` and ``map`` except that they return a new Hit 71 object instead of a list. 72 73 Here is an example of using ``filter`` to select for HSPs whose e-value is 74 less than 1e-10:: 75 76 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 77 >>> filtered_hit = hit.filter(evalue_filter) 78 >>> len(hit) 79 2 80 >>> len(filtered_hit) 81 1 82 >>> print(filtered_hit) 83 Query: 33211 84 mir_1 85 Hit: gi|301171322|ref|NR_035857.1| (86) 86 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 87 HSPs: ---- -------- --------- ------ --------------- --------------------- 88 # E-value Bit score Span Query range Hit range 89 ---- -------- --------- ------ --------------- --------------------- 90 0 8.9e-20 100.47 60 [1:61] [13:73] 91 92 There are also other methods which are counterparts of Python lists' methods 93 with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their 94 respective documentations for more details and examples of their usage. 95 96 """ 97 98 # attributes we don't want to transfer when creating a new Hit class 99 # from this one 100 _NON_STICKY_ATTRS = ("_items",) 101 102 def __init__(self, hsps=(), id=None, query_id=None): 103 """Initialize a Hit object. 104 105 :param hsps: HSP objects contained in the Hit object 106 :type hsps: iterable yielding HSP 107 :param id: hit ID 108 :type id: string 109 :param query_id: query ID 110 :type query_id: string 111 112 If multiple HSP objects are used for initialization, they must all 113 have the same ``query_id``, ``query_description``, ``hit_id``, and 114 ``hit_description`` properties. 115 """ 116 # default attribute values 117 self._id = id 118 self._id_alt = [] 119 self._query_id = query_id 120 self._description = None 121 self._description_alt = [] 122 self._query_description = None 123 self.attributes = {} 124 self.dbxrefs = [] 125 126 # TODO - Move this into the for look below in case 127 # hsps is a single use iterator? 128 for attr in ("query_id", "query_description", "hit_id", "hit_description"): 129 # HACK: setting the if clause to '> 1' allows for empty hit objects. 130 # This makes it easier to work with file formats with unpredictable 131 # hit-hsp ordering. The empty hit object itself is nonfunctional, 132 # however, since all its cascading properties are empty. 133 if len({getattr(hsp, attr) for hsp in hsps}) > 1: 134 raise ValueError( 135 "Hit object can not contain HSPs with more than one %s." % attr 136 ) 137 138 self._items = [] 139 for hsp in hsps: 140 # validate each HSP 141 self._validate_hsp(hsp) 142 # and store it them as an instance attribute 143 self.append(hsp) 144 145 def __repr__(self): 146 """Return string representation of Hit object.""" 147 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, len(self)) 148 149 def __iter__(self): 150 """Iterate over hsps.""" 151 return iter(self.hsps) 152 153 def __len__(self): 154 """Return number of hsps.""" 155 return len(self.hsps) 156 157 def __bool__(self): 158 """Return True if there are hsps.""" 159 return bool(self.hsps) 160 161 def __contains__(self, hsp): 162 """Return True if hsp in items.""" 163 return hsp in self._items 164 165 def __str__(self): 166 """Return a human readable summary of the Hit object.""" 167 lines = [] 168 169 # set query id line 170 qid_line = "Query: %s" % self.query_id 171 lines.append(qid_line) 172 if self.query_description: 173 line = " %s" % self.query_description 174 line = line[:77] + "..." if len(line) > 80 else line 175 lines.append(line) 176 177 # set hit id line 178 hid_line = " Hit: %s" % self.id 179 try: 180 seq_len = self.seq_len 181 except AttributeError: 182 pass 183 else: 184 hid_line += " (%i)" % seq_len 185 lines.append(hid_line) 186 if self.description: 187 line = " %s" % self.description 188 line = line[:77] + "..." if len(line) > 80 else line 189 lines.append(line) 190 191 # set attributes lines 192 for key, value in sorted(self.attributes.items()): 193 lines.append(" %s: %s" % (key, value)) 194 195 # set dbxrefs line 196 if self.dbxrefs: 197 lines.append("Database cross-references: " + ", ".join(self.dbxrefs)) 198 199 # set hsp line and table 200 if not self.hsps: 201 lines.append(" HSPs: ?") 202 else: 203 lines.append( 204 " HSPs: %s %s %s %s %s %s" 205 % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21) 206 ) 207 pattern = "%11s %8s %9s %6s %15s %21s" 208 lines.append( 209 pattern 210 % ("#", "E-value", "Bit score", "Span", "Query range", "Hit range") 211 ) 212 lines.append( 213 pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21) 214 ) 215 for idx, hsp in enumerate(self.hsps): 216 # evalue 217 evalue = getattr_str(hsp, "evalue", fmt="%.2g") 218 # bitscore 219 bitscore = getattr_str(hsp, "bitscore", fmt="%.2f") 220 # alignment length 221 aln_span = getattr_str(hsp, "aln_span") 222 # query region 223 query_start = getattr_str(hsp, "query_start") 224 query_end = getattr_str(hsp, "query_end") 225 query_range = "[%s:%s]" % (query_start, query_end) 226 # max column length is 18 227 query_range = ( 228 query_range[:13] + "~]" if len(query_range) > 15 else query_range 229 ) 230 # hit region 231 hit_start = getattr_str(hsp, "hit_start") 232 hit_end = getattr_str(hsp, "hit_end") 233 hit_range = "[%s:%s]" % (hit_start, hit_end) 234 hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range 235 # append the hsp row 236 lines.append( 237 pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range) 238 ) 239 240 return "\n".join(lines) 241 242 def __getitem__(self, idx): 243 """Return the HSP object at the given index.""" 244 # if key is slice, return a new Hit instance 245 if isinstance(idx, slice): 246 obj = self.__class__(self.hsps[idx]) 247 self._transfer_attrs(obj) 248 return obj 249 return self._items[idx] 250 251 def __setitem__(self, idx, hsps): 252 """Assign hsps to index idx.""" 253 # handle case if hsps is a list of hsp 254 if isinstance(hsps, (list, tuple)): 255 for hsp in hsps: 256 self._validate_hsp(hsp) 257 else: 258 self._validate_hsp(hsps) 259 260 self._items[idx] = hsps 261 262 def __delitem__(self, idx): 263 """Delete item of index idx.""" 264 del self._items[idx] 265 266 # hsp properties # 267 def _validate_hsp(self, hsp): 268 """Validate an HSP object (PRIVATE). 269 270 Valid HSP objects have the same hit_id as the Hit object ID and the 271 same query_id as the Hit object's query_id. 272 273 """ 274 if not isinstance(hsp, HSP): 275 raise TypeError("Hit objects can only contain HSP objects.") 276 # HACK: to make validation during __init__ work 277 if self._items: 278 if self.id is not None: 279 if hsp.hit_id != self.id: 280 raise ValueError( 281 "Expected HSP with hit ID %r, found %r instead." 282 % (self.id, hsp.hit_id) 283 ) 284 else: 285 self.id = hsp.hit_id 286 287 if self.description is not None: 288 if hsp.hit_description != self.description: 289 raise ValueError( 290 "Expected HSP with hit description %r, found %r instead." 291 % (self.description, hsp.hit_description) 292 ) 293 else: 294 self.description = hsp.hit_description 295 296 if self.query_id is not None: 297 if hsp.query_id != self.query_id: 298 raise ValueError( 299 "Expected HSP with query ID %r, found %r instead." 300 % (self.query_id, hsp.query_id) 301 ) 302 else: 303 self.query_id = hsp.query_id 304 305 if self.query_description is not None: 306 if hsp.query_description != self.query_description: 307 raise ValueError( 308 "Expected HSP with query description %r, found %r instead." 309 % (self.query_description, hsp.query_description) 310 ) 311 else: 312 self.query_description = hsp.query_description 313 314 # properties # 315 description = optionalcascade( 316 "_description", "hit_description", """Hit description""" 317 ) 318 query_description = optionalcascade( 319 "_query_description", 320 "query_description", 321 """Description of the query that produced the hit""", 322 ) 323 id = optionalcascade("_id", "hit_id", """Hit ID string.""") 324 query_id = optionalcascade( 325 "_query_id", "query_id", """ID string of the query that produced the hit""" 326 ) 327 # returns all hsps 328 hsps = allitems(doc="""HSP objects contained in the Hit""") 329 330 @property 331 def id_all(self): 332 """Alternative ID(s) of the Hit.""" 333 return [self.id] + self._id_alt 334 335 @property 336 def description_all(self): 337 """Alternative descriptions of the Hit.""" 338 return [self.description] + self._description_alt 339 340 @property 341 def fragments(self): 342 """Access the HSPFragment objects contained in the Hit.""" 343 return list(chain(*self._items)) 344 345 # public methods # 346 def append(self, hsp): 347 """Add a HSP object to the end of Hit. 348 349 Parameters 350 hsp -- HSP object to append. 351 352 Any HSP object appended must have the same ``hit_id`` property as the 353 Hit object's ``id`` property and the same ``query_id`` property as the 354 Hit object's ``query_id`` property. 355 356 """ 357 self._validate_hsp(hsp) 358 self._items.append(hsp) 359 360 def filter(self, func=None): 361 """Create new Hit object whose HSP objects pass the filter function. 362 363 :param func: function for filtering 364 :type func: callable, accepts HSP, returns bool 365 366 ``filter`` is analogous to Python's built-in ``filter`` function, except 367 that instead of returning a list it returns a ``Hit`` object. Here is an 368 example of using ``filter`` to select for HSPs having bitscores bigger 369 than 60:: 370 371 >>> from Bio import SearchIO 372 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 373 >>> hit = qresult[3] 374 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 375 >>> filtered_hit = hit.filter(evalue_filter) 376 >>> len(hit) 377 2 378 >>> len(filtered_hit) 379 1 380 >>> print(filtered_hit) 381 Query: 33211 382 mir_1 383 Hit: gi|301171322|ref|NR_035857.1| (86) 384 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 385 HSPs: ---- -------- --------- ------ --------------- --------------------- 386 # E-value Bit score Span Query range Hit range 387 ---- -------- --------- ------ --------------- --------------------- 388 0 8.9e-20 100.47 60 [1:61] [13:73] 389 390 """ 391 hsps = list(filter(func, self.hsps)) 392 if hsps: 393 obj = self.__class__(hsps) 394 self._transfer_attrs(obj) 395 return obj 396 397 def index(self, hsp): 398 """Return the index of a given HSP object, zero-based. 399 400 :param hsp: object to look up 401 :type hsp: HSP 402 403 """ 404 return self._items.index(hsp) 405 406 def map(self, func=None): 407 """Create new Hit object, mapping the given function to its HSPs. 408 409 :param func: function for mapping 410 :type func: callable, accepts HSP, returns HSP 411 412 ``map`` is analogous to Python's built-in ``map`` function. It is applied to 413 all HSPs contained in the Hit object and returns a new Hit object. 414 415 """ 416 if func is not None: 417 hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy 418 else: 419 hsps = self.hsps[:] 420 if hsps: 421 obj = self.__class__(hsps) 422 self._transfer_attrs(obj) 423 return obj 424 425 def pop(self, index=-1): 426 """Remove and returns the HSP object at the specified index. 427 428 :param index: index of HSP object to pop 429 :type index: int 430 431 """ 432 return self._items.pop(index) 433 434 def sort(self, key=None, reverse=False, in_place=True): 435 """Sort the HSP objects. 436 437 :param key: sorting function 438 :type key: callable, accepts HSP, returns key for sorting 439 :param reverse: whether to reverse sorting results or no 440 :type reverse: bool 441 :param in_place: whether to do in-place sorting or no 442 :type in_place: bool 443 444 ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort`` 445 method. If you set the ``in_place`` argument to False, it will treat 446 return a new, sorted Hit object and keep the initial one unsorted 447 448 """ 449 if in_place: 450 self._items.sort(key=key, reverse=reverse) 451 else: 452 hsps = self.hsps[:] 453 hsps.sort(key=key, reverse=reverse) 454 obj = self.__class__(hsps) 455 self._transfer_attrs(obj) 456 return obj 457 458 459# if not used as a module, run the doctest 460if __name__ == "__main__": 461 from Bio._utils import run_doctest 462 463 run_doctest() 464