1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- 2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 3# 4# MDAnalysis --- https://www.mdanalysis.org 5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors 6# (see the file AUTHORS for the full list of names) 7# 8# Released under the GNU Public Licence, v2 or any higher version 9# 10# Please cite your use of MDAnalysis in published work: 11# 12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, 13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. 14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics 15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th 16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. 17# 18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. 19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. 20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 21# 22 23"""Atom selection Hierarchy --- :mod:`MDAnalysis.core.selection` 24============================================================= 25 26This module contains objects that represent selections. They are 27constructed and then applied to the group. 28 29In general, :meth:`Parser.parse` creates a :class:`Selection` object 30from a selection string. This :class:`Selection` object is then passed 31an :class:`~MDAnalysis.core.groups.AtomGroup` through its 32:meth:`~MDAnalysis.core.groups.AtomGroup.apply` method to apply the 33``Selection`` to the ``AtomGroup``. 34 35This is all invisible to the user through the 36:meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` method of an 37:class:`~MDAnalysis.core.groups.AtomGroup`. 38 39""" 40from __future__ import division, absolute_import 41import six 42from six.moves import zip 43 44import collections 45import re 46import functools 47import warnings 48 49import numpy as np 50from numpy.lib.utils import deprecate 51 52 53from MDAnalysis.lib.util import unique_int_1d 54from MDAnalysis.core import flags 55from ..lib import distances 56from ..exceptions import SelectionError, NoDataError 57 58 59def is_keyword(val): 60 """Is val a selection keyword? 61 62 Returns False on any of the following strings: 63 - keys in SELECTIONDICT (tokens from Selection objects) 64 - keys in OPERATIONS (tokens from LogicOperations) 65 - (Parentheses) 66 - The value `None` (used as EOF in selection strings) 67 """ 68 return (val in _SELECTIONDICT or 69 val in _OPERATIONS or 70 val in ['(', ')'] or 71 val is None) 72 73 74def grab_not_keywords(tokens): 75 """Pop tokens from the left until you hit a keyword 76 77 Parameters 78 ---------- 79 tokens : collections.deque 80 deque of strings, some tokens some not 81 82 Returns 83 ------- 84 values : list of strings 85 All non keywords found until a keyword was hit 86 87 Note 88 ---- 89 This function pops the values from the deque 90 91 Examples 92 -------- 93 grab_not_keywords(['H', 'and','resname', 'MET']) 94 >>> ['H'] 95 96 grab_not_keywords(['H', 'Ca', 'N', 'and','resname', 'MET']) 97 >>> ['H', 'Ca' ,'N'] 98 99 grab_not_keywords(['and','resname', 'MET']) 100 >>> [] 101 """ 102 values = [] 103 while not is_keyword(tokens[0]): 104 val = tokens.popleft() 105 # Insert escape characters here to use keywords as names? 106 values.append(val) 107 return values 108 109 110_SELECTIONDICT = {} 111_OPERATIONS = {} 112# These are named args to select_atoms that have a special meaning and must 113# not be allowed as names for the 'group' keyword. 114_RESERVED_KWARGS=('updating',) 115 116 117# And and Or are exception and aren't strictly a Selection 118# as they work on other Selections rather than doing work themselves. 119# So their init is a little strange too.... 120class _Operationmeta(type): 121 def __init__(cls, name, bases, classdict): 122 type.__init__(type, name, bases, classdict) 123 try: 124 _OPERATIONS[classdict['token']] = cls 125 except KeyError: 126 pass 127 128 129class LogicOperation(six.with_metaclass(_Operationmeta, object)): 130 def __init__(self, lsel, rsel): 131 self.rsel = rsel 132 self.lsel = lsel 133 134 135class AndOperation(LogicOperation): 136 token = 'and' 137 precedence = 3 138 139 def apply(self, group): 140 rsel = self.rsel.apply(group) 141 lsel = self.lsel.apply(group) 142 143 # Mask which lsel indices appear in rsel 144 mask = np.in1d(rsel.indices, lsel.indices) 145 # and mask rsel according to that 146 return rsel[mask].unique 147 148 149class OrOperation(LogicOperation): 150 token = 'or' 151 precedence = 3 152 153 def apply(self, group): 154 lsel = self.lsel.apply(group) 155 rsel = self.rsel.apply(group) 156 157 # Find unique indices from both these AtomGroups 158 # and slice master list using them 159 idx = np.union1d(lsel.indices, rsel.indices).astype(np.int32) 160 161 return group.universe.atoms[idx] 162 163 164class _Selectionmeta(type): 165 def __init__(cls, name, bases, classdict): 166 type.__init__(type, name, bases, classdict) 167 try: 168 _SELECTIONDICT[classdict['token']] = cls 169 except KeyError: 170 pass 171 172 173class Selection(six.with_metaclass(_Selectionmeta, object)): 174 pass 175 176 177class AllSelection(Selection): 178 token = 'all' 179 180 def __init__(self, parser, tokens): 181 pass 182 183 def apply(self, group): 184 # Check whether group is identical to the one stored 185 # in the corresponding universe, in which case this 186 # is returned directly. This works since the Universe.atoms 187 # are unique by construction. 188 if group is group.universe.atoms: 189 return group 190 return group[:].unique 191 192 193class UnarySelection(Selection): 194 def __init__(self, parser, tokens): 195 sel = parser.parse_expression(self.precedence) 196 self.sel = sel 197 198 199class NotSelection(UnarySelection): 200 token = 'not' 201 precedence = 5 202 203 def apply(self, group): 204 notsel = self.sel.apply(group) 205 return group[~np.in1d(group.indices, notsel.indices)].unique 206 207 208class GlobalSelection(UnarySelection): 209 token = 'global' 210 precedence = 5 211 212 def apply(self, group): 213 return self.sel.apply(group.universe.atoms).unique 214 215 216class ByResSelection(UnarySelection): 217 token = 'byres' 218 precedence = 1 219 220 def apply(self, group): 221 res = self.sel.apply(group) 222 unique_res = unique_int_1d(res.resids.astype(np.int64)) 223 mask = np.in1d(group.resids, unique_res) 224 225 return group[mask].unique 226 227 228class DistanceSelection(Selection): 229 """Base class for distance search based selections""" 230 231 def validate_dimensions(self, dimensions): 232 r"""Check if the system is periodic in all three-dimensions. 233 234 Parameters 235 ---------- 236 dimensions : numpy.ndarray 237 6-item array denoting system size and angles 238 239 Returns 240 ------- 241 None or numpy.ndarray 242 Returns argument dimensions if system is periodic in all 243 three-dimensions, otherwise returns None 244 """ 245 if self.periodic and all(dimensions[:3]): 246 return dimensions 247 return None 248 249class AroundSelection(DistanceSelection): 250 token = 'around' 251 precedence = 1 252 253 def __init__(self, parser, tokens): 254 self.periodic = parser.periodic 255 self.cutoff = float(tokens.popleft()) 256 self.sel = parser.parse_expression(self.precedence) 257 258 def apply(self, group): 259 indices = [] 260 sel = self.sel.apply(group) 261 # All atoms in group that aren't in sel 262 sys = group[~np.in1d(group.indices, sel.indices)] 263 264 if not sys or not sel: 265 return sys[[]] 266 267 box = self.validate_dimensions(group.dimensions) 268 pairs = distances.capped_distance(sel.positions, sys.positions, 269 self.cutoff, box=box, 270 return_distances=False) 271 if pairs.size > 0: 272 indices = np.sort(pairs[:, 1]) 273 274 return sys[np.asarray(indices, dtype=np.int64)].unique 275 276class SphericalLayerSelection(DistanceSelection): 277 token = 'sphlayer' 278 precedence = 1 279 280 def __init__(self, parser, tokens): 281 self.periodic = parser.periodic 282 self.inRadius = float(tokens.popleft()) 283 self.exRadius = float(tokens.popleft()) 284 self.sel = parser.parse_expression(self.precedence) 285 286 def apply(self, group): 287 indices = [] 288 sel = self.sel.apply(group) 289 box = self.validate_dimensions(group.dimensions) 290 periodic = box is not None 291 ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) 292 pairs = distances.capped_distance(ref, group.positions, self.exRadius, 293 min_cutoff=self.inRadius, 294 box=box, 295 return_distances=False) 296 if pairs.size > 0: 297 indices = np.sort(pairs[:, 1]) 298 299 return group[np.asarray(indices, dtype=np.int64)].unique 300 301 302class SphericalZoneSelection(DistanceSelection): 303 token = 'sphzone' 304 precedence = 1 305 306 def __init__(self, parser, tokens): 307 self.periodic = parser.periodic 308 self.cutoff = float(tokens.popleft()) 309 self.sel = parser.parse_expression(self.precedence) 310 311 def apply(self, group): 312 indices = [] 313 sel = self.sel.apply(group) 314 box = self.validate_dimensions(group.dimensions) 315 periodic = box is not None 316 ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) 317 pairs = distances.capped_distance(ref, group.positions, self.cutoff, 318 box=box, 319 return_distances=False) 320 if pairs.size > 0: 321 indices = np.sort(pairs[:, 1]) 322 323 return group[np.asarray(indices, dtype=np.int64)].unique 324 325 326class CylindricalSelection(Selection): 327 def apply(self, group): 328 sel = self.sel.apply(group) 329 330 # Calculate vectors between point of interest and our group 331 vecs = group.positions - sel.center_of_geometry() 332 333 if self.periodic and not np.any(group.dimensions[:3] == 0): 334 box = group.dimensions[:3] 335 cyl_z_hheight = self.zmax - self.zmin 336 337 if 2 * self.exRadius > box[0]: 338 raise NotImplementedError( 339 "The diameter of the cylinder selection ({:.3f}) is larger " 340 "than the unit cell's x dimension ({:.3f}). Can only do " 341 "selections where it is smaller or equal." 342 "".format(2*self.exRadius, box[0])) 343 if 2 * self.exRadius > box[1]: 344 raise NotImplementedError( 345 "The diameter of the cylinder selection ({:.3f}) is larger " 346 "than the unit cell's y dimension ({:.3f}). Can only do " 347 "selections where it is smaller or equal." 348 "".format(2*self.exRadius, box[1])) 349 if cyl_z_hheight > box[2]: 350 raise NotImplementedError( 351 "The total length of the cylinder selection in z ({:.3f}) " 352 "is larger than the unit cell's z dimension ({:.3f}). Can " 353 "only do selections where it is smaller or equal." 354 "".format(cyl_z_hheight, box[2])) 355 356 if np.all(group.dimensions[3:] == 90.): 357 # Orthogonal version 358 vecs -= box[:3] * np.rint(vecs / box[:3]) 359 else: 360 # Triclinic version 361 tribox = group.universe.trajectory.ts.triclinic_dimensions 362 vecs -= tribox[2] * np.rint(vecs[:, 2] / tribox[2][2])[:, None] 363 vecs -= tribox[1] * np.rint(vecs[:, 1] / tribox[1][1])[:, None] 364 vecs -= tribox[0] * np.rint(vecs[:, 0] / tribox[0][0])[:, None] 365 366 # First deal with Z dimension criteria 367 mask = (vecs[:, 2] > self.zmin) & (vecs[:, 2] < self.zmax) 368 # Mask out based on height to reduce number of radii comparisons 369 vecs = vecs[mask] 370 group = group[mask] 371 372 # Radial vectors from sel to each in group 373 radii = vecs[:, 0]**2 + vecs[:, 1]**2 374 mask = radii < self.exRadius**2 375 try: 376 mask &= radii > self.inRadius**2 377 except AttributeError: 378 # Only for cylayer, cyzone doesn't have inRadius 379 pass 380 381 return group[mask].unique 382 383 384class CylindricalZoneSelection(CylindricalSelection): 385 token = 'cyzone' 386 precedence = 1 387 388 def __init__(self, parser, tokens): 389 self.periodic = parser.periodic 390 self.exRadius = float(tokens.popleft()) 391 self.zmax = float(tokens.popleft()) 392 self.zmin = float(tokens.popleft()) 393 self.sel = parser.parse_expression(self.precedence) 394 395 396class CylindricalLayerSelection(CylindricalSelection): 397 token = 'cylayer' 398 precedence = 1 399 400 def __init__(self, parser, tokens): 401 self.periodic = parser.periodic 402 self.inRadius = float(tokens.popleft()) 403 self.exRadius = float(tokens.popleft()) 404 self.zmax = float(tokens.popleft()) 405 self.zmin = float(tokens.popleft()) 406 self.sel = parser.parse_expression(self.precedence) 407 408 409class PointSelection(DistanceSelection): 410 token = 'point' 411 412 def __init__(self, parser, tokens): 413 self.periodic = parser.periodic 414 x = float(tokens.popleft()) 415 y = float(tokens.popleft()) 416 z = float(tokens.popleft()) 417 self.ref = np.array([x, y, z], dtype=np.float32) 418 self.cutoff = float(tokens.popleft()) 419 420 def apply(self, group): 421 indices = [] 422 box = self.validate_dimensions(group.dimensions) 423 pairs = distances.capped_distance(self.ref[None, :], group.positions, self.cutoff, 424 box=box, 425 return_distances=False) 426 if pairs.size > 0: 427 indices = np.sort(pairs[:, 1]) 428 429 return group[np.asarray(indices, dtype=np.int64)].unique 430 431 432class AtomSelection(Selection): 433 token = 'atom' 434 435 def __init__(self, parser, tokens): 436 self.segid = tokens.popleft() 437 self.resid = int(tokens.popleft()) 438 self.name = tokens.popleft() 439 440 def apply(self, group): 441 sub = group[group.names == self.name] 442 if sub: 443 sub = sub[sub.resids == self.resid] 444 if sub: 445 sub = sub[sub.segids == self.segid] 446 return sub.unique 447 448 449class BondedSelection(Selection): 450 token = 'bonded' 451 precedence = 1 452 453 def __init__(self, parser, tokens): 454 self.sel = parser.parse_expression(self.precedence) 455 456 def apply(self, group): 457 grp = self.sel.apply(group) 458 # Check if we have bonds 459 if not group.bonds: 460 warnings.warn("Bonded selection has 0 bonds") 461 return group[[]] 462 463 grpidx = grp.indices 464 465 # (n, 2) array of bond indices 466 bix = np.array(group.bonds.to_indices()) 467 468 idx = [] 469 # left side 470 idx.append(bix[:, 0][np.in1d(bix[:, 1], grpidx)]) 471 # right side 472 idx.append(bix[:, 1][np.in1d(bix[:, 0], grpidx)]) 473 474 idx = np.union1d(*idx) 475 476 return group.universe.atoms[np.unique(idx)] 477 478 479class SelgroupSelection(Selection): 480 token = 'group' 481 482 def __init__(self, parser, tokens): 483 grpname = tokens.popleft() 484 if grpname in _RESERVED_KWARGS: 485 raise TypeError("The '{}' keyword is reserved and cannot be " 486 "used as a selection group name." 487 .format(grpname)) 488 try: 489 self.grp = parser.selgroups[grpname] 490 except KeyError: 491 raise ValueError("Failed to find group: {0}".format(grpname)) 492 493 def apply(self, group): 494 mask = np.in1d(group.indices, self.grp.indices) 495 return group[mask] 496 497# TODO: remove in 1.0 (should have been removed in 0.15.0) 498class FullSelgroupSelection(Selection): 499 token = 'fullgroup' 500 501 def __init__(self, parser, tokens): 502 grpname = tokens.popleft() 503 try: 504 self.grp = parser.selgroups[grpname] 505 except KeyError: 506 raise ValueError("Failed to find group: {0}".format(grpname)) 507 508 @deprecate(old_name='fullgroup', new_name='global group', 509 message=' This will be removed in v0.15.0') 510 def apply(self, group): 511 return self.grp.unique 512 513 514class StringSelection(Selection): 515 """Selections based on text attributes 516 517 Supports the use of wildcards at the end of strings 518 """ 519 def __init__(self, parser, tokens): 520 vals = grab_not_keywords(tokens) 521 if not vals: 522 raise ValueError("Unexpected token '{0}'".format(tokens[0])) 523 524 self.values = vals 525 526 def apply(self, group): 527 mask = np.zeros(len(group), dtype=np.bool) 528 for val in self.values: 529 wc_pos = val.find('*') 530 if wc_pos == -1: # No wildcard found 531 mask |= getattr(group, self.field) == val 532 else: 533 values = getattr(group, self.field).astype(np.str_) 534 mask |= np.char.startswith(values, val[:wc_pos]) 535 536 return group[mask].unique 537 538 539class AtomNameSelection(StringSelection): 540 """Select atoms based on 'names' attribute""" 541 token = 'name' 542 field = 'names' 543 544 545class AtomTypeSelection(StringSelection): 546 """Select atoms based on 'types' attribute""" 547 token = 'type' 548 field = 'types' 549 550 551class AtomICodeSelection(StringSelection): 552 """Select atoms based on icode attribute""" 553 token = 'icode' 554 field = 'icodes' 555 556 557class ResidueNameSelection(StringSelection): 558 """Select atoms based on 'resnames' attribute""" 559 token = 'resname' 560 field = 'resnames' 561 562 563class MoleculeTypeSelection(StringSelection): 564 """Select atoms based on 'moltypes' attribute""" 565 token = 'moltype' 566 field = 'moltypes' 567 568 569class SegmentNameSelection(StringSelection): 570 """Select atoms based on 'segids' attribute""" 571 token = 'segid' 572 field = 'segids' 573 574 575class AltlocSelection(StringSelection): 576 """Select atoms based on 'altLoc' attribute""" 577 token = 'altloc' 578 field = 'altLocs' 579 580 581class ResidSelection(Selection): 582 """Select atoms based on numerical fields 583 584 Allows the use of ':' and '-' to specify a range of values 585 For example 586 587 resid 1:10 588 """ 589 token = 'resid' 590 591 def __init__(self, parser, tokens): 592 values = grab_not_keywords(tokens) 593 if not values: 594 raise ValueError("Unexpected token: '{0}'".format(tokens[0])) 595 596 # each value in uppers and lowers is a tuple of (resid, icode) 597 uppers = [] 598 lowers = [] 599 600 for val in values: 601 m1 = re.match("(\d+)(\w?)$", val) 602 if not m1 is None: 603 res = m1.groups() 604 lower = int(res[0]), res[1] 605 upper = None, None 606 else: 607 # check if in appropriate format 'lower:upper' or 'lower-upper' 608 # each val is one or more digits, maybe a letter 609 selrange = re.match("(\d+)(\w?)[:-](\d+)(\w?)", val) 610 if selrange is None: # re.match returns None on failure 611 raise ValueError("Failed to parse value: {0}".format(val)) 612 res = selrange.groups() 613 # resid and icode 614 lower = int(res[0]), res[1] 615 upper = int(res[2]), res[3] 616 617 lowers.append(lower) 618 uppers.append(upper) 619 620 self.lowers = lowers 621 self.uppers = uppers 622 623 def apply(self, group): 624 # Grab arrays here to reduce number of calls to main topology 625 vals = group.resids 626 try: # optional attribute 627 icodes = group.icodes 628 except (AttributeError, NoDataError): 629 icodes = None 630 # if no icodes and icodes are part of selection, cause a fuss 631 if (any(v[1] for v in self.uppers) or 632 any(v[1] for v in self.lowers)): 633 raise ValueError("Selection specified icodes, while the " 634 "topology doesn't have any.") 635 636 if not icodes is None: 637 mask = self._sel_with_icodes(vals, icodes) 638 else: 639 mask = self._sel_without_icodes(vals) 640 641 return group[mask].unique 642 643 def _sel_without_icodes(self, vals): 644 # Final mask that gets applied to group 645 mask = np.zeros(len(vals), dtype=np.bool) 646 647 for (u_resid, _), (l_resid, _) in zip(self.uppers, self.lowers): 648 if u_resid is not None: # range selection 649 thismask = vals >= l_resid 650 thismask &= vals <= u_resid 651 else: # single residue selection 652 thismask = vals == l_resid 653 654 mask |= thismask 655 656 return mask 657 658 def _sel_with_icodes(self, vals, icodes): 659 # Final mask that gets applied to group 660 mask = np.zeros(len(vals), dtype=np.bool) 661 662 for (u_resid, u_icode), (l_resid, l_icode) in zip(self.uppers, self.lowers): 663 if u_resid is not None: # Selecting a range 664 # Special case, if l_resid == u_resid, ie 163A-163C, this simplifies to: 665 # all 163, and A <= icode <= C 666 if l_resid == u_resid: 667 thismask = vals == l_resid 668 thismask &= icodes >= l_icode 669 thismask &= icodes <= u_icode 670 # For 163A to 166B we want: 671 # [START] all 163 and icode >= 'A' 672 # [MIDDLE] all of 164 and 165, any icode 673 # [END] 166 and icode <= 'B' 674 else: 675 # start of range 676 startmask = vals == l_resid 677 startmask &= icodes >= l_icode 678 thismask = startmask 679 680 # middle of range 681 mid = np.arange(l_resid + 1, u_resid) 682 if len(mid): # if there are any resids in the middle 683 mid_beg, mid_end = mid[0], mid[-1] 684 midmask = vals >= mid_beg 685 midmask &= vals <= mid_end 686 687 thismask |= midmask 688 689 # end of range 690 endmask = vals == u_resid 691 endmask &= icodes <= u_icode 692 693 thismask |= endmask 694 else: # Selecting a single residue 695 thismask = vals == l_resid 696 thismask &= icodes == l_icode 697 698 mask |= thismask 699 700 return mask 701 702 703class RangeSelection(Selection): 704 value_offset=0 705 706 def __init__(self, parser, tokens): 707 values = grab_not_keywords(tokens) 708 if not values: 709 raise ValueError("Unexpected token: '{0}'".format(tokens[0])) 710 711 uppers = [] # upper limit on any range 712 lowers = [] # lower limit on any range 713 714 for val in values: 715 try: 716 lower = int(val) 717 upper = None 718 except ValueError: 719 # check if in appropriate format 'lower:upper' or 'lower-upper' 720 selrange = re.match("(\d+)[:-](\d+)", val) 721 if not selrange: 722 raise ValueError( 723 "Failed to parse number: {0}".format(val)) 724 lower, upper = np.int64(selrange.groups()) 725 726 lowers.append(lower) 727 uppers.append(upper) 728 729 self.lowers = lowers 730 self.uppers = uppers 731 732 def apply(self, group): 733 mask = np.zeros(len(group), dtype=np.bool) 734 vals = getattr(group, self.field) + self.value_offset 735 736 for upper, lower in zip(self.uppers, self.lowers): 737 if upper is not None: 738 thismask = vals >= lower 739 thismask &= vals <= upper 740 else: 741 thismask = vals == lower 742 743 mask |= thismask 744 return group[mask].unique 745 746 747class ResnumSelection(RangeSelection): 748 token = 'resnum' 749 field = 'resnums' 750 751 752class ByNumSelection(RangeSelection): 753 token = 'bynum' 754 field = 'indices' 755 value_offset = 1 # queries are in 1 based indices 756 757 758class MolidSelection(RangeSelection): 759 token = 'molnum' 760 field = 'molnums' 761 762 763class ProteinSelection(Selection): 764 """Consists of all residues with recognized residue names. 765 766 Recognized residue names in :attr:`ProteinSelection.prot_res`. 767 768 * from the CHARMM force field:: 769 awk '/RESI/ {printf "'"'"%s"'"',",$2 }' top_all27_prot_lipid.rtf 770 771 * manually added special CHARMM, OPLS/AA and Amber residue names. 772 773 See Also 774 -------- 775 :func:`MDAnalysis.lib.util.convert_aa_code` 776 """ 777 token = 'protein' 778 779 prot_res = np.array([ 780 # CHARMM top_all27_prot_lipid.rtf 781 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD', 782 'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 783 'TRP', 'TYR', 'VAL', 'ALAD', 784 ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin 785 # PDB 786 'HIS', 'MSE', 787 # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp 788 'ARGN', 'ASPH', 'CYS2', 'CYSH', 'QLN', 'PGLU', 'GLUH', 'HIS1', 'HISD', 789 'HISE', 'HISH', 'LYSH', 790 # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp 791 'ASN1', 'CYS1', 'HISA', 'HISB', 'HIS2', 792 # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp 793 'HID', 'HIE', 'HIP', 'ORN', 'DAB', 'LYN', 'HYP', 'CYM', 'CYX', 'ASH', 794 'GLH', 'ACE', 'NME', 795 # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp 796 'NALA', 'NGLY', 'NSER', 'NTHR', 'NLEU', 'NILE', 'NVAL', 'NASN', 'NGLN', 797 'NARG', 'NHID', 'NHIE', 'NHIP', 'NTRP', 'NPHE', 'NTYR', 'NGLU', 'NASP', 798 'NLYS', 'NPRO', 'NCYS', 'NCYX', 'NMET', 'CALA', 'CGLY', 'CSER', 'CTHR', 799 'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE', 800 'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS', 801 'CCYX', 'CMET', 'CME', 'ASF', 802 ]) 803 804 def __init__(self, parser, tokens): 805 pass 806 807 def apply(self, group): 808 mask = np.in1d(group.resnames, self.prot_res) 809 return group[mask].unique 810 811 812class NucleicSelection(Selection): 813 """All atoms in nucleic acid residues with recognized residue names. 814 815 Recognized residue names: 816 817 * from the CHARMM force field :: 818 awk '/RESI/ {printf "'"'"%s"'"',",$2 }' top_all27_prot_na.rtf 819 * recognized: 'ADE', 'URA', 'CYT', 'GUA', 'THY' 820 * recognized (CHARMM in Gromacs): 'DA', 'DU', 'DC', 'DG', 'DT' 821 822 .. versionchanged:: 0.8 823 additional Gromacs selections 824 """ 825 token = 'nucleic' 826 827 nucl_res = np.array([ 828 'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA', 829 'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G', 830 'DA5', 'DC5', 'DG5', 'DT5', 831 'DA3', 'DC3', 'DG3', 'DT3', 832 'RA5', 'RU5', 'RG5', 'RC5', 833 'RA3', 'RU3', 'RG3', 'RC3' 834 ]) 835 836 def __init__(self, parser, tokens): 837 pass 838 839 def apply(self, group): 840 mask = np.in1d(group.resnames, self.nucl_res) 841 return group[mask].unique 842 843 844class BackboneSelection(ProteinSelection): 845 """A BackboneSelection contains all atoms with name 'N', 'CA', 'C', 'O'. 846 847 This excludes OT* on C-termini 848 (which are included by, eg VMD's backbone selection). 849 """ 850 token = 'backbone' 851 bb_atoms = np.array(['N', 'CA', 'C', 'O']) 852 853 def apply(self, group): 854 mask = np.in1d(group.names, self.bb_atoms) 855 mask &= np.in1d(group.resnames, self.prot_res) 856 return group[mask].unique 857 858 859class NucleicBackboneSelection(NucleicSelection): 860 """Contains all atoms with name "P", "C5'", C3'", "O3'", "O5'". 861 862 These atoms are only recognized if they are in a residue matched 863 by the :class:`NucleicSelection`. 864 """ 865 token = 'nucleicbackbone' 866 bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"]) 867 868 def apply(self, group): 869 mask = np.in1d(group.names, self.bb_atoms) 870 mask &= np.in1d(group.resnames, self.nucl_res) 871 return group[mask].unique 872 873 874class BaseSelection(NucleicSelection): 875 """Selection of atoms in nucleobases. 876 877 Recognized atom names (from CHARMM): 878 879 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 880 'O6','N2','N6', 'O2','N4','O4','C5M' 881 """ 882 token = 'nucleicbase' 883 base_atoms = np.array([ 884 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 885 'O6', 'N2', 'N6', 886 'O2', 'N4', 'O4', 'C5M']) 887 888 def apply(self, group): 889 mask = np.in1d(group.names, self.base_atoms) 890 mask &= np.in1d(group.resnames, self.nucl_res) 891 return group[mask].unique 892 893 894class NucleicSugarSelection(NucleicSelection): 895 """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'. 896 """ 897 token = 'nucleicsugar' 898 sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"]) 899 900 def apply(self, group): 901 mask = np.in1d(group.names, self.sug_atoms) 902 mask &= np.in1d(group.resnames, self.nucl_res) 903 return group[mask].unique 904 905 906class PropertySelection(Selection): 907 """Some of the possible properties: 908 x, y, z, radius, mass, 909 """ 910 token = 'prop' 911 ops = dict([ 912 ('>', np.greater), 913 ('<', np.less), 914 ('>=', np.greater_equal), 915 ('<=', np.less_equal), 916 ('==', np.equal), 917 ('!=', np.not_equal), 918 ]) 919 # order here is important, need to check <= before < so the 920 # larger (in terms of string length) symbol is considered first 921 _op_symbols = ('<=', '>=', '==', '!=', '<', '>') 922 923 # symbols to replace with when flipping 924 # eg 6 > x -> x <= 6, 5 == x -> x == 5 925 opposite_ops = { 926 '==': '==', '!=': '!=', 927 '<': '>=', '>=': '<', 928 '>': '<=', '<=': '>', 929 } 930 931 props = {'mass', 'charge', 'x', 'y', 'z'} 932 933 def __init__(self, parser, tokens): 934 """ 935 Possible splitting around operator: 936 937 prop x < 5 938 prop x< 5 939 prop x <5 940 prop x<5 941 """ 942 prop = tokens.popleft() 943 oper = None 944 value = None 945 if prop == "abs": 946 self.absolute = True 947 prop = tokens.popleft() 948 else: 949 self.absolute = False 950 951 # check if prop has any extra information atm 952 for possible in self._op_symbols: 953 try: 954 x, y = prop.split(possible) 955 except ValueError: 956 # won't unpack into 2 args unless *possible* is present 957 pass 958 else: 959 prop = x 960 oper = possible + y # add back after splitting 961 break 962 963 if oper is None: 964 oper = tokens.popleft() 965 # check if oper has the value appended 966 for possible in self._op_symbols: 967 if possible in oper: 968 x, y = oper.split(possible) 969 if y: # '<='.split('<=') == ['', ''], therefore y won't exist 970 oper = possible 971 value = y 972 break 973 974 if value is None: 975 value = tokens.popleft() 976 977 # check if we flip prop and value 978 # eg 5 > x -> x <= 5 979 if value in self.props: 980 prop, value = value, prop 981 oper = self.opposite_ops[oper] 982 983 self.prop = prop 984 try: 985 self.operator = self.ops[oper] 986 except KeyError: 987 raise ValueError( 988 "Invalid operator : '{0}' Use one of : '{1}'" 989 "".format(oper, self.ops.keys())) 990 self.value = float(value) 991 992 def apply(self, group): 993 try: 994 col = {'x': 0, 'y': 1, 'z': 2}[self.prop] 995 except KeyError: 996 if self.prop == 'mass': 997 values = group.masses 998 elif self.prop == 'charge': 999 values = group.charges 1000 else: 1001 raise SelectionError( 1002 "Expected one of : {0}" 1003 "".format(['x', 'y', 'z', 'mass', 'charge'])) 1004 else: 1005 values = group.positions[:, col] 1006 1007 if self.absolute: 1008 values = np.abs(values) 1009 mask = self.operator(values, self.value) 1010 1011 return group[mask].unique 1012 1013 1014class SameSelection(Selection): 1015 token = 'same' 1016 precedence = 1 1017 1018 prop_trans = { 1019 'fragment': None, 1020 'x': None, 1021 'y': None, 1022 'z': None, 1023 'residue': 'resids', 1024 'segment': 'segids', 1025 'name': 'names', 1026 'type': 'types', 1027 'resname': 'resnames', 1028 'resid': 'resids', 1029 'segid': 'segids', 1030 'mass': 'masses', 1031 'charge': 'charges', 1032 'radius': 'radii', 1033 'bfactor': 'bfactors', 1034 'resnum': 'resnums', 1035 } 1036 1037 def __init__(self, parser, tokens): 1038 prop = tokens.popleft() 1039 if prop not in self.prop_trans: 1040 raise ValueError("Unknown same property : {0}" 1041 "Choose one of : {1}" 1042 "".format(prop, self.prop_trans.keys())) 1043 self.prop = prop 1044 parser.expect("as") 1045 self.sel = parser.parse_expression(self.precedence) 1046 self.prop = prop 1047 1048 def apply(self, group): 1049 res = self.sel.apply(group) 1050 if not res: 1051 return group[[]] # empty selection 1052 1053 # Fragment must come before self.prop_trans lookups! 1054 if self.prop == 'fragment': 1055 # Combine all fragments together, then check where group 1056 # indices are same as fragment(s) indices 1057 allfrags = functools.reduce(lambda x, y: x + y, res.fragments) 1058 1059 mask = np.in1d(group.indices, allfrags.indices) 1060 return group[mask].unique 1061 # [xyz] must come before self.prop_trans lookups too! 1062 try: 1063 pos_idx = {'x': 0, 'y': 1, 'z': 2}[self.prop] 1064 except KeyError: 1065 # The self.prop string was already checked, 1066 # so don't need error checking here. 1067 # KeyError at this point is impossible! 1068 attrname = self.prop_trans[self.prop] 1069 vals = getattr(res, attrname) 1070 mask = np.in1d(getattr(group, attrname), vals) 1071 1072 return group[mask].unique 1073 else: 1074 vals = res.positions[:, pos_idx] 1075 pos = group.positions[:, pos_idx] 1076 1077 # isclose only does one value at a time 1078 mask = np.vstack([np.isclose(pos, v) 1079 for v in vals]).any(axis=0) 1080 return group[mask].unique 1081 1082 1083class SelectionParser(object): 1084 """A small parser for selection expressions. Demonstration of 1085 recursive descent parsing using Precedence climbing (see 1086 http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm). Transforms 1087 expressions into nested Selection tree. 1088 1089 For reference, the grammar that we parse is :: 1090 1091 E(xpression)--> Exp(0) 1092 Exp(p) --> P {B Exp(q)} 1093 P --> U Exp(q) | "(" E ")" | v 1094 B(inary) --> "and" | "or" 1095 U(nary) --> "not" 1096 T(erms) --> segid [value] 1097 | resname [value] 1098 | resid [value] 1099 | name [value] 1100 | type [value] 1101 """ 1102 # Borg pattern: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531 1103 _shared_state = {} 1104 1105 def __new__(cls, *p, **k): 1106 self = object.__new__(cls, *p, **k) 1107 self.__dict__ = cls._shared_state 1108 return self 1109 1110 def expect(self, token): 1111 """Anticipate and remove a given token""" 1112 if self.tokens[0] == token: 1113 self.tokens.popleft() 1114 else: 1115 raise SelectionError( 1116 "Unexpected token: '{0}' Expected: '{1}'" 1117 "".format(self.tokens[0], token)) 1118 1119 def parse(self, selectstr, selgroups, periodic=None): 1120 """Create a Selection object from a string. 1121 1122 Parameters 1123 ---------- 1124 selectstr : str 1125 The string that describes the selection 1126 selgroups : AtomGroups 1127 AtomGroups to be used in `group` selections 1128 periodic : bool, optional 1129 for distance based selections, whether to consider 1130 periodic boundary conditions 1131 1132 Returns 1133 ------- 1134 The appropriate Selection object. Use the .apply method on 1135 this to perform the selection. 1136 1137 Raises 1138 ------ 1139 SelectionError 1140 If anything goes wrong in creating the Selection object. 1141 """ 1142 self.periodic = periodic 1143 1144 self.selectstr = selectstr 1145 self.selgroups = selgroups 1146 tokens = selectstr.replace('(', ' ( ').replace(')', ' ) ') 1147 self.tokens = collections.deque(tokens.split() + [None]) 1148 parsetree = self.parse_expression(0) 1149 if self.tokens[0] is not None: 1150 raise SelectionError( 1151 "Unexpected token at end of selection string: '{0}'" 1152 "".format(self.tokens[0])) 1153 return parsetree 1154 1155 def parse_expression(self, p): 1156 exp1 = self._parse_subexp() 1157 while (self.tokens[0] in _OPERATIONS and 1158 _OPERATIONS[self.tokens[0]].precedence >= p): 1159 op = _OPERATIONS[self.tokens.popleft()] 1160 q = 1 + op.precedence 1161 exp2 = self.parse_expression(q) 1162 exp1 = op(exp1, exp2) 1163 return exp1 1164 1165 def _parse_subexp(self): 1166 op = self.tokens.popleft() 1167 1168 if op == '(': 1169 exp = self.parse_expression(0) 1170 self.expect(')') 1171 return exp 1172 1173 try: 1174 return _SELECTIONDICT[op](self, self.tokens) 1175 except KeyError: 1176 raise SelectionError("Unknown selection token: '{0}'".format(op)) 1177 except ValueError as e: 1178 raise SelectionError("Selection failed: '{0}'".format(e)) 1179 1180 1181# The module level instance 1182Parser = SelectionParser() 1183