1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23"""Atom selection Hierarchy --- :mod:`MDAnalysis.core.selection`
24=============================================================
25
26This module contains objects that represent selections. They are
27constructed and then applied to the group.
28
29In general, :meth:`Parser.parse` creates a :class:`Selection` object
30from a selection string. This :class:`Selection` object is then passed
31an :class:`~MDAnalysis.core.groups.AtomGroup` through its
32:meth:`~MDAnalysis.core.groups.AtomGroup.apply` method to apply the
33``Selection`` to the ``AtomGroup``.
34
35This is all invisible to the user through the
36:meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` method of an
37:class:`~MDAnalysis.core.groups.AtomGroup`.
38
39"""
40from __future__ import division, absolute_import
41import six
42from six.moves import zip
43
44import collections
45import re
46import functools
47import warnings
48
49import numpy as np
50from numpy.lib.utils import deprecate
51
52
53from MDAnalysis.lib.util import unique_int_1d
54from MDAnalysis.core import flags
55from ..lib import distances
56from ..exceptions import SelectionError, NoDataError
57
58
59def is_keyword(val):
60    """Is val a selection keyword?
61
62    Returns False on any of the following strings:
63      - keys in SELECTIONDICT (tokens from Selection objects)
64      - keys in OPERATIONS (tokens from LogicOperations)
65      - (Parentheses)
66      - The value `None` (used as EOF in selection strings)
67    """
68    return (val in _SELECTIONDICT or
69            val in _OPERATIONS or
70            val in ['(', ')'] or
71            val is None)
72
73
74def grab_not_keywords(tokens):
75    """Pop tokens from the left until you hit a keyword
76
77    Parameters
78    ----------
79    tokens : collections.deque
80        deque of strings, some tokens some not
81
82    Returns
83    -------
84    values : list of strings
85        All non keywords found until a keyword was hit
86
87    Note
88    ----
89    This function pops the values from the deque
90
91    Examples
92    --------
93    grab_not_keywords(['H', 'and','resname', 'MET'])
94    >>> ['H']
95
96    grab_not_keywords(['H', 'Ca', 'N', 'and','resname', 'MET'])
97    >>> ['H', 'Ca' ,'N']
98
99    grab_not_keywords(['and','resname', 'MET'])
100    >>> []
101    """
102    values = []
103    while not is_keyword(tokens[0]):
104        val = tokens.popleft()
105        # Insert escape characters here to use keywords as names?
106        values.append(val)
107    return values
108
109
110_SELECTIONDICT = {}
111_OPERATIONS = {}
112# These are named args to select_atoms that have a special meaning and must
113# not be allowed as names for the 'group' keyword.
114_RESERVED_KWARGS=('updating',)
115
116
117# And and Or are exception and aren't strictly a Selection
118# as they work on other Selections rather than doing work themselves.
119# So their init is a little strange too....
120class _Operationmeta(type):
121    def __init__(cls, name, bases, classdict):
122        type.__init__(type, name, bases, classdict)
123        try:
124            _OPERATIONS[classdict['token']] = cls
125        except KeyError:
126            pass
127
128
129class LogicOperation(six.with_metaclass(_Operationmeta, object)):
130    def __init__(self, lsel, rsel):
131        self.rsel = rsel
132        self.lsel = lsel
133
134
135class AndOperation(LogicOperation):
136    token = 'and'
137    precedence = 3
138
139    def apply(self, group):
140        rsel = self.rsel.apply(group)
141        lsel = self.lsel.apply(group)
142
143        # Mask which lsel indices appear in rsel
144        mask = np.in1d(rsel.indices, lsel.indices)
145        # and mask rsel according to that
146        return rsel[mask].unique
147
148
149class OrOperation(LogicOperation):
150    token = 'or'
151    precedence = 3
152
153    def apply(self, group):
154        lsel = self.lsel.apply(group)
155        rsel = self.rsel.apply(group)
156
157        # Find unique indices from both these AtomGroups
158        # and slice master list using them
159        idx = np.union1d(lsel.indices, rsel.indices).astype(np.int32)
160
161        return group.universe.atoms[idx]
162
163
164class _Selectionmeta(type):
165    def __init__(cls, name, bases, classdict):
166        type.__init__(type, name, bases, classdict)
167        try:
168            _SELECTIONDICT[classdict['token']] = cls
169        except KeyError:
170            pass
171
172
173class Selection(six.with_metaclass(_Selectionmeta, object)):
174    pass
175
176
177class AllSelection(Selection):
178    token = 'all'
179
180    def __init__(self, parser, tokens):
181        pass
182
183    def apply(self, group):
184        # Check whether group is identical to the one stored
185        # in the corresponding universe, in which case this
186        # is returned directly. This works since the Universe.atoms
187        # are unique by construction.
188        if group is group.universe.atoms:
189            return group
190        return group[:].unique
191
192
193class UnarySelection(Selection):
194    def __init__(self, parser, tokens):
195        sel = parser.parse_expression(self.precedence)
196        self.sel = sel
197
198
199class NotSelection(UnarySelection):
200    token = 'not'
201    precedence = 5
202
203    def apply(self, group):
204        notsel = self.sel.apply(group)
205        return group[~np.in1d(group.indices, notsel.indices)].unique
206
207
208class GlobalSelection(UnarySelection):
209    token = 'global'
210    precedence = 5
211
212    def apply(self, group):
213        return self.sel.apply(group.universe.atoms).unique
214
215
216class ByResSelection(UnarySelection):
217    token = 'byres'
218    precedence = 1
219
220    def apply(self, group):
221        res = self.sel.apply(group)
222        unique_res = unique_int_1d(res.resids.astype(np.int64))
223        mask = np.in1d(group.resids, unique_res)
224
225        return group[mask].unique
226
227
228class DistanceSelection(Selection):
229    """Base class for distance search based selections"""
230
231    def validate_dimensions(self, dimensions):
232        r"""Check if the system is periodic in all three-dimensions.
233
234        Parameters
235        ----------
236        dimensions : numpy.ndarray
237            6-item array denoting system size and angles
238
239        Returns
240        -------
241        None or numpy.ndarray
242            Returns argument dimensions if system is periodic in all
243            three-dimensions, otherwise returns None
244        """
245        if self.periodic and all(dimensions[:3]):
246            return dimensions
247        return None
248
249class AroundSelection(DistanceSelection):
250    token = 'around'
251    precedence = 1
252
253    def __init__(self, parser, tokens):
254        self.periodic = parser.periodic
255        self.cutoff = float(tokens.popleft())
256        self.sel = parser.parse_expression(self.precedence)
257
258    def apply(self, group):
259        indices = []
260        sel = self.sel.apply(group)
261        # All atoms in group that aren't in sel
262        sys = group[~np.in1d(group.indices, sel.indices)]
263
264        if not sys or not sel:
265            return sys[[]]
266
267        box = self.validate_dimensions(group.dimensions)
268        pairs = distances.capped_distance(sel.positions, sys.positions,
269                                          self.cutoff, box=box,
270                                          return_distances=False)
271        if pairs.size > 0:
272            indices = np.sort(pairs[:, 1])
273
274        return sys[np.asarray(indices, dtype=np.int64)].unique
275
276class SphericalLayerSelection(DistanceSelection):
277    token = 'sphlayer'
278    precedence = 1
279
280    def __init__(self, parser, tokens):
281        self.periodic = parser.periodic
282        self.inRadius = float(tokens.popleft())
283        self.exRadius = float(tokens.popleft())
284        self.sel = parser.parse_expression(self.precedence)
285
286    def apply(self, group):
287        indices = []
288        sel = self.sel.apply(group)
289        box = self.validate_dimensions(group.dimensions)
290        periodic = box is not None
291        ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32)
292        pairs = distances.capped_distance(ref, group.positions, self.exRadius,
293                                          min_cutoff=self.inRadius,
294                                          box=box,
295                                          return_distances=False)
296        if pairs.size > 0:
297            indices = np.sort(pairs[:, 1])
298
299        return group[np.asarray(indices, dtype=np.int64)].unique
300
301
302class SphericalZoneSelection(DistanceSelection):
303    token = 'sphzone'
304    precedence = 1
305
306    def __init__(self, parser, tokens):
307        self.periodic = parser.periodic
308        self.cutoff = float(tokens.popleft())
309        self.sel = parser.parse_expression(self.precedence)
310
311    def apply(self, group):
312        indices = []
313        sel = self.sel.apply(group)
314        box = self.validate_dimensions(group.dimensions)
315        periodic = box is not None
316        ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32)
317        pairs = distances.capped_distance(ref, group.positions, self.cutoff,
318                                          box=box,
319                                          return_distances=False)
320        if pairs.size > 0:
321            indices = np.sort(pairs[:, 1])
322
323        return group[np.asarray(indices, dtype=np.int64)].unique
324
325
326class CylindricalSelection(Selection):
327    def apply(self, group):
328        sel = self.sel.apply(group)
329
330        # Calculate vectors between point of interest and our group
331        vecs = group.positions - sel.center_of_geometry()
332
333        if self.periodic and not np.any(group.dimensions[:3] == 0):
334            box = group.dimensions[:3]
335            cyl_z_hheight = self.zmax - self.zmin
336
337            if 2 * self.exRadius > box[0]:
338                raise NotImplementedError(
339                    "The diameter of the cylinder selection ({:.3f}) is larger "
340                    "than the unit cell's x dimension ({:.3f}). Can only do "
341                    "selections where it is smaller or equal."
342                    "".format(2*self.exRadius, box[0]))
343            if 2 * self.exRadius > box[1]:
344                raise NotImplementedError(
345                    "The diameter of the cylinder selection ({:.3f}) is larger "
346                    "than the unit cell's y dimension ({:.3f}). Can only do "
347                    "selections where it is smaller or equal."
348                    "".format(2*self.exRadius, box[1]))
349            if cyl_z_hheight > box[2]:
350                raise NotImplementedError(
351                    "The total length of the cylinder selection in z ({:.3f}) "
352                    "is larger than the unit cell's z dimension ({:.3f}). Can "
353                    "only do selections where it is smaller or equal."
354                    "".format(cyl_z_hheight, box[2]))
355
356            if np.all(group.dimensions[3:] == 90.):
357                # Orthogonal version
358                vecs -= box[:3] * np.rint(vecs / box[:3])
359            else:
360                # Triclinic version
361                tribox = group.universe.trajectory.ts.triclinic_dimensions
362                vecs -= tribox[2] * np.rint(vecs[:, 2] / tribox[2][2])[:, None]
363                vecs -= tribox[1] * np.rint(vecs[:, 1] / tribox[1][1])[:, None]
364                vecs -= tribox[0] * np.rint(vecs[:, 0] / tribox[0][0])[:, None]
365
366        # First deal with Z dimension criteria
367        mask = (vecs[:, 2] > self.zmin) & (vecs[:, 2] < self.zmax)
368        # Mask out based on height to reduce number of radii comparisons
369        vecs = vecs[mask]
370        group = group[mask]
371
372        # Radial vectors from sel to each in group
373        radii = vecs[:, 0]**2 + vecs[:, 1]**2
374        mask = radii < self.exRadius**2
375        try:
376            mask &= radii > self.inRadius**2
377        except AttributeError:
378            # Only for cylayer, cyzone doesn't have inRadius
379            pass
380
381        return group[mask].unique
382
383
384class CylindricalZoneSelection(CylindricalSelection):
385    token = 'cyzone'
386    precedence = 1
387
388    def __init__(self, parser, tokens):
389        self.periodic = parser.periodic
390        self.exRadius = float(tokens.popleft())
391        self.zmax = float(tokens.popleft())
392        self.zmin = float(tokens.popleft())
393        self.sel = parser.parse_expression(self.precedence)
394
395
396class CylindricalLayerSelection(CylindricalSelection):
397    token = 'cylayer'
398    precedence = 1
399
400    def __init__(self, parser, tokens):
401        self.periodic = parser.periodic
402        self.inRadius = float(tokens.popleft())
403        self.exRadius = float(tokens.popleft())
404        self.zmax = float(tokens.popleft())
405        self.zmin = float(tokens.popleft())
406        self.sel = parser.parse_expression(self.precedence)
407
408
409class PointSelection(DistanceSelection):
410    token = 'point'
411
412    def __init__(self, parser, tokens):
413        self.periodic = parser.periodic
414        x = float(tokens.popleft())
415        y = float(tokens.popleft())
416        z = float(tokens.popleft())
417        self.ref = np.array([x, y, z], dtype=np.float32)
418        self.cutoff = float(tokens.popleft())
419
420    def apply(self, group):
421        indices = []
422        box = self.validate_dimensions(group.dimensions)
423        pairs = distances.capped_distance(self.ref[None, :], group.positions, self.cutoff,
424                                          box=box,
425                                          return_distances=False)
426        if pairs.size > 0:
427            indices = np.sort(pairs[:, 1])
428
429        return group[np.asarray(indices, dtype=np.int64)].unique
430
431
432class AtomSelection(Selection):
433    token = 'atom'
434
435    def __init__(self, parser, tokens):
436        self.segid = tokens.popleft()
437        self.resid = int(tokens.popleft())
438        self.name = tokens.popleft()
439
440    def apply(self, group):
441        sub = group[group.names == self.name]
442        if sub:
443            sub = sub[sub.resids == self.resid]
444        if sub:
445            sub = sub[sub.segids == self.segid]
446        return sub.unique
447
448
449class BondedSelection(Selection):
450    token = 'bonded'
451    precedence = 1
452
453    def __init__(self, parser, tokens):
454        self.sel = parser.parse_expression(self.precedence)
455
456    def apply(self, group):
457        grp = self.sel.apply(group)
458        # Check if we have bonds
459        if not group.bonds:
460            warnings.warn("Bonded selection has 0 bonds")
461            return group[[]]
462
463        grpidx = grp.indices
464
465        # (n, 2) array of bond indices
466        bix = np.array(group.bonds.to_indices())
467
468        idx = []
469        # left side
470        idx.append(bix[:, 0][np.in1d(bix[:, 1], grpidx)])
471        # right side
472        idx.append(bix[:, 1][np.in1d(bix[:, 0], grpidx)])
473
474        idx = np.union1d(*idx)
475
476        return group.universe.atoms[np.unique(idx)]
477
478
479class SelgroupSelection(Selection):
480    token = 'group'
481
482    def __init__(self, parser, tokens):
483        grpname = tokens.popleft()
484        if grpname in _RESERVED_KWARGS:
485            raise TypeError("The '{}' keyword is reserved and cannot be "
486                            "used as a selection group name."
487                            .format(grpname))
488        try:
489            self.grp = parser.selgroups[grpname]
490        except KeyError:
491            raise ValueError("Failed to find group: {0}".format(grpname))
492
493    def apply(self, group):
494        mask = np.in1d(group.indices, self.grp.indices)
495        return group[mask]
496
497# TODO: remove in 1.0 (should have been removed in 0.15.0)
498class FullSelgroupSelection(Selection):
499    token = 'fullgroup'
500
501    def __init__(self, parser, tokens):
502        grpname = tokens.popleft()
503        try:
504            self.grp = parser.selgroups[grpname]
505        except KeyError:
506            raise ValueError("Failed to find group: {0}".format(grpname))
507
508    @deprecate(old_name='fullgroup', new_name='global group',
509               message=' This will be removed in v0.15.0')
510    def apply(self, group):
511        return self.grp.unique
512
513
514class StringSelection(Selection):
515    """Selections based on text attributes
516
517    Supports the use of wildcards at the end of strings
518    """
519    def __init__(self, parser, tokens):
520        vals = grab_not_keywords(tokens)
521        if not vals:
522            raise ValueError("Unexpected token '{0}'".format(tokens[0]))
523
524        self.values = vals
525
526    def apply(self, group):
527        mask = np.zeros(len(group), dtype=np.bool)
528        for val in self.values:
529            wc_pos = val.find('*')
530            if wc_pos == -1:  # No wildcard found
531                mask |= getattr(group, self.field) == val
532            else:
533                values = getattr(group, self.field).astype(np.str_)
534                mask |= np.char.startswith(values, val[:wc_pos])
535
536        return group[mask].unique
537
538
539class AtomNameSelection(StringSelection):
540    """Select atoms based on 'names' attribute"""
541    token = 'name'
542    field = 'names'
543
544
545class AtomTypeSelection(StringSelection):
546    """Select atoms based on 'types' attribute"""
547    token = 'type'
548    field = 'types'
549
550
551class AtomICodeSelection(StringSelection):
552    """Select atoms based on icode attribute"""
553    token = 'icode'
554    field = 'icodes'
555
556
557class ResidueNameSelection(StringSelection):
558    """Select atoms based on 'resnames' attribute"""
559    token = 'resname'
560    field = 'resnames'
561
562
563class MoleculeTypeSelection(StringSelection):
564    """Select atoms based on 'moltypes' attribute"""
565    token = 'moltype'
566    field = 'moltypes'
567
568
569class SegmentNameSelection(StringSelection):
570    """Select atoms based on 'segids' attribute"""
571    token = 'segid'
572    field = 'segids'
573
574
575class AltlocSelection(StringSelection):
576    """Select atoms based on 'altLoc' attribute"""
577    token = 'altloc'
578    field = 'altLocs'
579
580
581class ResidSelection(Selection):
582    """Select atoms based on numerical fields
583
584    Allows the use of ':' and '-' to specify a range of values
585    For example
586
587      resid 1:10
588    """
589    token = 'resid'
590
591    def __init__(self, parser, tokens):
592        values = grab_not_keywords(tokens)
593        if not values:
594            raise ValueError("Unexpected token: '{0}'".format(tokens[0]))
595
596        # each value in uppers and lowers is a tuple of (resid, icode)
597        uppers = []
598        lowers = []
599
600        for val in values:
601            m1 = re.match("(\d+)(\w?)$", val)
602            if not m1 is None:
603                res = m1.groups()
604                lower = int(res[0]), res[1]
605                upper = None, None
606            else:
607                # check if in appropriate format 'lower:upper' or 'lower-upper'
608                # each val is one or more digits, maybe a letter
609                selrange = re.match("(\d+)(\w?)[:-](\d+)(\w?)", val)
610                if selrange is None:  # re.match returns None on failure
611                    raise ValueError("Failed to parse value: {0}".format(val))
612                res = selrange.groups()
613                # resid and icode
614                lower = int(res[0]), res[1]
615                upper = int(res[2]), res[3]
616
617            lowers.append(lower)
618            uppers.append(upper)
619
620        self.lowers = lowers
621        self.uppers = uppers
622
623    def apply(self, group):
624        # Grab arrays here to reduce number of calls to main topology
625        vals = group.resids
626        try:  # optional attribute
627            icodes = group.icodes
628        except (AttributeError, NoDataError):
629            icodes = None
630            # if no icodes and icodes are part of selection, cause a fuss
631            if (any(v[1] for v in self.uppers) or
632                any(v[1] for v in self.lowers)):
633                raise ValueError("Selection specified icodes, while the "
634                                 "topology doesn't have any.")
635
636        if not icodes is None:
637            mask = self._sel_with_icodes(vals, icodes)
638        else:
639            mask = self._sel_without_icodes(vals)
640
641        return group[mask].unique
642
643    def _sel_without_icodes(self, vals):
644        # Final mask that gets applied to group
645        mask = np.zeros(len(vals), dtype=np.bool)
646
647        for (u_resid, _), (l_resid, _) in zip(self.uppers, self.lowers):
648            if u_resid is not None:  # range selection
649                thismask = vals >= l_resid
650                thismask &= vals <= u_resid
651            else:  # single residue selection
652                thismask = vals == l_resid
653
654            mask |= thismask
655
656        return mask
657
658    def _sel_with_icodes(self, vals, icodes):
659        # Final mask that gets applied to group
660        mask = np.zeros(len(vals), dtype=np.bool)
661
662        for (u_resid, u_icode), (l_resid, l_icode) in zip(self.uppers, self.lowers):
663            if u_resid is not None:  # Selecting a range
664                # Special case, if l_resid == u_resid, ie 163A-163C, this simplifies to:
665                # all 163, and A <= icode <= C
666                if l_resid == u_resid:
667                    thismask = vals == l_resid
668                    thismask &= icodes >= l_icode
669                    thismask &= icodes <= u_icode
670                # For 163A to 166B we want:
671                # [START]  all 163 and icode >= 'A'
672                # [MIDDLE] all of 164 and 165, any icode
673                # [END]    166 and icode <= 'B'
674                else:
675                    # start of range
676                    startmask = vals == l_resid
677                    startmask &= icodes >= l_icode
678                    thismask = startmask
679
680                    # middle of range
681                    mid = np.arange(l_resid + 1, u_resid)
682                    if len(mid):  # if there are any resids in the middle
683                        mid_beg, mid_end = mid[0], mid[-1]
684                        midmask = vals >= mid_beg
685                        midmask &= vals <= mid_end
686
687                        thismask |= midmask
688
689                    # end of range
690                    endmask = vals == u_resid
691                    endmask &= icodes <= u_icode
692
693                    thismask |= endmask
694            else:  # Selecting a single residue
695                thismask = vals == l_resid
696                thismask &= icodes == l_icode
697
698            mask |= thismask
699
700        return mask
701
702
703class RangeSelection(Selection):
704    value_offset=0
705
706    def __init__(self, parser, tokens):
707        values = grab_not_keywords(tokens)
708        if not values:
709            raise ValueError("Unexpected token: '{0}'".format(tokens[0]))
710
711        uppers = []  # upper limit on any range
712        lowers = []  # lower limit on any range
713
714        for val in values:
715            try:
716                lower = int(val)
717                upper = None
718            except ValueError:
719                # check if in appropriate format 'lower:upper' or 'lower-upper'
720                selrange = re.match("(\d+)[:-](\d+)", val)
721                if not selrange:
722                    raise ValueError(
723                        "Failed to parse number: {0}".format(val))
724                lower, upper = np.int64(selrange.groups())
725
726            lowers.append(lower)
727            uppers.append(upper)
728
729        self.lowers = lowers
730        self.uppers = uppers
731
732    def apply(self, group):
733        mask = np.zeros(len(group), dtype=np.bool)
734        vals = getattr(group, self.field) + self.value_offset
735
736        for upper, lower in zip(self.uppers, self.lowers):
737            if upper is not None:
738                thismask = vals >= lower
739                thismask &= vals <= upper
740            else:
741                thismask = vals == lower
742
743            mask |= thismask
744        return group[mask].unique
745
746
747class ResnumSelection(RangeSelection):
748    token = 'resnum'
749    field = 'resnums'
750
751
752class ByNumSelection(RangeSelection):
753    token = 'bynum'
754    field = 'indices'
755    value_offset = 1  # queries are in 1 based indices
756
757
758class MolidSelection(RangeSelection):
759    token = 'molnum'
760    field = 'molnums'
761
762
763class ProteinSelection(Selection):
764    """Consists of all residues with  recognized residue names.
765
766    Recognized residue names in :attr:`ProteinSelection.prot_res`.
767
768      * from the CHARMM force field::
769         awk '/RESI/ {printf "'"'"%s"'"',",$2 }' top_all27_prot_lipid.rtf
770
771      * manually added special CHARMM, OPLS/AA and Amber residue names.
772
773    See Also
774    --------
775    :func:`MDAnalysis.lib.util.convert_aa_code`
776    """
777    token = 'protein'
778
779    prot_res = np.array([
780        # CHARMM top_all27_prot_lipid.rtf
781        'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD',
782        'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR',
783        'TRP', 'TYR', 'VAL', 'ALAD',
784        ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin
785        # PDB
786        'HIS', 'MSE',
787        # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp
788        'ARGN', 'ASPH', 'CYS2', 'CYSH', 'QLN', 'PGLU', 'GLUH', 'HIS1', 'HISD',
789        'HISE', 'HISH', 'LYSH',
790        # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp
791        'ASN1', 'CYS1', 'HISA', 'HISB', 'HIS2',
792        # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp
793        'HID', 'HIE', 'HIP', 'ORN', 'DAB', 'LYN', 'HYP', 'CYM', 'CYX', 'ASH',
794        'GLH', 'ACE', 'NME',
795        # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp
796        'NALA', 'NGLY', 'NSER', 'NTHR', 'NLEU', 'NILE', 'NVAL', 'NASN', 'NGLN',
797        'NARG', 'NHID', 'NHIE', 'NHIP', 'NTRP', 'NPHE', 'NTYR', 'NGLU', 'NASP',
798        'NLYS', 'NPRO', 'NCYS', 'NCYX', 'NMET', 'CALA', 'CGLY', 'CSER', 'CTHR',
799        'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE',
800        'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS',
801        'CCYX', 'CMET', 'CME', 'ASF',
802    ])
803
804    def __init__(self, parser, tokens):
805        pass
806
807    def apply(self, group):
808        mask = np.in1d(group.resnames, self.prot_res)
809        return group[mask].unique
810
811
812class NucleicSelection(Selection):
813    """All atoms in nucleic acid residues with recognized residue names.
814
815    Recognized residue names:
816
817    * from the CHARMM force field ::
818        awk '/RESI/ {printf "'"'"%s"'"',",$2 }' top_all27_prot_na.rtf
819    * recognized: 'ADE', 'URA', 'CYT', 'GUA', 'THY'
820    * recognized (CHARMM in Gromacs): 'DA', 'DU', 'DC', 'DG', 'DT'
821
822    .. versionchanged:: 0.8
823       additional Gromacs selections
824    """
825    token = 'nucleic'
826
827    nucl_res = np.array([
828        'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA',
829        'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G',
830        'DA5', 'DC5', 'DG5', 'DT5',
831        'DA3', 'DC3', 'DG3', 'DT3',
832        'RA5', 'RU5', 'RG5', 'RC5',
833        'RA3', 'RU3', 'RG3', 'RC3'
834    ])
835
836    def __init__(self, parser, tokens):
837        pass
838
839    def apply(self, group):
840        mask = np.in1d(group.resnames, self.nucl_res)
841        return group[mask].unique
842
843
844class BackboneSelection(ProteinSelection):
845    """A BackboneSelection contains all atoms with name 'N', 'CA', 'C', 'O'.
846
847    This excludes OT* on C-termini
848    (which are included by, eg VMD's backbone selection).
849    """
850    token = 'backbone'
851    bb_atoms = np.array(['N', 'CA', 'C', 'O'])
852
853    def apply(self, group):
854        mask = np.in1d(group.names, self.bb_atoms)
855        mask &= np.in1d(group.resnames, self.prot_res)
856        return group[mask].unique
857
858
859class NucleicBackboneSelection(NucleicSelection):
860    """Contains all atoms with name "P", "C5'", C3'", "O3'", "O5'".
861
862    These atoms are only recognized if they are in a residue matched
863    by the :class:`NucleicSelection`.
864    """
865    token = 'nucleicbackbone'
866    bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"])
867
868    def apply(self, group):
869        mask = np.in1d(group.names, self.bb_atoms)
870        mask &= np.in1d(group.resnames, self.nucl_res)
871        return group[mask].unique
872
873
874class BaseSelection(NucleicSelection):
875    """Selection of atoms in nucleobases.
876
877    Recognized atom names (from CHARMM):
878
879     'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6',
880     'O6','N2','N6', 'O2','N4','O4','C5M'
881    """
882    token = 'nucleicbase'
883    base_atoms = np.array([
884        'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6',
885        'O6', 'N2', 'N6',
886        'O2', 'N4', 'O4', 'C5M'])
887
888    def apply(self, group):
889        mask = np.in1d(group.names, self.base_atoms)
890        mask &= np.in1d(group.resnames, self.nucl_res)
891        return group[mask].unique
892
893
894class NucleicSugarSelection(NucleicSelection):
895    """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'.
896    """
897    token = 'nucleicsugar'
898    sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"])
899
900    def apply(self, group):
901        mask = np.in1d(group.names, self.sug_atoms)
902        mask &= np.in1d(group.resnames, self.nucl_res)
903        return group[mask].unique
904
905
906class PropertySelection(Selection):
907    """Some of the possible properties:
908    x, y, z, radius, mass,
909    """
910    token = 'prop'
911    ops = dict([
912        ('>', np.greater),
913        ('<', np.less),
914        ('>=', np.greater_equal),
915        ('<=', np.less_equal),
916        ('==', np.equal),
917        ('!=', np.not_equal),
918    ])
919    # order here is important, need to check <= before < so the
920    # larger (in terms of string length) symbol is considered first
921    _op_symbols = ('<=', '>=', '==', '!=', '<', '>')
922
923    # symbols to replace with when flipping
924    # eg 6 > x -> x <= 6, 5 == x -> x == 5
925    opposite_ops = {
926        '==': '==', '!=': '!=',
927        '<': '>=', '>=': '<',
928        '>': '<=', '<=': '>',
929    }
930
931    props = {'mass', 'charge', 'x', 'y', 'z'}
932
933    def __init__(self, parser, tokens):
934        """
935        Possible splitting around operator:
936
937        prop x < 5
938        prop x< 5
939        prop x <5
940        prop x<5
941        """
942        prop = tokens.popleft()
943        oper = None
944        value = None
945        if prop == "abs":
946            self.absolute = True
947            prop = tokens.popleft()
948        else:
949            self.absolute = False
950
951        # check if prop has any extra information atm
952        for possible in self._op_symbols:
953            try:
954                x, y = prop.split(possible)
955            except ValueError:
956                # won't unpack into 2 args unless *possible* is present
957                pass
958            else:
959                prop = x
960                oper = possible + y  # add back after splitting
961                break
962
963        if oper is None:
964            oper = tokens.popleft()
965        # check if oper has the value appended
966        for possible in self._op_symbols:
967            if possible in oper:
968                x, y = oper.split(possible)
969                if y:  # '<='.split('<=') == ['', ''], therefore y won't exist
970                    oper = possible
971                    value = y
972                break
973
974        if value is None:
975            value = tokens.popleft()
976
977        # check if we flip prop and value
978        # eg 5 > x -> x <= 5
979        if value in self.props:
980            prop, value = value, prop
981            oper = self.opposite_ops[oper]
982
983        self.prop = prop
984        try:
985            self.operator = self.ops[oper]
986        except KeyError:
987            raise ValueError(
988                "Invalid operator : '{0}' Use one of : '{1}'"
989                "".format(oper, self.ops.keys()))
990        self.value = float(value)
991
992    def apply(self, group):
993        try:
994            col = {'x': 0, 'y': 1, 'z': 2}[self.prop]
995        except KeyError:
996            if self.prop == 'mass':
997                values = group.masses
998            elif self.prop == 'charge':
999                values = group.charges
1000            else:
1001                raise SelectionError(
1002                    "Expected one of : {0}"
1003                    "".format(['x', 'y', 'z', 'mass', 'charge']))
1004        else:
1005            values = group.positions[:, col]
1006
1007        if self.absolute:
1008            values = np.abs(values)
1009        mask = self.operator(values, self.value)
1010
1011        return group[mask].unique
1012
1013
1014class SameSelection(Selection):
1015    token = 'same'
1016    precedence = 1
1017
1018    prop_trans = {
1019        'fragment': None,
1020        'x': None,
1021        'y': None,
1022        'z': None,
1023        'residue': 'resids',
1024        'segment': 'segids',
1025        'name': 'names',
1026        'type': 'types',
1027        'resname': 'resnames',
1028        'resid': 'resids',
1029        'segid': 'segids',
1030        'mass': 'masses',
1031        'charge': 'charges',
1032        'radius': 'radii',
1033        'bfactor': 'bfactors',
1034        'resnum': 'resnums',
1035    }
1036
1037    def __init__(self, parser, tokens):
1038        prop = tokens.popleft()
1039        if prop not in self.prop_trans:
1040            raise ValueError("Unknown same property : {0}"
1041                             "Choose one of : {1}"
1042                             "".format(prop, self.prop_trans.keys()))
1043        self.prop = prop
1044        parser.expect("as")
1045        self.sel = parser.parse_expression(self.precedence)
1046        self.prop = prop
1047
1048    def apply(self, group):
1049        res = self.sel.apply(group)
1050        if not res:
1051            return group[[]]  # empty selection
1052
1053        # Fragment must come before self.prop_trans lookups!
1054        if self.prop == 'fragment':
1055            # Combine all fragments together, then check where group
1056            # indices are same as fragment(s) indices
1057            allfrags = functools.reduce(lambda x, y: x + y, res.fragments)
1058
1059            mask = np.in1d(group.indices, allfrags.indices)
1060            return group[mask].unique
1061        # [xyz] must come before self.prop_trans lookups too!
1062        try:
1063            pos_idx = {'x': 0, 'y': 1, 'z': 2}[self.prop]
1064        except KeyError:
1065            # The self.prop string was already checked,
1066            # so don't need error checking here.
1067            # KeyError at this point is impossible!
1068            attrname = self.prop_trans[self.prop]
1069            vals = getattr(res, attrname)
1070            mask = np.in1d(getattr(group, attrname), vals)
1071
1072            return group[mask].unique
1073        else:
1074            vals = res.positions[:, pos_idx]
1075            pos = group.positions[:, pos_idx]
1076
1077            # isclose only does one value at a time
1078            mask = np.vstack([np.isclose(pos, v)
1079                              for v in vals]).any(axis=0)
1080            return group[mask].unique
1081
1082
1083class SelectionParser(object):
1084    """A small parser for selection expressions.  Demonstration of
1085    recursive descent parsing using Precedence climbing (see
1086    http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm).  Transforms
1087    expressions into nested Selection tree.
1088
1089    For reference, the grammar that we parse is ::
1090
1091       E(xpression)--> Exp(0)
1092       Exp(p) -->      P {B Exp(q)}
1093       P -->           U Exp(q) | "(" E ")" | v
1094       B(inary) -->    "and" | "or"
1095       U(nary) -->     "not"
1096       T(erms) -->     segid [value]
1097                       | resname [value]
1098                       | resid [value]
1099                       | name [value]
1100                       | type [value]
1101   """
1102    # Borg pattern: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531
1103    _shared_state = {}
1104
1105    def __new__(cls, *p, **k):
1106        self = object.__new__(cls, *p, **k)
1107        self.__dict__ = cls._shared_state
1108        return self
1109
1110    def expect(self, token):
1111        """Anticipate and remove a given token"""
1112        if self.tokens[0] == token:
1113            self.tokens.popleft()
1114        else:
1115            raise SelectionError(
1116                "Unexpected token: '{0}' Expected: '{1}'"
1117                "".format(self.tokens[0], token))
1118
1119    def parse(self, selectstr, selgroups, periodic=None):
1120        """Create a Selection object from a string.
1121
1122        Parameters
1123        ----------
1124        selectstr : str
1125            The string that describes the selection
1126        selgroups : AtomGroups
1127            AtomGroups to be used in `group` selections
1128        periodic : bool, optional
1129            for distance based selections, whether to consider
1130            periodic boundary conditions
1131
1132        Returns
1133        -------
1134        The appropriate Selection object.  Use the .apply method on
1135        this to perform the selection.
1136
1137        Raises
1138        ------
1139        SelectionError
1140            If anything goes wrong in creating the Selection object.
1141        """
1142        self.periodic = periodic
1143
1144        self.selectstr = selectstr
1145        self.selgroups = selgroups
1146        tokens = selectstr.replace('(', ' ( ').replace(')', ' ) ')
1147        self.tokens = collections.deque(tokens.split() + [None])
1148        parsetree = self.parse_expression(0)
1149        if self.tokens[0] is not None:
1150            raise SelectionError(
1151                "Unexpected token at end of selection string: '{0}'"
1152                "".format(self.tokens[0]))
1153        return parsetree
1154
1155    def parse_expression(self, p):
1156        exp1 = self._parse_subexp()
1157        while (self.tokens[0] in _OPERATIONS and
1158               _OPERATIONS[self.tokens[0]].precedence >= p):
1159            op = _OPERATIONS[self.tokens.popleft()]
1160            q = 1 + op.precedence
1161            exp2 = self.parse_expression(q)
1162            exp1 = op(exp1, exp2)
1163        return exp1
1164
1165    def _parse_subexp(self):
1166        op = self.tokens.popleft()
1167
1168        if op == '(':
1169            exp = self.parse_expression(0)
1170            self.expect(')')
1171            return exp
1172
1173        try:
1174            return _SELECTIONDICT[op](self, self.tokens)
1175        except KeyError:
1176            raise SelectionError("Unknown selection token: '{0}'".format(op))
1177        except ValueError as e:
1178            raise SelectionError("Selection failed: '{0}'".format(e))
1179
1180
1181# The module level instance
1182Parser = SelectionParser()
1183