1#!/usr/bin/env python
2#
3#      Restriction Analysis Libraries.
4#      Copyright (C) 2004. Frederic Sohm.
5#
6# This code is part of the Biopython distribution and governed by its
7# license.  Please see the LICENSE file that should have been included
8# as part of this package.
9#
10
11"""Restriction Enzyme classes.
12
13Notes about the diverses class of the restriction enzyme implementation::
14
15            RestrictionType is the type of all restriction enzymes.
16        -----------------------------------------------------------------------
17            AbstractCut implements some methods that are common to all enzymes.
18        -----------------------------------------------------------------------
19            NoCut, OneCut,TwoCuts   represent the number of double strand cuts
20                                    produced by the enzyme.
21                                    they correspond to the 4th field of the
22                                    rebase record emboss_e.NNN.
23                    0->NoCut    : the enzyme is not characterised.
24                    2->OneCut   : the enzyme produce one double strand cut.
25                    4->TwoCuts  : two double strand cuts.
26        -----------------------------------------------------------------------
27            Meth_Dep, Meth_Undep    represent the methylation susceptibility to
28                                    the enzyme.
29                                    Not implemented yet.
30        -----------------------------------------------------------------------
31            Palindromic,            if the site is palindromic or not.
32            NotPalindromic          allow some optimisations of the code.
33                                    No need to check the reverse strand
34                                    with palindromic sites.
35        -----------------------------------------------------------------------
36            Unknown, Blunt,         represent the overhang.
37            Ov5, Ov3                Unknown is here for symmetry reasons and
38                                    correspond to enzymes that are not
39                                    characterised in rebase.
40        -----------------------------------------------------------------------
41            Defined, Ambiguous,     represent the sequence of the overhang.
42            NotDefined
43                                    NotDefined is for enzymes not characterised
44                                    in rebase.
45
46                                    Defined correspond to enzymes that display
47                                    a constant overhang whatever the sequence.
48                                    ex : EcoRI. G^AATTC -> overhang :AATT
49                                                CTTAA^G
50
51                                    Ambiguous : the overhang varies with the
52                                    sequence restricted.
53                                    Typically enzymes which cut outside their
54                                    restriction site or (but not always)
55                                    inside an ambiguous site.
56                                    ex:
57                                    AcuI CTGAAG(22/20)  -> overhang : NN
58                                    AasI GACNNN^NNNGTC  -> overhang : NN
59                                         CTGN^NNNNNCAG
60
61                note : these 3 classes refers to the overhang not the site.
62                   So the enzyme ApoI (RAATTY) is defined even if its
63                   restriction site is ambiguous.
64
65                        ApoI R^AATTY -> overhang : AATT -> Defined
66                             YTTAA^R
67                   Accordingly, blunt enzymes are always Defined even
68                   when they cut outside their restriction site.
69        -----------------------------------------------------------------------
70            Not_available,          as found in rebase file emboss_r.NNN files.
71            Commercially_available
72                                    allow the selection of the enzymes
73                                    according to their suppliers to reduce the
74                                    quantity of results.
75                                    Also will allow the implementation of
76                                    buffer compatibility tables. Not
77                                    implemented yet.
78
79                                    the list of suppliers is extracted from
80                                    emboss_s.NNN
81        -----------------------------------------------------------------------
82
83"""
84
85
86import warnings
87
88import re
89import itertools
90
91from Bio.Seq import Seq, MutableSeq
92from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
93from Bio.Restriction.Restriction_Dictionary import typedict
94from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
95from Bio.Restriction.PrintFormat import PrintFormat
96from Bio import BiopythonWarning
97
98
99# Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this
100# namespace), but have deprecated that module.
101
102
103def _check_bases(seq_string):
104    """Check characters in a string (PRIVATE).
105
106    Remove digits and white space present in string. Allows any valid ambiguous
107    IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
108
109    Other characters (e.g. symbols) trigger a TypeError.
110
111    Returns the string WITH A LEADING SPACE (!). This is for backwards
112    compatibility, and may in part be explained by the fact that
113    ``Bio.Restriction`` doesn't use zero based counting.
114    """
115    # Remove white space and make upper case:
116    seq_string = "".join(seq_string.split()).upper()
117    # Remove digits
118    for c in "0123456789":
119        seq_string = seq_string.replace(c, "")
120    # Check only allowed IUPAC letters
121    if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
122        raise TypeError("Invalid character found in %r" % seq_string)
123    return " " + seq_string
124
125
126matching = {
127    "A": "ARWMHVDN",
128    "C": "CYSMHBVN",
129    "G": "GRSKBVDN",
130    "T": "TYWKHBDN",
131    "R": "ABDGHKMNSRWV",
132    "Y": "CBDHKMNSTWVY",
133    "W": "ABDHKMNRTWVY",
134    "S": "CBDGHKMNSRVY",
135    "M": "ACBDHMNSRWVY",
136    "K": "BDGHKNSRTWVY",
137    "H": "ACBDHKMNSRTWVY",
138    "B": "CBDGHKMNSRTWVY",
139    "V": "ACBDGHKMNSRWVY",
140    "D": "ABDGHKMNSRTWVY",
141    "N": "ACBDGHKMNSRTWVY",
142}
143
144DNA = Seq
145
146
147class FormattedSeq:
148    """A linear or circular sequence object for restriction analysis.
149
150    Translates a Bio.Seq into a formatted sequence to be used with Restriction.
151
152    Roughly: remove anything which is not IUPAC alphabet and then add a space
153             in front of the sequence to get a biological index instead of a
154             python index (i.e. index of the first base is 1 not 0).
155
156    Retains information about the shape of the molecule linear (default) or
157    circular. Restriction sites are search over the edges of circular sequence.
158    """
159
160    def __init__(self, seq, linear=True):
161        """Initialize ``FormattedSeq`` with sequence and topology (optional).
162
163        ``seq`` is either a ``Bio.Seq``, ``Bio.MutableSeq`` or a
164        ``FormattedSeq``. If ``seq`` is a ``FormattedSeq``, ``linear``
165        will have no effect on the shape of the sequence.
166        """
167        if isinstance(seq, (Seq, MutableSeq)):
168            stringy = str(seq)
169            self.lower = stringy.islower()
170            # Note this adds a leading space to the sequence (!)
171            self.data = _check_bases(stringy)
172            self.linear = linear
173            self.klass = seq.__class__
174        elif isinstance(seq, FormattedSeq):
175            self.lower = seq.lower
176            self.data = seq.data
177            self.linear = seq.linear
178            self.klass = seq.klass
179        else:
180            raise TypeError("expected Seq or MutableSeq, got %s" % type(seq))
181
182    def __len__(self):
183        """Return length of ``FormattedSeq``.
184
185        ``FormattedSeq`` has a leading space, thus subtract 1.
186        """
187        return len(self.data) - 1
188
189    def __repr__(self):
190        """Represent ``FormattedSeq`` class as a string."""
191        return "FormattedSeq(%r, linear=%r)" % (self[1:], self.linear)
192
193    def __eq__(self, other):
194        """Implement equality operator for ``FormattedSeq`` object."""
195        if isinstance(other, FormattedSeq):
196            if repr(self) == repr(other):
197                return True
198            else:
199                return False
200        return False
201
202    def circularise(self):
203        """Circularise sequence in place."""
204        self.linear = False
205
206    def linearise(self):
207        """Linearise sequence in place."""
208        self.linear = True
209
210    def to_linear(self):
211        """Make a new instance of sequence as linear."""
212        new = self.__class__(self)
213        new.linear = True
214        return new
215
216    def to_circular(self):
217        """Make a new instance of sequence as circular."""
218        new = self.__class__(self)
219        new.linear = False
220        return new
221
222    def is_linear(self):
223        """Return if sequence is linear (True) or circular (False)."""
224        return self.linear
225
226    def finditer(self, pattern, size):
227        """Return a list of a given pattern which occurs in the sequence.
228
229        The list is made of tuple (location, pattern.group).
230        The latter is used with non palindromic sites.
231        Pattern is the regular expression pattern corresponding to the
232        enzyme restriction site.
233        Size is the size of the restriction enzyme recognition-site size.
234        """
235        if self.is_linear():
236            data = self.data
237        else:
238            data = self.data + self.data[1:size]
239        return [(i.start(), i.group) for i in re.finditer(pattern, data)]
240
241    def __getitem__(self, i):
242        """Return substring of ``FormattedSeq``.
243
244        The class of the returned object is the class of the respective
245        sequence. Note that due to the leading space, indexing is 1-based:
246
247        >>> from Bio.Seq import Seq
248        >>> from Bio.Restriction.Restriction import FormattedSeq
249        >>> f_seq = FormattedSeq(Seq('ATGCATGC'))
250        >>> f_seq[1]
251        Seq('A')
252
253        """
254        if self.lower:
255            return self.klass(self.data[i].lower())
256        return self.klass(self.data[i])
257
258
259class RestrictionType(type):
260    """RestrictionType. Type from which all enzyme classes are derived.
261
262    Implement the operator methods.
263    """
264
265    def __init__(cls, name="", bases=(), dct=None):
266        """Initialize RestrictionType instance.
267
268        Not intended to be used in normal operation. The enzymes are
269        instantiated when importing the module.
270        See below.
271        """
272        if "-" in name:
273            raise ValueError("Problem with hyphen in %r as enzyme name" % name)
274        # 2011/11/26 - Nobody knows what this call was supposed to accomplish,
275        # but all unit tests seem to pass without it.
276        # super().__init__(cls, name, bases, dct)
277        try:
278            cls.compsite = re.compile(cls.compsite)
279        except AttributeError:
280            # Can happen if initialised wrongly.
281            # (This was seen when Sphinx api-doc imports the classes, and
282            # tried to automatically general documentation for them)
283            pass
284        except Exception:
285            raise ValueError(
286                "Problem with regular expression, re.compiled(%r)" % cls.compsite
287            ) from None
288
289    def __add__(cls, other):
290        """Add restriction enzyme to a RestrictionBatch().
291
292        If other is an enzyme returns a batch of the two enzymes.
293        If other is already a RestrictionBatch add enzyme to it.
294        """
295        if isinstance(other, RestrictionType):
296            return RestrictionBatch([cls, other])
297        elif isinstance(other, RestrictionBatch):
298            return other.add_nocheck(cls)
299        else:
300            raise TypeError
301
302    def __truediv__(cls, other):
303        """Override '/' operator to use as search method.
304
305        >>> from Bio.Restriction import EcoRI
306        >>> EcoRI/Seq('GAATTC')
307        [2]
308
309        Returns RE.search(other).
310        """
311        return cls.search(other)
312
313    def __rtruediv__(cls, other):
314        """Override division with reversed operands to use as search method.
315
316        >>> from Bio.Restriction import EcoRI
317        >>> Seq('GAATTC')/EcoRI
318        [2]
319
320        Returns RE.search(other).
321        """
322        return cls.search(other)
323
324    def __floordiv__(cls, other):
325        """Override '//' operator to use as catalyse method.
326
327        >>> from Bio.Restriction import EcoRI
328        >>> EcoRI//Seq('GAATTC')
329        (Seq('G'), Seq('AATTC'))
330
331        Returns RE.catalyse(other).
332        """
333        return cls.catalyse(other)
334
335    def __rfloordiv__(cls, other):
336        """As __floordiv__, with reversed operands.
337
338        >>> from Bio.Restriction import EcoRI
339        >>> Seq('GAATTC')//EcoRI
340        (Seq('G'), Seq('AATTC'))
341
342        Returns RE.catalyse(other).
343        """
344        return cls.catalyse(other)
345
346    def __str__(cls):
347        """Return the name of the enzyme as string."""
348        return cls.__name__
349
350    def __repr__(cls):
351        """Implement repr method.
352
353        Used with eval or exec will instantiate the enzyme.
354        """
355        return "%s" % cls.__name__
356
357    def __len__(cls):
358        """Return length of recognition site of enzyme as int."""
359        try:
360            return cls.size
361        except AttributeError:
362            # Happens if the instance was not initialised as expected.
363            # e.g. if instance created by a documentation framework
364            # like Sphinx trying to inspect the class automatically,
365            # Also seen within IPython.
366            return 0
367
368    def __hash__(cls):
369        """Implement ``hash()`` method for ``RestrictionType``.
370
371        Python default is to use ``id(...)``
372        This is consistent with the ``__eq__`` implementation
373        """
374        return id(cls)
375
376    def __eq__(cls, other):
377        """Override '==' operator.
378
379        True if RE and other are the same enzyme.
380
381        Specifically this checks they are the same Python object.
382        """
383        # assert (id(cls)==id(other)) == (other is cls) == (cls is other)
384        return id(cls) == id(other)
385
386    def __ne__(cls, other):
387        """Override '!=' operator.
388
389        Isoschizomer strict (same recognition site, same restriction) -> False
390        All the other-> True
391
392        WARNING - This is not the inverse of the __eq__ method
393
394        >>> from Bio.Restriction import SacI, SstI
395        >>> SacI != SstI  # true isoschizomers
396        False
397        >>> SacI == SstI
398        False
399        """
400        if not isinstance(other, RestrictionType):
401            return True
402        elif cls.charac == other.charac:
403            return False
404        else:
405            return True
406
407    def __rshift__(cls, other):
408        """Override '>>' operator to test for neoschizomers.
409
410        neoschizomer : same recognition site, different restriction. -> True
411        all the others :                                             -> False
412
413        >>> from Bio.Restriction import SmaI, XmaI
414        >>> SmaI >> XmaI
415        True
416        """
417        if not isinstance(other, RestrictionType):
418            return False
419        elif cls.site == other.site and cls.charac != other.charac:
420            return True
421        else:
422            return False
423
424    def __mod__(cls, other):
425        """Override '%' operator to test for compatible overhangs.
426
427        True if a and b have compatible overhang.
428
429        >>> from Bio.Restriction import XhoI, SalI
430        >>> XhoI % SalI
431        True
432        """
433        if not isinstance(other, RestrictionType):
434            raise TypeError("expected RestrictionType, got %s instead" % type(other))
435        return cls._mod1(other)
436
437    def __ge__(cls, other):
438        """Compare length of recognition site of two enzymes.
439
440        Override '>='. a is greater or equal than b if the a site is longer
441        than b site. If their site have the same length sort by alphabetical
442        order of their names.
443
444        >>> from Bio.Restriction import EcoRI, EcoRV
445        >>> EcoRI.size
446        6
447        >>> EcoRV.size
448        6
449        >>> EcoRI >= EcoRV
450        False
451        """
452        if not isinstance(other, RestrictionType):
453            raise NotImplementedError
454        if len(cls) > len(other):
455            return True
456        elif cls.size == len(other) and cls.__name__ >= other.__name__:
457            return True
458        else:
459            return False
460
461    def __gt__(cls, other):
462        """Compare length of recognition site of two enzymes.
463
464        Override '>'. Sorting order:
465
466        1. size of the recognition site.
467        2. if equal size, alphabetical order of the names.
468
469        """
470        if not isinstance(other, RestrictionType):
471            raise NotImplementedError
472        if len(cls) > len(other):
473            return True
474        elif cls.size == len(other) and cls.__name__ > other.__name__:
475            return True
476        else:
477            return False
478
479    def __le__(cls, other):
480        """Compare length of recognition site of two enzymes.
481
482        Override '<='. Sorting order:
483
484        1. size of the recognition site.
485        2. if equal size, alphabetical order of the names.
486
487        """
488        if not isinstance(other, RestrictionType):
489            raise NotImplementedError
490        elif len(cls) < len(other):
491            return True
492        elif len(cls) == len(other) and cls.__name__ <= other.__name__:
493            return True
494        else:
495            return False
496
497    def __lt__(cls, other):
498        """Compare length of recognition site of two enzymes.
499
500        Override '<'. Sorting order:
501
502        1. size of the recognition site.
503        2. if equal size, alphabetical order of the names.
504
505        """
506        if not isinstance(other, RestrictionType):
507            raise NotImplementedError
508        elif len(cls) < len(other):
509            return True
510        elif len(cls) == len(other) and cls.__name__ < other.__name__:
511            return True
512        else:
513            return False
514
515
516class AbstractCut(RestrictionType):
517    """Implement the methods that are common to all restriction enzymes.
518
519    All the methods are classmethod.
520
521    For internal use only. Not meant to be instantiated.
522    """
523
524    @classmethod
525    def search(cls, dna, linear=True):
526        """Return a list of cutting sites of the enzyme in the sequence.
527
528        Compensate for circular sequences and so on.
529
530        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
531
532        If linear is False, the restriction sites that span over the boundaries
533        will be included.
534
535        The positions are the first base of the 3' fragment,
536        i.e. the first base after the position the enzyme will cut.
537        """
538        #
539        #   Separating search from _search allow a (very limited) optimisation
540        #   of the search when using a batch of restriction enzymes.
541        #   in this case the DNA is tested once by the class which implements
542        #   the batch instead of being tested by each enzyme single.
543        #   see RestrictionBatch.search() for example.
544        #
545        if isinstance(dna, FormattedSeq):
546            cls.dna = dna
547            return cls._search()
548        else:
549            cls.dna = FormattedSeq(dna, linear)
550            return cls._search()
551
552    @classmethod
553    def all_suppliers(cls):
554        """Print all the suppliers of restriction enzyme."""
555        supply = sorted(x[0] for x in suppliers_dict.values())
556        print(",\n".join(supply))
557
558    @classmethod
559    def is_equischizomer(cls, other):
560        """Test for real isoschizomer.
561
562        True if other is an isoschizomer of RE, but not an neoschizomer,
563        else False.
564
565        Equischizomer: same site, same position of restriction.
566
567        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
568        >>> SacI.is_equischizomer(SstI)
569        True
570        >>> SmaI.is_equischizomer(XmaI)
571        False
572
573        """
574        return not cls != other
575
576    @classmethod
577    def is_neoschizomer(cls, other):
578        """Test for neoschizomer.
579
580        True if other is an isoschizomer of RE, else False.
581        Neoschizomer: same site, different position of restriction.
582        """
583        return cls >> other
584
585    @classmethod
586    def is_isoschizomer(cls, other):
587        """Test for same recognition site.
588
589        True if other has the same recognition site, else False.
590
591        Isoschizomer: same site.
592
593        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
594        >>> SacI.is_isoschizomer(SstI)
595        True
596        >>> SmaI.is_isoschizomer(XmaI)
597        True
598
599        """
600        return (not cls != other) or cls >> other
601
602    @classmethod
603    def equischizomers(cls, batch=None):
604        """List equischizomers of the enzyme.
605
606        Return a tuple of all the isoschizomers of RE.
607        If batch is supplied it is used instead of the default AllEnzymes.
608
609        Equischizomer: same site, same position of restriction.
610        """
611        if not batch:
612            batch = AllEnzymes
613        r = [x for x in batch if not cls != x]
614        i = r.index(cls)
615        del r[i]
616        r.sort()
617        return r
618
619    @classmethod
620    def neoschizomers(cls, batch=None):
621        """List neoschizomers of the enzyme.
622
623        Return a tuple of all the neoschizomers of RE.
624        If batch is supplied it is used instead of the default AllEnzymes.
625
626        Neoschizomer: same site, different position of restriction.
627        """
628        if not batch:
629            batch = AllEnzymes
630        r = sorted(x for x in batch if cls >> x)
631        return r
632
633    @classmethod
634    def isoschizomers(cls, batch=None):
635        """List all isoschizomers of the enzyme.
636
637        Return a tuple of all the equischizomers and neoschizomers of RE.
638        If batch is supplied it is used instead of the default AllEnzymes.
639        """
640        if not batch:
641            batch = AllEnzymes
642        r = [x for x in batch if (cls >> x) or (not cls != x)]
643        i = r.index(cls)
644        del r[i]
645        r.sort()
646        return r
647
648    @classmethod
649    def frequency(cls):
650        """Return the theoretically cutting frequency of the enzyme.
651
652        Frequency of the site, given as 'one cut per x bases' (int).
653        """
654        return cls.freq
655
656
657class NoCut(AbstractCut):
658    """Implement the methods specific to the enzymes that do not cut.
659
660    These enzymes are generally enzymes that have been only partially
661    characterised and the way they cut the DNA is unknow or enzymes for
662    which the pattern of cut is to complex to be recorded in Rebase
663    (ncuts values of 0 in emboss_e.###).
664
665    When using search() with these enzymes the values returned are at the start
666    of the restriction site.
667
668    Their catalyse() method returns a TypeError.
669
670    Unknown and NotDefined are also part of the base classes of these enzymes.
671
672    Internal use only. Not meant to be instantiated.
673    """
674
675    @classmethod
676    def cut_once(cls):
677        """Return if the cutting pattern has one cut.
678
679        True if the enzyme cut the sequence one time on each strand.
680        """
681        return False
682
683    @classmethod
684    def cut_twice(cls):
685        """Return if the cutting pattern has two cuts.
686
687        True if the enzyme cut the sequence twice on each strand.
688        """
689        return False
690
691    @classmethod
692    def _modify(cls, location):
693        """Return a generator that moves the cutting position by 1 (PRIVATE).
694
695        For internal use only.
696
697        location is an integer corresponding to the location of the match for
698        the enzyme pattern in the sequence.
699        _modify returns the real place where the enzyme will cut.
700
701        Example::
702
703            EcoRI pattern : GAATTC
704            EcoRI will cut after the G.
705            so in the sequence:
706                     ______
707            GAATACACGGAATTCGA
708                     |
709                     10
710            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
711            EcoRI cut after the G so:
712            EcoRI._modify(10) -> 11.
713
714        If the enzyme cut twice _modify will returns two integer corresponding
715        to each cutting site.
716        """
717        yield location
718
719    @classmethod
720    def _rev_modify(cls, location):
721        """Return a generator that moves the cutting position by 1 (PRIVATE).
722
723        For internal use only.
724
725        As _modify for site situated on the antiparallel strand when the
726        enzyme is not palindromic.
727        """
728        yield location
729
730    @classmethod
731    def characteristic(cls):
732        """Return a list of the enzyme's characteristics as tuple.
733
734        the tuple contains the attributes:
735
736        - fst5 -> first 5' cut ((current strand) or None
737        - fst3 -> first 3' cut (complementary strand) or None
738        - scd5 -> second 5' cut (current strand) or None
739        - scd5 -> second 3' cut (complementary strand) or None
740        - site -> recognition site.
741
742        """
743        return None, None, None, None, cls.site
744
745
746class OneCut(AbstractCut):
747    """Implement the methods for enzymes that cut the DNA only once.
748
749    Correspond to ncuts values of 2 in emboss_e.###
750
751    Internal use only. Not meant to be instantiated.
752    """
753
754    @classmethod
755    def cut_once(cls):
756        """Return if the cutting pattern has one cut.
757
758        True if the enzyme cut the sequence one time on each strand.
759        """
760        return True
761
762    @classmethod
763    def cut_twice(cls):
764        """Return if the cutting pattern has two cuts.
765
766        True if the enzyme cut the sequence twice on each strand.
767        """
768        return False
769
770    @classmethod
771    def _modify(cls, location):
772        """Return a generator that moves the cutting position by 1 (PRIVATE).
773
774        For internal use only.
775
776        location is an integer corresponding to the location of the match for
777        the enzyme pattern in the sequence.
778        _modify returns the real place where the enzyme will cut.
779
780        Example::
781
782            EcoRI pattern : GAATTC
783            EcoRI will cut after the G.
784            so in the sequence:
785                     ______
786            GAATACACGGAATTCGA
787                     |
788                     10
789            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
790            EcoRI cut after the G so:
791            EcoRI._modify(10) -> 11.
792
793        if the enzyme cut twice _modify will returns two integer corresponding
794        to each cutting site.
795        """
796        yield location + cls.fst5
797
798    @classmethod
799    def _rev_modify(cls, location):
800        """Return a generator that moves the cutting position by 1 (PRIVATE).
801
802        For internal use only.
803
804        As _modify for site situated on the antiparallel strand when the
805        enzyme is not palindromic
806        """
807        yield location - cls.fst3
808
809    @classmethod
810    def characteristic(cls):
811        """Return a list of the enzyme's characteristics as tuple.
812
813        The tuple contains the attributes:
814
815        - fst5 -> first 5' cut ((current strand) or None
816        - fst3 -> first 3' cut (complementary strand) or None
817        - scd5 -> second 5' cut (current strand) or None
818        - scd5 -> second 3' cut (complementary strand) or None
819        - site -> recognition site.
820
821        """
822        return cls.fst5, cls.fst3, None, None, cls.site
823
824
825class TwoCuts(AbstractCut):
826    """Implement the methods for enzymes that cut the DNA twice.
827
828    Correspond to ncuts values of 4 in emboss_e.###
829
830    Internal use only. Not meant to be instantiated.
831    """
832
833    @classmethod
834    def cut_once(cls):
835        """Return if the cutting pattern has one cut.
836
837        True if the enzyme cut the sequence one time on each strand.
838        """
839        return False
840
841    @classmethod
842    def cut_twice(cls):
843        """Return if the cutting pattern has two cuts.
844
845        True if the enzyme cut the sequence twice on each strand.
846        """
847        return True
848
849    @classmethod
850    def _modify(cls, location):
851        """Return a generator that moves the cutting position by 1 (PRIVATE).
852
853        For internal use only.
854
855        location is an integer corresponding to the location of the match for
856        the enzyme pattern in the sequence.
857        _modify returns the real place where the enzyme will cut.
858
859        example::
860
861            EcoRI pattern : GAATTC
862            EcoRI will cut after the G.
863            so in the sequence:
864                     ______
865            GAATACACGGAATTCGA
866                     |
867                     10
868            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
869            EcoRI cut after the G so:
870            EcoRI._modify(10) -> 11.
871
872        if the enzyme cut twice _modify will returns two integer corresponding
873        to each cutting site.
874        """
875        yield location + cls.fst5
876        yield location + cls.scd5
877
878    @classmethod
879    def _rev_modify(cls, location):
880        """Return a generator that moves the cutting position by 1 (PRIVATE).
881
882        for internal use only.
883
884        as _modify for site situated on the antiparallel strand when the
885        enzyme is not palindromic
886        """
887        yield location - cls.fst3
888        yield location - cls.scd3
889
890    @classmethod
891    def characteristic(cls):
892        """Return a list of the enzyme's characteristics as tuple.
893
894        the tuple contains the attributes:
895
896        - fst5 -> first 5' cut ((current strand) or None
897        - fst3 -> first 3' cut (complementary strand) or None
898        - scd5 -> second 5' cut (current strand) or None
899        - scd5 -> second 3' cut (complementary strand) or None
900        - site -> recognition site.
901
902        """
903        return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
904
905
906class Meth_Dep(AbstractCut):
907    """Implement the information about methylation.
908
909    Enzymes of this class possess a site which is methylable.
910    """
911
912    @classmethod
913    def is_methylable(cls):
914        """Return if recognition site can be methylated.
915
916        True if the recognition site is a methylable.
917        """
918        return True
919
920
921class Meth_Undep(AbstractCut):
922    """Implement information about methylation sensitibility.
923
924    Enzymes of this class are not sensible to methylation.
925    """
926
927    @classmethod
928    def is_methylable(cls):
929        """Return if recognition site can be methylated.
930
931        True if the recognition site is a methylable.
932        """
933        return False
934
935
936class Palindromic(AbstractCut):
937    """Implement methods for enzymes with palindromic recognition sites.
938
939    palindromic means : the recognition site and its reverse complement are
940                        identical.
941    Remarks     : an enzyme with a site CGNNCG is palindromic even if some
942                  of the sites that it will recognise are not.
943                  for example here : CGAACG
944
945    Internal use only. Not meant to be instantiated.
946    """
947
948    @classmethod
949    def _search(cls):
950        """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
951
952        For internal use only.
953
954        Implement the search method for palindromic enzymes.
955        """
956        siteloc = cls.dna.finditer(cls.compsite, cls.size)
957        cls.results = [r for s, g in siteloc for r in cls._modify(s)]
958        if cls.results:
959            cls._drop()
960        return cls.results
961
962    @classmethod
963    def is_palindromic(cls):
964        """Return if the enzyme has a palindromic recoginition site."""
965        return True
966
967
968class NonPalindromic(AbstractCut):
969    """Implement methods for enzymes with non-palindromic recognition sites.
970
971    Palindromic means : the recognition site and its reverse complement are
972                        identical.
973
974    Internal use only. Not meant to be instantiated.
975    """
976
977    @classmethod
978    def _search(cls):
979        """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
980
981        For internal use only.
982
983        Implement the search method for non palindromic enzymes.
984        """
985        iterator = cls.dna.finditer(cls.compsite, cls.size)
986        cls.results = []
987        modif = cls._modify
988        revmodif = cls._rev_modify
989        s = str(cls)
990        cls.on_minus = []
991
992        for start, group in iterator:
993            if group(s):
994                cls.results += list(modif(start))
995            else:
996                cls.on_minus += list(revmodif(start))
997        cls.results += cls.on_minus
998
999        if cls.results:
1000            cls.results.sort()
1001            cls._drop()
1002        return cls.results
1003
1004    @classmethod
1005    def is_palindromic(cls):
1006        """Return if the enzyme has a palindromic recoginition site."""
1007        return False
1008
1009
1010class Unknown(AbstractCut):
1011    """Implement methods for enzymes that produce unknown overhangs.
1012
1013    These enzymes are also NotDefined and NoCut.
1014
1015    Internal use only. Not meant to be instantiated.
1016    """
1017
1018    @classmethod
1019    def catalyse(cls, dna, linear=True):
1020        """List the sequence fragments after cutting dna with enzyme.
1021
1022        Return a tuple of dna as will be produced by using RE to restrict the
1023        dna.
1024
1025        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1026
1027        If linear is False, the sequence is considered to be circular and the
1028        output will be modified accordingly.
1029        """
1030        raise NotImplementedError("%s restriction is unknown." % cls.__name__)
1031
1032    catalyze = catalyse
1033
1034    @classmethod
1035    def is_blunt(cls):
1036        """Return if the enzyme produces blunt ends.
1037
1038        True if the enzyme produces blunt end.
1039
1040        Related methods:
1041
1042        - RE.is_3overhang()
1043        - RE.is_5overhang()
1044        - RE.is_unknown()
1045
1046        """
1047        return False
1048
1049    @classmethod
1050    def is_5overhang(cls):
1051        """Return if the enzymes produces 5' overhanging ends.
1052
1053        True if the enzyme produces 5' overhang sticky end.
1054
1055        Related methods:
1056
1057        - RE.is_3overhang()
1058        - RE.is_blunt()
1059        - RE.is_unknown()
1060
1061        """
1062        return False
1063
1064    @classmethod
1065    def is_3overhang(cls):
1066        """Return if the enzyme produces 3' overhanging ends.
1067
1068        True if the enzyme produces 3' overhang sticky end.
1069
1070        Related methods:
1071
1072        - RE.is_5overhang()
1073        - RE.is_blunt()
1074        - RE.is_unknown()
1075
1076        """
1077        return False
1078
1079    @classmethod
1080    def overhang(cls):
1081        """Return the type of the enzyme's overhang as string.
1082
1083        Can be "3' overhang", "5' overhang", "blunt", "unknown".
1084        """
1085        return "unknown"
1086
1087    @classmethod
1088    def compatible_end(cls):
1089        """List all enzymes that produce compatible ends for the enzyme."""
1090        return []
1091
1092    @classmethod
1093    def _mod1(cls, other):
1094        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1095
1096        For internal use only.
1097
1098        Test for the compatibility of restriction ending of RE and other.
1099        """
1100        return False
1101
1102
1103class Blunt(AbstractCut):
1104    """Implement methods for enzymes that produce blunt ends.
1105
1106    The enzyme cuts the + strand and the - strand of the DNA at the same
1107    place.
1108
1109    Internal use only. Not meant to be instantiated.
1110    """
1111
1112    @classmethod
1113    def catalyse(cls, dna, linear=True):
1114        """List the sequence fragments after cutting dna with enzyme.
1115
1116        Return a tuple of dna as will be produced by using RE to restrict the
1117        dna.
1118
1119        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1120
1121        If linear is False, the sequence is considered to be circular and the
1122        output will be modified accordingly.
1123        """
1124        r = cls.search(dna, linear)
1125        d = cls.dna
1126        if not r:
1127            return (d[1:],)
1128        fragments = []
1129        length = len(r) - 1
1130        if d.is_linear():
1131            #
1132            #   START of the sequence to FIRST site.
1133            #
1134            fragments.append(d[1 : r[0]])
1135            if length:
1136                #
1137                #   if more than one site add them.
1138                #
1139                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1140            #
1141            #   LAST site to END of the sequence.
1142            #
1143            fragments.append(d[r[-1] :])
1144        else:
1145            #
1146            #   circular : bridge LAST site to FIRST site.
1147            #
1148            fragments.append(d[r[-1] :] + d[1 : r[0]])
1149            if not length:
1150                #
1151                #   one site we finish here.
1152                #
1153                return tuple(fragments)
1154            #
1155            #   add the others.
1156            #
1157            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1158        return tuple(fragments)
1159
1160    catalyze = catalyse
1161
1162    @classmethod
1163    def is_blunt(cls):
1164        """Return if the enzyme produces blunt ends.
1165
1166        True if the enzyme produces blunt end.
1167
1168        Related methods:
1169
1170        - RE.is_3overhang()
1171        - RE.is_5overhang()
1172        - RE.is_unknown()
1173
1174        """
1175        return True
1176
1177    @classmethod
1178    def is_5overhang(cls):
1179        """Return if the enzymes produces 5' overhanging ends.
1180
1181        True if the enzyme produces 5' overhang sticky end.
1182
1183        Related methods:
1184
1185        - RE.is_3overhang()
1186        - RE.is_blunt()
1187        - RE.is_unknown()
1188
1189        """
1190        return False
1191
1192    @classmethod
1193    def is_3overhang(cls):
1194        """Return if the enzyme produces 3' overhanging ends.
1195
1196        True if the enzyme produces 3' overhang sticky end.
1197
1198        Related methods:
1199
1200        - RE.is_5overhang()
1201        - RE.is_blunt()
1202        - RE.is_unknown()
1203
1204        """
1205        return False
1206
1207    @classmethod
1208    def overhang(cls):
1209        """Return the type of the enzyme's overhang as string.
1210
1211        Can be "3' overhang", "5' overhang", "blunt", "unknown".
1212        """
1213        return "blunt"
1214
1215    @classmethod
1216    def compatible_end(cls, batch=None):
1217        """List all enzymes that produce compatible ends for the enzyme."""
1218        if not batch:
1219            batch = AllEnzymes
1220        r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1221        return r
1222
1223    @staticmethod
1224    def _mod1(other):
1225        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1226
1227        For internal use only
1228
1229        Test for the compatibility of restriction ending of RE and other.
1230        """
1231        return issubclass(other, Blunt)
1232
1233
1234class Ov5(AbstractCut):
1235    """Implement methods for enzymes that produce 5' overhanging ends.
1236
1237    The enzyme cuts the + strand after the - strand of the DNA.
1238
1239    Internal use only. Not meant to be instantiated.
1240    """
1241
1242    @classmethod
1243    def catalyse(cls, dna, linear=True):
1244        """List the sequence fragments after cutting dna with enzyme.
1245
1246        Return a tuple of dna as will be produced by using RE to restrict the
1247        dna.
1248
1249        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1250
1251        If linear is False, the sequence is considered to be circular and the
1252        output will be modified accordingly.
1253        """
1254        r = cls.search(dna, linear)
1255        d = cls.dna
1256        if not r:
1257            return (d[1:],)
1258        length = len(r) - 1
1259        fragments = []
1260        if d.is_linear():
1261            #
1262            #   START of the sequence to FIRST site.
1263            #
1264            fragments.append(d[1 : r[0]])
1265            if length:
1266                #
1267                #   if more than one site add them.
1268                #
1269                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1270            #
1271            #   LAST site to END of the sequence.
1272            #
1273            fragments.append(d[r[-1] :])
1274        else:
1275            #
1276            #   circular : bridge LAST site to FIRST site.
1277            #
1278            fragments.append(d[r[-1] :] + d[1 : r[0]])
1279            if not length:
1280                #
1281                #   one site we finish here.
1282                #
1283                return tuple(fragments)
1284            #
1285            #   add the others.
1286            #
1287            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1288        return tuple(fragments)
1289
1290    catalyze = catalyse
1291
1292    @classmethod
1293    def is_blunt(cls):
1294        """Return if the enzyme produces blunt ends.
1295
1296        True if the enzyme produces blunt end.
1297
1298        Related methods:
1299
1300        - RE.is_3overhang()
1301        - RE.is_5overhang()
1302        - RE.is_unknown()
1303
1304        """
1305        return False
1306
1307    @classmethod
1308    def is_5overhang(cls):
1309        """Return if the enzymes produces 5' overhanging ends.
1310
1311        True if the enzyme produces 5' overhang sticky end.
1312
1313        Related methods:
1314
1315        - RE.is_3overhang()
1316        - RE.is_blunt()
1317        - RE.is_unknown()
1318
1319        """
1320        return True
1321
1322    @classmethod
1323    def is_3overhang(cls):
1324        """Return if the enzyme produces 3' overhanging ends.
1325
1326        True if the enzyme produces 3' overhang sticky end.
1327
1328        Related methods:
1329
1330        - RE.is_5overhang()
1331        - RE.is_blunt()
1332        - RE.is_unknown()
1333
1334        """
1335        return False
1336
1337    @classmethod
1338    def overhang(cls):
1339        """Return the type of the enzyme's overhang as string.
1340
1341        Can be "3' overhang", "5' overhang", "blunt", "unknown".
1342        """
1343        return "5' overhang"
1344
1345    @classmethod
1346    def compatible_end(cls, batch=None):
1347        """List all enzymes that produce compatible ends for the enzyme."""
1348        if not batch:
1349            batch = AllEnzymes
1350        r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % cls)
1351        return r
1352
1353    @classmethod
1354    def _mod1(cls, other):
1355        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1356
1357        For internal use only.
1358
1359        Test for the compatibility of restriction ending of RE and other.
1360        """
1361        if issubclass(other, Ov5):
1362            return cls._mod2(other)
1363        else:
1364            return False
1365
1366
1367class Ov3(AbstractCut):
1368    """Implement methods for enzymes that produce 3' overhanging ends.
1369
1370    The enzyme cuts the - strand after the + strand of the DNA.
1371
1372    Internal use only. Not meant to be instantiated.
1373    """
1374
1375    @classmethod
1376    def catalyse(cls, dna, linear=True):
1377        """List the sequence fragments after cutting dna with enzyme.
1378
1379        Return a tuple of dna as will be produced by using RE to restrict the
1380        dna.
1381
1382        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1383
1384        If linear is False, the sequence is considered to be circular and the
1385        output will be modified accordingly.
1386        """
1387        r = cls.search(dna, linear)
1388        d = cls.dna
1389        if not r:
1390            return (d[1:],)
1391        fragments = []
1392        length = len(r) - 1
1393        if d.is_linear():
1394            #
1395            #   START of the sequence to FIRST site.
1396            #
1397            fragments.append(d[1 : r[0]])
1398            if length:
1399                #
1400                #   if more than one site add them.
1401                #
1402                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1403            #
1404            #   LAST site to END of the sequence.
1405            #
1406            fragments.append(d[r[-1] :])
1407        else:
1408            #
1409            #   circular : bridge LAST site to FIRST site.
1410            #
1411            fragments.append(d[r[-1] :] + d[1 : r[0]])
1412            if not length:
1413                #
1414                #   one site we finish here.
1415                #
1416                return tuple(fragments)
1417            #
1418            #   add the others.
1419            #
1420            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
1421        return tuple(fragments)
1422
1423    catalyze = catalyse
1424
1425    @classmethod
1426    def is_blunt(cls):
1427        """Return if the enzyme produces blunt ends.
1428
1429        True if the enzyme produces blunt end.
1430
1431        Related methods:
1432
1433        - RE.is_3overhang()
1434        - RE.is_5overhang()
1435        - RE.is_unknown()
1436
1437        """
1438        return False
1439
1440    @classmethod
1441    def is_5overhang(cls):
1442        """Return if the enzymes produces 5' overhanging ends.
1443
1444        True if the enzyme produces 5' overhang sticky end.
1445
1446        Related methods:
1447
1448        - RE.is_3overhang()
1449        - RE.is_blunt()
1450        - RE.is_unknown()
1451
1452        """
1453        return False
1454
1455    @classmethod
1456    def is_3overhang(cls):
1457        """Return if the enzyme produces 3' overhanging ends.
1458
1459        True if the enzyme produces 3' overhang sticky end.
1460
1461        Related methods:
1462
1463        - RE.is_5overhang()
1464        - RE.is_blunt()
1465        - RE.is_unknown()
1466
1467        """
1468        return True
1469
1470    @classmethod
1471    def overhang(cls):
1472        """Return the type of the enzyme's overhang as string.
1473
1474        Can be "3' overhang", "5' overhang", "blunt", "unknown".
1475        """
1476        return "3' overhang"
1477
1478    @classmethod
1479    def compatible_end(cls, batch=None):
1480        """List all enzymes that produce compatible ends for the enzyme."""
1481        if not batch:
1482            batch = AllEnzymes
1483        r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % cls)
1484        return r
1485
1486    @classmethod
1487    def _mod1(cls, other):
1488        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1489
1490        For internal use only.
1491
1492        Test for the compatibility of restriction ending of RE and other.
1493        """
1494        #
1495        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
1496        #
1497        if issubclass(other, Ov3):
1498            return cls._mod2(other)
1499        else:
1500            return False
1501
1502
1503class Defined(AbstractCut):
1504    """Implement methods for enzymes with defined recognition site and cut.
1505
1506    Typical example : EcoRI -> G^AATT_C
1507                      The overhang will always be AATT
1508    Notes:
1509        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1510        Their overhang is always the same : blunt!
1511
1512    Internal use only. Not meant to be instantiated.
1513    """
1514
1515    @classmethod
1516    def _drop(cls):
1517        """Remove cuts that are outsite of the sequence (PRIVATE).
1518
1519        For internal use only.
1520
1521        Drop the site that are situated outside the sequence in linear
1522        sequence. Modify the index for site in circular sequences.
1523        """
1524        #
1525        #   remove or modify the results that are outside the sequence.
1526        #   This is necessary since after finding the site we add the distance
1527        #   from the site to the cut with the _modify and _rev_modify methods.
1528        #   For linear we will remove these sites altogether.
1529        #   For circular sequence, we modify the result rather than _drop it
1530        #   since the site is in the sequence.
1531        #
1532        length = len(cls.dna)
1533        drop = itertools.dropwhile
1534        take = itertools.takewhile
1535        if cls.dna.is_linear():
1536            cls.results = list(drop(lambda x: x <= 1, cls.results))
1537            cls.results = list(take(lambda x: x <= length, cls.results))
1538        else:
1539            for index, location in enumerate(cls.results):
1540                if location < 1:
1541                    cls.results[index] += length
1542                else:
1543                    break
1544            for index, location in enumerate(cls.results[::-1]):
1545                if location > length:
1546                    cls.results[-(index + 1)] -= length
1547                else:
1548                    break
1549
1550    @classmethod
1551    def is_defined(cls):
1552        """Return if recognition sequence and cut are defined.
1553
1554        True if the sequence recognised and cut is constant,
1555        i.e. the recognition site is not degenerated AND the enzyme cut inside
1556        the site.
1557
1558        Related methods:
1559
1560        - RE.is_ambiguous()
1561        - RE.is_unknown()
1562
1563        """
1564        return True
1565
1566    @classmethod
1567    def is_ambiguous(cls):
1568        """Return if recognition sequence and cut may be ambiguous.
1569
1570        True if the sequence recognised and cut is ambiguous,
1571        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1572        the site.
1573
1574        Related methods:
1575
1576        - RE.is_defined()
1577        - RE.is_unknown()
1578
1579        """
1580        return False
1581
1582    @classmethod
1583    def is_unknown(cls):
1584        """Return if recognition sequence is unknown.
1585
1586        True if the sequence is unknown,
1587        i.e. the recognition site has not been characterised yet.
1588
1589        Related methods:
1590
1591        - RE.is_defined()
1592        - RE.is_ambiguous()
1593
1594        """
1595        return False
1596
1597    @classmethod
1598    def elucidate(cls):
1599        """Return a string representing the recognition site and cuttings.
1600
1601        Return a representation of the site with the cut on the (+) strand
1602        represented as '^' and the cut on the (-) strand as '_'.
1603        ie:
1604
1605        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1606        >>> EcoRI.elucidate()   # 5' overhang
1607        'G^AATT_C'
1608        >>> KpnI.elucidate()    # 3' overhang
1609        'G_GTAC^C'
1610        >>> EcoRV.elucidate()   # blunt
1611        'GAT^_ATC'
1612        >>> SnaI.elucidate()    # NotDefined, cut profile unknown.
1613        '? GTATAC ?'
1614        >>>
1615
1616        """
1617        f5 = cls.fst5
1618        f3 = cls.fst3
1619        site = cls.site
1620        if cls.cut_twice():
1621            re = "cut twice, not yet implemented sorry."
1622        elif cls.is_5overhang():
1623            if f5 == f3 == 0:
1624                re = "N^" + cls.site + "_N"
1625            elif f3 == 0:
1626                re = site[:f5] + "^" + site[f5:] + "_N"
1627            else:
1628                re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
1629        elif cls.is_blunt():
1630            re = site[:f5] + "^_" + site[f5:]
1631        else:
1632            if f5 == f3 == 0:
1633                re = "N_" + site + "^N"
1634            else:
1635                re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
1636        return re
1637
1638    @classmethod
1639    def _mod2(cls, other):
1640        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1641
1642        For internal use only.
1643
1644        Test for the compatibility of restriction ending of RE and other.
1645        """
1646        #
1647        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
1648        #
1649        if other.ovhgseq == cls.ovhgseq:
1650            return True
1651        elif issubclass(other, Ambiguous):
1652            return other._mod2(cls)
1653        else:
1654            return False
1655
1656
1657class Ambiguous(AbstractCut):
1658    """Implement methods for enzymes that produce variable overhangs.
1659
1660    Typical example : BstXI -> CCAN_NNNN^NTGG
1661                      The overhang can be any sequence of 4 bases.
1662
1663    Notes:
1664        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1665        Their overhang is always the same : blunt!
1666
1667    Internal use only. Not meant to be instantiated.
1668
1669    """
1670
1671    @classmethod
1672    def _drop(cls):
1673        """Remove cuts that are outsite of the sequence (PRIVATE).
1674
1675        For internal use only.
1676
1677        Drop the site that are situated outside the sequence in linear
1678        sequence. Modify the index for site in circular sequences.
1679        """
1680        length = len(cls.dna)
1681        drop = itertools.dropwhile
1682        take = itertools.takewhile
1683        if cls.dna.is_linear():
1684            cls.results = list(drop(lambda x: x <= 1, cls.results))
1685            cls.results = list(take(lambda x: x <= length, cls.results))
1686        else:
1687            for index, location in enumerate(cls.results):
1688                if location < 1:
1689                    cls.results[index] += length
1690                else:
1691                    break
1692            for index, location in enumerate(cls.results[::-1]):
1693                if location > length:
1694                    cls.results[-(index + 1)] -= length
1695                else:
1696                    break
1697
1698    @classmethod
1699    def is_defined(cls):
1700        """Return if recognition sequence and cut are defined.
1701
1702        True if the sequence recognised and cut is constant,
1703        i.e. the recognition site is not degenerated AND the enzyme cut inside
1704        the site.
1705
1706        Related methods:
1707
1708        - RE.is_ambiguous()
1709        - RE.is_unknown()
1710
1711        """
1712        return False
1713
1714    @classmethod
1715    def is_ambiguous(cls):
1716        """Return if recognition sequence and cut may be ambiguous.
1717
1718        True if the sequence recognised and cut is ambiguous,
1719        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1720        the site.
1721
1722        Related methods:
1723
1724        - RE.is_defined()
1725        - RE.is_unknown()
1726
1727        """
1728        return True
1729
1730    @classmethod
1731    def is_unknown(cls):
1732        """Return if recognition sequence is unknown.
1733
1734        True if the sequence is unknown,
1735        i.e. the recognition site has not been characterised yet.
1736
1737        Related methods:
1738
1739        - RE.is_defined()
1740        - RE.is_ambiguous()
1741
1742        """
1743        return False
1744
1745    @classmethod
1746    def _mod2(cls, other):
1747        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1748
1749        For internal use only.
1750
1751        Test for the compatibility of restriction ending of RE and other.
1752        """
1753        #
1754        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
1755        #
1756        if len(cls.ovhgseq) != len(other.ovhgseq):
1757            return False
1758        else:
1759            se = cls.ovhgseq
1760            for base in se:
1761                if base in "ATCG":
1762                    pass
1763                if base in "N":
1764                    se = ".".join(se.split("N"))
1765                if base in "RYWMSKHDBV":
1766                    expand = "[" + matching[base] + "]"
1767                    se = expand.join(se.split(base))
1768            if re.match(se, other.ovhgseq):
1769                return True
1770            else:
1771                return False
1772
1773    @classmethod
1774    def elucidate(cls):
1775        """Return a string representing the recognition site and cuttings.
1776
1777        Return a representation of the site with the cut on the (+) strand
1778        represented as '^' and the cut on the (-) strand as '_'.
1779        ie:
1780
1781        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1782        >>> EcoRI.elucidate()   # 5' overhang
1783        'G^AATT_C'
1784        >>> KpnI.elucidate()    # 3' overhang
1785        'G_GTAC^C'
1786        >>> EcoRV.elucidate()   # blunt
1787        'GAT^_ATC'
1788        >>> SnaI.elucidate()     # NotDefined, cut profile unknown.
1789        '? GTATAC ?'
1790        >>>
1791
1792        """
1793        f5 = cls.fst5
1794        f3 = cls.fst3
1795        length = len(cls)
1796        site = cls.site
1797        if cls.cut_twice():
1798            re = "cut twice, not yet implemented sorry."
1799        elif cls.is_5overhang():
1800            if f3 == f5 == 0:
1801                re = "N^" + site + "_N"
1802            elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1803                re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
1804            elif 0 <= f5 <= length:
1805                re = site[:f5] + "^" + site[f5:] + f3 * "N" + "_N"
1806            elif 0 <= f3 + length <= length:
1807                re = "N^" + abs(f5) * "N" + site[:f3] + "_" + site[f3:]
1808            elif f3 + length < 0:
1809                re = "N^" * abs(f5) * "N" + "_" + abs(length + f3) * "N" + site
1810            elif f5 > length:
1811                re = site + (f5 - length) * "N" + "^" + (length + f3 - f5) * "N" + "_N"
1812            else:
1813                re = "N^" + abs(f5) * "N" + site + f3 * "N" + "_N"
1814        elif cls.is_blunt():
1815            if f5 < 0:
1816                re = "N^_" + abs(f5) * "N" + site
1817            elif f5 > length:
1818                re = site + (f5 - length) * "N" + "^_N"
1819            else:
1820                raise ValueError("%s.easyrepr() : error f5=%i" % (cls.name, f5))
1821        else:
1822            if f3 == 0:
1823                if f5 == 0:
1824                    re = "N_" + site + "^N"
1825                else:
1826                    re = site + "_" + (f5 - length) * "N" + "^N"
1827            elif 0 < f3 + length <= length and 0 <= f5 <= length:
1828                re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
1829            elif 0 < f3 + length <= length:
1830                re = site[:f3] + "_" + site[f3:] + (f5 - length) * "N" + "^N"
1831            elif 0 <= f5 <= length:
1832                re = "N_" + "N" * (f3 + length) + site[:f5] + "^" + site[f5:]
1833            elif f3 > 0:
1834                re = site + f3 * "N" + "_" + (f5 - f3 - length) * "N" + "^N"
1835            elif f5 < 0:
1836                re = "N_" + abs(f3 - f5 + length) * "N" + "^" + abs(f5) * "N" + site
1837            else:
1838                re = "N_" + abs(f3 + length) * "N" + site + (f5 - length) * "N" + "^N"
1839        return re
1840
1841
1842class NotDefined(AbstractCut):
1843    """Implement methods for enzymes with non-characterized overhangs.
1844
1845    Correspond to NoCut and Unknown.
1846
1847    Internal use only. Not meant to be instantiated.
1848    """
1849
1850    @classmethod
1851    def _drop(cls):
1852        """Remove cuts that are outsite of the sequence (PRIVATE).
1853
1854        For internal use only.
1855
1856        Drop the site that are situated outside the sequence in linear
1857        sequence. Modify the index for site in circular sequences.
1858        """
1859        if cls.dna.is_linear():
1860            return
1861        else:
1862            length = len(cls.dna)
1863            for index, location in enumerate(cls.results):
1864                if location < 1:
1865                    cls.results[index] += length
1866                else:
1867                    break
1868            for index, location in enumerate(cls.results[:-1]):
1869                if location > length:
1870                    cls.results[-(index + 1)] -= length
1871                else:
1872                    break
1873
1874    @classmethod
1875    def is_defined(cls):
1876        """Return if recognition sequence and cut are defined.
1877
1878        True if the sequence recognised and cut is constant,
1879        i.e. the recognition site is not degenerated AND the enzyme cut inside
1880        the site.
1881
1882        Related methods:
1883
1884        - RE.is_ambiguous()
1885        - RE.is_unknown()
1886
1887        """
1888        return False
1889
1890    @classmethod
1891    def is_ambiguous(cls):
1892        """Return if recognition sequence and cut may be ambiguous.
1893
1894        True if the sequence recognised and cut is ambiguous,
1895        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1896        the site.
1897
1898        Related methods:
1899
1900        - RE.is_defined()
1901        - RE.is_unknown()
1902
1903        """
1904        return False
1905
1906    @classmethod
1907    def is_unknown(cls):
1908        """Return if recognition sequence is unknown.
1909
1910        True if the sequence is unknown,
1911        i.e. the recognition site has not been characterised yet.
1912
1913        Related methods:
1914
1915        - RE.is_defined()
1916        - RE.is_ambiguous()
1917
1918        """
1919        return True
1920
1921    @classmethod
1922    def _mod2(cls, other):
1923        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1924
1925        For internal use only.
1926
1927        Test for the compatibility of restriction ending of RE and other.
1928        """
1929        #
1930        #   Normally we should not arrive here. But well better safe than
1931        #   sorry.
1932        #   the overhang is not defined we are compatible with nobody.
1933        #   could raise an Error may be rather than return quietly.
1934        #
1935        # return False
1936        raise ValueError(
1937            "%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1938            % (str(cls), str(other), str(cls))
1939        )
1940
1941    @classmethod
1942    def elucidate(cls):
1943        """Return a string representing the recognition site and cuttings.
1944
1945        Return a representation of the site with the cut on the (+) strand
1946        represented as '^' and the cut on the (-) strand as '_'.
1947        ie:
1948
1949        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1950        >>> EcoRI.elucidate()   # 5' overhang
1951        'G^AATT_C'
1952        >>> KpnI.elucidate()    # 3' overhang
1953        'G_GTAC^C'
1954        >>> EcoRV.elucidate()   # blunt
1955        'GAT^_ATC'
1956        >>> SnaI.elucidate()     # NotDefined, cut profile unknown.
1957        '? GTATAC ?'
1958        >>>
1959
1960        """
1961        return "? %s ?" % cls.site
1962
1963
1964class Commercially_available(AbstractCut):
1965    """Implement methods for enzymes which are commercially available.
1966
1967    Internal use only. Not meant to be instantiated.
1968    """
1969
1970    #
1971    #   Recent addition to Rebase make this naming convention uncertain.
1972    #   May be better to says enzymes which have a supplier.
1973    #
1974
1975    @classmethod
1976    def suppliers(cls):
1977        """Print a list of suppliers of the enzyme."""
1978        for s in cls.suppl:
1979            print(suppliers_dict[s][0] + ",")
1980
1981    @classmethod
1982    def supplier_list(cls):
1983        """Return a list of suppliers of the enzyme."""
1984        return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1985
1986    @classmethod
1987    def buffers(cls, supplier):
1988        """Return the recommended buffer of the supplier for this enzyme.
1989
1990        Not implemented yet.
1991        """
1992
1993    @classmethod
1994    def is_comm(cls):
1995        """Return if enzyme is commercially available.
1996
1997        True if RE has suppliers.
1998        """
1999        return True
2000
2001
2002class Not_available(AbstractCut):
2003    """Implement methods for enzymes which are not commercially available.
2004
2005    Internal use only. Not meant to be instantiated.
2006    """
2007
2008    @staticmethod
2009    def suppliers():
2010        """Print a list of suppliers of the enzyme."""
2011        return None
2012
2013    @classmethod
2014    def supplier_list(cls):
2015        """Return a list of suppliers of the enzyme."""
2016        return []
2017
2018    @classmethod
2019    def buffers(cls, supplier):
2020        """Return the recommended buffer of the supplier for this enzyme.
2021
2022        Not implemented yet.
2023        """
2024        raise TypeError("Enzyme not commercially available.")
2025
2026    @classmethod
2027    def is_comm(cls):
2028        """Return if enzyme is commercially available.
2029
2030        True if RE has suppliers.
2031        """
2032        return False
2033
2034
2035###############################################################################
2036#                                                                             #
2037#                       Restriction Batch                                     #
2038#                                                                             #
2039###############################################################################
2040
2041
2042class RestrictionBatch(set):
2043    """Class for operations on more than one enzyme."""
2044
2045    def __init__(self, first=(), suppliers=()):
2046        """Initialize empty RB or pre-fill with enzymes (from supplier)."""
2047        first = [self.format(x) for x in first]
2048        first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
2049        set.__init__(self, first)
2050        self.mapping = dict.fromkeys(self)
2051        self.already_mapped = None
2052        self.suppliers = [x for x in suppliers if x in suppliers_dict]
2053
2054    def __str__(self):
2055        """Return a readable representation of the ``RestrictionBatch``."""
2056        if len(self) < 5:
2057            return "+".join(self.elements())
2058        else:
2059            return "...".join(
2060                ("+".join(self.elements()[:2]), "+".join(self.elements()[-2:]))
2061            )
2062
2063    def __repr__(self):
2064        """Represent ``RestrictionBatch`` class as a string for debugging."""
2065        return "RestrictionBatch(%s)" % self.elements()
2066
2067    def __contains__(self, other):
2068        """Implement ``in`` for ``RestrictionBatch``."""
2069        try:
2070            other = self.format(other)
2071        except ValueError:  # other is not a restriction enzyme
2072            return False
2073        return set.__contains__(self, other)
2074
2075    def __div__(self, other):
2076        """Override '/' operator to use as search method."""
2077        return self.search(other)
2078
2079    def __rdiv__(self, other):
2080        """Override division with reversed operands to use as search method."""
2081        return self.search(other)
2082
2083    def __truediv__(self, other):
2084        """Override Python 3 division operator to use as search method.
2085
2086        Like __div__.
2087        """
2088        return self.search(other)
2089
2090    def __rtruediv__(self, other):
2091        """As __truediv___, with reversed operands.
2092
2093        Like __rdiv__.
2094        """
2095        return self.search(other)
2096
2097    def get(self, enzyme, add=False):
2098        """Check if enzyme is in batch and return it.
2099
2100        If add is True and enzyme is not in batch add enzyme to batch.
2101        If add is False (which is the default) only return enzyme.
2102        If enzyme is not a RestrictionType or can not be evaluated to
2103        a RestrictionType, raise a ValueError.
2104        """
2105        e = self.format(enzyme)
2106        if e in self:
2107            return e
2108        elif add:
2109            self.add(e)
2110            return e
2111        else:
2112            raise ValueError("enzyme %s is not in RestrictionBatch" % e.__name__)
2113
2114    def lambdasplit(self, func):
2115        """Filter enzymes in batch with supplied function.
2116
2117        The new batch will contain only the enzymes for which
2118        func return True.
2119        """
2120        d = list(filter(func, self))
2121        new = RestrictionBatch()
2122        new._data = dict(zip(d, [True] * len(d)))
2123        return new
2124
2125    def add_supplier(self, letter):
2126        """Add all enzymes from a given supplier to batch.
2127
2128        letter represents the suppliers as defined in the dictionary
2129        RestrictionDictionary.suppliers
2130        Returns None.
2131        Raise a KeyError if letter is not a supplier code.
2132        """
2133        supplier = suppliers_dict[letter]
2134        self.suppliers.append(letter)
2135        for x in supplier[1]:
2136            self.add_nocheck(eval(x))
2137
2138    def current_suppliers(self):
2139        """List the current suppliers for the restriction batch.
2140
2141        Return a sorted list of the suppliers which have been used to
2142        create the batch.
2143        """
2144        suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2145        return suppl_list
2146
2147    def __iadd__(self, other):
2148        """Override '+=' for use with sets.
2149
2150        b += other -> add other to b, check the type of other.
2151        """
2152        self.add(other)
2153        return self
2154
2155    def __add__(self, other):
2156        """Overide '+' for use with sets.
2157
2158        b + other -> new RestrictionBatch.
2159        """
2160        new = self.__class__(self)
2161        new.add(other)
2162        return new
2163
2164    def remove(self, other):
2165        """Remove enzyme from restriction batch.
2166
2167        Safe set.remove method. Verify that other is a RestrictionType or can
2168        be evaluated to a RestrictionType.
2169        Raise a ValueError if other can not be evaluated to a RestrictionType.
2170        Raise a KeyError if other is not in B.
2171        """
2172        return set.remove(self, self.format(other))
2173
2174    def add(self, other):
2175        """Add a restriction enzyme to the restriction batch.
2176
2177        Safe set.add method. Verify that other is a RestrictionType or can be
2178        evaluated to a RestrictionType.
2179        Raise a ValueError if other can not be evaluated to a RestrictionType.
2180        """
2181        return set.add(self, self.format(other))
2182
2183    def add_nocheck(self, other):
2184        """Add restriction enzyme to batch without checking its type."""
2185        return set.add(self, other)
2186
2187    def format(self, y):
2188        """Evaluate enzyme (name) and return it (as RestrictionType).
2189
2190        If y is a RestrictionType return y.
2191        If y can be evaluated to a RestrictionType return eval(y).
2192        Raise a ValueError in all other case.
2193        """
2194        try:
2195            if isinstance(y, RestrictionType):
2196                return y
2197            elif isinstance(eval(str(y)), RestrictionType):
2198                return eval(y)
2199        except (NameError, SyntaxError):
2200            pass
2201        raise ValueError("%s is not a RestrictionType" % y.__class__)
2202
2203    def is_restriction(self, y):
2204        """Return if enzyme (name) is a known enzyme.
2205
2206        True if y or eval(y) is a RestrictionType.
2207        """
2208        return isinstance(y, RestrictionType) or isinstance(
2209            eval(str(y)), RestrictionType
2210        )
2211
2212    def split(self, *classes, **bool):
2213        """Extract enzymes of a certain class and put in new RestrictionBatch.
2214
2215        It works but it is slow, so it has really an interest when splitting
2216        over multiple conditions.
2217        """
2218
2219        def splittest(element):
2220            for klass in classes:
2221                b = bool.get(klass.__name__, True)
2222                if issubclass(element, klass):
2223                    if b:
2224                        continue
2225                    else:
2226                        return False
2227                elif b:
2228                    return False
2229                else:
2230                    continue
2231            return True
2232
2233        d = list(filter(splittest, self))
2234        new = RestrictionBatch()
2235        new._data = dict(zip(d, [True] * len(d)))
2236        return new
2237
2238    def elements(self):
2239        """List the enzymes of the RestrictionBatch as list of strings.
2240
2241        Give all the names of the enzymes in B sorted alphabetically.
2242        """
2243        return sorted(str(e) for e in self)
2244
2245    def as_string(self):
2246        """List the names of the enzymes of the RestrictionBatch.
2247
2248        Return a list of the name of the elements of the batch.
2249        """
2250        return [str(e) for e in self]
2251
2252    @classmethod
2253    def suppl_codes(cls):
2254        """Return a dicionary with supplier codes.
2255
2256        Letter code for the suppliers.
2257        """
2258        supply = {k: v[0] for k, v in suppliers_dict.items()}
2259        return supply
2260
2261    @classmethod
2262    def show_codes(cls):
2263        """Print a list of supplier codes."""
2264        supply = [" = ".join(i) for i in cls.suppl_codes().items()]
2265        print("\n".join(supply))
2266
2267    def search(self, dna, linear=True):
2268        """Return a dic of cutting sites in the seq for the batch enzymes."""
2269        #
2270        #   here we replace the search method of the individual enzymes
2271        #   with one unique testing method.
2272        #
2273        if not hasattr(self, "already_mapped"):
2274            # TODO - Why does this happen!
2275            # Try the "doctest" at the start of PrintFormat.py
2276            self.already_mapped = None
2277        if isinstance(dna, DNA):
2278            # For the searching, we just care about the sequence as a string,
2279            # if that is the same we can use the cached search results.
2280            # At the time of writing, Seq == method isn't implemented,
2281            # and therefore does object identity which is stricter.
2282            if (str(dna), linear) == self.already_mapped:
2283                return self.mapping
2284            else:
2285                self.already_mapped = str(dna), linear
2286                fseq = FormattedSeq(dna, linear)
2287                self.mapping = {x: x.search(fseq) for x in self}
2288                return self.mapping
2289        elif isinstance(dna, FormattedSeq):
2290            if (str(dna), dna.linear) == self.already_mapped:
2291                return self.mapping
2292            else:
2293                self.already_mapped = str(dna), dna.linear
2294                self.mapping = {x: x.search(dna) for x in self}
2295                return self.mapping
2296        raise TypeError(
2297            "Expected Seq or MutableSeq instance, got %s instead" % type(dna)
2298        )
2299
2300
2301###############################################################################
2302#                                                                             #
2303#                       Restriction Analysis                                  #
2304#                                                                             #
2305###############################################################################
2306
2307_empty_DNA = DNA("")
2308_restrictionbatch = RestrictionBatch()
2309
2310
2311class Analysis(RestrictionBatch, PrintFormat):
2312    """Provide methods for enhanced analysis and pretty printing."""
2313
2314    def __init__(
2315        self, restrictionbatch=_restrictionbatch, sequence=_empty_DNA, linear=True
2316    ):
2317        """Initialize an Analysis with RestrictionBatch and sequence.
2318
2319        For most of the methods of this class if a dictionary is given it will
2320        be used as the base to calculate the results.
2321        If no dictionary is given a new analysis using the RestrictionBatch
2322        which has been given when the Analysis class has been instantiated,
2323        will be carried out and used.
2324        """
2325        RestrictionBatch.__init__(self, restrictionbatch)
2326        self.rb = restrictionbatch
2327        self.sequence = sequence
2328        self.linear = linear
2329        if self.sequence:
2330            self.search(self.sequence, self.linear)
2331
2332    def __repr__(self):
2333        """Represent ``Analysis`` class as a string."""
2334        return "Analysis(%r,%r,%s)" % (self.rb, self.sequence, self.linear)
2335
2336    def _sub_set(self, wanted):
2337        """Filter result for keys which are in wanted (PRIVATE).
2338
2339        Internal use only. Returns a dict.
2340
2341        Screen the results through wanted set.
2342        Keep only the results for which the enzymes is in wanted set.
2343        """
2344        # It seems that this method is not used in the whole class!
2345        return {k: v for k, v in self.mapping.items() if k in wanted}
2346
2347    def _boundaries(self, start, end):
2348        """Set boundaries to correct values (PRIVATE).
2349
2350        Format the boundaries for use with the methods that limit the
2351        search to only part of the sequence given to analyse.
2352        """
2353        if not isinstance(start, int):
2354            raise TypeError("expected int, got %s instead" % type(start))
2355        if not isinstance(end, int):
2356            raise TypeError("expected int, got %s instead" % type(end))
2357        if start < 1:  # Looks like this tries to do python list like indexing
2358            start += len(self.sequence)
2359        if end < 1:
2360            end += len(self.sequence)
2361        if start < end:
2362            pass
2363        else:
2364            start, end = end, start
2365        if start < end:
2366            return start, end, self._test_normal
2367
2368    def _test_normal(self, start, end, site):
2369        """Test if site is between start and end (PRIVATE).
2370
2371        Internal use only
2372        """
2373        return start <= site < end
2374
2375    def _test_reverse(self, start, end, site):
2376        """Test if site is between end and start, for circular sequences (PRIVATE).
2377
2378        Internal use only.
2379        """
2380        return start <= site <= len(self.sequence) or 1 <= site < end
2381
2382    def format_output(self, dct=None, title="", s1=""):
2383        """Collect data and pass to PrintFormat.
2384
2385        If dct is not given the full dictionary is used.
2386        """
2387        if not dct:
2388            dct = self.mapping
2389        return PrintFormat.format_output(self, dct, title, s1)
2390
2391    def print_that(self, dct=None, title="", s1=""):
2392        """Print the output of the analysis.
2393
2394        If dct is not given the full dictionary is used.
2395        s1: Title for non-cutting enzymes
2396        This method prints the output of A.format_output() and it is here
2397        for backwards compatibility.
2398        """
2399        print(self.format_output(dct, title, s1))
2400
2401    def change(self, **what):
2402        """Change parameters of print output.
2403
2404        It is possible to change the width of the shell by setting
2405        self.ConsoleWidth to what you want.
2406        self.NameWidth refer to the maximal length of the enzyme name.
2407
2408        Changing one of these parameters here might not give the results
2409        you expect. In which case, you can settle back to a 80 columns shell
2410        or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2411        you get it right.
2412        """
2413        for k, v in what.items():
2414            if k in ("NameWidth", "ConsoleWidth"):
2415                setattr(self, k, v)
2416                self.Cmodulo = self.ConsoleWidth % self.NameWidth
2417                self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2418            elif k == "sequence":
2419                setattr(self, "sequence", v)
2420                self.search(self.sequence, self.linear)
2421            elif k == "rb":
2422                self = Analysis.__init__(self, v, self.sequence, self.linear)
2423            elif k == "linear":
2424                setattr(self, "linear", v)
2425                self.search(self.sequence, v)
2426            elif k in ("Indent", "Maxsize"):
2427                setattr(self, k, v)
2428            elif k in ("Cmodulo", "PrefWidth"):
2429                raise AttributeError(
2430                    "To change %s, change NameWidth and/or ConsoleWidth" % k
2431                )
2432            else:
2433                raise AttributeError("Analysis has no attribute %s" % k)
2434
2435    def full(self, linear=True):
2436        """Perform analysis with all enzymes of batch and return all results.
2437
2438        Full Restriction Map of the sequence, as a dictionary.
2439        """
2440        return self.mapping
2441
2442    def blunt(self, dct=None):
2443        """Return only cuts that have blunt ends."""
2444        if not dct:
2445            dct = self.mapping
2446        return {k: v for k, v in dct.items() if k.is_blunt()}
2447
2448    def overhang5(self, dct=None):
2449        """Return only cuts that have 5' overhangs."""
2450        if not dct:
2451            dct = self.mapping
2452        return {k: v for k, v in dct.items() if k.is_5overhang()}
2453
2454    def overhang3(self, dct=None):
2455        """Return only cuts that have 3' overhangs."""
2456        if not dct:
2457            dct = self.mapping
2458        return {k: v for k, v in dct.items() if k.is_3overhang()}
2459
2460    def defined(self, dct=None):
2461        """Return only results from enzymes that produce defined overhangs."""
2462        if not dct:
2463            dct = self.mapping
2464        return {k: v for k, v in dct.items() if k.is_defined()}
2465
2466    def with_sites(self, dct=None):
2467        """Return only results from enzyme with at least one cut."""
2468        if not dct:
2469            dct = self.mapping
2470        return {k: v for k, v in dct.items() if v}
2471
2472    def without_site(self, dct=None):
2473        """Return only results from enzymes that don't cut the sequence."""
2474        if not dct:
2475            dct = self.mapping
2476        return {k: v for k, v in dct.items() if not v}
2477
2478    def with_N_sites(self, N, dct=None):
2479        """Return only results from enzymes that cut the sequence N times."""
2480        if not dct:
2481            dct = self.mapping
2482        return {k: v for k, v in dct.items() if len(v) == N}
2483
2484    def with_number_list(self, list, dct=None):
2485        """Return only results from enzymes that cut (x,y,z,...) times."""
2486        if not dct:
2487            dct = self.mapping
2488        return {k: v for k, v in dct.items() if len(v) in list}
2489
2490    def with_name(self, names, dct=None):
2491        """Return only results from enzymes which names are listed."""
2492        for i, enzyme in enumerate(names):
2493            if enzyme not in AllEnzymes:
2494                warnings.warn("no data for the enzyme: %s" % enzyme, BiopythonWarning)
2495                del names[i]
2496        if not dct:
2497            return RestrictionBatch(names).search(self.sequence, self.linear)
2498        return {n: dct[n] for n in names if n in dct}
2499
2500    def with_site_size(self, site_size, dct=None):
2501        """Return only results form enzymes with a given site size."""
2502        sites = [name for name in self if name.size == site_size]
2503        if not dct:
2504            return RestrictionBatch(sites).search(self.sequence)
2505        return {k: v for k, v in dct.items() if k in site_size}
2506
2507    def only_between(self, start, end, dct=None):
2508        """Return only results from enzymes that only cut within start, end."""
2509        start, end, test = self._boundaries(start, end)
2510        if not dct:
2511            dct = self.mapping
2512        d = dict(dct)
2513        for key, sites in dct.items():
2514            if not sites:
2515                del d[key]
2516                continue
2517            for site in sites:
2518                if test(start, end, site):
2519                    continue
2520                else:
2521                    del d[key]
2522                    break
2523        return d
2524
2525    def between(self, start, end, dct=None):
2526        """Return only results from enzymes that cut at least within borders.
2527
2528        Enzymes that cut the sequence at least in between start and end.
2529        They may cut outside as well.
2530        """
2531        start, end, test = self._boundaries(start, end)
2532        d = {}
2533        if not dct:
2534            dct = self.mapping
2535        for key, sites in dct.items():
2536            for site in sites:
2537                if test(start, end, site):
2538                    d[key] = sites
2539                    break
2540                continue
2541        return d
2542
2543    def show_only_between(self, start, end, dct=None):
2544        """Return only results from within start, end.
2545
2546        Enzymes must cut inside start/end and may also cut outside. However,
2547        only the cutting positions within start/end will be returned.
2548        """
2549        d = []
2550        if start <= end:
2551            d = [
2552                (k, [vv for vv in v if start <= vv <= end])
2553                for k, v in self.between(start, end, dct).items()
2554            ]
2555        else:
2556            d = [
2557                (k, [vv for vv in v if start <= vv or vv <= end])
2558                for k, v in self.between(start, end, dct).items()
2559            ]
2560        return dict(d)
2561
2562    def only_outside(self, start, end, dct=None):
2563        """Return only results from enzymes that only cut outside start, end.
2564
2565        Enzymes that cut the sequence outside of the region
2566        in between start and end but do not cut inside.
2567        """
2568        start, end, test = self._boundaries(start, end)
2569        if not dct:
2570            dct = self.mapping
2571        d = dict(dct)
2572        for key, sites in dct.items():
2573            if not sites:
2574                del d[key]
2575                continue
2576            for site in sites:
2577                if test(start, end, site):
2578                    del d[key]
2579                    break
2580                else:
2581                    continue
2582        return d
2583
2584    def outside(self, start, end, dct=None):
2585        """Return only results from enzymes that at least cut outside borders.
2586
2587        Enzymes that cut outside the region in between start and end.
2588        They may cut inside as well.
2589        """
2590        start, end, test = self._boundaries(start, end)
2591        if not dct:
2592            dct = self.mapping
2593        d = {}
2594        for key, sites in dct.items():
2595            for site in sites:
2596                if test(start, end, site):
2597                    continue
2598                else:
2599                    d[key] = sites
2600                    break
2601        return d
2602
2603    def do_not_cut(self, start, end, dct=None):
2604        """Return only results from enzymes that don't cut between borders."""
2605        if not dct:
2606            dct = self.mapping
2607        d = self.without_site()
2608        d.update(self.only_outside(start, end, dct))
2609        return d
2610
2611
2612#
2613#   The restriction enzyme classes are created dynamically when the module is
2614#   imported. Here is the magic which allow the creation of the
2615#   restriction-enzyme classes.
2616#
2617#   The reason for the two dictionaries in Restriction_Dictionary
2618#   one for the types (which will be called pseudo-type as they really
2619#   correspond to the values that instances of RestrictionType can take)
2620#   and one for the enzymes is efficiency as the bases are evaluated
2621#   once per pseudo-type.
2622#
2623#   However Restriction is still a very inefficient module at import. But
2624#   remember that around 660 classes (which is more or less the size of Rebase)
2625#   have to be created dynamically. However, this processing take place only
2626#   once.
2627#   This inefficiency is however largely compensated by the use of metaclass
2628#   which provide a very efficient layout for the class themselves mostly
2629#   alleviating the need of if/else loops in the class methods.
2630#
2631#   It is essential to run Restriction with doc string optimisation (-OO
2632#   switch) as the doc string of 660 classes take a lot of processing.
2633#
2634CommOnly = RestrictionBatch()  # commercial enzymes
2635NonComm = RestrictionBatch()  # not available commercially
2636for TYPE, (bases, enzymes) in typedict.items():
2637    #
2638    #   The keys are the pseudo-types TYPE (stored as type1, type2...)
2639    #   The names are not important and are only present to differentiate
2640    #   the keys in the dict. All the pseudo-types are in fact RestrictionType.
2641    #   These names will not be used after and the pseudo-types are not
2642    #   kept in the locals() dictionary. It is therefore impossible to
2643    #   import them.
2644    #   Now, if you have look at the dictionary, you will see that not all the
2645    #   types are present as those without corresponding enzymes have been
2646    #   removed by Dictionary_Builder().
2647    #
2648    #   The values are tuples which contain
2649    #   as first element a tuple of bases (as string) and
2650    #   as second element the names of the enzymes.
2651    #
2652    #   First eval the bases.
2653    #
2654    bases = tuple(eval(x) for x in bases)
2655    #
2656    #   now create the particular value of RestrictionType for the classes
2657    #   in enzymes.
2658    #
2659    T = type.__new__(RestrictionType, "RestrictionType", bases, {})
2660    for k in enzymes:
2661        #
2662        #   Now, we go through all the enzymes and assign them their type.
2663        #   enzymedict[k] contains the values of the attributes for this
2664        #   particular class (self.site, self.ovhg,....).
2665        #
2666        newenz = T(k, bases, enzymedict[k])
2667        #
2668        #   we add the enzymes to the corresponding batch.
2669        #
2670        #   No need to verify the enzyme is a RestrictionType -> add_nocheck
2671        #
2672        if newenz.is_comm():
2673            CommOnly.add_nocheck(newenz)
2674        else:
2675            NonComm.add_nocheck(newenz)
2676#
2677#   AllEnzymes is a RestrictionBatch with all the enzymes from Rebase.
2678#
2679AllEnzymes = RestrictionBatch(CommOnly)
2680AllEnzymes.update(NonComm)
2681#
2682#   Now, place the enzymes in locals so they can be imported.
2683#
2684names = [str(x) for x in AllEnzymes]
2685locals().update(dict(zip(names, AllEnzymes)))
2686__all__ = (
2687    "FormattedSeq",
2688    "Analysis",
2689    "RestrictionBatch",
2690    "AllEnzymes",
2691    "CommOnly",
2692    "NonComm",
2693) + tuple(names)
2694del k, enzymes, TYPE, bases, names
2695