1# built-in
2from collections import Counter
3from contextlib import suppress
4
5# app
6from ..libraries import prototype
7from ..utils import find_ngrams
8
9
10libraries = prototype.clone()
11libraries.optimize()
12
13
14class Base:
15    def __init__(self, qval=1, external=True):
16        self.qval = qval
17        self.external = external
18
19    def __call__(self, *sequences):
20        raise NotImplementedError
21
22    @staticmethod
23    def maximum(*sequences):
24        """Get maximum possible value
25        """
26        return max(map(len, sequences))
27
28    def distance(self, *sequences):
29        """Get distance between sequences
30        """
31        return self(*sequences)
32
33    def similarity(self, *sequences):
34        """Get sequences similarity.
35
36        similarity = maximum - distance
37        """
38        return self.maximum(*sequences) - self.distance(*sequences)
39
40    def normalized_distance(self, *sequences):
41        """Get distance from 0 to 1
42        """
43        maximum = self.maximum(*sequences)
44        if maximum == 0:
45            return 0
46        return self.distance(*sequences) / maximum
47
48    def normalized_similarity(self, *sequences):
49        """Get similarity from 0 to 1
50
51        normalized_similarity = 1 - normalized_distance
52        """
53        return 1 - self.normalized_distance(*sequences)
54
55    def external_answer(self, *sequences):
56        """Try to get answer from known external libraries.
57        """
58        # if this feature disabled
59        if not getattr(self, 'external', False):
60            return
61        # all external libs doesn't support test_func
62        if hasattr(self, 'test_func') and self.test_func is not self._ident:
63            return
64        # try to get external libs for algorithm
65        libs = libraries.get_libs(self.__class__.__name__)
66        for lib in libs:
67            # if conditions not satisfied
68            if not lib.check_conditions(self, *sequences):
69                continue
70            # if library is not installed yet
71            if not lib.get_function():
72                continue
73
74            prepared_sequences = lib.prepare(*sequences)
75            # fail side libraries silently and try next libs
76            with suppress(Exception):
77                return lib.func(*prepared_sequences)
78
79    def quick_answer(self, *sequences):
80        """Try to get answer quick without main implementation calling.
81
82        If no sequences, 1 sequence or all sequences are equal then return 0.
83        If any sequence are empty then return maximum.
84        And in finish try to get external answer.
85        """
86        if not sequences:
87            return 0
88        if len(sequences) == 1:
89            return 0
90        if self._ident(*sequences):
91            return 0
92        if not all(sequences):
93            return self.maximum(*sequences)
94        # try get answer from external libs
95        answer = self.external_answer(*sequences)
96        if answer is not None:
97            return answer
98
99    @staticmethod
100    def _ident(*elements):
101        """Return True if all sequences are equal.
102        """
103        try:
104            # for hashable elements
105            return len(set(elements)) == 1
106        except TypeError:
107            # for unhashable elements
108            for e1, e2 in zip(elements, elements[1:]):
109                if e1 != e2:
110                    return False
111            return True
112
113    def _get_sequences(self, *sequences):
114        """Prepare sequences.
115
116        qval=None: split text by words
117        qval=1: do not split sequences. For text this is mean comparing by letters.
118        qval>1: split sequences by q-grams
119        """
120        # by words
121        if not self.qval:
122            return [s.split() for s in sequences]
123        # by chars
124        if self.qval == 1:
125            return sequences
126        # by n-grams
127        return [find_ngrams(s, self.qval) for s in sequences]
128
129    def _get_counters(self, *sequences):
130        """Prepare sequences and convert it to Counters.
131        """
132        # already Counters
133        if all(isinstance(s, Counter) for s in sequences):
134            return sequences
135        return [Counter(s) for s in self._get_sequences(*sequences)]
136
137    def _intersect_counters(self, *sequences):
138        intersection = sequences[0].copy()
139        for s in sequences[1:]:
140            intersection &= s
141        return intersection
142
143    def _union_counters(self, *sequences):
144        union = sequences[0].copy()
145        for s in sequences[1:]:
146            union |= s
147        return union
148
149    def _sum_counters(self, *sequences):
150        result = sequences[0].copy()
151        for s in sequences[1:]:
152            result += s
153        return result
154
155    def _count_counters(self, counter):
156        """Return all elements count from Counter
157        """
158        if getattr(self, 'as_set', False):
159            return len(set(counter))
160        else:
161            return sum(counter.values())
162
163    def __repr__(self):
164        return '{name}({data})'.format(
165            name=type(self).__name__,
166            data=self.__dict__,
167        )
168
169
170class BaseSimilarity(Base):
171    def distance(self, *sequences):
172        return self.maximum(*sequences) - self.similarity(*sequences)
173
174    def similarity(self, *sequences):
175        return self(*sequences)
176
177    def quick_answer(self, *sequences):
178        if not sequences:
179            return self.maximum(*sequences)
180        if len(sequences) == 1:
181            return self.maximum(*sequences)
182        if self._ident(*sequences):
183            return self.maximum(*sequences)
184        if not all(sequences):
185            return 0
186        # try get answer from external libs
187        answer = self.external_answer(*sequences)
188        if answer is not None:
189            return answer
190