1"""
2Contains functions implementing edit distance algorithms.
3"""
4
5from whoosh.compat import xrange
6
7
8def levenshtein(seq1, seq2, limit=None):
9    """Returns the Levenshtein edit distance between two strings.
10    """
11
12    oneago = None
13    thisrow = list(range(1, len(seq2) + 1)) + [0]
14    for x in xrange(len(seq1)):
15        # Python lists wrap around for negative indices, so put the
16        # leftmost column at the *end* of the list. This matches with
17        # the zero-indexed strings and saves extra calculation.
18        oneago, thisrow = thisrow, [0] * len(seq2) + [x + 1]
19        for y in xrange(len(seq2)):
20            delcost = oneago[y] + 1
21            addcost = thisrow[y - 1] + 1
22            subcost = oneago[y - 1] + (seq1[x] != seq2[y])
23            thisrow[y] = min(delcost, addcost, subcost)
24
25        if limit and x > limit and min(thisrow) > limit:
26            return limit + 1
27
28    return thisrow[len(seq2) - 1]
29
30
31def damerau_levenshtein(seq1, seq2, limit=None):
32    """Returns the Damerau-Levenshtein edit distance between two strings.
33    """
34
35    oneago = None
36    thisrow = list(range(1, len(seq2) + 1)) + [0]
37    for x in xrange(len(seq1)):
38        # Python lists wrap around for negative indices, so put the
39        # leftmost column at the *end* of the list. This matches with
40        # the zero-indexed strings and saves extra calculation.
41        twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
42        for y in xrange(len(seq2)):
43            delcost = oneago[y] + 1
44            addcost = thisrow[y - 1] + 1
45            subcost = oneago[y - 1] + (seq1[x] != seq2[y])
46            thisrow[y] = min(delcost, addcost, subcost)
47            # This block deals with transpositions
48            if (x > 0 and y > 0 and seq1[x] == seq2[y - 1]
49                and seq1[x - 1] == seq2[y] and seq1[x] != seq2[y]):
50                thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
51
52        if limit and x > limit and min(thisrow) > limit:
53            return limit + 1
54
55    return thisrow[len(seq2) - 1]
56
57
58def relative(a, b):
59    """Returns the relative distance between two strings, in the range
60    [0-1] where 1 means total equality.
61    """
62
63    d = distance(a, b)
64    longer = float(max((len(a), len(b))))
65    shorter = float(min((len(a), len(b))))
66    r = ((longer - d) / longer) * (shorter / longer)
67    return r
68
69
70distance = damerau_levenshtein
71