1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "edit_distance.h"
16 
17 #include <algorithm>
18 #include <vector>
19 
20 using namespace std;
21 
EditDistance(const StringPiece & s1,const StringPiece & s2,bool allow_replacements,int max_edit_distance)22 int EditDistance(const StringPiece& s1,
23                  const StringPiece& s2,
24                  bool allow_replacements,
25                  int max_edit_distance) {
26   // The algorithm implemented below is the "classic"
27   // dynamic-programming algorithm for computing the Levenshtein
28   // distance, which is described here:
29   //
30   //   http://en.wikipedia.org/wiki/Levenshtein_distance
31   //
32   // Although the algorithm is typically described using an m x n
33   // array, only one row plus one element are used at a time, so this
34   // implementation just keeps one vector for the row.  To update one entry,
35   // only the entries to the left, top, and top-left are needed.  The left
36   // entry is in row[x-1], the top entry is what's in row[x] from the last
37   // iteration, and the top-left entry is stored in previous.
38   int m = s1.len_;
39   int n = s2.len_;
40 
41   vector<int> row(n + 1);
42   for (int i = 1; i <= n; ++i)
43     row[i] = i;
44 
45   for (int y = 1; y <= m; ++y) {
46     row[0] = y;
47     int best_this_row = row[0];
48 
49     int previous = y - 1;
50     for (int x = 1; x <= n; ++x) {
51       int old_row = row[x];
52       if (allow_replacements) {
53         row[x] = min(previous + (s1.str_[y - 1] == s2.str_[x - 1] ? 0 : 1),
54                      min(row[x - 1], row[x]) + 1);
55       }
56       else {
57         if (s1.str_[y - 1] == s2.str_[x - 1])
58           row[x] = previous;
59         else
60           row[x] = min(row[x - 1], row[x]) + 1;
61       }
62       previous = old_row;
63       best_this_row = min(best_this_row, row[x]);
64     }
65 
66     if (max_edit_distance && best_this_row > max_edit_distance)
67       return max_edit_distance + 1;
68   }
69 
70   return row[n];
71 }
72