1 /* dzl-levenshtein.c
2 *
3 * Copyright (C) 2013-2017 Christian Hergert <christian@hergert.me>
4 *
5 * This file is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or (at
8 * your option) any later version.
9 *
10 * This file is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 * License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "config.h"
20
21 #include <glib.h>
22 #include <string.h>
23
24 #include "dzl-levenshtein.h"
25
26 gint
dzl_levenshtein(const gchar * needle,const gchar * haystack)27 dzl_levenshtein (const gchar *needle,
28 const gchar *haystack)
29 {
30 const gchar *s;
31 const gchar *t;
32 gunichar sc;
33 gunichar tc;
34 gint *v0;
35 gint *v1;
36 gint haystack_char_len;
37 gint cost;
38 gint i;
39 gint j;
40 gint ret;
41
42 g_return_val_if_fail (needle, G_MAXINT);
43 g_return_val_if_fail (haystack, G_MAXINT);
44
45 /*
46 * Handle degenerate cases.
47 */
48 if (!g_strcmp0(needle, haystack)) {
49 return 0;
50 } else if (!*needle) {
51 return g_utf8_strlen(haystack, -1);
52 } else if (!*haystack) {
53 return g_utf8_strlen(needle, -1);
54 }
55
56 haystack_char_len = g_utf8_strlen (haystack, -1);
57
58 /*
59 * Create two vectors to hold our states.
60 */
61 v0 = g_new0(int, haystack_char_len + 1);
62 v1 = g_new0(int, haystack_char_len + 1);
63
64 /*
65 * initialize v0 (the previous row of distances).
66 * this row is A[0][i]: edit distance for an empty needle.
67 * the distance is just the number of characters to delete from haystack.
68 */
69 for (i = 0; i < haystack_char_len + 1; i++) {
70 v0[i] = i;
71 }
72
73 for (i = 0, s = needle; s && *s; i++, s = g_utf8_next_char(s)) {
74 /*
75 * Calculate v1 (current row distances) from the previous row v0.
76 */
77
78 sc = g_utf8_get_char(s);
79
80 /*
81 * first element of v1 is A[i+1][0]
82 *
83 * edit distance is delete (i+1) chars from needle to match empty
84 * haystack.
85 */
86 v1[0] = i + 1;
87
88 /*
89 * use formula to fill in the rest of the row.
90 */
91 for (j = 0, t = haystack; t && *t; j++, t = g_utf8_next_char(t)) {
92 tc = g_utf8_get_char(t);
93 cost = (sc == tc) ? 0 : 1;
94 v1[j+1] = MIN(v1[j] + 1, MIN(v0[j+1] + 1, v0[j] + cost));
95 }
96
97 /*
98 * copy v1 (current row) to v0 (previous row) for next iteration.
99 */
100 memcpy(v0, v1, sizeof(gint) * haystack_char_len);
101 }
102
103 ret = v1[haystack_char_len];
104
105 g_free(v0);
106 g_free(v1);
107
108 return ret;
109 }
110