1 /* dzl-levenshtein.c
2  *
3  * Copyright (C) 2013-2017 Christian Hergert <christian@hergert.me>
4  *
5  * This file is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or (at
8  * your option) any later version.
9  *
10  * This file is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
13  * License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "config.h"
20 
21 #include <glib.h>
22 #include <string.h>
23 
24 #include "dzl-levenshtein.h"
25 
26 gint
dzl_levenshtein(const gchar * needle,const gchar * haystack)27 dzl_levenshtein (const gchar *needle,
28                  const gchar *haystack)
29 {
30    const gchar *s;
31    const gchar *t;
32    gunichar sc;
33    gunichar tc;
34    gint *v0;
35    gint *v1;
36    gint haystack_char_len;
37    gint cost;
38    gint i;
39    gint j;
40    gint ret;
41 
42    g_return_val_if_fail (needle, G_MAXINT);
43    g_return_val_if_fail (haystack, G_MAXINT);
44 
45    /*
46     * Handle degenerate cases.
47     */
48    if (!g_strcmp0(needle, haystack)) {
49       return 0;
50    } else if (!*needle) {
51       return g_utf8_strlen(haystack, -1);
52    } else if (!*haystack) {
53       return g_utf8_strlen(needle, -1);
54    }
55 
56    haystack_char_len = g_utf8_strlen (haystack, -1);
57 
58    /*
59     * Create two vectors to hold our states.
60     */
61    v0 = g_new0(int, haystack_char_len + 1);
62    v1 = g_new0(int, haystack_char_len + 1);
63 
64    /*
65     * initialize v0 (the previous row of distances).
66     * this row is A[0][i]: edit distance for an empty needle.
67     * the distance is just the number of characters to delete from haystack.
68     */
69    for (i = 0; i < haystack_char_len + 1; i++) {
70       v0[i] = i;
71    }
72 
73    for (i = 0, s = needle; s && *s; i++, s = g_utf8_next_char(s)) {
74       /*
75        * Calculate v1 (current row distances) from the previous row v0.
76        */
77 
78       sc = g_utf8_get_char(s);
79 
80       /*
81        * first element of v1 is A[i+1][0]
82        *
83        * edit distance is delete (i+1) chars from needle to match empty
84        * haystack.
85        */
86       v1[0] = i + 1;
87 
88       /*
89        * use formula to fill in the rest of the row.
90        */
91       for (j = 0, t = haystack; t && *t; j++, t = g_utf8_next_char(t)) {
92          tc = g_utf8_get_char(t);
93          cost = (sc == tc) ? 0 : 1;
94          v1[j+1] = MIN(v1[j] + 1, MIN(v0[j+1] + 1, v0[j] + cost));
95       }
96 
97       /*
98        * copy v1 (current row) to v0 (previous row) for next iteration.
99        */
100       memcpy(v0, v1, sizeof(gint) * haystack_char_len);
101    }
102 
103    ret = v1[haystack_char_len];
104 
105    g_free(v0);
106    g_free(v1);
107 
108    return ret;
109 }
110