1 /* EasyTAG - tag editor for audio files
2  * Copyright (C) 2004  Santtu Lakkala <inz@inz.fi>
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License as published by the Free
6  * Software Foundation; either version 2 of the License, or (at your option)
7  * any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17  */
18 
19 #include <string.h>
20 
21 #include "dlm.h"
22 
23 static int dlm_cost    (const gchar, const gchar);
24 static int dlm_minimum (int a, int b, int c, int d);
25 
26 /*
27  * Compute the Damerau-Levenshtein Distance between utf-8 strings ds and dt.
28  */
29 gint
dlm(const gchar * ds,const gchar * dt)30 dlm (const gchar *ds, const gchar *dt)
31 {
32     gsize n;
33     gsize m;
34 
35     /* Casefold for better matching of the strings. */
36     gchar *s = g_utf8_casefold(ds, -1);
37     gchar *t = g_utf8_casefold(dt, -1);
38 
39     n = strlen(s);
40     m = strlen(t);
41 
42     if (n && m)
43     {
44         gint *d;
45         gsize i;
46         gsize j;
47         gint metric;
48 
49         n++;
50         m++;
51 
52         d = (gint *)g_malloc0 (sizeof (gint) * m * n);
53 
54         for (i = 0; i < m; i++)
55             d[i * n] = i;
56 
57         for (i = 1; i < n; i++)
58         {
59             d[i] = i;
60             for (j = 1; j < m; j++)
61             {
62                 d[j * n + i] = dlm_minimum(
63                     d[(j - 1) * n + i] + 1,
64                     d[j * n + i - 1] + 1,
65                     d[(j - 1) * n + i - 1] + dlm_cost(s[i - 1], t[j - 1]),
66                     (j>1 && i>1 ?
67                          d[(j - 2) * n + i - 2] + dlm_cost(s[i - 1], t[j])
68                         + dlm_cost(s[i], t[j - 1])
69                     : 0x7fff)
70                 );
71             }
72         }
73         metric = d[n * m - 1];
74         g_free(d);
75 
76         /* Count a "similarity value" */
77         /* Subtract the extra byte added to each string length earlier. */
78         metric = 1000 - (1000 * (metric * 2)) / (m + n - 2);
79 
80         g_free(t);
81         g_free(s);
82         return metric;
83     }
84     g_free(t);
85     g_free(s);
86 
87     /* Return value of -1 indicates an error */
88     return -1;
89 }
90 
91 /* "Cost" of changing from a to b. */
92 static int
dlm_cost(const char a,const char b)93 dlm_cost (const char a, const char b)
94 {
95     return a == b ? 0 : 1;
96 }
97 
98 /* Return the smallest of four integers. */
99 static int
dlm_minimum(int a,int b,int c,int d)100 dlm_minimum (int a, int b, int c, int d)
101 {
102     int min = a;
103     if (b < min)
104     {
105         min = b;
106     }
107     if (c < min)
108     {
109         min = c;
110     }
111     if (d < min)
112     {
113         min = d;
114     }
115     return min;
116 }
117