1 /* EasyTAG - tag editor for audio files
2 * Copyright (C) 2004 Santtu Lakkala <inz@inz.fi>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 #include <string.h>
20
21 #include "dlm.h"
22
23 static int dlm_cost (const gchar, const gchar);
24 static int dlm_minimum (int a, int b, int c, int d);
25
26 /*
27 * Compute the Damerau-Levenshtein Distance between utf-8 strings ds and dt.
28 */
29 gint
dlm(const gchar * ds,const gchar * dt)30 dlm (const gchar *ds, const gchar *dt)
31 {
32 gsize n;
33 gsize m;
34
35 /* Casefold for better matching of the strings. */
36 gchar *s = g_utf8_casefold(ds, -1);
37 gchar *t = g_utf8_casefold(dt, -1);
38
39 n = strlen(s);
40 m = strlen(t);
41
42 if (n && m)
43 {
44 gint *d;
45 gsize i;
46 gsize j;
47 gint metric;
48
49 n++;
50 m++;
51
52 d = (gint *)g_malloc0 (sizeof (gint) * m * n);
53
54 for (i = 0; i < m; i++)
55 d[i * n] = i;
56
57 for (i = 1; i < n; i++)
58 {
59 d[i] = i;
60 for (j = 1; j < m; j++)
61 {
62 d[j * n + i] = dlm_minimum(
63 d[(j - 1) * n + i] + 1,
64 d[j * n + i - 1] + 1,
65 d[(j - 1) * n + i - 1] + dlm_cost(s[i - 1], t[j - 1]),
66 (j>1 && i>1 ?
67 d[(j - 2) * n + i - 2] + dlm_cost(s[i - 1], t[j])
68 + dlm_cost(s[i], t[j - 1])
69 : 0x7fff)
70 );
71 }
72 }
73 metric = d[n * m - 1];
74 g_free(d);
75
76 /* Count a "similarity value" */
77 /* Subtract the extra byte added to each string length earlier. */
78 metric = 1000 - (1000 * (metric * 2)) / (m + n - 2);
79
80 g_free(t);
81 g_free(s);
82 return metric;
83 }
84 g_free(t);
85 g_free(s);
86
87 /* Return value of -1 indicates an error */
88 return -1;
89 }
90
91 /* "Cost" of changing from a to b. */
92 static int
dlm_cost(const char a,const char b)93 dlm_cost (const char a, const char b)
94 {
95 return a == b ? 0 : 1;
96 }
97
98 /* Return the smallest of four integers. */
99 static int
dlm_minimum(int a,int b,int c,int d)100 dlm_minimum (int a, int b, int c, int d)
101 {
102 int min = a;
103 if (b < min)
104 {
105 min = b;
106 }
107 if (c < min)
108 {
109 min = c;
110 }
111 if (d < min)
112 {
113 min = d;
114 }
115 return min;
116 }
117