1 /**
2 * Modified levenshtein distance calculation
3 *
4 * This program can be used, redistributed or modified under any of
5 * Boost Software License 1.0, GPL v2 or GPL v3
6 * See the file COPYING for details.
7 *
8 * $Id$
9 *
10 * Copyright (C) 2014 kikairoya <kikairoya@gmail.com>
11 * Copyright (C) 2014 Jesse Kornblum <research@jessekornblum.com>
12 */
13 #include <stddef.h>
14
15 #define EDIT_DISTN_MAXLEN 64 /* MAX_SPAMSUM */
16 #define EDIT_DISTN_INSERT_COST 1
17 #define EDIT_DISTN_REMOVE_COST 1
18 #define EDIT_DISTN_REPLACE_COST 2
19
20 #define MIN(x,y) ((x)<(y)?(x):(y))
21
edit_distn(const char * s1,size_t s1len,const char * s2,size_t s2len)22 int edit_distn(const char *s1, size_t s1len, const char *s2, size_t s2len) {
23 int t[2][EDIT_DISTN_MAXLEN+1];
24 int *t1 = t[0];
25 int *t2 = t[1];
26 int *t3;
27 size_t i1, i2;
28 for (i2 = 0; i2 <= s2len; i2++)
29 t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST;
30 for (i1 = 0; i1 < s1len; i1++) {
31 t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST;
32 for (i2 = 0; i2 < s2len; i2++) {
33 int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST;
34 int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST;
35 int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST);
36 t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r);
37 }
38 t3 = t1;
39 t1 = t2;
40 t2 = t3;
41 }
42 return t1[s2len];
43 }
44
45
46 #ifdef __UNITTEST
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50
51 #define HELLOWORLD "Hello World!"
52
53 // Convenience method for getting the edit distance of two strings
edit_dist(const char * a,const char * b)54 int edit_dist(const char *a, const char *b) {
55 unsigned int a_len = 0, b_len = 0;
56 if (a)
57 a_len = strlen(a);
58 if (b)
59 b_len = strlen(b);
60
61 return edit_distn(a, a_len, b, b_len);
62 }
63
64 // Exercises edit_dist on a and b. If the result matches the expected value,
65 // returns 0. If not, displays the message and returns 1.
run_test(char * a,char * b,int expected,char * msg)66 int run_test(char *a, char *b, int expected, char *msg) {
67 int actual = edit_dist(a,b);
68 if (actual == expected)
69 return 0;
70
71 printf ("FAIL: Expected %d, got %d for %s:%s, %s\n",
72 expected,
73 actual,
74 a,
75 b,
76 msg);
77 return 1;
78 }
79
80 #define RUN_TEST(A,B,EXPECTED,MSG) failures += run_test(A,B,EXPECTED,MSG)
81
main(void)82 int main(void) {
83 unsigned int failures = 0;
84
85 RUN_TEST(NULL, HELLOWORLD, 12, "Null source");
86 RUN_TEST(HELLOWORLD, NULL, 12, "Null dest");
87 RUN_TEST("", HELLOWORLD, 12, "Empty source");
88 RUN_TEST(HELLOWORLD, "", 12, "Empty destination");
89 RUN_TEST(HELLOWORLD, HELLOWORLD, 0, "Equal strings");
90 RUN_TEST("Hello world", "Hell world", 1, "Delete");
91 RUN_TEST("Hell world", "Hello world", 1, "Insert");
92 RUN_TEST("Hello world", "Hello owrld", 2, "Swap");
93 RUN_TEST("Hello world", "HellX world", 2, "Change");
94
95 if (failures) {
96 printf ("\n%u tests failed.\n", failures);
97 return EXIT_FAILURE;
98 }
99
100 printf ("All tests passed.\n");
101 return EXIT_SUCCESS;
102 }
103 #endif
104