1 /**
2  * Modified levenshtein distance calculation
3  *
4  * This program can be used, redistributed or modified under any of
5  * Boost Software License 1.0, GPL v2 or GPL v3
6  * See the file COPYING for details.
7  *
8  * $Id$
9  *
10  * Copyright (C) 2014 kikairoya <kikairoya@gmail.com>
11  * Copyright (C) 2014 Jesse Kornblum <research@jessekornblum.com>
12  */
13 #include <stddef.h>
14 
15 #define EDIT_DISTN_MAXLEN 64 /* MAX_SPAMSUM */
16 #define EDIT_DISTN_INSERT_COST 1
17 #define EDIT_DISTN_REMOVE_COST 1
18 #define EDIT_DISTN_REPLACE_COST 2
19 
20 #define MIN(x,y) ((x)<(y)?(x):(y))
21 
edit_distn(const char * s1,size_t s1len,const char * s2,size_t s2len)22 int edit_distn(const char *s1, size_t s1len, const char *s2, size_t s2len) {
23   int t[2][EDIT_DISTN_MAXLEN+1];
24   int *t1 = t[0];
25   int *t2 = t[1];
26   int *t3;
27   size_t i1, i2;
28   for (i2 = 0; i2 <= s2len; i2++)
29     t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST;
30   for (i1 = 0; i1 < s1len; i1++) {
31     t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST;
32     for (i2 = 0; i2 < s2len; i2++) {
33       int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST;
34       int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST;
35       int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST);
36       t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r);
37     }
38     t3 = t1;
39     t1 = t2;
40     t2 = t3;
41   }
42   return t1[s2len];
43 }
44 
45 
46 #ifdef __UNITTEST
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 
51 #define HELLOWORLD "Hello World!"
52 
53 // Convenience method for getting the edit distance of two strings
edit_dist(const char * a,const char * b)54 int edit_dist(const char *a, const char *b) {
55   unsigned int a_len = 0, b_len = 0;
56   if (a)
57     a_len = strlen(a);
58   if (b)
59     b_len = strlen(b);
60 
61   return edit_distn(a, a_len, b, b_len);
62 }
63 
64 // Exercises edit_dist on a and b. If the result matches the expected value,
65 // returns 0. If not, displays the message and returns 1.
run_test(char * a,char * b,int expected,char * msg)66 int run_test(char *a, char *b, int expected, char *msg) {
67   int actual = edit_dist(a,b);
68   if (actual == expected)
69     return 0;
70 
71   printf ("FAIL: Expected %d, got %d for %s:%s, %s\n",
72           expected,
73           actual,
74           a,
75           b,
76           msg);
77   return 1;
78 }
79 
80 #define RUN_TEST(A,B,EXPECTED,MSG)   failures += run_test(A,B,EXPECTED,MSG)
81 
main(void)82 int main(void) {
83   unsigned int failures = 0;
84 
85   RUN_TEST(NULL, HELLOWORLD, 12, "Null source");
86   RUN_TEST(HELLOWORLD, NULL, 12, "Null dest");
87   RUN_TEST("", HELLOWORLD, 12, "Empty source");
88   RUN_TEST(HELLOWORLD, "", 12, "Empty destination");
89   RUN_TEST(HELLOWORLD, HELLOWORLD, 0, "Equal strings");
90   RUN_TEST("Hello world", "Hell world", 1, "Delete");
91   RUN_TEST("Hell world", "Hello world", 1, "Insert");
92   RUN_TEST("Hello world", "Hello owrld", 2, "Swap");
93   RUN_TEST("Hello world", "HellX world", 2, "Change");
94 
95   if (failures) {
96     printf ("\n%u tests failed.\n", failures);
97     return EXIT_FAILURE;
98   }
99 
100   printf ("All tests passed.\n");
101   return EXIT_SUCCESS;
102 }
103 #endif
104