1 /* COPYRIGHT NOTICE
2  *
3  * This code was pulled directly from the Text-DoubleMetaphone perl package,
4  * version 0.07
5  *
6  * The README mentions that the copyright is:
7  *
8  *  Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
9  *  All rights reserved.
10 
11  *  This code is based heavily on the C++ implementation by
12  *  Lawrence Philips and incorporates several bug fixes courtesy
13  *  of Kevin Atkinson <kevina@users.sourceforge.net>.
14  *
15  *  This module is free software; you may redistribute it and/or
16  *  modify it under the same terms as Perl itself.
17  */
18 
19 #include <stdio.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdarg.h>
24 #include <assert.h>
25 #include "double_metaphone.h"
26 
27 #include "rmalloc.h"
28 
29 /*
30  * * If META_USE_PERL_MALLOC is defined we use Perl's memory routines.
31  * */
32 #ifdef META_USE_PERL_MALLOC
33 
34 #include "EXTERN.h"
35 #include "perl.h"
36 #define META_MALLOC(v, n, t) New(1, v, n, t)
37 #define META_REALLOC(v, n, t) Renew(v, n, t)
38 #define META_FREE(x) Safefree((x))
39 
40 #else
41 
42 #define META_MALLOC(v, n, t) (v = (t *)rm_malloc(((n) * sizeof(t))))
43 #define META_REALLOC(v, n, t) (v = (t *)rm_realloc((v), ((n) * sizeof(t))))
44 #define META_FREE(x) rm_free((x))
45 
46 #endif /* META_USE_PERL_MALLOC */
47 
NewMetaString(const char * init_str)48 static metastring *NewMetaString(const char *init_str) {
49   metastring *s;
50   char empty_string[] = "";
51 
52   META_MALLOC(s, 1, metastring);
53   assert(s != NULL);
54 
55   if (init_str == NULL) init_str = empty_string;
56   s->length = strlen(init_str);
57   /* preallocate a bit more for potential growth */
58   s->bufsize = s->length + 7;
59 
60   META_MALLOC(s->str, s->bufsize, char);
61   assert(s->str != NULL);
62 
63   strncpy(s->str, init_str, s->length + 1);
64   s->free_string_on_destroy = 1;
65 
66   return s;
67 }
68 
DestroyMetaString(metastring * s)69 static void DestroyMetaString(metastring *s) {
70   if (s == NULL) return;
71 
72   if (s->free_string_on_destroy && (s->str != NULL)) META_FREE(s->str);
73 
74   META_FREE(s);
75 }
76 
IncreaseBuffer(metastring * s,int chars_needed)77 static void IncreaseBuffer(metastring *s, int chars_needed) {
78   META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
79   assert(s->str != NULL);
80   s->bufsize = s->bufsize + chars_needed + 10;
81 }
82 
MakeUpper(metastring * s)83 static void MakeUpper(metastring *s) {
84   char *i;
85 
86   for (i = s->str; *i; i++) {
87     *i = toupper(*i);
88   }
89 }
90 
IsVowel(metastring * s,int pos)91 static int IsVowel(metastring *s, int pos) {
92   char c;
93 
94   if ((pos < 0) || (pos >= s->length)) return 0;
95 
96   c = *(s->str + pos);
97   if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') || (c == 'U') || (c == 'Y')) return 1;
98 
99   return 0;
100 }
101 
SlavoGermanic(metastring * s)102 static int SlavoGermanic(metastring *s) {
103   if ((char *)strstr(s->str, "W"))
104     return 1;
105   else if ((char *)strstr(s->str, "K"))
106     return 1;
107   else if ((char *)strstr(s->str, "CZ"))
108     return 1;
109   else if ((char *)strstr(s->str, "WITZ"))
110     return 1;
111   else
112     return 0;
113 }
114 
GetLength(metastring * s)115 static int GetLength(metastring *s) {
116   return s->length;
117 }
118 
GetAt(metastring * s,int pos)119 static char GetAt(metastring *s, int pos) {
120   if ((pos < 0) || (pos >= s->length)) return '\0';
121 
122   return ((char)*(s->str + pos));
123 }
124 
SetAt(metastring * s,int pos,char c)125 static void SetAt(metastring *s, int pos, char c) {
126   if ((pos < 0) || (pos >= s->length)) return;
127 
128   *(s->str + pos) = c;
129 }
130 
131 /*
132    Caveats: the START value is 0 based
133 */
StringAt(metastring * s,int start,int length,...)134 static int StringAt(metastring *s, int start, int length, ...) {
135   char *test;
136   char *pos;
137   va_list ap;
138 
139   if ((start < 0) || (start >= s->length)) return 0;
140 
141   pos = (s->str + start);
142   va_start(ap, length);
143 
144   do {
145     test = va_arg(ap, char *);
146     if (*test && (strncmp(pos, test, length) == 0)) return 1;
147   } while (strcmp(test, ""));
148 
149   va_end(ap);
150 
151   return 0;
152 }
153 
MetaphAdd(metastring * s,const char * new_str)154 static void MetaphAdd(metastring *s, const char *new_str) {
155   int add_length;
156 
157   if (new_str == NULL) return;
158 
159   add_length = strlen(new_str);
160   if ((s->length + add_length) > (s->bufsize - 1)) {
161     IncreaseBuffer(s, add_length);
162   }
163 
164   strcat(s->str, new_str);
165   s->length += add_length;
166 }
167 
DoubleMetaphone(const char * str,char ** primary_pp,char ** secondary_pp)168 void DoubleMetaphone(const char *str, char **primary_pp, char **secondary_pp) {
169   int length;
170   metastring *original;
171   metastring *primary;
172   metastring *secondary;
173   int current;
174   int last;
175 
176   current = 0;
177   /* we need the real length and last prior to padding */
178   length = strlen(str);
179   last = length - 1;
180   original = NewMetaString(str);
181   /* Pad original so we can index beyond end */
182   MetaphAdd(original, "     ");
183 
184   primary = NewMetaString("");
185   secondary = NewMetaString("");
186 
187   MakeUpper(original);
188 
189   /* skip these when at start of word */
190   if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", "")) current += 1;
191 
192   /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
193   if (GetAt(original, 0) == 'X') {
194     MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
195     MetaphAdd(secondary, "S");
196     current += 1;
197   }
198 
199   /* main loop */
200   while ((primary->length < 4) || (secondary->length < 4)) {
201     if (current >= length) break;
202 
203     switch (GetAt(original, current)) {
204       case 'A':
205       case 'E':
206       case 'I':
207       case 'O':
208       case 'U':
209       case 'Y':
210         if (current == 0) {
211           /* all init vowels now map to 'A' */
212           MetaphAdd(primary, "A");
213           MetaphAdd(secondary, "A");
214         }
215         current += 1;
216         break;
217 
218       case 'B':
219 
220         /* "-mb", e.g", "dumb", already skipped over... */
221         MetaphAdd(primary, "P");
222         MetaphAdd(secondary, "P");
223 
224         if (GetAt(original, current + 1) == 'B')
225           current += 2;
226         else
227           current += 1;
228         break;
229 
230 #if 0  // This is 2018 and nobody is using Latin1
231       case 'Ç':
232         MetaphAdd(primary, "S");
233         MetaphAdd(secondary, "S");
234         current += 1;
235         break;
236 #endif
237 
238       case 'C':
239         /* various germanic */
240         if ((current > 1) && !IsVowel(original, current - 2) &&
241             StringAt(original, (current - 1), 3, "ACH", "") &&
242             ((GetAt(original, current + 2) != 'I') &&
243              ((GetAt(original, current + 2) != 'E') ||
244               StringAt(original, (current - 2), 6, "BACHER", "MACHER", "")))) {
245           MetaphAdd(primary, "K");
246           MetaphAdd(secondary, "K");
247           current += 2;
248           break;
249         }
250 
251         /* special case 'caesar' */
252         if ((current == 0) && StringAt(original, current, 6, "CAESAR", "")) {
253           MetaphAdd(primary, "S");
254           MetaphAdd(secondary, "S");
255           current += 2;
256           break;
257         }
258 
259         /* italian 'chianti' */
260         if (StringAt(original, current, 4, "CHIA", "")) {
261           MetaphAdd(primary, "K");
262           MetaphAdd(secondary, "K");
263           current += 2;
264           break;
265         }
266 
267         if (StringAt(original, current, 2, "CH", "")) {
268           /* find 'michael' */
269           if ((current > 0) && StringAt(original, current, 4, "CHAE", "")) {
270             MetaphAdd(primary, "K");
271             MetaphAdd(secondary, "X");
272             current += 2;
273             break;
274           }
275 
276           /* greek roots e.g. 'chemistry', 'chorus' */
277           if ((current == 0) &&
278               (StringAt(original, (current + 1), 5, "HARAC", "HARIS", "") ||
279                StringAt(original, (current + 1), 3, "HOR", "HYM", "HIA", "HEM", "")) &&
280               !StringAt(original, 0, 5, "CHORE", "")) {
281             MetaphAdd(primary, "K");
282             MetaphAdd(secondary, "K");
283             current += 2;
284             break;
285           }
286 
287           /* germanic, greek, or otherwise 'ch' for 'kh' sound */
288           if ((StringAt(original, 0, 4, "VAN ", "VON ", "") || StringAt(original, 0, 3, "SCH", ""))
289               /*  'architect but not 'arch', 'orchestra', 'orchid' */
290               || StringAt(original, (current - 2), 6, "ORCHES", "ARCHIT", "ORCHID", "") ||
291               StringAt(original, (current + 2), 1, "T", "S", "") ||
292               ((StringAt(original, (current - 1), 1, "A", "O", "U", "E", "") || (current == 0))
293                /* e.g., 'wachtler', 'wechsler', but not 'tichner' */
294                && StringAt(original, (current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W",
295                            " ", ""))) {
296             MetaphAdd(primary, "K");
297             MetaphAdd(secondary, "K");
298           } else {
299             if (current > 0) {
300               if (StringAt(original, 0, 2, "MC", "")) {
301                 /* e.g., "McHugh" */
302                 MetaphAdd(primary, "K");
303                 MetaphAdd(secondary, "K");
304               } else {
305                 MetaphAdd(primary, "X");
306                 MetaphAdd(secondary, "K");
307               }
308             } else {
309               MetaphAdd(primary, "X");
310               MetaphAdd(secondary, "X");
311             }
312           }
313           current += 2;
314           break;
315         }
316         /* e.g, 'czerny' */
317         if (StringAt(original, current, 2, "CZ", "") &&
318             !StringAt(original, (current - 2), 4, "WICZ", "")) {
319           MetaphAdd(primary, "S");
320           MetaphAdd(secondary, "X");
321           current += 2;
322           break;
323         }
324 
325         /* e.g., 'focaccia' */
326         if (StringAt(original, (current + 1), 3, "CIA", "")) {
327           MetaphAdd(primary, "X");
328           MetaphAdd(secondary, "X");
329           current += 3;
330           break;
331         }
332 
333         /* double 'C', but not if e.g. 'McClellan' */
334         if (StringAt(original, current, 2, "CC", "") &&
335             !((current == 1) && (GetAt(original, 0) == 'M'))) {
336           /* 'bellocchio' but not 'bacchus' */
337           if (StringAt(original, (current + 2), 1, "I", "E", "H", "") &&
338               !StringAt(original, (current + 2), 2, "HU", "")) {
339             /* 'accident', 'accede' 'succeed' */
340             if (((current == 1) && (GetAt(original, current - 1) == 'A')) ||
341                 StringAt(original, (current - 1), 5, "UCCEE", "UCCES", "")) {
342               MetaphAdd(primary, "KS");
343               MetaphAdd(secondary, "KS");
344               /* 'bacci', 'bertucci', other italian */
345             } else {
346               MetaphAdd(primary, "X");
347               MetaphAdd(secondary, "X");
348             }
349             current += 3;
350             break;
351           } else { /* Pierce's rule */
352             MetaphAdd(primary, "K");
353             MetaphAdd(secondary, "K");
354             current += 2;
355             break;
356           }
357         }
358 
359         if (StringAt(original, current, 2, "CK", "CG", "CQ", "")) {
360           MetaphAdd(primary, "K");
361           MetaphAdd(secondary, "K");
362           current += 2;
363           break;
364         }
365 
366         if (StringAt(original, current, 2, "CI", "CE", "CY", "")) {
367           /* italian vs. english */
368           if (StringAt(original, current, 3, "CIO", "CIE", "CIA", "")) {
369             MetaphAdd(primary, "S");
370             MetaphAdd(secondary, "X");
371           } else {
372             MetaphAdd(primary, "S");
373             MetaphAdd(secondary, "S");
374           }
375           current += 2;
376           break;
377         }
378 
379         /* else */
380         MetaphAdd(primary, "K");
381         MetaphAdd(secondary, "K");
382 
383         /* name sent in 'mac caffrey', 'mac gregor */
384         if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
385           current += 3;
386         else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "") &&
387                  !StringAt(original, (current + 1), 2, "CE", "CI", ""))
388           current += 2;
389         else
390           current += 1;
391         break;
392 
393       case 'D':
394         if (StringAt(original, current, 2, "DG", "")) {
395           if (StringAt(original, (current + 2), 1, "I", "E", "Y", "")) {
396             /* e.g. 'edge' */
397             MetaphAdd(primary, "J");
398             MetaphAdd(secondary, "J");
399             current += 3;
400             break;
401           } else {
402             /* e.g. 'edgar' */
403             MetaphAdd(primary, "TK");
404             MetaphAdd(secondary, "TK");
405             current += 2;
406             break;
407           }
408         }
409 
410         if (StringAt(original, current, 2, "DT", "DD", "")) {
411           MetaphAdd(primary, "T");
412           MetaphAdd(secondary, "T");
413           current += 2;
414           break;
415         }
416 
417         /* else */
418         MetaphAdd(primary, "T");
419         MetaphAdd(secondary, "T");
420         current += 1;
421         break;
422 
423       case 'F':
424         if (GetAt(original, current + 1) == 'F')
425           current += 2;
426         else
427           current += 1;
428         MetaphAdd(primary, "F");
429         MetaphAdd(secondary, "F");
430         break;
431 
432       case 'G':
433         if (GetAt(original, current + 1) == 'H') {
434           if ((current > 0) && !IsVowel(original, current - 1)) {
435             MetaphAdd(primary, "K");
436             MetaphAdd(secondary, "K");
437             current += 2;
438             break;
439           }
440 
441           if (current < 3) {
442             /* 'ghislane', ghiradelli */
443             if (current == 0) {
444               if (GetAt(original, current + 2) == 'I') {
445                 MetaphAdd(primary, "J");
446                 MetaphAdd(secondary, "J");
447               } else {
448                 MetaphAdd(primary, "K");
449                 MetaphAdd(secondary, "K");
450               }
451               current += 2;
452               break;
453             }
454           }
455           /* Parker's rule (with some further refinements) - e.g., 'hugh' */
456           if (((current > 1) && StringAt(original, (current - 2), 1, "B", "H", "D", ""))
457               /* e.g., 'bough' */
458               || ((current > 2) && StringAt(original, (current - 3), 1, "B", "H", "D", ""))
459               /* e.g., 'broughton' */
460               || ((current > 3) && StringAt(original, (current - 4), 1, "B", "H", ""))) {
461             current += 2;
462             break;
463           } else {
464             /* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' */
465             if ((current > 2) && (GetAt(original, current - 1) == 'U') &&
466                 StringAt(original, (current - 3), 1, "C", "G", "L", "R", "T", "")) {
467               MetaphAdd(primary, "F");
468               MetaphAdd(secondary, "F");
469             } else if ((current > 0) && GetAt(original, current - 1) != 'I') {
470 
471               MetaphAdd(primary, "K");
472               MetaphAdd(secondary, "K");
473             }
474 
475             current += 2;
476             break;
477           }
478         }
479 
480         if (GetAt(original, current + 1) == 'N') {
481           if ((current == 1) && IsVowel(original, 0) && !SlavoGermanic(original)) {
482             MetaphAdd(primary, "KN");
483             MetaphAdd(secondary, "N");
484           } else
485               /* not e.g. 'cagney' */
486               if (!StringAt(original, (current + 2), 2, "EY", "") &&
487                   (GetAt(original, current + 1) != 'Y') && !SlavoGermanic(original)) {
488             MetaphAdd(primary, "N");
489             MetaphAdd(secondary, "KN");
490           } else {
491             MetaphAdd(primary, "KN");
492             MetaphAdd(secondary, "KN");
493           }
494           current += 2;
495           break;
496         }
497 
498         /* 'tagliaro' */
499         if (StringAt(original, (current + 1), 2, "LI", "") && !SlavoGermanic(original)) {
500           MetaphAdd(primary, "KL");
501           MetaphAdd(secondary, "L");
502           current += 2;
503           break;
504         }
505 
506         /* -ges-,-gep-,-gel-, -gie- at beginning */
507         if ((current == 0) && ((GetAt(original, current + 1) == 'Y') ||
508                                StringAt(original, (current + 1), 2, "ES", "EP", "EB", "EL", "EY",
509                                         "IB", "IL", "IN", "IE", "EI", "ER", ""))) {
510           MetaphAdd(primary, "K");
511           MetaphAdd(secondary, "J");
512           current += 2;
513           break;
514         }
515 
516         /*  -ger-,  -gy- */
517         if ((StringAt(original, (current + 1), 2, "ER", "") ||
518              (GetAt(original, current + 1) == 'Y')) &&
519             !StringAt(original, 0, 6, "DANGER", "RANGER", "MANGER", "") &&
520             !StringAt(original, (current - 1), 1, "E", "I", "") &&
521             !StringAt(original, (current - 1), 3, "RGY", "OGY", "")) {
522           MetaphAdd(primary, "K");
523           MetaphAdd(secondary, "J");
524           current += 2;
525           break;
526         }
527 
528         /*  italian e.g, 'biaggi' */
529         if (StringAt(original, (current + 1), 1, "E", "I", "Y", "") ||
530             StringAt(original, (current - 1), 4, "AGGI", "OGGI", "")) {
531           /* obvious germanic */
532           if ((StringAt(original, 0, 4, "VAN ", "VON ", "") ||
533                StringAt(original, 0, 3, "SCH", "")) ||
534               StringAt(original, (current + 1), 2, "ET", "")) {
535             MetaphAdd(primary, "K");
536             MetaphAdd(secondary, "K");
537           } else {
538             /* always soft if french ending */
539             if (StringAt(original, (current + 1), 4, "IER ", "")) {
540               MetaphAdd(primary, "J");
541               MetaphAdd(secondary, "J");
542             } else {
543               MetaphAdd(primary, "J");
544               MetaphAdd(secondary, "K");
545             }
546           }
547           current += 2;
548           break;
549         }
550 
551         if (GetAt(original, current + 1) == 'G')
552           current += 2;
553         else
554           current += 1;
555         MetaphAdd(primary, "K");
556         MetaphAdd(secondary, "K");
557         break;
558 
559       case 'H':
560         /* only keep if first & before vowel or btw. 2 vowels */
561         if (((current == 0) || IsVowel(original, current - 1)) && IsVowel(original, current + 1)) {
562           MetaphAdd(primary, "H");
563           MetaphAdd(secondary, "H");
564           current += 2;
565         } else /* also takes care of 'HH' */
566           current += 1;
567         break;
568 
569       case 'J':
570         /* obvious spanish, 'jose', 'san jacinto' */
571         if (StringAt(original, current, 4, "JOSE", "") || StringAt(original, 0, 4, "SAN ", "")) {
572           if (((current == 0) && (GetAt(original, current + 4) == ' ')) ||
573               StringAt(original, 0, 4, "SAN ", "")) {
574             MetaphAdd(primary, "H");
575             MetaphAdd(secondary, "H");
576           } else {
577             MetaphAdd(primary, "J");
578             MetaphAdd(secondary, "H");
579           }
580           current += 1;
581           break;
582         }
583 
584         if ((current == 0) && !StringAt(original, current, 4, "JOSE", "")) {
585           MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
586           MetaphAdd(secondary, "A");
587         } else {
588           /* spanish pron. of e.g. 'bajador' */
589           if (IsVowel(original, current - 1) && !SlavoGermanic(original) &&
590               ((GetAt(original, current + 1) == 'A') || (GetAt(original, current + 1) == 'O'))) {
591             MetaphAdd(primary, "J");
592             MetaphAdd(secondary, "H");
593           } else {
594             if (current == last) {
595               MetaphAdd(primary, "J");
596               MetaphAdd(secondary, "");
597             } else {
598               if (!StringAt(original, (current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z",
599                             "") &&
600                   !StringAt(original, (current - 1), 1, "S", "K", "L", "")) {
601                 MetaphAdd(primary, "J");
602                 MetaphAdd(secondary, "J");
603               }
604             }
605           }
606         }
607 
608         if (GetAt(original, current + 1) == 'J') /* it could happen! */
609           current += 2;
610         else
611           current += 1;
612         break;
613 
614       case 'K':
615         if (GetAt(original, current + 1) == 'K')
616           current += 2;
617         else
618           current += 1;
619         MetaphAdd(primary, "K");
620         MetaphAdd(secondary, "K");
621         break;
622 
623       case 'L':
624         if (GetAt(original, current + 1) == 'L') {
625           /* spanish e.g. 'cabrillo', 'gallegos' */
626           if (((current == (length - 3)) &&
627                StringAt(original, (current - 1), 4, "ILLO", "ILLA", "ALLE", "")) ||
628               ((StringAt(original, (last - 1), 2, "AS", "OS", "") ||
629                 StringAt(original, last, 1, "A", "O", "")) &&
630                StringAt(original, (current - 1), 4, "ALLE", ""))) {
631             MetaphAdd(primary, "L");
632             MetaphAdd(secondary, "");
633             current += 2;
634             break;
635           }
636           current += 2;
637         } else
638           current += 1;
639         MetaphAdd(primary, "L");
640         MetaphAdd(secondary, "L");
641         break;
642 
643       case 'M':
644         if ((StringAt(original, (current - 1), 3, "UMB", "") &&
645              (((current + 1) == last) || StringAt(original, (current + 2), 2, "ER", "")))
646             /* 'dumb','thumb' */
647             || (GetAt(original, current + 1) == 'M'))
648           current += 2;
649         else
650           current += 1;
651         MetaphAdd(primary, "M");
652         MetaphAdd(secondary, "M");
653         break;
654 
655       case 'N':
656         if (GetAt(original, current + 1) == 'N')
657           current += 2;
658         else
659           current += 1;
660         MetaphAdd(primary, "N");
661         MetaphAdd(secondary, "N");
662         break;
663 
664 #if 0  // UTF8, not Latin1
665       case 'Ñ':
666         current += 1;
667         MetaphAdd(primary, "N");
668         MetaphAdd(secondary, "N");
669         break;
670 #endif
671 
672       case 'P':
673         if (GetAt(original, current + 1) == 'H') {
674           MetaphAdd(primary, "F");
675           MetaphAdd(secondary, "F");
676           current += 2;
677           break;
678         }
679 
680         /* also account for "campbell", "raspberry" */
681         if (StringAt(original, (current + 1), 1, "P", "B", ""))
682           current += 2;
683         else
684           current += 1;
685         MetaphAdd(primary, "P");
686         MetaphAdd(secondary, "P");
687         break;
688 
689       case 'Q':
690         if (GetAt(original, current + 1) == 'Q')
691           current += 2;
692         else
693           current += 1;
694         MetaphAdd(primary, "K");
695         MetaphAdd(secondary, "K");
696         break;
697 
698       case 'R':
699         /* french e.g. 'rogier', but exclude 'hochmeier' */
700         if ((current == last) && !SlavoGermanic(original) &&
701             StringAt(original, (current - 2), 2, "IE", "") &&
702             !StringAt(original, (current - 4), 2, "ME", "MA", "")) {
703           MetaphAdd(primary, "");
704           MetaphAdd(secondary, "R");
705         } else {
706           MetaphAdd(primary, "R");
707           MetaphAdd(secondary, "R");
708         }
709 
710         if (GetAt(original, current + 1) == 'R')
711           current += 2;
712         else
713           current += 1;
714         break;
715 
716       case 'S':
717         /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
718         if (StringAt(original, (current - 1), 3, "ISL", "YSL", "")) {
719           current += 1;
720           break;
721         }
722 
723         /* special case 'sugar-' */
724         if ((current == 0) && StringAt(original, current, 5, "SUGAR", "")) {
725           MetaphAdd(primary, "X");
726           MetaphAdd(secondary, "S");
727           current += 1;
728           break;
729         }
730 
731         if (StringAt(original, current, 2, "SH", "")) {
732           /* germanic */
733           if (StringAt(original, (current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", "")) {
734             MetaphAdd(primary, "S");
735             MetaphAdd(secondary, "S");
736           } else {
737             MetaphAdd(primary, "X");
738             MetaphAdd(secondary, "X");
739           }
740           current += 2;
741           break;
742         }
743 
744         /* italian & armenian */
745         if (StringAt(original, current, 3, "SIO", "SIA", "") ||
746             StringAt(original, current, 4, "SIAN", "")) {
747           if (!SlavoGermanic(original)) {
748             MetaphAdd(primary, "S");
749             MetaphAdd(secondary, "X");
750           } else {
751             MetaphAdd(primary, "S");
752             MetaphAdd(secondary, "S");
753           }
754           current += 3;
755           break;
756         }
757 
758         /* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
759            also, -sz- in slavic language altho in hungarian it is pronounced 's' */
760         if (((current == 0) && StringAt(original, (current + 1), 1, "M", "N", "L", "W", "")) ||
761             StringAt(original, (current + 1), 1, "Z", "")) {
762           MetaphAdd(primary, "S");
763           MetaphAdd(secondary, "X");
764           if (StringAt(original, (current + 1), 1, "Z", ""))
765             current += 2;
766           else
767             current += 1;
768           break;
769         }
770 
771         if (StringAt(original, current, 2, "SC", "")) {
772           /* Schlesinger's rule */
773           if (GetAt(original, current + 2) == 'H') /* dutch origin, e.g. 'school', 'schooner' */ {
774             if (StringAt(original, (current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM", "")) {
775               /* 'schermerhorn', 'schenker' */
776               if (StringAt(original, (current + 3), 2, "ER", "EN", "")) {
777                 MetaphAdd(primary, "X");
778                 MetaphAdd(secondary, "SK");
779               } else {
780                 MetaphAdd(primary, "SK");
781                 MetaphAdd(secondary, "SK");
782               }
783               current += 3;
784               break;
785             } else {
786               if ((current == 0) && !IsVowel(original, 3) && (GetAt(original, 3) != 'W')) {
787                 MetaphAdd(primary, "X");
788                 MetaphAdd(secondary, "S");
789               } else {
790                 MetaphAdd(primary, "X");
791                 MetaphAdd(secondary, "X");
792               }
793               current += 3;
794               break;
795             }
796           }
797 
798           if (StringAt(original, (current + 2), 1, "I", "E", "Y", "")) {
799             MetaphAdd(primary, "S");
800             MetaphAdd(secondary, "S");
801             current += 3;
802             break;
803           }
804           /* else */
805           MetaphAdd(primary, "SK");
806           MetaphAdd(secondary, "SK");
807           current += 3;
808           break;
809         }
810 
811         /* french e.g. 'resnais', 'artois' */
812         if ((current == last) && StringAt(original, (current - 2), 2, "AI", "OI", "")) {
813           MetaphAdd(primary, "");
814           MetaphAdd(secondary, "S");
815         } else {
816           MetaphAdd(primary, "S");
817           MetaphAdd(secondary, "S");
818         }
819 
820         if (StringAt(original, (current + 1), 1, "S", "Z", ""))
821           current += 2;
822         else
823           current += 1;
824         break;
825 
826       case 'T':
827         if (StringAt(original, current, 4, "TION", "")) {
828           MetaphAdd(primary, "X");
829           MetaphAdd(secondary, "X");
830           current += 3;
831           break;
832         }
833 
834         if (StringAt(original, current, 3, "TIA", "TCH", "")) {
835           MetaphAdd(primary, "X");
836           MetaphAdd(secondary, "X");
837           current += 3;
838           break;
839         }
840 
841         if (StringAt(original, current, 2, "TH", "") || StringAt(original, current, 3, "TTH", "")) {
842           /* special case 'thomas', 'thames' or germanic */
843           if (StringAt(original, (current + 2), 2, "OM", "AM", "") ||
844               StringAt(original, 0, 4, "VAN ", "VON ", "") || StringAt(original, 0, 3, "SCH", "")) {
845             MetaphAdd(primary, "T");
846             MetaphAdd(secondary, "T");
847           } else {
848             MetaphAdd(primary, "0"); /* yes, zero */
849             MetaphAdd(secondary, "T");
850           }
851           current += 2;
852           break;
853         }
854 
855         if (StringAt(original, (current + 1), 1, "T", "D", ""))
856           current += 2;
857         else
858           current += 1;
859         MetaphAdd(primary, "T");
860         MetaphAdd(secondary, "T");
861         break;
862 
863       case 'V':
864         if (GetAt(original, current + 1) == 'V')
865           current += 2;
866         else
867           current += 1;
868         MetaphAdd(primary, "F");
869         MetaphAdd(secondary, "F");
870         break;
871 
872       case 'W':
873         /* can also be in middle of word */
874         if (StringAt(original, current, 2, "WR", "")) {
875           MetaphAdd(primary, "R");
876           MetaphAdd(secondary, "R");
877           current += 2;
878           break;
879         }
880 
881         if ((current == 0) &&
882             (IsVowel(original, current + 1) || StringAt(original, current, 2, "WH", ""))) {
883           /* Wasserman should match Vasserman */
884           if (IsVowel(original, current + 1)) {
885             MetaphAdd(primary, "A");
886             MetaphAdd(secondary, "F");
887           } else {
888             /* need Uomo to match Womo */
889             MetaphAdd(primary, "A");
890             MetaphAdd(secondary, "A");
891           }
892         }
893 
894         /* Arnow should match Arnoff */
895         if (((current == last) && IsVowel(original, current - 1)) ||
896             StringAt(original, (current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "") ||
897             StringAt(original, 0, 3, "SCH", "")) {
898           MetaphAdd(primary, "");
899           MetaphAdd(secondary, "F");
900           current += 1;
901           break;
902         }
903 
904         /* polish e.g. 'filipowicz' */
905         if (StringAt(original, current, 4, "WICZ", "WITZ", "")) {
906           MetaphAdd(primary, "TS");
907           MetaphAdd(secondary, "FX");
908           current += 4;
909           break;
910         }
911 
912         /* else skip it */
913         current += 1;
914         break;
915 
916       case 'X':
917         /* french e.g. breaux */
918         if (!((current == last) && (StringAt(original, (current - 3), 3, "IAU", "EAU", "") ||
919                                     StringAt(original, (current - 2), 2, "AU", "OU", "")))) {
920           MetaphAdd(primary, "KS");
921           MetaphAdd(secondary, "KS");
922         }
923 
924         if (StringAt(original, (current + 1), 1, "C", "X", ""))
925           current += 2;
926         else
927           current += 1;
928         break;
929 
930       case 'Z':
931         /* chinese pinyin e.g. 'zhao' */
932         if (GetAt(original, current + 1) == 'H') {
933           MetaphAdd(primary, "J");
934           MetaphAdd(secondary, "J");
935           current += 2;
936           break;
937         } else if (StringAt(original, (current + 1), 2, "ZO", "ZI", "ZA", "") ||
938                    (SlavoGermanic(original) &&
939                     ((current > 0) && GetAt(original, current - 1) != 'T'))) {
940           MetaphAdd(primary, "S");
941           MetaphAdd(secondary, "TS");
942         } else {
943           MetaphAdd(primary, "S");
944           MetaphAdd(secondary, "S");
945         }
946 
947         if (GetAt(original, current + 1) == 'Z')
948           current += 2;
949         else
950           current += 1;
951         break;
952 
953       default:
954         current += 1;
955     }
956     /* printf("PRIMARY: %s\n", primary->str);
957     printf("SECONDARY: %s\n", secondary->str);  */
958   }
959 
960   if (primary->length > 4) SetAt(primary, 4, '\0');
961 
962   if (secondary->length > 4) SetAt(secondary, 4, '\0');
963   if (primary_pp) {
964     if (primary->length > 0) {
965       *primary_pp = primary->str;
966       primary->free_string_on_destroy = 0;
967     }
968   }
969   if (secondary_pp) {
970     if (secondary->length > 0) {
971       *secondary_pp = secondary->str;
972       secondary->free_string_on_destroy = 0;
973     }
974   }
975 
976   DestroyMetaString(original);
977   DestroyMetaString(primary);
978   DestroyMetaString(secondary);
979 }
980