1 /*
2 * fuzzystrmatch.c
3 *
4 * Functions for "fuzzy" comparison of strings
5 *
6 * Joe Conway <mail@joeconway.com>
7 *
8 * contrib/fuzzystrmatch/fuzzystrmatch.c
9 * Copyright (c) 2001-2016, PostgreSQL Global Development Group
10 * ALL RIGHTS RESERVED;
11 *
12 * metaphone()
13 * -----------
14 * Modified for PostgreSQL by Joe Conway.
15 * Based on CPAN's "Text-Metaphone-1.96" by Michael G Schwern <schwern@pobox.com>
16 * Code slightly modified for use as PostgreSQL function (palloc, elog, etc).
17 * Metaphone was originally created by Lawrence Philips and presented in article
18 * in "Computer Language" December 1990 issue.
19 *
20 * Permission to use, copy, modify, and distribute this software and its
21 * documentation for any purpose, without fee, and without a written agreement
22 * is hereby granted, provided that the above copyright notice and this
23 * paragraph and the following two paragraphs appear in all copies.
24 *
25 * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
26 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
27 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
28 * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
33 * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
34 * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
35 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
36 *
37 */
38
39 #include "postgres.h"
40
41 #include <ctype.h>
42
43 #include "mb/pg_wchar.h"
44 #include "utils/builtins.h"
45
46 PG_MODULE_MAGIC;
47
48 /*
49 * Soundex
50 */
51 static void _soundex(const char *instr, char *outstr);
52
53 #define SOUNDEX_LEN 4
54
55 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
56 static const char *soundex_table = "01230120022455012623010202";
57
58 static char
soundex_code(char letter)59 soundex_code(char letter)
60 {
61 letter = toupper((unsigned char) letter);
62 /* Defend against non-ASCII letters */
63 if (letter >= 'A' && letter <= 'Z')
64 return soundex_table[letter - 'A'];
65 return letter;
66 }
67
68 /*
69 * Metaphone
70 */
71 #define MAX_METAPHONE_STRLEN 255
72
73 /*
74 * Original code by Michael G Schwern starts here.
75 * Code slightly modified for use as PostgreSQL function.
76 */
77
78
79 /**************************************************************************
80 metaphone -- Breaks english phrases down into their phonemes.
81
82 Input
83 word -- An english word to be phonized
84 max_phonemes -- How many phonemes to calculate. If 0, then it
85 will phonize the entire phrase.
86 phoned_word -- The final phonized word. (We'll allocate the
87 memory.)
88 Output
89 error -- A simple error flag, returns TRUE or FALSE
90
91 NOTES: ALL non-alpha characters are ignored, this includes whitespace,
92 although non-alpha characters will break up phonemes.
93 ****************************************************************************/
94
95
96 /**************************************************************************
97 my constants -- constants I like
98
99 Probably redundant.
100
101 ***************************************************************************/
102
103 #define META_ERROR FALSE
104 #define META_SUCCESS TRUE
105 #define META_FAILURE FALSE
106
107
108 /* I add modifications to the traditional metaphone algorithm that you
109 might find in books. Define this if you want metaphone to behave
110 traditionally */
111 #undef USE_TRADITIONAL_METAPHONE
112
113 /* Special encodings */
114 #define SH 'X'
115 #define TH '0'
116
117 static char Lookahead(char *word, int how_far);
118 static int _metaphone(char *word, int max_phonemes, char **phoned_word);
119
120 /* Metachar.h ... little bits about characters for metaphone */
121
122
123 /*-- Character encoding array & accessing macros --*/
124 /* Stolen directly out of the book... */
125 static const char _codes[26] = {
126 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
127 /* a b c d e f g h i j k l m n o p q r s t u v w x y z */
128 };
129
130 static int
getcode(char c)131 getcode(char c)
132 {
133 if (isalpha((unsigned char) c))
134 {
135 c = toupper((unsigned char) c);
136 /* Defend against non-ASCII letters */
137 if (c >= 'A' && c <= 'Z')
138 return _codes[c - 'A'];
139 }
140 return 0;
141 }
142
143 #define isvowel(c) (getcode(c) & 1) /* AEIOU */
144
145 /* These letters are passed through unchanged */
146 #define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
147
148 /* These form diphthongs when preceding H */
149 #define AFFECTH(c) (getcode(c) & 4) /* CGPST */
150
151 /* These make C and G soft */
152 #define MAKESOFT(c) (getcode(c) & 8) /* EIY */
153
154 /* These prevent GH from becoming F */
155 #define NOGHTOF(c) (getcode(c) & 16) /* BDH */
156
157 PG_FUNCTION_INFO_V1(levenshtein_with_costs);
158 Datum
levenshtein_with_costs(PG_FUNCTION_ARGS)159 levenshtein_with_costs(PG_FUNCTION_ARGS)
160 {
161 text *src = PG_GETARG_TEXT_PP(0);
162 text *dst = PG_GETARG_TEXT_PP(1);
163 int ins_c = PG_GETARG_INT32(2);
164 int del_c = PG_GETARG_INT32(3);
165 int sub_c = PG_GETARG_INT32(4);
166 const char *s_data;
167 const char *t_data;
168 int s_bytes,
169 t_bytes;
170
171 /* Extract a pointer to the actual character data */
172 s_data = VARDATA_ANY(src);
173 t_data = VARDATA_ANY(dst);
174 /* Determine length of each string in bytes */
175 s_bytes = VARSIZE_ANY_EXHDR(src);
176 t_bytes = VARSIZE_ANY_EXHDR(dst);
177
178 PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes,
179 ins_c, del_c, sub_c, false));
180 }
181
182
183 PG_FUNCTION_INFO_V1(levenshtein);
184 Datum
levenshtein(PG_FUNCTION_ARGS)185 levenshtein(PG_FUNCTION_ARGS)
186 {
187 text *src = PG_GETARG_TEXT_PP(0);
188 text *dst = PG_GETARG_TEXT_PP(1);
189 const char *s_data;
190 const char *t_data;
191 int s_bytes,
192 t_bytes;
193
194 /* Extract a pointer to the actual character data */
195 s_data = VARDATA_ANY(src);
196 t_data = VARDATA_ANY(dst);
197 /* Determine length of each string in bytes */
198 s_bytes = VARSIZE_ANY_EXHDR(src);
199 t_bytes = VARSIZE_ANY_EXHDR(dst);
200
201 PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes,
202 1, 1, 1, false));
203 }
204
205
206 PG_FUNCTION_INFO_V1(levenshtein_less_equal_with_costs);
207 Datum
levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)208 levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)
209 {
210 text *src = PG_GETARG_TEXT_PP(0);
211 text *dst = PG_GETARG_TEXT_PP(1);
212 int ins_c = PG_GETARG_INT32(2);
213 int del_c = PG_GETARG_INT32(3);
214 int sub_c = PG_GETARG_INT32(4);
215 int max_d = PG_GETARG_INT32(5);
216 const char *s_data;
217 const char *t_data;
218 int s_bytes,
219 t_bytes;
220
221 /* Extract a pointer to the actual character data */
222 s_data = VARDATA_ANY(src);
223 t_data = VARDATA_ANY(dst);
224 /* Determine length of each string in bytes */
225 s_bytes = VARSIZE_ANY_EXHDR(src);
226 t_bytes = VARSIZE_ANY_EXHDR(dst);
227
228 PG_RETURN_INT32(varstr_levenshtein_less_equal(s_data, s_bytes,
229 t_data, t_bytes,
230 ins_c, del_c, sub_c,
231 max_d, false));
232 }
233
234
235 PG_FUNCTION_INFO_V1(levenshtein_less_equal);
236 Datum
levenshtein_less_equal(PG_FUNCTION_ARGS)237 levenshtein_less_equal(PG_FUNCTION_ARGS)
238 {
239 text *src = PG_GETARG_TEXT_PP(0);
240 text *dst = PG_GETARG_TEXT_PP(1);
241 int max_d = PG_GETARG_INT32(2);
242 const char *s_data;
243 const char *t_data;
244 int s_bytes,
245 t_bytes;
246
247 /* Extract a pointer to the actual character data */
248 s_data = VARDATA_ANY(src);
249 t_data = VARDATA_ANY(dst);
250 /* Determine length of each string in bytes */
251 s_bytes = VARSIZE_ANY_EXHDR(src);
252 t_bytes = VARSIZE_ANY_EXHDR(dst);
253
254 PG_RETURN_INT32(varstr_levenshtein_less_equal(s_data, s_bytes,
255 t_data, t_bytes,
256 1, 1, 1,
257 max_d, false));
258 }
259
260
261 /*
262 * Calculates the metaphone of an input string.
263 * Returns number of characters requested
264 * (suggested value is 4)
265 */
266 PG_FUNCTION_INFO_V1(metaphone);
267 Datum
metaphone(PG_FUNCTION_ARGS)268 metaphone(PG_FUNCTION_ARGS)
269 {
270 char *str_i = TextDatumGetCString(PG_GETARG_DATUM(0));
271 size_t str_i_len = strlen(str_i);
272 int reqlen;
273 char *metaph;
274 int retval;
275
276 /* return an empty string if we receive one */
277 if (!(str_i_len > 0))
278 PG_RETURN_TEXT_P(cstring_to_text(""));
279
280 if (str_i_len > MAX_METAPHONE_STRLEN)
281 ereport(ERROR,
282 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
283 errmsg("argument exceeds the maximum length of %d bytes",
284 MAX_METAPHONE_STRLEN)));
285
286 reqlen = PG_GETARG_INT32(1);
287 if (reqlen > MAX_METAPHONE_STRLEN)
288 ereport(ERROR,
289 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
290 errmsg("output exceeds the maximum length of %d bytes",
291 MAX_METAPHONE_STRLEN)));
292
293 if (!(reqlen > 0))
294 ereport(ERROR,
295 (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
296 errmsg("output cannot be empty string")));
297
298
299 retval = _metaphone(str_i, reqlen, &metaph);
300 if (retval == META_SUCCESS)
301 PG_RETURN_TEXT_P(cstring_to_text(metaph));
302 else
303 {
304 /* internal error */
305 elog(ERROR, "metaphone: failure");
306 /* keep the compiler quiet */
307 PG_RETURN_NULL();
308 }
309 }
310
311
312 /*
313 * Original code by Michael G Schwern starts here.
314 * Code slightly modified for use as PostgreSQL
315 * function (palloc, etc).
316 */
317
318 /* I suppose I could have been using a character pointer instead of
319 * accessing the array directly... */
320
321 /* Look at the next letter in the word */
322 #define Next_Letter (toupper((unsigned char) word[w_idx+1]))
323 /* Look at the current letter in the word */
324 #define Curr_Letter (toupper((unsigned char) word[w_idx]))
325 /* Go N letters back. */
326 #define Look_Back_Letter(n) \
327 (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
328 /* Previous letter. I dunno, should this return null on failure? */
329 #define Prev_Letter (Look_Back_Letter(1))
330 /* Look two letters down. It makes sure you don't walk off the string. */
331 #define After_Next_Letter \
332 (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
333 #define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
334
335
336 /* Allows us to safely look ahead an arbitrary # of letters */
337 /* I probably could have just used strlen... */
338 static char
Lookahead(char * word,int how_far)339 Lookahead(char *word, int how_far)
340 {
341 char letter_ahead = '\0'; /* null by default */
342 int idx;
343
344 for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
345 /* Edge forward in the string... */
346
347 letter_ahead = word[idx]; /* idx will be either == to how_far or at the
348 * end of the string */
349 return letter_ahead;
350 }
351
352
353 /* phonize one letter */
354 #define Phonize(c) do {(*phoned_word)[p_idx++] = c;} while (0)
355 /* Slap a null character on the end of the phoned word */
356 #define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)
357 /* How long is the phoned word? */
358 #define Phone_Len (p_idx)
359
360 /* Note is a letter is a 'break' in the word */
361 #define Isbreak(c) (!isalpha((unsigned char) (c)))
362
363
364 static int
_metaphone(char * word,int max_phonemes,char ** phoned_word)365 _metaphone(char *word, /* IN */
366 int max_phonemes,
367 char **phoned_word) /* OUT */
368 {
369 int w_idx = 0; /* point in the phonization we're at. */
370 int p_idx = 0; /* end of the phoned phrase */
371
372 /*-- Parameter checks --*/
373
374 /*
375 * Shouldn't be necessary, but left these here anyway jec Aug 3, 2001
376 */
377
378 /* Negative phoneme length is meaningless */
379 if (!(max_phonemes > 0))
380 /* internal error */
381 elog(ERROR, "metaphone: Requested output length must be > 0");
382
383 /* Empty/null string is meaningless */
384 if ((word == NULL) || !(strlen(word) > 0))
385 /* internal error */
386 elog(ERROR, "metaphone: Input string length must be > 0");
387
388 /*-- Allocate memory for our phoned_phrase --*/
389 if (max_phonemes == 0)
390 { /* Assume largest possible */
391 *phoned_word = palloc(sizeof(char) * strlen(word) +1);
392 }
393 else
394 {
395 *phoned_word = palloc(sizeof(char) * max_phonemes + 1);
396 }
397
398 /*-- The first phoneme has to be processed specially. --*/
399 /* Find our first letter */
400 for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
401 {
402 /* On the off chance we were given nothing but crap... */
403 if (Curr_Letter == '\0')
404 {
405 End_Phoned_Word;
406 return META_SUCCESS; /* For testing */
407 }
408 }
409
410 switch (Curr_Letter)
411 {
412 /* AE becomes E */
413 case 'A':
414 if (Next_Letter == 'E')
415 {
416 Phonize('E');
417 w_idx += 2;
418 }
419 /* Remember, preserve vowels at the beginning */
420 else
421 {
422 Phonize('A');
423 w_idx++;
424 }
425 break;
426 /* [GKP]N becomes N */
427 case 'G':
428 case 'K':
429 case 'P':
430 if (Next_Letter == 'N')
431 {
432 Phonize('N');
433 w_idx += 2;
434 }
435 break;
436
437 /*
438 * WH becomes H, WR becomes R W if followed by a vowel
439 */
440 case 'W':
441 if (Next_Letter == 'H' ||
442 Next_Letter == 'R')
443 {
444 Phonize(Next_Letter);
445 w_idx += 2;
446 }
447 else if (isvowel(Next_Letter))
448 {
449 Phonize('W');
450 w_idx += 2;
451 }
452 /* else ignore */
453 break;
454 /* X becomes S */
455 case 'X':
456 Phonize('S');
457 w_idx++;
458 break;
459 /* Vowels are kept */
460
461 /*
462 * We did A already case 'A': case 'a':
463 */
464 case 'E':
465 case 'I':
466 case 'O':
467 case 'U':
468 Phonize(Curr_Letter);
469 w_idx++;
470 break;
471 default:
472 /* do nothing */
473 break;
474 }
475
476
477
478 /* On to the metaphoning */
479 for (; Curr_Letter != '\0' &&
480 (max_phonemes == 0 || Phone_Len < max_phonemes);
481 w_idx++)
482 {
483 /*
484 * How many letters to skip because an earlier encoding handled
485 * multiple letters
486 */
487 unsigned short int skip_letter = 0;
488
489
490 /*
491 * THOUGHT: It would be nice if, rather than having things like...
492 * well, SCI. For SCI you encode the S, then have to remember to skip
493 * the C. So the phonome SCI invades both S and C. It would be
494 * better, IMHO, to skip the C from the S part of the encoding. Hell,
495 * I'm trying it.
496 */
497
498 /* Ignore non-alphas */
499 if (!isalpha((unsigned char) (Curr_Letter)))
500 continue;
501
502 /* Drop duplicates, except CC */
503 if (Curr_Letter == Prev_Letter &&
504 Curr_Letter != 'C')
505 continue;
506
507 switch (Curr_Letter)
508 {
509 /* B -> B unless in MB */
510 case 'B':
511 if (Prev_Letter != 'M')
512 Phonize('B');
513 break;
514
515 /*
516 * 'sh' if -CIA- or -CH, but not SCH, except SCHW. (SCHW is
517 * handled in S) S if -CI-, -CE- or -CY- dropped if -SCI-,
518 * SCE-, -SCY- (handed in S) else K
519 */
520 case 'C':
521 if (MAKESOFT(Next_Letter))
522 { /* C[IEY] */
523 if (After_Next_Letter == 'A' &&
524 Next_Letter == 'I')
525 { /* CIA */
526 Phonize(SH);
527 }
528 /* SC[IEY] */
529 else if (Prev_Letter == 'S')
530 {
531 /* Dropped */
532 }
533 else
534 Phonize('S');
535 }
536 else if (Next_Letter == 'H')
537 {
538 #ifndef USE_TRADITIONAL_METAPHONE
539 if (After_Next_Letter == 'R' ||
540 Prev_Letter == 'S')
541 { /* Christ, School */
542 Phonize('K');
543 }
544 else
545 Phonize(SH);
546 #else
547 Phonize(SH);
548 #endif
549 skip_letter++;
550 }
551 else
552 Phonize('K');
553 break;
554
555 /*
556 * J if in -DGE-, -DGI- or -DGY- else T
557 */
558 case 'D':
559 if (Next_Letter == 'G' &&
560 MAKESOFT(After_Next_Letter))
561 {
562 Phonize('J');
563 skip_letter++;
564 }
565 else
566 Phonize('T');
567 break;
568
569 /*
570 * F if in -GH and not B--GH, D--GH, -H--GH, -H---GH else
571 * dropped if -GNED, -GN, else dropped if -DGE-, -DGI- or
572 * -DGY- (handled in D) else J if in -GE-, -GI, -GY and not GG
573 * else K
574 */
575 case 'G':
576 if (Next_Letter == 'H')
577 {
578 if (!(NOGHTOF(Look_Back_Letter(3)) ||
579 Look_Back_Letter(4) == 'H'))
580 {
581 Phonize('F');
582 skip_letter++;
583 }
584 else
585 {
586 /* silent */
587 }
588 }
589 else if (Next_Letter == 'N')
590 {
591 if (Isbreak(After_Next_Letter) ||
592 (After_Next_Letter == 'E' &&
593 Look_Ahead_Letter(3) == 'D'))
594 {
595 /* dropped */
596 }
597 else
598 Phonize('K');
599 }
600 else if (MAKESOFT(Next_Letter) &&
601 Prev_Letter != 'G')
602 Phonize('J');
603 else
604 Phonize('K');
605 break;
606 /* H if before a vowel and not after C,G,P,S,T */
607 case 'H':
608 if (isvowel(Next_Letter) &&
609 !AFFECTH(Prev_Letter))
610 Phonize('H');
611 break;
612
613 /*
614 * dropped if after C else K
615 */
616 case 'K':
617 if (Prev_Letter != 'C')
618 Phonize('K');
619 break;
620
621 /*
622 * F if before H else P
623 */
624 case 'P':
625 if (Next_Letter == 'H')
626 Phonize('F');
627 else
628 Phonize('P');
629 break;
630
631 /*
632 * K
633 */
634 case 'Q':
635 Phonize('K');
636 break;
637
638 /*
639 * 'sh' in -SH-, -SIO- or -SIA- or -SCHW- else S
640 */
641 case 'S':
642 if (Next_Letter == 'I' &&
643 (After_Next_Letter == 'O' ||
644 After_Next_Letter == 'A'))
645 Phonize(SH);
646 else if (Next_Letter == 'H')
647 {
648 Phonize(SH);
649 skip_letter++;
650 }
651 #ifndef USE_TRADITIONAL_METAPHONE
652 else if (Next_Letter == 'C' &&
653 Look_Ahead_Letter(2) == 'H' &&
654 Look_Ahead_Letter(3) == 'W')
655 {
656 Phonize(SH);
657 skip_letter += 2;
658 }
659 #endif
660 else
661 Phonize('S');
662 break;
663
664 /*
665 * 'sh' in -TIA- or -TIO- else 'th' before H else T
666 */
667 case 'T':
668 if (Next_Letter == 'I' &&
669 (After_Next_Letter == 'O' ||
670 After_Next_Letter == 'A'))
671 Phonize(SH);
672 else if (Next_Letter == 'H')
673 {
674 Phonize(TH);
675 skip_letter++;
676 }
677 else
678 Phonize('T');
679 break;
680 /* F */
681 case 'V':
682 Phonize('F');
683 break;
684 /* W before a vowel, else dropped */
685 case 'W':
686 if (isvowel(Next_Letter))
687 Phonize('W');
688 break;
689 /* KS */
690 case 'X':
691 Phonize('K');
692 if (max_phonemes == 0 || Phone_Len < max_phonemes)
693 Phonize('S');
694 break;
695 /* Y if followed by a vowel */
696 case 'Y':
697 if (isvowel(Next_Letter))
698 Phonize('Y');
699 break;
700 /* S */
701 case 'Z':
702 Phonize('S');
703 break;
704 /* No transformation */
705 case 'F':
706 case 'J':
707 case 'L':
708 case 'M':
709 case 'N':
710 case 'R':
711 Phonize(Curr_Letter);
712 break;
713 default:
714 /* nothing */
715 break;
716 } /* END SWITCH */
717
718 w_idx += skip_letter;
719 } /* END FOR */
720
721 End_Phoned_Word;
722
723 return (META_SUCCESS);
724 } /* END metaphone */
725
726
727 /*
728 * SQL function: soundex(text) returns text
729 */
730 PG_FUNCTION_INFO_V1(soundex);
731
732 Datum
soundex(PG_FUNCTION_ARGS)733 soundex(PG_FUNCTION_ARGS)
734 {
735 char outstr[SOUNDEX_LEN + 1];
736 char *arg;
737
738 arg = text_to_cstring(PG_GETARG_TEXT_P(0));
739
740 _soundex(arg, outstr);
741
742 PG_RETURN_TEXT_P(cstring_to_text(outstr));
743 }
744
745 static void
_soundex(const char * instr,char * outstr)746 _soundex(const char *instr, char *outstr)
747 {
748 int count;
749
750 AssertArg(instr);
751 AssertArg(outstr);
752
753 outstr[SOUNDEX_LEN] = '\0';
754
755 /* Skip leading non-alphabetic characters */
756 while (!isalpha((unsigned char) instr[0]) && instr[0])
757 ++instr;
758
759 /* No string left */
760 if (!instr[0])
761 {
762 outstr[0] = (char) 0;
763 return;
764 }
765
766 /* Take the first letter as is */
767 *outstr++ = (char) toupper((unsigned char) *instr++);
768
769 count = 1;
770 while (*instr && count < SOUNDEX_LEN)
771 {
772 if (isalpha((unsigned char) *instr) &&
773 soundex_code(*instr) != soundex_code(*(instr - 1)))
774 {
775 *outstr = soundex_code(instr[0]);
776 if (*outstr != '0')
777 {
778 ++outstr;
779 ++count;
780 }
781 }
782 ++instr;
783 }
784
785 /* Fill with 0's */
786 while (count < SOUNDEX_LEN)
787 {
788 *outstr = '0';
789 ++outstr;
790 ++count;
791 }
792 }
793
794 PG_FUNCTION_INFO_V1(difference);
795
796 Datum
difference(PG_FUNCTION_ARGS)797 difference(PG_FUNCTION_ARGS)
798 {
799 char sndx1[SOUNDEX_LEN + 1],
800 sndx2[SOUNDEX_LEN + 1];
801 int i,
802 result;
803
804 _soundex(text_to_cstring(PG_GETARG_TEXT_P(0)), sndx1);
805 _soundex(text_to_cstring(PG_GETARG_TEXT_P(1)), sndx2);
806
807 result = 0;
808 for (i = 0; i < SOUNDEX_LEN; i++)
809 {
810 if (sndx1[i] == sndx2[i])
811 result++;
812 }
813
814 PG_RETURN_INT32(result);
815 }
816