1 /* -*- buffer-read-only: t -*- vi: set ro: */ 2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3 /* Substring search in a NUL terminated string of 'char' elements, 4 using the Knuth-Morris-Pratt algorithm. 5 Copyright (C) 2005-2010 Free Software Foundation, Inc. 6 Written by Bruno Haible <bruno@clisp.org>, 2005. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software Foundation, 20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22 /* Before including this file, you need to define: 23 CANON_ELEMENT(c) A macro that canonicalizes an element right after 24 it has been fetched from one of the two strings. 25 The argument is an 'unsigned char'; the result 26 must be an 'unsigned char' as well. */ 27 28 /* Knuth-Morris-Pratt algorithm. 29 See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm 30 Return a boolean indicating success: 31 Return true and set *RESULTP if the search was completed. 32 Return false if it was aborted because not enough memory was available. */ 33 static bool 34 knuth_morris_pratt_unibyte (const char *haystack, const char *needle, 35 const char **resultp) 36 { 37 size_t m = strlen (needle); 38 39 /* Allocate the table. */ 40 size_t *table = (size_t *) nmalloca (m, sizeof (size_t)); 41 if (table == NULL) 42 return false; 43 /* Fill the table. 44 For 0 < i < m: 45 0 < table[i] <= i is defined such that 46 forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x], 47 and table[i] is as large as possible with this property. 48 This implies: 49 1) For 0 < i < m: 50 If table[i] < i, 51 needle[table[i]..i-1] = needle[0..i-1-table[i]]. 52 2) For 0 < i < m: 53 rhaystack[0..i-1] == needle[0..i-1] 54 and exists h, i <= h < m: rhaystack[h] != needle[h] 55 implies 56 forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1]. 57 table[0] remains uninitialized. */ 58 { 59 size_t i, j; 60 61 /* i = 1: Nothing to verify for x = 0. */ 62 table[1] = 1; 63 j = 0; 64 65 for (i = 2; i < m; i++) 66 { 67 /* Here: j = i-1 - table[i-1]. 68 The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold 69 for x < table[i-1], by induction. 70 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 71 unsigned char b = CANON_ELEMENT ((unsigned char) needle[i - 1]); 72 73 for (;;) 74 { 75 /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] 76 is known to hold for x < i-1-j. 77 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 78 if (b == CANON_ELEMENT ((unsigned char) needle[j])) 79 { 80 /* Set table[i] := i-1-j. */ 81 table[i] = i - ++j; 82 break; 83 } 84 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 85 for x = i-1-j, because 86 needle[i-1] != needle[j] = needle[i-1-x]. */ 87 if (j == 0) 88 { 89 /* The inequality holds for all possible x. */ 90 table[i] = i; 91 break; 92 } 93 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 94 for i-1-j < x < i-1-j+table[j], because for these x: 95 needle[x..i-2] 96 = needle[x-(i-1-j)..j-1] 97 != needle[0..j-1-(x-(i-1-j))] (by definition of table[j]) 98 = needle[0..i-2-x], 99 hence needle[x..i-1] != needle[0..i-1-x]. 100 Furthermore 101 needle[i-1-j+table[j]..i-2] 102 = needle[table[j]..j-1] 103 = needle[0..j-1-table[j]] (by definition of table[j]). */ 104 j = j - table[j]; 105 } 106 /* Here: j = i - table[i]. */ 107 } 108 } 109 110 /* Search, using the table to accelerate the processing. */ 111 { 112 size_t j; 113 const char *rhaystack; 114 const char *phaystack; 115 116 *resultp = NULL; 117 j = 0; 118 rhaystack = haystack; 119 phaystack = haystack; 120 /* Invariant: phaystack = rhaystack + j. */ 121 while (*phaystack != '\0') 122 if (CANON_ELEMENT ((unsigned char) needle[j]) 123 == CANON_ELEMENT ((unsigned char) *phaystack)) 124 { 125 j++; 126 phaystack++; 127 if (j == m) 128 { 129 /* The entire needle has been found. */ 130 *resultp = rhaystack; 131 break; 132 } 133 } 134 else if (j > 0) 135 { 136 /* Found a match of needle[0..j-1], mismatch at needle[j]. */ 137 rhaystack += table[j]; 138 j -= table[j]; 139 } 140 else 141 { 142 /* Found a mismatch at needle[0] already. */ 143 rhaystack++; 144 phaystack++; 145 } 146 } 147 148 freea (table); 149 return true; 150 } 151 152 #undef CANON_ELEMENT 153