1 /* -*- buffer-read-only: t -*- vi: set ro: */ 2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3 /* Substring search in a NUL terminated string of UNIT elements, 4 using the Knuth-Morris-Pratt algorithm. 5 Copyright (C) 2005-2011 Free Software Foundation, Inc. 6 Written by Bruno Haible <bruno@clisp.org>, 2005. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software Foundation, 20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22 /* Before including this file, you need to define: 23 UNIT The element type of the needle and haystack. 24 CANON_ELEMENT(c) A macro that canonicalizes an element right after 25 it has been fetched from needle or haystack. 26 The argument is of type UNIT; the result must be 27 of type UNIT as well. */ 28 29 /* Knuth-Morris-Pratt algorithm. 30 See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm 31 HAYSTACK is the NUL terminated string in which to search for. 32 NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN 33 units. 34 Return a boolean indicating success: 35 Return true and set *RESULTP if the search was completed. 36 Return false if it was aborted because not enough memory was available. */ 37 static bool 38 knuth_morris_pratt (const UNIT *haystack, 39 const UNIT *needle, size_t needle_len, 40 const UNIT **resultp) 41 { 42 size_t m = needle_len; 43 44 /* Allocate the table. */ 45 size_t *table = (size_t *) nmalloca (m, sizeof (size_t)); 46 if (table == NULL) 47 return false; 48 /* Fill the table. 49 For 0 < i < m: 50 0 < table[i] <= i is defined such that 51 forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x], 52 and table[i] is as large as possible with this property. 53 This implies: 54 1) For 0 < i < m: 55 If table[i] < i, 56 needle[table[i]..i-1] = needle[0..i-1-table[i]]. 57 2) For 0 < i < m: 58 rhaystack[0..i-1] == needle[0..i-1] 59 and exists h, i <= h < m: rhaystack[h] != needle[h] 60 implies 61 forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1]. 62 table[0] remains uninitialized. */ 63 { 64 size_t i, j; 65 66 /* i = 1: Nothing to verify for x = 0. */ 67 table[1] = 1; 68 j = 0; 69 70 for (i = 2; i < m; i++) 71 { 72 /* Here: j = i-1 - table[i-1]. 73 The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold 74 for x < table[i-1], by induction. 75 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 76 UNIT b = CANON_ELEMENT (needle[i - 1]); 77 78 for (;;) 79 { 80 /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] 81 is known to hold for x < i-1-j. 82 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 83 if (b == CANON_ELEMENT (needle[j])) 84 { 85 /* Set table[i] := i-1-j. */ 86 table[i] = i - ++j; 87 break; 88 } 89 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 90 for x = i-1-j, because 91 needle[i-1] != needle[j] = needle[i-1-x]. */ 92 if (j == 0) 93 { 94 /* The inequality holds for all possible x. */ 95 table[i] = i; 96 break; 97 } 98 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 99 for i-1-j < x < i-1-j+table[j], because for these x: 100 needle[x..i-2] 101 = needle[x-(i-1-j)..j-1] 102 != needle[0..j-1-(x-(i-1-j))] (by definition of table[j]) 103 = needle[0..i-2-x], 104 hence needle[x..i-1] != needle[0..i-1-x]. 105 Furthermore 106 needle[i-1-j+table[j]..i-2] 107 = needle[table[j]..j-1] 108 = needle[0..j-1-table[j]] (by definition of table[j]). */ 109 j = j - table[j]; 110 } 111 /* Here: j = i - table[i]. */ 112 } 113 } 114 115 /* Search, using the table to accelerate the processing. */ 116 { 117 size_t j; 118 const UNIT *rhaystack; 119 const UNIT *phaystack; 120 121 *resultp = NULL; 122 j = 0; 123 rhaystack = haystack; 124 phaystack = haystack; 125 /* Invariant: phaystack = rhaystack + j. */ 126 while (*phaystack != 0) 127 if (CANON_ELEMENT (needle[j]) == CANON_ELEMENT (*phaystack)) 128 { 129 j++; 130 phaystack++; 131 if (j == m) 132 { 133 /* The entire needle has been found. */ 134 *resultp = rhaystack; 135 break; 136 } 137 } 138 else if (j > 0) 139 { 140 /* Found a match of needle[0..j-1], mismatch at needle[j]. */ 141 rhaystack += table[j]; 142 j -= table[j]; 143 } 144 else 145 { 146 /* Found a mismatch at needle[0] already. */ 147 rhaystack++; 148 phaystack++; 149 } 150 } 151 152 freea (table); 153 return true; 154 } 155 156 #undef CANON_ELEMENT 157