1 /* Substring search in a NUL terminated string of UNIT elements, 2 using the Knuth-Morris-Pratt algorithm. 3 Copyright (C) 2005-2015 Free Software Foundation, Inc. 4 Written by Bruno Haible <bruno@clisp.org>, 2005. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, see <http://www.gnu.org/licenses/>. */ 18 19 /* Before including this file, you need to define: 20 UNIT The element type of the needle and haystack. 21 CANON_ELEMENT(c) A macro that canonicalizes an element right after 22 it has been fetched from needle or haystack. 23 The argument is of type UNIT; the result must be 24 of type UNIT as well. */ 25 26 /* Knuth-Morris-Pratt algorithm. 27 See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm 28 HAYSTACK is the NUL terminated string in which to search for. 29 NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN 30 units. 31 Return a boolean indicating success: 32 Return true and set *RESULTP if the search was completed. 33 Return false if it was aborted because not enough memory was available. */ 34 static bool 35 knuth_morris_pratt (const UNIT *haystack, 36 const UNIT *needle, size_t needle_len, 37 const UNIT **resultp) 38 { 39 size_t m = needle_len; 40 41 /* Allocate the table. */ 42 size_t *table = (size_t *) nmalloca (m, sizeof (size_t)); 43 if (table == NULL) 44 return false; 45 /* Fill the table. 46 For 0 < i < m: 47 0 < table[i] <= i is defined such that 48 forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x], 49 and table[i] is as large as possible with this property. 50 This implies: 51 1) For 0 < i < m: 52 If table[i] < i, 53 needle[table[i]..i-1] = needle[0..i-1-table[i]]. 54 2) For 0 < i < m: 55 rhaystack[0..i-1] == needle[0..i-1] 56 and exists h, i <= h < m: rhaystack[h] != needle[h] 57 implies 58 forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1]. 59 table[0] remains uninitialized. */ 60 { 61 size_t i, j; 62 63 /* i = 1: Nothing to verify for x = 0. */ 64 table[1] = 1; 65 j = 0; 66 67 for (i = 2; i < m; i++) 68 { 69 /* Here: j = i-1 - table[i-1]. 70 The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold 71 for x < table[i-1], by induction. 72 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 73 UNIT b = CANON_ELEMENT (needle[i - 1]); 74 75 for (;;) 76 { 77 /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] 78 is known to hold for x < i-1-j. 79 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ 80 if (b == CANON_ELEMENT (needle[j])) 81 { 82 /* Set table[i] := i-1-j. */ 83 table[i] = i - ++j; 84 break; 85 } 86 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 87 for x = i-1-j, because 88 needle[i-1] != needle[j] = needle[i-1-x]. */ 89 if (j == 0) 90 { 91 /* The inequality holds for all possible x. */ 92 table[i] = i; 93 break; 94 } 95 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds 96 for i-1-j < x < i-1-j+table[j], because for these x: 97 needle[x..i-2] 98 = needle[x-(i-1-j)..j-1] 99 != needle[0..j-1-(x-(i-1-j))] (by definition of table[j]) 100 = needle[0..i-2-x], 101 hence needle[x..i-1] != needle[0..i-1-x]. 102 Furthermore 103 needle[i-1-j+table[j]..i-2] 104 = needle[table[j]..j-1] 105 = needle[0..j-1-table[j]] (by definition of table[j]). */ 106 j = j - table[j]; 107 } 108 /* Here: j = i - table[i]. */ 109 } 110 } 111 112 /* Search, using the table to accelerate the processing. */ 113 { 114 size_t j; 115 const UNIT *rhaystack; 116 const UNIT *phaystack; 117 118 *resultp = NULL; 119 j = 0; 120 rhaystack = haystack; 121 phaystack = haystack; 122 /* Invariant: phaystack = rhaystack + j. */ 123 while (*phaystack != 0) 124 if (CANON_ELEMENT (needle[j]) == CANON_ELEMENT (*phaystack)) 125 { 126 j++; 127 phaystack++; 128 if (j == m) 129 { 130 /* The entire needle has been found. */ 131 *resultp = rhaystack; 132 break; 133 } 134 } 135 else if (j > 0) 136 { 137 /* Found a match of needle[0..j-1], mismatch at needle[j]. */ 138 rhaystack += table[j]; 139 j -= table[j]; 140 } 141 else 142 { 143 /* Found a mismatch at needle[0] already. */ 144 rhaystack++; 145 phaystack++; 146 } 147 } 148 149 freea (table); 150 return true; 151 } 152 153 #undef CANON_ELEMENT 154