1 /* Optimized strstr function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
3
4 SPDX-License-Identifier: BSD-3-Clause
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
16 permission.
17
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28
29 /*
30 FUNCTION
31 <<strstr>>---find string segment
32
33 INDEX
34 strstr
35
36 SYNOPSIS
37 #include <string.h>
38 char *strstr(const char *<[s1]>, const char *<[s2]>);
39
40 DESCRIPTION
41 Locates the first occurrence in the string pointed to by <[s1]> of
42 the sequence of characters in the string pointed to by <[s2]>
43 (excluding the terminating null character).
44
45 RETURNS
46 Returns a pointer to the located string segment, or a null
47 pointer if the string <[s2]> is not found. If <[s2]> points to
48 a string with zero length, <[s1]> is returned.
49
50 PORTABILITY
51 <<strstr>> is ANSI C.
52
53 <<strstr>> requires no supporting OS subroutines.
54
55 QUICKREF
56 strstr ansi pure
57 */
58
59 #include <string.h>
60 #include <limits.h>
61
62 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
63 || CHAR_BIT > 8
64
65 /* Small and efficient strstr implementation. */
66 char *
strstr(const char * hs,const char * ne)67 strstr (const char *hs, const char *ne)
68 {
69 size_t i;
70 int c = ne[0];
71
72 if (c == 0)
73 return (char*)hs;
74
75 for ( ; hs[0] != '\0'; hs++)
76 {
77 if (hs[0] != c)
78 continue;
79 for (i = 1; ne[i] != 0; i++)
80 if (hs[i] != ne[i])
81 break;
82 if (ne[i] == '\0')
83 return (char*)hs;
84 }
85
86 return NULL;
87 }
88
89 #else /* compilation for speed */
90
91 # define RETURN_TYPE char *
92 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
93 || ((h_l) += strnlen ((char *)(h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
94
95 # include "str-two-way.h"
96
97 /* Number of bits used to index shift table. */
98 #define SHIFT_TABLE_BITS 6
99
100 static inline char *
strstr2(const unsigned char * hs,const unsigned char * ne)101 strstr2 (const unsigned char *hs, const unsigned char *ne)
102 {
103 uint32_t h1 = (ne[0] << 16) | ne[1];
104 uint32_t h2 = 0;
105 int c;
106 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
107 h2 = (h2 << 16) | c;
108 return h1 == h2 ? (char *)hs - 2 : NULL;
109 }
110
111 static inline char *
strstr3(const unsigned char * hs,const unsigned char * ne)112 strstr3 (const unsigned char *hs, const unsigned char *ne)
113 {
114 uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
115 uint32_t h2 = 0;
116 int c;
117 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
118 h2 = (h2 | c) << 8;
119 return h1 == h2 ? (char *)hs - 3 : NULL;
120 }
121
122 static inline char *
strstr4(const unsigned char * hs,const unsigned char * ne)123 strstr4 (const unsigned char *hs, const unsigned char *ne)
124 {
125 uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8) | ne[3];
126 uint32_t h2 = 0;
127 int c;
128 for (c = hs[0]; c != 0 && h1 != h2; c = *++hs)
129 h2 = (h2 << 8) | c;
130 return h1 == h2 ? (char *)hs - 4 : NULL;
131 }
132
133 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
134 Small needles up to size 4 use a dedicated linear search. Longer needles
135 up to size 254 use Sunday's Quick-Search algorithm. Due to its simplicity
136 it has the best average performance of string matching algorithms on almost
137 all inputs. It uses a bad-character shift table to skip past mismatches.
138 By limiting the needle length to 254, the shift table can be reduced to 8
139 bits per entry, lowering preprocessing overhead and minimizing cache effects.
140 The limit also implies the worst-case performance is linear.
141 Even larger needles are processed by the linear-time Two-Way algorithm.
142 */
143 char *
strstr(const char * haystack,const char * needle)144 strstr (const char *haystack, const char *needle)
145 {
146 const unsigned char *hs = (const unsigned char *) haystack;
147 const unsigned char *ne = (const unsigned char *) needle;
148 int i;
149
150 /* Handle short needle special cases first. */
151 if (ne[0] == '\0')
152 return (char *) hs;
153 if (ne[1] == '\0')
154 return (char*)strchr ((char *) hs, (char) ne[0]);
155 if (ne[2] == '\0')
156 return strstr2 (hs, ne);
157 if (ne[3] == '\0')
158 return strstr3 (hs, ne);
159 if (ne[4] == '\0')
160 return strstr4 (hs, ne);
161
162 size_t ne_len = strlen ((char *) ne);
163 size_t hs_len = strnlen ((char *) hs, ne_len | 512);
164
165 /* Ensure haystack length is >= needle length. */
166 if (hs_len < ne_len)
167 return NULL;
168
169 /* Use the Quick-Search algorithm for needle lengths less than 255. */
170 if (__builtin_expect (ne_len < 255, 1))
171 {
172 uint8_t shift[1 << SHIFT_TABLE_BITS];
173 const unsigned char *end = hs + hs_len - ne_len;
174
175 /* Initialize bad character shift hash table. */
176 memset (shift, ne_len + 1, sizeof (shift));
177 for (i = 0; i < ne_len; i++)
178 shift[ne[i] % sizeof (shift)] = ne_len - i;
179
180 do
181 {
182 hs--;
183
184 /* Search by skipping past bad characters. */
185 size_t tmp = shift[hs[ne_len] % sizeof (shift)];
186 for (hs += tmp; hs <= end; hs += tmp)
187 {
188 tmp = shift[hs[ne_len] % sizeof (shift)];
189 if (memcmp (hs, ne, ne_len) == 0)
190 return (char*) hs;
191 }
192 if (end[ne_len] == 0)
193 return NULL;
194 end += strnlen ((char *) end + ne_len, 2048);
195 }
196 while (hs <= end);
197
198 return NULL;
199 }
200
201 /* Use Two-Way algorithm for very long needles. */
202 return two_way_long_needle (hs, hs_len, ne, ne_len);
203 }
204 #endif /* compilation for speed */
205