1 /* Optimized strstr function.
2    Copyright (c) 2018 Arm Ltd.  All rights reserved.
3 
4    SPDX-License-Identifier: BSD-3-Clause
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11    2. Redistributions in binary form must reproduce the above copyright
12       notice, this list of conditions and the following disclaimer in the
13       documentation and/or other materials provided with the distribution.
14    3. The name of the company may not be used to endorse or promote
15       products derived from this software without specific prior written
16       permission.
17 
18    THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19    WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20    MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21    IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23    TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28 
29 /*
30 FUNCTION
31 	<<strstr>>---find string segment
32 
33 INDEX
34 	strstr
35 
36 SYNOPSIS
37 	#include <string.h>
38 	char *strstr(const char *<[s1]>, const char *<[s2]>);
39 
40 DESCRIPTION
41 	Locates the first occurrence in the string pointed to by <[s1]> of
42 	the sequence of characters in the string pointed to by <[s2]>
43 	(excluding the terminating null character).
44 
45 RETURNS
46 	Returns a pointer to the located string segment, or a null
47 	pointer if the string <[s2]> is not found. If <[s2]> points to
48 	a string with zero length, <[s1]> is returned.
49 
50 PORTABILITY
51 <<strstr>> is ANSI C.
52 
53 <<strstr>> requires no supporting OS subroutines.
54 
55 QUICKREF
56 	strstr ansi pure
57 */
58 
59 #include <string.h>
60 #include <limits.h>
61 
62 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
63     || CHAR_BIT > 8
64 
65 /* Small and efficient strstr implementation.  */
66 char *
strstr(const char * hs,const char * ne)67 strstr (const char *hs, const char *ne)
68 {
69   size_t i;
70   int c = ne[0];
71 
72   if (c == 0)
73     return (char*)hs;
74 
75   for ( ; hs[0] != '\0'; hs++)
76     {
77       if (hs[0] != c)
78 	continue;
79       for (i = 1; ne[i] != 0; i++)
80 	if (hs[i] != ne[i])
81 	  break;
82       if (ne[i] == '\0')
83 	return (char*)hs;
84     }
85 
86   return NULL;
87 }
88 
89 #else /* compilation for speed */
90 
91 # define RETURN_TYPE char *
92 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
93    || ((h_l) += strnlen ((char *)(h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
94 
95 # include "str-two-way.h"
96 
97 /* Number of bits used to index shift table.  */
98 #define SHIFT_TABLE_BITS 6
99 
100 static inline char *
strstr2(const unsigned char * hs,const unsigned char * ne)101 strstr2 (const unsigned char *hs, const unsigned char *ne)
102 {
103   uint32_t h1 = (ne[0] << 16) | ne[1];
104   uint32_t h2 = 0;
105   int c;
106   for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
107       h2 = (h2 << 16) | c;
108   return h1 == h2 ? (char *)hs - 2 : NULL;
109 }
110 
111 static inline char *
strstr3(const unsigned char * hs,const unsigned char * ne)112 strstr3 (const unsigned char *hs, const unsigned char *ne)
113 {
114   uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
115   uint32_t h2 = 0;
116   int c;
117   for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
118       h2 = (h2 | c) << 8;
119   return h1 == h2 ? (char *)hs - 3 : NULL;
120 }
121 
122 static inline char *
strstr4(const unsigned char * hs,const unsigned char * ne)123 strstr4 (const unsigned char *hs, const unsigned char *ne)
124 {
125   uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8) | ne[3];
126   uint32_t h2 = 0;
127   int c;
128   for (c = hs[0]; c != 0 && h1 != h2; c = *++hs)
129     h2 = (h2 << 8) | c;
130   return h1 == h2 ? (char *)hs - 4 : NULL;
131 }
132 
133 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
134    Small needles up to size 4 use a dedicated linear search.  Longer needles
135    up to size 254 use Sunday's Quick-Search algorithm.  Due to its simplicity
136    it has the best average performance of string matching algorithms on almost
137    all inputs.  It uses a bad-character shift table to skip past mismatches.
138    By limiting the needle length to 254, the shift table can be reduced to 8
139    bits per entry, lowering preprocessing overhead and minimizing cache effects.
140    The limit also implies the worst-case performance is linear.
141    Even larger needles are processed by the linear-time Two-Way algorithm.
142 */
143 char *
strstr(const char * haystack,const char * needle)144 strstr (const char *haystack, const char *needle)
145 {
146   const unsigned char *hs = (const unsigned char *) haystack;
147   const unsigned char *ne = (const unsigned char *) needle;
148   int i;
149 
150   /* Handle short needle special cases first.  */
151   if (ne[0] == '\0')
152     return (char *) hs;
153   if (ne[1] == '\0')
154     return (char*)strchr ((char *) hs, (char) ne[0]);
155   if (ne[2] == '\0')
156     return strstr2 (hs, ne);
157   if (ne[3] == '\0')
158     return strstr3 (hs, ne);
159   if (ne[4] == '\0')
160     return strstr4 (hs, ne);
161 
162   size_t ne_len = strlen ((char *) ne);
163   size_t hs_len = strnlen ((char *) hs, ne_len | 512);
164 
165   /* Ensure haystack length is >= needle length.  */
166   if (hs_len < ne_len)
167     return NULL;
168 
169   /* Use the Quick-Search algorithm for needle lengths less than 255.  */
170   if (__builtin_expect (ne_len < 255, 1))
171     {
172       uint8_t shift[1 << SHIFT_TABLE_BITS];
173       const unsigned char *end = hs + hs_len - ne_len;
174 
175       /* Initialize bad character shift hash table.  */
176       memset (shift, ne_len + 1, sizeof (shift));
177       for (i = 0; i < ne_len; i++)
178 	shift[ne[i] % sizeof (shift)] = ne_len - i;
179 
180       do
181 	{
182 	  hs--;
183 
184 	  /* Search by skipping past bad characters.  */
185 	  size_t tmp = shift[hs[ne_len] % sizeof (shift)];
186 	  for (hs += tmp; hs <= end; hs += tmp)
187 	    {
188 	      tmp = shift[hs[ne_len] % sizeof (shift)];
189 	      if (memcmp (hs, ne, ne_len) == 0)
190 		return (char*) hs;
191 	    }
192 	  if (end[ne_len] == 0)
193 	    return NULL;
194 	  end += strnlen ((char *) end + ne_len, 2048);
195 	}
196       while (hs <= end);
197 
198       return NULL;
199     }
200 
201   /* Use Two-Way algorithm for very long needles.  */
202   return two_way_long_needle (hs, hs_len, ne, ne_len);
203 }
204 #endif /* compilation for speed */
205