xref: /freebsd/usr.bin/locate/locate/util.c (revision c03c5b1c)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * James A. Woods.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $FreeBSD$
36  */
37 
38 #include <sys/param.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <err.h>
42 #include <arpa/inet.h>
43 #include <stdio.h>
44 #include <sys/stat.h>
45 
46 #include "locate.h"
47 #include "pathnames.h"
48 
49 char 	**colon(char **, char*, char*);
50 char 	*patprep(char *);
51 u_char 	*tolower_word(u_char *);
52 int 	getwm(caddr_t);
53 int 	getwf(FILE *);
54 int	check_bigram_char(int);
55 
56 /*
57  * Validate bigram chars. If the test failed the database is corrupt
58  * or the database is obviously not a locate database.
59  */
60 int
61 check_bigram_char(ch)
62 	int ch;
63 {
64 	/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
65 	if (ch == 0 ||
66 	    (ch >= ASCII_MIN && ch <= ASCII_MAX))
67 		return (ch);
68 
69 	errx(1,
70 		"locate database header corrupt, bigram char outside 0, %d-%d: %d",
71 		ASCII_MIN, ASCII_MAX, ch);
72 	exit(1);
73 }
74 
75 /* split a colon separated string into a char vector
76  *
77  * "bla:foo" -> {"foo", "bla"}
78  * "bla:"    -> {"foo", dot}
79  * "bla"     -> {"bla"}
80  * ""	     -> do nothing
81  *
82  */
83 char **
84 colon(char **dbv, char *path, char *dot)
85 {
86 	int vlen, slen;
87 	char *c, *ch, *p;
88 	char **pv;
89 
90 	if (dbv == NULL) {
91 		if ((dbv = malloc(sizeof(char *))) == NULL)
92 			err(1, "malloc");
93 		*dbv = NULL;
94 	}
95 
96 	/* empty string */
97 	if (*path == '\0') {
98 		warnx("empty database name, ignored");
99 		return (dbv);
100 	}
101 
102 	/* length of string vector */
103 	for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
104 
105 	for (ch = c = path; ; ch++) {
106 		if (*ch == ':' ||
107 		    (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
108 			/* single colon -> dot */
109 			if (ch == c)
110 				p = dot;
111 			else {
112 				/* a string */
113 				slen = ch - c;
114 				if ((p = malloc(sizeof(char) * (slen + 1)))
115 				    == NULL)
116 					err(1, "malloc");
117 				bcopy(c, p, slen);
118 				*(p + slen) = '\0';
119 			}
120 			/* increase dbv with element p */
121 			if ((dbv = realloc(dbv, sizeof(char *) * (vlen + 2)))
122 			    == NULL)
123 				err(1, "realloc");
124 			*(dbv + vlen) = p;
125 			*(dbv + ++vlen) = NULL;
126 			c = ch + 1;
127 		}
128 		if (*ch == '\0')
129 			break;
130 	}
131 	return (dbv);
132 }
133 
134 /*
135  * extract last glob-free subpattern in name for fast pre-match; prepend
136  * '\0' for backwards match; return end of new pattern
137  */
138 static char globfree[100];
139 
140 char *
141 patprep(name)
142 	char *name;
143 {
144 	char *endmark, *p, *subp;
145 
146 	subp = globfree;
147 	*subp++ = '\0';   /* set first element to '\0' */
148 	p = name + strlen(name) - 1;
149 
150 	/* skip trailing metacharacters */
151 	for (; p >= name; p--)
152 		if (strchr(LOCATE_REG, *p) == NULL)
153 			break;
154 
155 	/*
156 	 * check if maybe we are in a character class
157 	 *
158 	 * 'foo.[ch]'
159 	 *        |----< p
160 	 */
161 	if (p >= name &&
162 	    (strchr(p, '[') != NULL || strchr(p, ']') != NULL)) {
163 		for (p = name; *p != '\0'; p++)
164 			if (*p == ']' || *p == '[')
165 				break;
166 		p--;
167 
168 		/*
169 		 * cannot find a non-meta character, give up
170 		 * '*\*[a-z]'
171 		 *    |-------< p
172 		 */
173 		if (p >= name && strchr(LOCATE_REG, *p) != NULL)
174 			p = name - 1;
175 	}
176 
177 	if (p < name)
178 		/* only meta chars: "???", force '/' search */
179 		*subp++ = '/';
180 
181 	else {
182 		for (endmark = p; p >= name; p--)
183 			if (strchr(LOCATE_REG, *p) != NULL)
184 				break;
185 		for (++p;
186 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
187 			*subp++ = *p++;
188 	}
189 	*subp = '\0';
190 	return (--subp);
191 }
192 
193 /* tolower word */
194 u_char *
195 tolower_word(word)
196 	u_char *word;
197 {
198 	u_char *p;
199 
200 	for(p = word; *p != '\0'; p++)
201 		*p = TOLOWER(*p);
202 
203 	return (word);
204 }
205 
206 
207 /*
208  * Read integer from mmap pointer.
209  * Essentially a simple ``return *(int *)p'' but avoids sigbus
210  * for integer alignment (SunOS 4.x, 5.x).
211  *
212  * Convert network byte order to host byte order if necessary.
213  * So we can read a locate database on FreeBSD/i386 (little endian)
214  * which was built on SunOS/sparc (big endian).
215  */
216 
217 int
218 getwm(p)
219 	caddr_t p;
220 {
221 	union {
222 		char buf[INTSIZE];
223 		int i;
224 	} u;
225 	int i, hi;
226 
227 	/* the integer is stored by an offset of 14 (!!!) */
228         int i_max = LOCATE_PATH_MAX + OFFSET;
229         int i_min = -(LOCATE_PATH_MAX - OFFSET);
230 
231 	for (i = 0; i < (int)INTSIZE; i++)
232 		u.buf[i] = *p++;
233 
234 	i = u.i;
235 
236 	if (i >= i_max || i <= i_min) {
237 		hi = ntohl(i);
238 		if (hi >= i_max || hi <= i_min)
239 			errx(1, "integer out of range: %d < %d < %d",
240 			    i_min, abs(i) < abs(hi) ? i : hi, i_max);
241 		return (hi);
242 	}
243 	return (i);
244 }
245 
246 /*
247  * Read integer from stream.
248  *
249  * Convert network byte order to host byte order if necessary.
250  * So we can read on FreeBSD/i386 (little endian) a locate database
251  * which was built on SunOS/sparc (big endian).
252  */
253 
254 int
255 getwf(fp)
256 	FILE *fp;
257 {
258 	int word, hword;
259         int i_max = LOCATE_PATH_MAX + OFFSET;
260         int i_min = -(LOCATE_PATH_MAX - OFFSET);
261 
262 	word = getw(fp);
263 
264 	if (word >= i_max || word <= i_min) {
265 		hword = ntohl(word);
266 		if (hword >= i_max || hword <= i_min)
267 			errx(1, "integer out of range: %d < %d < %d",
268 			    i_min, abs(word) < abs(hword) ? word : hword, i_max);
269 		return (hword);
270 	}
271 	return (word);
272 }
273 
274 void
275 rebuild_message(char *db)
276 {
277 	/* only for the default locate database */
278 	if (strcmp(_PATH_FCODES, db) == 0) {
279 		fprintf(stderr, "\nTo create a new database, please run the following command as root:\n\n");
280 		fprintf(stderr, "  /etc/periodic/weekly/310.locate\n\n");
281 	}
282 }
283 
284 int
285 check_size(char *db)
286 {
287         struct stat sb;
288         off_t len;
289 
290 	if (stat(db, &sb) == -1) {
291 		warnx("the locate database '%s' does not exist.", db);
292 		rebuild_message(db);
293 		return (0);
294 	}
295 	len = sb.st_size;
296 
297 	if (len < (2 * NBG)) {
298 		warnx("the locate database '%s' is smaller than %d bytes large.", db, (2 * NBG));
299 		rebuild_message(db);
300 		return (0);
301 	}
302 
303 	return (1);
304 }
305