xref: /freebsd/usr.bin/locate/locate/util.c (revision 61e21613)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * James A. Woods.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/param.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <err.h>
40 #include <arpa/inet.h>
41 #include <stdio.h>
42 #include <sys/stat.h>
43 
44 #include "locate.h"
45 #include "pathnames.h"
46 
47 char 	**colon(char **, char*, char*);
48 char 	*patprep(char *);
49 u_char 	*tolower_word(u_char *);
50 int 	getwm(caddr_t);
51 int 	getwf(FILE *);
52 int	check_bigram_char(int);
53 
54 /*
55  * Validate bigram chars. If the test failed the database is corrupt
56  * or the database is obviously not a locate database.
57  */
58 int
59 check_bigram_char(int ch)
60 {
61 	/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
62 	if (ch == 0 ||
63 	    (ch >= ASCII_MIN && ch <= ASCII_MAX))
64 		return (ch);
65 
66 	errx(1,
67 		"locate database header corrupt, bigram char outside 0, %d-%d: %d",
68 		ASCII_MIN, ASCII_MAX, ch);
69 	exit(1);
70 }
71 
72 /* split a colon separated string into a char vector
73  *
74  * "bla:foo" -> {"foo", "bla"}
75  * "bla:"    -> {"foo", dot}
76  * "bla"     -> {"bla"}
77  * ""	     -> do nothing
78  *
79  */
80 char **
81 colon(char **dbv, char *path, char *dot)
82 {
83 	int vlen, slen;
84 	char *c, *ch, *p;
85 	char **pv;
86 
87 	if (dbv == NULL) {
88 		if ((dbv = malloc(sizeof(char *))) == NULL)
89 			err(1, "malloc");
90 		*dbv = NULL;
91 	}
92 
93 	/* empty string */
94 	if (*path == '\0') {
95 		warnx("empty database name, ignored");
96 		return (dbv);
97 	}
98 
99 	/* length of string vector */
100 	for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
101 
102 	for (ch = c = path; ; ch++) {
103 		if (*ch == ':' ||
104 		    (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
105 			/* single colon -> dot */
106 			if (ch == c)
107 				p = dot;
108 			else {
109 				/* a string */
110 				slen = ch - c;
111 				if ((p = malloc(sizeof(char) * (slen + 1)))
112 				    == NULL)
113 					err(1, "malloc");
114 				bcopy(c, p, slen);
115 				*(p + slen) = '\0';
116 			}
117 			/* increase dbv with element p */
118 			if ((dbv = realloc(dbv, sizeof(char *) * (vlen + 2)))
119 			    == NULL)
120 				err(1, "realloc");
121 			*(dbv + vlen) = p;
122 			*(dbv + ++vlen) = NULL;
123 			c = ch + 1;
124 		}
125 		if (*ch == '\0')
126 			break;
127 	}
128 	return (dbv);
129 }
130 
131 /*
132  * extract last glob-free subpattern in name for fast pre-match; prepend
133  * '\0' for backwards match; return end of new pattern
134  */
135 static char globfree[100];
136 
137 char *
138 patprep(char *name)
139 {
140 	char *endmark, *p, *subp;
141 
142 	subp = globfree;
143 	*subp++ = '\0';   /* set first element to '\0' */
144 	p = name + strlen(name) - 1;
145 
146 	/* skip trailing metacharacters */
147 	for (; p >= name; p--)
148 		if (strchr(LOCATE_REG, *p) == NULL)
149 			break;
150 
151 	/*
152 	 * check if maybe we are in a character class
153 	 *
154 	 * 'foo.[ch]'
155 	 *        |----< p
156 	 */
157 	if (p >= name &&
158 	    (strchr(p, '[') != NULL || strchr(p, ']') != NULL)) {
159 		for (p = name; *p != '\0'; p++)
160 			if (*p == ']' || *p == '[')
161 				break;
162 		p--;
163 
164 		/*
165 		 * cannot find a non-meta character, give up
166 		 * '*\*[a-z]'
167 		 *    |-------< p
168 		 */
169 		if (p >= name && strchr(LOCATE_REG, *p) != NULL)
170 			p = name - 1;
171 	}
172 
173 	if (p < name)
174 		/* only meta chars: "???", force '/' search */
175 		*subp++ = '/';
176 
177 	else {
178 		for (endmark = p; p >= name; p--)
179 			if (strchr(LOCATE_REG, *p) != NULL)
180 				break;
181 		for (++p;
182 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
183 			*subp++ = *p++;
184 	}
185 	*subp = '\0';
186 	return (--subp);
187 }
188 
189 /* tolower word */
190 u_char *
191 tolower_word(u_char *word)
192 {
193 	u_char *p;
194 
195 	for(p = word; *p != '\0'; p++)
196 		*p = TOLOWER(*p);
197 
198 	return (word);
199 }
200 
201 
202 /*
203  * Read integer from mmap pointer.
204  * Essentially a simple ``return *(int *)p'' but avoids sigbus
205  * for integer alignment (SunOS 4.x, 5.x).
206  *
207  * Convert network byte order to host byte order if necessary.
208  * So we can read a locate database on FreeBSD/i386 (little endian)
209  * which was built on SunOS/sparc (big endian).
210  */
211 
212 int
213 getwm(caddr_t p)
214 {
215 	union {
216 		char buf[INTSIZE];
217 		int i;
218 	} u;
219 	int i, hi;
220 
221 	/* the integer is stored by an offset of 14 (!!!) */
222         int i_max = LOCATE_PATH_MAX + OFFSET;
223         int i_min = -(LOCATE_PATH_MAX - OFFSET);
224 
225 	for (i = 0; i < (int)INTSIZE; i++)
226 		u.buf[i] = *p++;
227 
228 	i = u.i;
229 
230 	if (i >= i_max || i <= i_min) {
231 		hi = ntohl(i);
232 		if (hi >= i_max || hi <= i_min)
233 			errx(1, "integer out of range: %d < %d < %d",
234 			    i_min, abs(i) < abs(hi) ? i : hi, i_max);
235 		return (hi);
236 	}
237 	return (i);
238 }
239 
240 /*
241  * Read integer from stream.
242  *
243  * Convert network byte order to host byte order if necessary.
244  * So we can read on FreeBSD/i386 (little endian) a locate database
245  * which was built on SunOS/sparc (big endian).
246  */
247 
248 int
249 getwf(FILE *fp)
250 {
251 	int word, hword;
252         int i_max = LOCATE_PATH_MAX + OFFSET;
253         int i_min = -(LOCATE_PATH_MAX - OFFSET);
254 
255 	word = getw(fp);
256 
257 	if (word >= i_max || word <= i_min) {
258 		hword = ntohl(word);
259 		if (hword >= i_max || hword <= i_min)
260 			errx(1, "integer out of range: %d < %d < %d",
261 			    i_min, abs(word) < abs(hword) ? word : hword, i_max);
262 		return (hword);
263 	}
264 	return (word);
265 }
266 
267 void
268 rebuild_message(char *db)
269 {
270 	/* only for the default locate database */
271 	if (strcmp(_PATH_FCODES, db) == 0) {
272 		fprintf(stderr, "\nTo create a new database, please run the following command as root:\n\n");
273 		fprintf(stderr, "  /etc/periodic/weekly/310.locate\n\n");
274 	}
275 }
276 
277 int
278 check_size(char *db)
279 {
280         struct stat sb;
281         off_t len;
282 
283 	if (stat(db, &sb) == -1) {
284 		warnx("the locate database '%s' does not exist.", db);
285 		rebuild_message(db);
286 		return (0);
287 	}
288 	len = sb.st_size;
289 
290 	if (len < (2 * NBG)) {
291 		warnx("the locate database '%s' is smaller than %d bytes large.", db, (2 * NBG));
292 		rebuild_message(db);
293 		return (0);
294 	}
295 
296 	return (1);
297 }
298