1 /*	$OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $	*/
2 /*	$NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $	*/
3 /*-
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * David Hitz of Auspex Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <config.h>
36 #include "bsdlook.h"
37 
38 #ifndef lint
39 #if 0
40 static char copyright[] =
41 "@(#) Copyright (c) 1991, 1993\n	The Regents of the University of California.  All rights reserved.\n";
42 #endif
43 #endif /* not lint */
44 
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)look.c	8.2 (Berkeley) 5/4/95";
48 static char rcsid[] = "$OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $";
49 #endif
50 #endif /* not lint */
51 
52 /*
53  * look -- find lines in a sorted list.
54  *
55  * The man page said that TABs and SPACEs participate in -d comparisons.
56  * In fact, they were ignored.  This implements historic practice, not
57  * the manual page.
58  */
59 
60 #include <sys/types.h>
61 #include <sys/mman.h>
62 #include <sys/stat.h>
63 
64 #include <ctype.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <limits.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <unistd.h>
72 
73 /*
74  * FOLD and DICT convert characters to a normal form for comparison,
75  * according to the user specified flags.
76  *
77  * DICT expects integers because it uses a non-character value to
78  * indicate a character which should not participate in comparisons.
79  */
80 #define	EQUAL		0
81 #define	GREATER		1
82 #define	LESS		(-1)
83 #define NO_COMPARE	(-2)
84 
85 #define	FOLD(c)	(isascii(c) && isupper(c) ? tolower(c) : (c))
86 #define	DICT(c)	(isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
87 
88 #ifndef SIZE_T_MAX
89 #define SIZE_T_MAX	ULONG_MAX
90 #endif
91 
92 struct uim_look_ctx {
93 	int fd;
94 	size_t len;
95 	char *front0, *back0;
96 	char *front, *back;
97 	int dflag, fflag;
98 	char *acc;
99 };
100 
101 static char	*binary_search(char *, uim_look_ctx *);
102 static int	 compare(char *, char *, uim_look_ctx *);
103 static char	*linear_search(char *, uim_look_ctx *);
104 
105 uim_look_ctx *
uim_look_init(void)106 uim_look_init(void)
107 {
108 	uim_look_ctx *ctx = calloc(1, sizeof(*ctx));
109 
110 	if (!ctx) {
111 		perror("uim_look_init");
112 		return NULL;
113 	}
114 
115 	/* set -df by default. */
116 	ctx->dflag = ctx->fflag = 1;
117 	return ctx;
118 }
119 
120 void
uim_look_set_option_dictionary_order(int dflag,uim_look_ctx * ctx)121 uim_look_set_option_dictionary_order(int dflag, uim_look_ctx *ctx)
122 {
123 	ctx->dflag = dflag;
124 }
125 
126 void
uim_look_set_option_ignore_case(int fflag,uim_look_ctx * ctx)127 uim_look_set_option_ignore_case(int fflag, uim_look_ctx *ctx)
128 {
129 	ctx->fflag = fflag;
130 }
131 
132 void
uim_look_reset(uim_look_ctx * ctx)133 uim_look_reset(uim_look_ctx *ctx)
134 {
135 	ctx->front = ctx->acc = ctx->front0;
136 	ctx->back = ctx->back0;
137 }
138 
139 void
uim_look_set(uim_look_ctx * ctx)140 uim_look_set(uim_look_ctx *ctx)
141 {
142 	ctx->acc = ctx->front;
143 }
144 
145 size_t
uim_look_get(char * string,char * dst,size_t len,uim_look_ctx * ctx)146 uim_look_get(char *string, char *dst, size_t len, uim_look_ctx *ctx)
147 {
148 	char *front = ctx->acc, *back = ctx->back;
149 	char *p = dst;
150 	size_t dst_len = 0;
151 
152 	if (front < back && compare(string, front, ctx) == EQUAL) {
153 		for (; dst_len < len - 1 && front < back && *front != '\n'; ++front, ++dst_len) {
154 			*p++ = *front;
155 		}
156 		ctx->acc = front + 1;
157 		*p = '\0';
158 		return dst_len;
159 	}
160 	return dst_len;
161 }
162 
163 void
uim_look_finish(uim_look_ctx * ctx)164 uim_look_finish(uim_look_ctx *ctx)
165 {
166 	if (!ctx)
167 		return;
168 
169 	if ((uintptr_t)ctx->front0 > 0 && munmap(ctx->front0, ctx->len) == -1)
170 		perror("uim_look_finish");
171 
172 	if (ctx->fd > 0)
173 		close(ctx->fd);
174 
175 	free(ctx);
176 	return;
177 }
178 
179 int
uim_look_open_dict(const char * dict,uim_look_ctx * ctx)180 uim_look_open_dict(const char *dict, uim_look_ctx *ctx)
181 {
182 	struct stat sb;
183 
184 	if ((ctx->fd = open(dict, O_RDONLY, 0)) < 0 || fstat(ctx->fd, &sb)) {
185 		perror("uim_look_open_dict");
186 		return 0;
187 	}
188 	if ((size_t)sb.st_size > SIZE_T_MAX) {
189 		perror("uim_look_open_dict");
190 		return 0;
191 	}
192 	if ((ctx->front0 = ctx->front = mmap(NULL,
193 		    (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, ctx->fd, (off_t)0)) == MAP_FAILED) {
194 		perror("uim_look_open_dict");
195 		ctx->front0 = ctx->front = 0;
196 	}
197 	ctx->len = (size_t)sb.st_size;
198 	ctx->back0 = ctx->back = ctx->front + sb.st_size;
199 
200 	return 1;
201 }
202 
203 int
uim_look(char * string,uim_look_ctx * ctx)204 uim_look(char *string, uim_look_ctx *ctx)
205 {
206 	int ch;
207 	unsigned char *readp, *writep;
208 	int fflag = ctx->fflag, dflag = ctx->dflag;
209 
210 	/* Reformat string to avoid doing it multiple times later. */
211 	for (readp = writep = (unsigned char *)string; (ch = *readp++) != '\0';) {
212 		if (fflag)
213 			ch = FOLD(ch);
214 		if (dflag)
215 			ch = DICT(ch);
216 		if (ch != NO_COMPARE)
217 			*(writep++) = (unsigned char)ch;
218 	}
219 	*writep = '\0';
220 
221 	ctx->front = binary_search(string, ctx);
222 	ctx->front = linear_search(string, ctx);
223 
224 	return (ctx->front ? 1 : 0);
225 }
226 
227 #if 0
228 int
229 main(int argc, char *argv[])
230 {
231 	int ch, termchar;
232 	char *file, *string = NULL, *p;
233 	int dflag = 0, fflag = 0;
234 	int ret;
235 	uim_look_ctx *ctx;
236 	char buf[BUFSIZ];
237 
238 	file = "/usr/share/dict/words";
239 	termchar = '\0';
240 	while ((ch = getopt(argc, argv, "dft:")) != -1)
241 		switch(ch) {
242 		case 'd':
243 			dflag = 1;
244 			break;
245 		case 'f':
246 			fflag = 1;
247 			break;
248 		case 't':
249 			termchar = *optarg;
250 			break;
251 		case '?':
252 		default:
253 			usage();
254 		}
255 	argc -= optind;
256 	argv += optind;
257 
258 	switch (argc) {
259 	case 2:				/* Don't set -df for user. */
260 		string = *argv++;
261 		file = *argv;
262 		break;
263 	case 1:				/* But set -df by default. */
264 		dflag = fflag = 1;
265 		string = *argv;
266 		break;
267 	default:
268 		usage();
269 	}
270 
271 	if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
272 		*++p = '\0';
273 
274 
275 	ctx = look_init();
276 
277 	if (!ctx)
278 		exit(1);
279 
280 	look_set_option_dictionary_order(dflag, ctx);
281 	look_set_option_ignore_case(fflag, ctx);
282 
283 	if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
284 		*++p = '\0';
285 
286 	if (!look_open_dict(file, ctx))
287 		exit(1);
288 
289 	if ((ret = look(string, ctx)) != 0) {
290 		look_set(ctx);
291 		while (look_get(string, buf, sizeof(buf), ctx) != 0)
292 			printf("%s\n", buf);
293 	}
294 
295 	look_finish(ctx);
296 
297 	return ret;
298 }
299 #endif
300 
301 /*
302  * Binary search for "string" in memory between "front" and "back".
303  *
304  * This routine is expected to return a pointer to the start of a line at
305  * *or before* the first word matching "string".  Relaxing the constraint
306  * this way simplifies the algorithm.
307  *
308  * Invariants:
309  * 	front points to the beginning of a line at or before the first
310  *	matching string.
311  *
312  * 	back points to the beginning of a line at or after the first
313  *	matching line.
314  *
315  * Base of the Invariants.
316  * 	front = NULL;
317  *	back = EOF;
318  *
319  * Advancing the Invariants:
320  *
321  * 	p = first newline after halfway point from front to back.
322  *
323  * 	If the string at "p" is not greater than the string to match,
324  *	p is the new front.  Otherwise it is the new back.
325  *
326  * Termination:
327  *
328  * 	The definition of the routine allows it return at any point,
329  *	since front is always at or before the line to print.
330  *
331  * 	In fact, it returns when the chosen "p" equals "back".  This
332  *	implies that there exists a string is least half as long as
333  *	(back - front), which in turn implies that a linear search will
334  *	be no more expensive than the cost of simply printing a string or two.
335  *
336  * 	Trying to continue with binary search at this point would be
337  *	more trouble than it's worth.
338  */
339 #define	SKIP_PAST_NEWLINE(p, back) \
340 	while (p < back && *p++ != '\n');
341 
342 static char *
binary_search(char * string,uim_look_ctx * ctx)343 binary_search(char *string, uim_look_ctx *ctx)
344 {
345 	char *p;
346 	char *front = ctx->front, *back = ctx->back;
347 
348 	p = front + (back - front) / 2;
349 	SKIP_PAST_NEWLINE(p, back);
350 
351 	/*
352 	 * If the file changes underneath us, make sure we don't
353 	 * infinitely loop.
354 	 */
355 	while (p < back && back > front) {
356 		if (compare(string, p, ctx) == GREATER)
357 			front = p;
358 		else
359 			back = p;
360 		p = front + (back - front) / 2;
361 		SKIP_PAST_NEWLINE(p, back);
362 	}
363 	return (front);
364 }
365 
366 /*
367  * Find the first line that starts with string, linearly searching from front
368  * to back.
369  *
370  * Return NULL for no such line.
371  *
372  * This routine assumes:
373  *
374  * 	o front points at the first character in a line.
375  *	o front is before or at the first line to be printed.
376  */
377 static char *
linear_search(char * string,uim_look_ctx * ctx)378 linear_search(char *string, uim_look_ctx *ctx)
379 {
380 	char *front = ctx->front, *back = ctx->back;
381 
382 	while (front < back) {
383 		switch (compare(string, front, ctx)) {
384 		case EQUAL:		/* Found it. */
385 			return (front);
386 			break;
387 		case LESS:		/* No such string. */
388 			return (NULL);
389 			break;
390 		case GREATER:		/* Keep going. */
391 			break;
392 		}
393 		SKIP_PAST_NEWLINE(front, back);
394 	}
395 	return (NULL);
396 }
397 
398 #if 0
399 /*
400  * Print as many lines as match string, starting at front.
401  */
402 void
403 look_print_from(char *string, uim_look_ctx *ctx)
404 {
405 	char *front = ctx->front, *back = ctx->back;
406 
407 	for (; front < back && compare(string, front, ctx) == EQUAL; ++front) {
408 		for (; front < back && *front != '\n'; ++front)
409 			if (putchar(*front) == EOF) {
410 				fprintf(stderr, "print_from: stdout");
411 				return;
412 			}
413 		if (putchar('\n') == EOF) {
414 			fprintf(stderr, "print_from: stdout");
415 			return;
416 		}
417 	}
418 }
419 #endif
420 
421 /*
422  * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
423  * string2 (s1 ??? s2).
424  *
425  * 	o Matches up to len(s1) are EQUAL.
426  *	o Matches up to len(s2) are GREATER.
427  *
428  * Compare understands about the -f and -d flags, and treats comparisons
429  * appropriately.
430  *
431  * The string "s1" is null terminated.  The string s2 is '\n' terminated (or
432  * "back" terminated).
433  */
434 static int
compare(char * s1,char * s2,uim_look_ctx * ctx)435 compare(char *s1, char *s2, uim_look_ctx *ctx)
436 {
437 	int ch;
438 	unsigned char *back = (unsigned char *)ctx->back;
439 	int fflag = ctx->fflag, dflag = ctx->dflag;
440 
441 	for (; (unsigned char)*s1 && (unsigned char *)s2 < back && *s2 != '\n'; ++s1, ++s2) {
442 		ch = (unsigned char)*s2;
443 		if (fflag)
444 			ch = FOLD(ch);
445 		if (dflag)
446 			ch = DICT(ch);
447 
448 		if (ch == NO_COMPARE) {
449 			++s2;		/* Ignore character in comparison. */
450 			continue;
451 		}
452 		if ((unsigned char)*s1 != ch)
453 			return ((unsigned char)*s1 < ch ? LESS : GREATER);
454 	}
455 	return ((unsigned char)*s1 ? GREATER : EQUAL);
456 }
457 
458 #if 0
459 void
460 usage(void)
461 {
462 	(void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
463 	exit(2);
464 }
465 #endif
466 
467