1 /* $OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $ */
2 /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * David Hitz of Auspex Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <config.h>
36 #include "bsdlook.h"
37
38 #ifndef lint
39 #if 0
40 static char copyright[] =
41 "@(#) Copyright (c) 1991, 1993\n The Regents of the University of California. All rights reserved.\n";
42 #endif
43 #endif /* not lint */
44
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
48 static char rcsid[] = "$OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $";
49 #endif
50 #endif /* not lint */
51
52 /*
53 * look -- find lines in a sorted list.
54 *
55 * The man page said that TABs and SPACEs participate in -d comparisons.
56 * In fact, they were ignored. This implements historic practice, not
57 * the manual page.
58 */
59
60 #include <sys/types.h>
61 #include <sys/mman.h>
62 #include <sys/stat.h>
63
64 #include <ctype.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <limits.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <unistd.h>
72
73 /*
74 * FOLD and DICT convert characters to a normal form for comparison,
75 * according to the user specified flags.
76 *
77 * DICT expects integers because it uses a non-character value to
78 * indicate a character which should not participate in comparisons.
79 */
80 #define EQUAL 0
81 #define GREATER 1
82 #define LESS (-1)
83 #define NO_COMPARE (-2)
84
85 #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
86 #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
87
88 #ifndef SIZE_T_MAX
89 #define SIZE_T_MAX ULONG_MAX
90 #endif
91
92 struct uim_look_ctx {
93 int fd;
94 size_t len;
95 char *front0, *back0;
96 char *front, *back;
97 int dflag, fflag;
98 char *acc;
99 };
100
101 static char *binary_search(char *, uim_look_ctx *);
102 static int compare(char *, char *, uim_look_ctx *);
103 static char *linear_search(char *, uim_look_ctx *);
104
105 uim_look_ctx *
uim_look_init(void)106 uim_look_init(void)
107 {
108 uim_look_ctx *ctx = calloc(1, sizeof(*ctx));
109
110 if (!ctx) {
111 perror("uim_look_init");
112 return NULL;
113 }
114
115 /* set -df by default. */
116 ctx->dflag = ctx->fflag = 1;
117 return ctx;
118 }
119
120 void
uim_look_set_option_dictionary_order(int dflag,uim_look_ctx * ctx)121 uim_look_set_option_dictionary_order(int dflag, uim_look_ctx *ctx)
122 {
123 ctx->dflag = dflag;
124 }
125
126 void
uim_look_set_option_ignore_case(int fflag,uim_look_ctx * ctx)127 uim_look_set_option_ignore_case(int fflag, uim_look_ctx *ctx)
128 {
129 ctx->fflag = fflag;
130 }
131
132 void
uim_look_reset(uim_look_ctx * ctx)133 uim_look_reset(uim_look_ctx *ctx)
134 {
135 ctx->front = ctx->acc = ctx->front0;
136 ctx->back = ctx->back0;
137 }
138
139 void
uim_look_set(uim_look_ctx * ctx)140 uim_look_set(uim_look_ctx *ctx)
141 {
142 ctx->acc = ctx->front;
143 }
144
145 size_t
uim_look_get(char * string,char * dst,size_t len,uim_look_ctx * ctx)146 uim_look_get(char *string, char *dst, size_t len, uim_look_ctx *ctx)
147 {
148 char *front = ctx->acc, *back = ctx->back;
149 char *p = dst;
150 size_t dst_len = 0;
151
152 if (front < back && compare(string, front, ctx) == EQUAL) {
153 for (; dst_len < len - 1 && front < back && *front != '\n'; ++front, ++dst_len) {
154 *p++ = *front;
155 }
156 ctx->acc = front + 1;
157 *p = '\0';
158 return dst_len;
159 }
160 return dst_len;
161 }
162
163 void
uim_look_finish(uim_look_ctx * ctx)164 uim_look_finish(uim_look_ctx *ctx)
165 {
166 if (!ctx)
167 return;
168
169 if ((uintptr_t)ctx->front0 > 0 && munmap(ctx->front0, ctx->len) == -1)
170 perror("uim_look_finish");
171
172 if (ctx->fd > 0)
173 close(ctx->fd);
174
175 free(ctx);
176 return;
177 }
178
179 int
uim_look_open_dict(const char * dict,uim_look_ctx * ctx)180 uim_look_open_dict(const char *dict, uim_look_ctx *ctx)
181 {
182 struct stat sb;
183
184 if ((ctx->fd = open(dict, O_RDONLY, 0)) < 0 || fstat(ctx->fd, &sb)) {
185 perror("uim_look_open_dict");
186 return 0;
187 }
188 if ((size_t)sb.st_size > SIZE_T_MAX) {
189 perror("uim_look_open_dict");
190 return 0;
191 }
192 if ((ctx->front0 = ctx->front = mmap(NULL,
193 (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, ctx->fd, (off_t)0)) == MAP_FAILED) {
194 perror("uim_look_open_dict");
195 ctx->front0 = ctx->front = 0;
196 }
197 ctx->len = (size_t)sb.st_size;
198 ctx->back0 = ctx->back = ctx->front + sb.st_size;
199
200 return 1;
201 }
202
203 int
uim_look(char * string,uim_look_ctx * ctx)204 uim_look(char *string, uim_look_ctx *ctx)
205 {
206 int ch;
207 unsigned char *readp, *writep;
208 int fflag = ctx->fflag, dflag = ctx->dflag;
209
210 /* Reformat string to avoid doing it multiple times later. */
211 for (readp = writep = (unsigned char *)string; (ch = *readp++) != '\0';) {
212 if (fflag)
213 ch = FOLD(ch);
214 if (dflag)
215 ch = DICT(ch);
216 if (ch != NO_COMPARE)
217 *(writep++) = (unsigned char)ch;
218 }
219 *writep = '\0';
220
221 ctx->front = binary_search(string, ctx);
222 ctx->front = linear_search(string, ctx);
223
224 return (ctx->front ? 1 : 0);
225 }
226
227 #if 0
228 int
229 main(int argc, char *argv[])
230 {
231 int ch, termchar;
232 char *file, *string = NULL, *p;
233 int dflag = 0, fflag = 0;
234 int ret;
235 uim_look_ctx *ctx;
236 char buf[BUFSIZ];
237
238 file = "/usr/share/dict/words";
239 termchar = '\0';
240 while ((ch = getopt(argc, argv, "dft:")) != -1)
241 switch(ch) {
242 case 'd':
243 dflag = 1;
244 break;
245 case 'f':
246 fflag = 1;
247 break;
248 case 't':
249 termchar = *optarg;
250 break;
251 case '?':
252 default:
253 usage();
254 }
255 argc -= optind;
256 argv += optind;
257
258 switch (argc) {
259 case 2: /* Don't set -df for user. */
260 string = *argv++;
261 file = *argv;
262 break;
263 case 1: /* But set -df by default. */
264 dflag = fflag = 1;
265 string = *argv;
266 break;
267 default:
268 usage();
269 }
270
271 if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
272 *++p = '\0';
273
274
275 ctx = look_init();
276
277 if (!ctx)
278 exit(1);
279
280 look_set_option_dictionary_order(dflag, ctx);
281 look_set_option_ignore_case(fflag, ctx);
282
283 if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
284 *++p = '\0';
285
286 if (!look_open_dict(file, ctx))
287 exit(1);
288
289 if ((ret = look(string, ctx)) != 0) {
290 look_set(ctx);
291 while (look_get(string, buf, sizeof(buf), ctx) != 0)
292 printf("%s\n", buf);
293 }
294
295 look_finish(ctx);
296
297 return ret;
298 }
299 #endif
300
301 /*
302 * Binary search for "string" in memory between "front" and "back".
303 *
304 * This routine is expected to return a pointer to the start of a line at
305 * *or before* the first word matching "string". Relaxing the constraint
306 * this way simplifies the algorithm.
307 *
308 * Invariants:
309 * front points to the beginning of a line at or before the first
310 * matching string.
311 *
312 * back points to the beginning of a line at or after the first
313 * matching line.
314 *
315 * Base of the Invariants.
316 * front = NULL;
317 * back = EOF;
318 *
319 * Advancing the Invariants:
320 *
321 * p = first newline after halfway point from front to back.
322 *
323 * If the string at "p" is not greater than the string to match,
324 * p is the new front. Otherwise it is the new back.
325 *
326 * Termination:
327 *
328 * The definition of the routine allows it return at any point,
329 * since front is always at or before the line to print.
330 *
331 * In fact, it returns when the chosen "p" equals "back". This
332 * implies that there exists a string is least half as long as
333 * (back - front), which in turn implies that a linear search will
334 * be no more expensive than the cost of simply printing a string or two.
335 *
336 * Trying to continue with binary search at this point would be
337 * more trouble than it's worth.
338 */
339 #define SKIP_PAST_NEWLINE(p, back) \
340 while (p < back && *p++ != '\n');
341
342 static char *
binary_search(char * string,uim_look_ctx * ctx)343 binary_search(char *string, uim_look_ctx *ctx)
344 {
345 char *p;
346 char *front = ctx->front, *back = ctx->back;
347
348 p = front + (back - front) / 2;
349 SKIP_PAST_NEWLINE(p, back);
350
351 /*
352 * If the file changes underneath us, make sure we don't
353 * infinitely loop.
354 */
355 while (p < back && back > front) {
356 if (compare(string, p, ctx) == GREATER)
357 front = p;
358 else
359 back = p;
360 p = front + (back - front) / 2;
361 SKIP_PAST_NEWLINE(p, back);
362 }
363 return (front);
364 }
365
366 /*
367 * Find the first line that starts with string, linearly searching from front
368 * to back.
369 *
370 * Return NULL for no such line.
371 *
372 * This routine assumes:
373 *
374 * o front points at the first character in a line.
375 * o front is before or at the first line to be printed.
376 */
377 static char *
linear_search(char * string,uim_look_ctx * ctx)378 linear_search(char *string, uim_look_ctx *ctx)
379 {
380 char *front = ctx->front, *back = ctx->back;
381
382 while (front < back) {
383 switch (compare(string, front, ctx)) {
384 case EQUAL: /* Found it. */
385 return (front);
386 break;
387 case LESS: /* No such string. */
388 return (NULL);
389 break;
390 case GREATER: /* Keep going. */
391 break;
392 }
393 SKIP_PAST_NEWLINE(front, back);
394 }
395 return (NULL);
396 }
397
398 #if 0
399 /*
400 * Print as many lines as match string, starting at front.
401 */
402 void
403 look_print_from(char *string, uim_look_ctx *ctx)
404 {
405 char *front = ctx->front, *back = ctx->back;
406
407 for (; front < back && compare(string, front, ctx) == EQUAL; ++front) {
408 for (; front < back && *front != '\n'; ++front)
409 if (putchar(*front) == EOF) {
410 fprintf(stderr, "print_from: stdout");
411 return;
412 }
413 if (putchar('\n') == EOF) {
414 fprintf(stderr, "print_from: stdout");
415 return;
416 }
417 }
418 }
419 #endif
420
421 /*
422 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
423 * string2 (s1 ??? s2).
424 *
425 * o Matches up to len(s1) are EQUAL.
426 * o Matches up to len(s2) are GREATER.
427 *
428 * Compare understands about the -f and -d flags, and treats comparisons
429 * appropriately.
430 *
431 * The string "s1" is null terminated. The string s2 is '\n' terminated (or
432 * "back" terminated).
433 */
434 static int
compare(char * s1,char * s2,uim_look_ctx * ctx)435 compare(char *s1, char *s2, uim_look_ctx *ctx)
436 {
437 int ch;
438 unsigned char *back = (unsigned char *)ctx->back;
439 int fflag = ctx->fflag, dflag = ctx->dflag;
440
441 for (; (unsigned char)*s1 && (unsigned char *)s2 < back && *s2 != '\n'; ++s1, ++s2) {
442 ch = (unsigned char)*s2;
443 if (fflag)
444 ch = FOLD(ch);
445 if (dflag)
446 ch = DICT(ch);
447
448 if (ch == NO_COMPARE) {
449 ++s2; /* Ignore character in comparison. */
450 continue;
451 }
452 if ((unsigned char)*s1 != ch)
453 return ((unsigned char)*s1 < ch ? LESS : GREATER);
454 }
455 return ((unsigned char)*s1 ? GREATER : EQUAL);
456 }
457
458 #if 0
459 void
460 usage(void)
461 {
462 (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
463 exit(2);
464 }
465 #endif
466
467