1 /* $OpenBSD: look.c,v 1.18 2015/10/09 01:37:08 deraadt Exp $ */ 2 /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */ 3 4 /*- 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * David Hitz of Auspex Systems, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * look -- find lines in a sorted list. 38 * 39 * The man page said that TABs and SPACEs participate in -d comparisons. 40 * In fact, they were ignored. This implements historic practice, not 41 * the manual page. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/mman.h> 46 #include <sys/stat.h> 47 48 #include <ctype.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <stdint.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <unistd.h> 56 #include <err.h> 57 58 #include "pathnames.h" 59 60 /* 61 * FOLD and DICT convert characters to a normal form for comparison, 62 * according to the user specified flags. 63 * 64 * DICT expects integers because it uses a non-character value to 65 * indicate a character which should not participate in comparisons. 66 */ 67 #define EQUAL 0 68 #define GREATER 1 69 #define LESS (-1) 70 #define NO_COMPARE (-2) 71 72 #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c)) 73 #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE) 74 75 int dflag, fflag; 76 77 char *binary_search(char *, char *, char *); 78 int compare(char *, char *, char *); 79 char *linear_search(char *, char *, char *); 80 int look(char *, char *, char *); 81 void print_from(char *, char *, char *); 82 void usage(void); 83 84 int 85 main(int argc, char *argv[]) 86 { 87 struct stat sb; 88 int ch, fd, termchar; 89 char *back, *file, *front, *string, *p; 90 91 if (pledge("stdio rpath", NULL) == -1) 92 err(1, "pledge"); 93 94 file = _PATH_WORDS; 95 termchar = '\0'; 96 while ((ch = getopt(argc, argv, "dft:")) != -1) 97 switch(ch) { 98 case 'd': 99 dflag = 1; 100 break; 101 case 'f': 102 fflag = 1; 103 break; 104 case 't': 105 termchar = *optarg; 106 break; 107 case '?': 108 default: 109 usage(); 110 } 111 argc -= optind; 112 argv += optind; 113 114 switch (argc) { 115 case 2: /* Don't set -df for user. */ 116 string = *argv++; 117 file = *argv; 118 break; 119 case 1: /* But set -df by default. */ 120 dflag = fflag = 1; 121 string = *argv; 122 break; 123 default: 124 usage(); 125 } 126 127 if (termchar != '\0' && (p = strchr(string, termchar)) != NULL) 128 *++p = '\0'; 129 130 if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) 131 err(2, "%s", file); 132 if (sb.st_size > SIZE_MAX) 133 errc(2, EFBIG, "%s", file); 134 if ((front = mmap(NULL, 135 (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED) 136 err(2, "%s", file); 137 back = front + sb.st_size; 138 exit(look(string, front, back)); 139 } 140 141 int 142 look(char *string, char *front, char *back) 143 { 144 int ch; 145 char *readp, *writep; 146 147 /* Reformat string to avoid doing it multiple times later. */ 148 for (readp = writep = string; ch = *readp++;) { 149 if (fflag) 150 ch = FOLD((unsigned char)ch); 151 if (dflag) 152 ch = DICT((unsigned char)ch); 153 if (ch != NO_COMPARE) 154 *(writep++) = ch; 155 } 156 *writep = '\0'; 157 158 front = binary_search(string, front, back); 159 front = linear_search(string, front, back); 160 161 if (front) 162 print_from(string, front, back); 163 return (front ? 0 : 1); 164 } 165 166 167 /* 168 * Binary search for "string" in memory between "front" and "back". 169 * 170 * This routine is expected to return a pointer to the start of a line at 171 * *or before* the first word matching "string". Relaxing the constraint 172 * this way simplifies the algorithm. 173 * 174 * Invariants: 175 * front points to the beginning of a line at or before the first 176 * matching string. 177 * 178 * back points to the beginning of a line at or after the first 179 * matching line. 180 * 181 * Base of the Invariants. 182 * front = NULL; 183 * back = EOF; 184 * 185 * Advancing the Invariants: 186 * 187 * p = first newline after halfway point from front to back. 188 * 189 * If the string at "p" is not greater than the string to match, 190 * p is the new front. Otherwise it is the new back. 191 * 192 * Termination: 193 * 194 * The definition of the routine allows it return at any point, 195 * since front is always at or before the line to print. 196 * 197 * In fact, it returns when the chosen "p" equals "back". This 198 * implies that there exists a string is least half as long as 199 * (back - front), which in turn implies that a linear search will 200 * be no more expensive than the cost of simply printing a string or two. 201 * 202 * Trying to continue with binary search at this point would be 203 * more trouble than it's worth. 204 */ 205 #define SKIP_PAST_NEWLINE(p, back) \ 206 while (p < back && *p++ != '\n'); 207 208 char * 209 binary_search(char *string, char *front, char *back) 210 { 211 char *p; 212 213 p = front + (back - front) / 2; 214 SKIP_PAST_NEWLINE(p, back); 215 216 /* 217 * If the file changes underneath us, make sure we don't 218 * infinitely loop. 219 */ 220 while (p < back && back > front) { 221 if (compare(string, p, back) == GREATER) 222 front = p; 223 else 224 back = p; 225 p = front + (back - front) / 2; 226 SKIP_PAST_NEWLINE(p, back); 227 } 228 return (front); 229 } 230 231 /* 232 * Find the first line that starts with string, linearly searching from front 233 * to back. 234 * 235 * Return NULL for no such line. 236 * 237 * This routine assumes: 238 * 239 * o front points at the first character in a line. 240 * o front is before or at the first line to be printed. 241 */ 242 char * 243 linear_search(char *string, char *front, char *back) 244 { 245 while (front < back) { 246 switch (compare(string, front, back)) { 247 case EQUAL: /* Found it. */ 248 return (front); 249 break; 250 case LESS: /* No such string. */ 251 return (NULL); 252 break; 253 case GREATER: /* Keep going. */ 254 break; 255 } 256 SKIP_PAST_NEWLINE(front, back); 257 } 258 return (NULL); 259 } 260 261 /* 262 * Print as many lines as match string, starting at front. 263 */ 264 void 265 print_from(char *string, char *front, char *back) 266 { 267 for (; front < back && compare(string, front, back) == EQUAL; ++front) { 268 for (; front < back && *front != '\n'; ++front) 269 if (putchar(*front) == EOF) 270 err(2, "stdout"); 271 if (putchar('\n') == EOF) 272 err(2, "stdout"); 273 } 274 } 275 276 /* 277 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with 278 * string2 (s1 ??? s2). 279 * 280 * o Matches up to len(s1) are EQUAL. 281 * o Matches up to len(s2) are GREATER. 282 * 283 * Compare understands about the -f and -d flags, and treats comparisons 284 * appropriately. 285 * 286 * The string "s1" is null terminated. The string s2 is '\n' terminated (or 287 * "back" terminated). 288 */ 289 int 290 compare(char *s1, char *s2, char *back) 291 { 292 int ch; 293 294 for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) { 295 ch = *s2; 296 if (fflag) 297 ch = FOLD((unsigned char)ch); 298 if (dflag) 299 ch = DICT((unsigned char)ch); 300 301 if (ch == NO_COMPARE) { 302 ++s2; /* Ignore character in comparison. */ 303 continue; 304 } 305 if (*s1 != ch) 306 return (*s1 < ch ? LESS : GREATER); 307 } 308 return (*s1 ? GREATER : EQUAL); 309 } 310 311 void 312 usage(void) 313 { 314 (void)fprintf(stderr, 315 "usage: look [-df] [-t termchar] string [file]\n"); 316 exit(2); 317 } 318