1 /* $OpenBSD: word.c,v 1.8 2016/01/10 13:18:07 mestre Exp $ */ 2 /* $NetBSD: word.c,v 1.2 1995/03/21 12:14:45 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Barry Brachman. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/stat.h> 37 38 #include <err.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include "bog.h" 44 #include "extern.h" 45 46 static char *dictspace, *dictend; 47 static char *sp; 48 49 static int first = 1, lastch = 0; 50 51 /* 52 * Return the next word in the compressed dictionary in 'buffer' or 53 * NULL on end-of-file 54 */ 55 char * 56 nextword(FILE *fp) 57 { 58 int ch, pcount; 59 char *p; 60 static char buf[MAXWORDLEN + 1]; 61 62 if (fp == NULL) { 63 if (sp == dictend) 64 return (NULL); 65 66 p = buf + (int) *sp++; 67 68 /* 69 * The dictionary ends with a null byte 70 */ 71 while (*sp >= 'a') 72 if ((*p++ = *sp++) == 'q') 73 *p++ = 'u'; 74 } else { 75 if (first) { 76 if ((pcount = getc(fp)) == EOF) 77 return (NULL); 78 first = 0; 79 } else if ((pcount = lastch) == EOF) 80 return (NULL); 81 82 p = buf + pcount; 83 84 while ((ch = getc(fp)) != EOF && ch >= 'a') 85 if ((*p++ = ch) == 'q') 86 *p++ = 'u'; 87 lastch = ch; 88 } 89 wordlen = (int) (p - buf); 90 *p = '\0'; 91 return (buf); 92 } 93 94 /* 95 * Reset the state of nextword() and do the fseek() 96 */ 97 long 98 dictseek(FILE *fp, long offset, int ptrname) 99 { 100 if (fp == NULL) { 101 if ((sp = dictspace + offset) >= dictend) 102 return (-1); 103 return (0); 104 } 105 106 first = 1; 107 return (fseek(fp, offset, ptrname)); 108 } 109 110 FILE * 111 opendict(const char *dict) 112 { 113 FILE *fp; 114 115 if ((fp = fopen(dict, "r")) == NULL) 116 return (NULL); 117 return (fp); 118 } 119 120 /* 121 * Load the given dictionary and initialize the pointers 122 */ 123 int 124 loaddict(FILE *fp) 125 { 126 struct stat statb; 127 long n; 128 int st; 129 char *p; 130 131 if (fstat(fileno(fp), &statb) < 0) { 132 fclose(fp); 133 return (-1); 134 } 135 136 /* 137 * An extra character (a sentinel) is allocated and set to null 138 * to improve the expansion loop in nextword(). 139 */ 140 if ((dictspace = malloc(statb.st_size + 1)) == NULL) { 141 fclose(fp); 142 return (-1); 143 } 144 n = (long)statb.st_size; 145 sp = dictspace; 146 dictend = dictspace + n; 147 148 p = dictspace; 149 st = -1; 150 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) { 151 p += st; 152 n -= st; 153 } 154 if (st < 0) { 155 fclose(fp); 156 warnx("Error reading dictionary"); 157 return (-1); 158 } 159 *p = '\0'; 160 return (0); 161 } 162 163 /* 164 * Dependent on the exact format of the index file: 165 * Starting offset field begins in column 1 and length field in column 9 166 * Taking the easy way out, the input buffer is made "large" and a check 167 * is made for lines that are too long 168 */ 169 int 170 loadindex(const char *indexfile) 171 { 172 int i, j; 173 char buf[BUFSIZ]; 174 FILE *fp; 175 176 if ((fp = fopen(indexfile, "r")) == NULL) { 177 warnx("Can't open '%s'", indexfile); 178 return (-1); 179 } 180 i = 0; 181 while (fgets(buf, sizeof(buf), fp) != NULL) { 182 if (strchr(buf, '\n') == NULL) { 183 warnx("A line in the index file is too long"); 184 fclose(fp); 185 return(-1); 186 } 187 j = *buf - 'a'; 188 if (i != j) { 189 warnx("Bad index order"); 190 fclose(fp); 191 return(-1); 192 } 193 dictindex[j].start = atol(buf + 1); 194 dictindex[j].length = atol(buf + 9) - dictindex[j].start; 195 i++; 196 } 197 if (i != 26) { 198 warnx("Bad index length"); 199 fclose(fp); 200 return(-1); 201 } 202 fclose(fp); 203 return(0); 204 } 205