1 /* $NetBSD: word.c,v 1.6 2001/02/05 00:27:35 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Barry Brachman. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)word.c 8.1 (Berkeley) 6/11/93"; 43 #else 44 __RCSID("$NetBSD: word.c,v 1.6 2001/02/05 00:27:35 christos Exp $"); 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/types.h> 49 #include <sys/stat.h> 50 51 #include <err.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 56 #include "bog.h" 57 #include "extern.h" 58 59 static char *dictspace, *dictend; 60 static char *sp; 61 62 static int first = 1, lastch = 0; 63 64 extern struct dictindex dictindex[]; 65 extern int wordlen; 66 67 /* 68 * Return the next word in the compressed dictionary in 'buffer' or 69 * NULL on end-of-file 70 */ 71 char * 72 nextword(fp) 73 FILE *fp; 74 { 75 int ch, pcount; 76 char *p; 77 static char buf[MAXWORDLEN + 1]; 78 79 if (fp == NULL) { 80 if (sp == dictend) 81 return (NULL); 82 83 p = buf + (int) *sp++; 84 85 /* 86 * The dictionary ends with a null byte 87 */ 88 while (*sp >= 'a') 89 if ((*p++ = *sp++) == 'q') 90 *p++ = 'u'; 91 } else { 92 if (first) { 93 if ((pcount = getc(fp)) == EOF) 94 return (NULL); 95 first = 0; 96 } else if ((pcount = lastch) == EOF) 97 return (NULL); 98 99 p = buf + pcount; 100 101 while ((ch = getc(fp)) != EOF && ch >= 'a') 102 if ((*p++ = ch) == 'q') 103 *p++ = 'u'; 104 lastch = ch; 105 } 106 wordlen = (int) (p - buf); 107 *p = '\0'; 108 return (buf); 109 } 110 111 /* 112 * Reset the state of nextword() and do the fseek() 113 */ 114 long 115 dictseek(fp, offset, ptrname) 116 FILE *fp; 117 long offset; 118 int ptrname; 119 { 120 if (fp == NULL) { 121 if ((sp = dictspace + offset) >= dictend) 122 return (-1); 123 return (0); 124 } 125 126 first = 1; 127 return (fseek(fp, offset, ptrname)); 128 } 129 130 FILE * 131 opendict(dict) 132 const char *dict; 133 { 134 FILE *fp; 135 136 if ((fp = fopen(dict, "r")) == NULL) 137 return (NULL); 138 return (fp); 139 } 140 141 /* 142 * Load the given dictionary and initialize the pointers 143 */ 144 int 145 loaddict(fp) 146 FILE *fp; 147 { 148 struct stat statb; 149 long n; 150 int st; 151 char *p; 152 153 if (fstat(fileno(fp), &statb) < 0) { 154 (void)fclose(fp); 155 return (-1); 156 } 157 158 /* 159 * An extra character (a sentinel) is allocated and set to null 160 * to improve the expansion loop in nextword(). 161 */ 162 if ((dictspace = malloc(statb.st_size + 1)) == NULL) { 163 (void)fclose(fp); 164 return (-1); 165 } 166 n = (long)statb.st_size; 167 sp = dictspace; 168 dictend = dictspace + n; 169 170 p = dictspace; 171 st = -1; 172 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) { 173 p += st; 174 n -= st; 175 } 176 if (st < 0) { 177 (void)fclose(fp); 178 warnx("Error reading dictionary"); 179 return (-1); 180 } 181 *p = '\0'; 182 return (0); 183 } 184 185 /* 186 * Dependent on the exact format of the index file: 187 * Starting offset field begins in column 1 and length field in column 9 188 * Taking the easy way out, the input buffer is made "large" and a check 189 * is made for lines that are too long 190 */ 191 int 192 loadindex(indexfile) 193 const char *indexfile; 194 { 195 int i, j; 196 char buf[BUFSIZ]; 197 FILE *fp; 198 199 if ((fp = fopen(indexfile, "r")) == NULL) { 200 warn("Can't open '%s'", indexfile); 201 return (-1); 202 } 203 i = 0; 204 while (fgets(buf, sizeof(buf), fp) != NULL) { 205 if (strchr(buf, '\n') == NULL) { 206 warnx("A line in the index file is too long"); 207 return(-1); 208 } 209 j = *buf - 'a'; 210 if (i != j) { 211 warnx("Bad index order"); 212 return(-1); 213 } 214 dictindex[j].start = atol(buf + 1); 215 dictindex[j].length = atol(buf + 9) - dictindex[j].start; 216 i++; 217 } 218 if (i != 26) { 219 warnx("Bad index length"); 220 return(-1); 221 } 222 (void) fclose(fp); 223 return(0); 224 } 225