1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Ken Arnold. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. 33 * @(#)strfile.c 8.1 (Berkeley) 5/31/93 34 * $FreeBSD: head/usr.bin/fortune/strfile/strfile.c 316500 2017-04-04 19:46:23Z asomers $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/endian.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <stdbool.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <time.h> 46 #include <unistd.h> 47 48 #include "strfile.h" 49 50 /* 51 * This program takes a file composed of strings separated by 52 * lines starting with two consecutive delimiting character (default 53 * character is '%') and creates another file which consists of a table 54 * describing the file (structure from "strfile.h"), a table of seek 55 * pointers to the start of the strings, and the strings, each terminated 56 * by a null byte. Usage: 57 * 58 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 59 * 60 * C - Allow comments marked by a double delimiter at line's beginning 61 * c - Change delimiting character from '%' to 'C' 62 * s - Silent. Give no summary of data processed at the end of 63 * the run. 64 * o - order the strings in alphabetic order 65 * i - if ordering, ignore case 66 * r - randomize the order of the strings 67 * x - set rotated bit 68 * 69 * Ken Arnold Sept. 7, 1978 -- 70 * 71 * Added ordering options. 72 */ 73 74 #define STORING_PTRS (Oflag || Rflag) 75 #define CHUNKSIZE 512 76 77 #define ALLOC(ptr, sz) do { \ 78 if (ptr == NULL) \ 79 ptr = malloc(CHUNKSIZE * sizeof(*ptr)); \ 80 else if (((sz) + 1) % CHUNKSIZE == 0) \ 81 ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof(*ptr)); \ 82 if (ptr == NULL) { \ 83 fprintf(stderr, "out of space\n"); \ 84 exit(1); \ 85 } \ 86 } while (0) 87 88 typedef struct { 89 int first; 90 off_t pos; 91 } STR; 92 93 static char *Infile = NULL; /* input file name */ 94 static char Outfile[MAXPATHLEN] = ""; /* output file name */ 95 static char Delimch = '%'; /* delimiting character */ 96 97 static int Cflag = false; /* embedded comments */ 98 static int Sflag = false; /* silent run flag */ 99 static int Oflag = false; /* ordering flag */ 100 static int Iflag = false; /* ignore case flag */ 101 static int Rflag = false; /* randomize order flag */ 102 static int Xflag = false; /* set rotated bit */ 103 static uint32_t Num_pts = 0; /* number of pointers/strings */ 104 105 static off_t *Seekpts; 106 107 static FILE *Sort_1, *Sort_2; /* pointers for sorting */ 108 109 static STRFILE Tbl; /* statistics table */ 110 111 static STR *Firstch; /* first chars of each string */ 112 113 static void add_offset(FILE *, off_t); 114 static int cmp_str(const void *, const void *); 115 static int collate_range_cmp(int, int); 116 static void do_order(void); 117 static void getargs(int, char **); 118 static void randomize(void); 119 static void usage(void); 120 121 /* 122 * main: 123 * Drive the sucker. There are two main modes -- either we store 124 * the seek pointers, if the table is to be sorted or randomized, 125 * or we write the pointer directly to the file, if we are to stay 126 * in file order. If the former, we allocate and re-allocate in 127 * CHUNKSIZE blocks; if the latter, we just write each pointer, 128 * and then seek back to the beginning to write in the table. 129 */ 130 int 131 main(int argc, char *argv[]) 132 { 133 char *sp, *nsp, dc; 134 FILE *inf, *outf; 135 off_t last_off, pos, *p; 136 size_t length; 137 int first; 138 uint32_t cnt; 139 STR *fp; 140 static char string[257]; 141 142 setlocale(LC_ALL, ""); 143 144 getargs(argc, argv); /* evalute arguments */ 145 dc = Delimch; 146 if ((inf = fopen(Infile, "r")) == NULL) { 147 perror(Infile); 148 exit(1); 149 } 150 151 if ((outf = fopen(Outfile, "w")) == NULL) { 152 perror(Outfile); 153 exit(1); 154 } 155 if (!STORING_PTRS) 156 fseek(outf, (long)sizeof(Tbl), SEEK_SET); 157 158 /* 159 * Write the strings onto the file 160 */ 161 162 Tbl.str_longlen = 0; 163 Tbl.str_shortlen = 0xffffffff; 164 Tbl.str_delim = dc; 165 Tbl.str_version = VERSION; 166 first = Oflag; 167 add_offset(outf, ftello(inf)); 168 last_off = 0; 169 do { 170 sp = fgets(string, 256, inf); 171 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 172 pos = ftello(inf); 173 length = (size_t)(pos - last_off) - 174 (sp != NULL ? strlen(sp) : 0); 175 last_off = pos; 176 if (length == 0) 177 continue; 178 add_offset(outf, pos); 179 if ((size_t)Tbl.str_longlen < length) 180 Tbl.str_longlen = length; 181 if ((size_t)Tbl.str_shortlen > length) 182 Tbl.str_shortlen = length; 183 first = Oflag; 184 } 185 else if (first) { 186 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 187 continue; 188 ALLOC(Firstch, Num_pts); 189 fp = &Firstch[Num_pts - 1]; 190 if (Iflag && isupper((unsigned char)*nsp)) 191 fp->first = tolower((unsigned char)*nsp); 192 else 193 fp->first = *nsp; 194 fp->pos = Seekpts[Num_pts - 1]; 195 first = false; 196 } 197 } while (sp != NULL); 198 199 /* 200 * write the tables in 201 */ 202 203 fclose(inf); 204 Tbl.str_numstr = Num_pts - 1; 205 206 if (Cflag) 207 Tbl.str_flags |= STR_COMMENTS; 208 209 if (Oflag) 210 do_order(); 211 else if (Rflag) 212 randomize(); 213 214 if (Xflag) 215 Tbl.str_flags |= STR_ROTATED; 216 217 if (!Sflag) { 218 printf("\"%s\" created\n", Outfile); 219 if (Num_pts == 2) 220 puts("There was 1 string"); 221 else 222 printf("There were %u strings\n", Num_pts - 1); 223 printf("Longest string: %u byte%s\n", Tbl.str_longlen, 224 Tbl.str_longlen == 1 ? "" : "s"); 225 printf("Shortest string: %u byte%s\n", Tbl.str_shortlen, 226 Tbl.str_shortlen == 1 ? "" : "s"); 227 } 228 229 rewind(outf); 230 Tbl.str_version = htobe32(Tbl.str_version); 231 Tbl.str_numstr = htobe32(Tbl.str_numstr); 232 Tbl.str_longlen = htobe32(Tbl.str_longlen); 233 Tbl.str_shortlen = htobe32(Tbl.str_shortlen); 234 Tbl.str_flags = htobe32(Tbl.str_flags); 235 fwrite((char *)&Tbl, sizeof(Tbl), 1, outf); 236 if (STORING_PTRS) { 237 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 238 *p = htobe64(*p); 239 fwrite(Seekpts, sizeof(*Seekpts), (size_t)Num_pts, outf); 240 } 241 fclose(outf); 242 exit(0); 243 } 244 245 /* 246 * This routine evaluates arguments from the command line 247 */ 248 void 249 getargs(int argc, char **argv) 250 { 251 int ch; 252 253 while ((ch = getopt(argc, argv, "Cc:iorsx")) != -1) 254 switch(ch) { 255 case 'C': /* embedded comments */ 256 Cflag++; 257 break; 258 case 'c': /* new delimiting char */ 259 Delimch = *optarg; 260 if (!isascii(Delimch)) { 261 printf("bad delimiting character: '\\%o\n'", 262 (unsigned char)Delimch); 263 } 264 break; 265 case 'i': /* ignore case in ordering */ 266 Iflag++; 267 break; 268 case 'o': /* order strings */ 269 Oflag++; 270 break; 271 case 'r': /* randomize pointers */ 272 Rflag++; 273 break; 274 case 's': /* silent */ 275 Sflag++; 276 break; 277 case 'x': /* set the rotated bit */ 278 Xflag++; 279 break; 280 case '?': 281 default: 282 usage(); 283 } 284 argv += optind; 285 286 if (*argv) { 287 Infile = *argv; 288 if (*++argv) 289 strcpy(Outfile, *argv); 290 } 291 if (!Infile) { 292 puts("No input file name"); 293 usage(); 294 } 295 if (*Outfile == '\0') { 296 strcpy(Outfile, Infile); 297 strcat(Outfile, ".dat"); 298 } 299 } 300 301 void 302 usage(void) 303 { 304 fprintf(stderr, 305 "strfile [-Ciorsx] [-c char] source_file [output_file]\n"); 306 exit(1); 307 } 308 309 /* 310 * add_offset: 311 * Add an offset to the list, or write it out, as appropriate. 312 */ 313 void 314 add_offset(FILE *fp, off_t off) 315 { 316 off_t beoff; 317 318 if (!STORING_PTRS) { 319 beoff = htobe64(off); 320 fwrite(&beoff, 1, sizeof(beoff), fp); 321 } else { 322 ALLOC(Seekpts, Num_pts + 1); 323 Seekpts[Num_pts] = off; 324 } 325 Num_pts++; 326 } 327 328 /* 329 * do_order: 330 * Order the strings alphabetically (possibly ignoring case). 331 */ 332 void 333 do_order(void) 334 { 335 uint32_t i; 336 off_t *lp; 337 STR *fp; 338 339 Sort_1 = fopen(Infile, "r"); 340 Sort_2 = fopen(Infile, "r"); 341 qsort(Firstch, (size_t)Tbl.str_numstr, sizeof(*Firstch), cmp_str); 342 i = Tbl.str_numstr; 343 lp = Seekpts; 344 fp = Firstch; 345 while (i--) 346 *lp++ = fp++->pos; 347 fclose(Sort_1); 348 fclose(Sort_2); 349 Tbl.str_flags |= STR_ORDERED; 350 } 351 352 static int 353 collate_range_cmp(int c1, int c2) 354 { 355 static char s1[2], s2[2]; 356 int ret; 357 358 s1[0] = c1; 359 s2[0] = c2; 360 if ((ret = strcoll(s1, s2)) != 0) 361 return (ret); 362 return (c1 - c2); 363 } 364 365 /* 366 * cmp_str: 367 * Compare two strings in the file 368 */ 369 int 370 cmp_str(const void *s1, const void *s2) 371 { 372 const STR *p1, *p2; 373 int c1, c2, n1, n2, r; 374 375 #define SET_N(nf,ch) (nf = (ch == '\n')) 376 #define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char)Delimch && nf)) 377 378 p1 = (const STR *)s1; 379 p2 = (const STR *)s2; 380 381 c1 = (unsigned char)p1->first; 382 c2 = (unsigned char)p2->first; 383 if ((r = collate_range_cmp(c1, c2)) != 0) 384 return r; 385 386 fseeko(Sort_1, p1->pos, SEEK_SET); 387 fseeko(Sort_2, p2->pos, SEEK_SET); 388 389 n1 = false; 390 n2 = false; 391 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) 392 SET_N(n1, c1); 393 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) 394 SET_N(n2, c2); 395 396 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 397 if (Iflag) { 398 if (isupper(c1)) 399 c1 = tolower(c1); 400 if (isupper(c2)) 401 c2 = tolower(c2); 402 } 403 if ((r = collate_range_cmp(c1, c2)) != 0) 404 return r; 405 SET_N(n1, c1); 406 SET_N(n2, c2); 407 c1 = getc(Sort_1); 408 c2 = getc(Sort_2); 409 } 410 if (IS_END(c1, n1)) 411 c1 = 0; 412 if (IS_END(c2, n2)) 413 c2 = 0; 414 415 return (collate_range_cmp(c1, c2)); 416 } 417 418 /* 419 * randomize: 420 * Randomize the order of the string table. We must be careful 421 * not to randomize across delimiter boundaries. All 422 * randomization is done within each block. 423 */ 424 void 425 randomize(void) 426 { 427 uint32_t cnt, i; 428 off_t tmp; 429 off_t *sp; 430 431 Tbl.str_flags |= STR_RANDOM; 432 cnt = Tbl.str_numstr; 433 434 /* 435 * move things around randomly 436 */ 437 438 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 439 i = arc4random_uniform(cnt); 440 tmp = sp[0]; 441 sp[0] = sp[i]; 442 sp[i] = tmp; 443 } 444 } 445