1 /*
2 * Copyright (c) 2000-2004
3 * Kevin Atkinson
4 * Jose Da Silva
5 *
6 * Word-list-compress Version 0.2.1.
7 *
8 * Permission to use, copy, modify, distribute and sell this software
9 * and its documentation for any purpose is hereby granted without
10 * fee, provided that the above copyright notice appear in all copies
11 * and that both that copyright notice and this permission notice
12 * appear in supporting documentation. Kevin Atkinson makes no
13 * representations about the suitability of this software for any
14 * purpose. It is provided "as is" without express or implied
15 * warranty.
16 *
17 * Bug fixes and enhancements by Jose Da Silva, 2004.
18 *
19 */
20
21 #include <stdio.h>
22
23 #if defined(__CYGWIN__) || defined (_WIN32)
24
25 # include <io.h>
26 # include <fcntl.h>
27
28 # define SETBIN(fno) _setmode( _fileno( fno ), _O_BINARY )
29
30 #else
31
32 # define SETBIN(fno)
33
34 #endif
35
36 #define BUFSIZE 256 /* BUFSIZE must be 256 */
37
usage()38 static void usage ()
39 {
40 fputs("Compresses or uncompresses sorted word lists. Version 0.2.1\n", stderr);
41 fputs("For best result the locale should be set to C before sorting by\n", stderr);
42 fputs(" setting the environmental variable LANG to \"C\" before sorting.\n", stderr);
43 fputs("Copyright 2000-2004 by Kevin Atkinson.\n", stderr);
44 fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n", stderr);
45 }
46
47 /* PRECOND: bufsize >= 2 */
get_word(FILE * in,char * w)48 static int get_word(FILE * in, char * w)
49 {
50 int bufsize = BUFSIZE - 1;
51 register int c;
52
53 while (c = getc(in), c <= 32 && c != EOF);
54 if (c != EOF) {
55 do {
56 *w++ = (char)(c);
57 } while (c = getc(in), c > 32 && c != EOF && --bufsize);
58 }
59 *w = '\0';
60 ungetc(c, in);
61 if (c == EOF) return 0; /* done */
62 if (bufsize) return 1; /* normal return */
63 return 2; /* error, word larger than 255 chars */
64 }
65
main(int argc,const char * argv[])66 int main (int argc, const char *argv[]) {
67
68 if (argc == 2) {
69 char c = argv[1][0];
70 if (c == '-') c = argv[1][1];
71
72 if (c == 'c') {
73
74 char s1[BUFSIZE];
75 char s2[BUFSIZE];
76 char * prev = s2;
77 char * cur = s1;
78 int errFlag;
79 *prev = '\0';
80
81 SETBIN (stdout);
82
83 while ((errFlag = get_word(stdin, cur)) == 1) {
84 int i = 0;
85 /* get the length of the prefix */
86 while (prev[i] != '\0' && prev[i] == cur[i])
87 ++i;
88 if (i > 31) {
89 if (putc('\0', stdout) < 0) goto error_out_c;
90 }
91 if (putc(i+1, stdout) < 0) goto error_out_c;
92 if (fputs(cur+i, stdout) < 0) goto error_out_c;
93
94 /* swap prev and next */
95 {
96 char * tmp = cur;
97 cur = prev;
98 prev = tmp;
99 }
100 }
101 if (fflush(stdout) < 0) goto error_out_c;
102 if (errFlag) goto error_in_c;
103 return 0;
104 }
105
106 if (c == 'd') {
107
108 char cur[BUFSIZE+1];
109 int i;
110 int c;
111 int last_max = 0;
112
113 SETBIN (stdin);
114
115 i = getc(stdin);
116 if (i != 1) goto error_in_d;
117 while (i != -1) {
118 if (i == 0)
119 i = getc(stdin);
120 --i;
121 if (i < 0 || i > last_max) goto error_in_d;
122 while ((c = getc(stdin)) > 32 && i < BUFSIZE)
123 cur[i++] = (char)c;
124 if (i >= BUFSIZE) goto error_in_d;
125 last_max = i;
126 cur[i] = '\n'; cur[++i] = '\0';
127 if (fputs(cur, stdout) < 0) goto error_out_d;
128 i = c;
129 }
130 return 0;
131
132 error_in_c:
133 error_in_d:
134 fputs("ERROR: Corrupt Input.\n", stderr);
135 return 2;
136
137 error_out_c:
138 error_out_d:
139 /* output space full or other output fault */
140 fputs("ERROR: Output Data Error.\n", stderr);
141 return 3;
142 }
143 }
144
145 usage();
146 return 1;
147 }
148