1 /*
2  * Copyright (c) 2000-2004
3  * Kevin Atkinson
4  * Jose Da Silva
5  *
6  * Word-list-compress Version 0.2.1.
7  *
8  * Permission to use, copy, modify, distribute and sell this software
9  * and its documentation for any purpose is hereby granted without
10  * fee, provided that the above copyright notice appear in all copies
11  * and that both that copyright notice and this permission notice
12  * appear in supporting documentation.  Kevin Atkinson makes no
13  * representations about the suitability of this software for any
14  * purpose.  It is provided "as is" without express or implied
15  * warranty.
16  *
17  * Bug fixes and enhancements by Jose Da Silva, 2004.
18  *
19  */
20 
21 #include <stdio.h>
22 
23 #if defined(__CYGWIN__) || defined (_WIN32)
24 
25 #  include <io.h>
26 #  include <fcntl.h>
27 
28 #  define SETBIN(fno)  _setmode( _fileno( fno ), _O_BINARY )
29 
30 #else
31 
32 #  define SETBIN(fno)
33 
34 #endif
35 
36 #define BUFSIZE 256	/* BUFSIZE must be 256 */
37 
usage()38 static void usage ()
39 {
40   fputs("Compresses or uncompresses sorted word lists.  Version 0.2.1\n",       stderr);
41   fputs("For best result the locale should be set to C before sorting by\n",    stderr);
42   fputs("  setting the environmental variable LANG to \"C\" before sorting.\n", stderr);
43   fputs("Copyright 2000-2004 by Kevin Atkinson.\n",                             stderr);
44   fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n",       stderr);
45 }
46 
47 /* PRECOND: bufsize >= 2 */
get_word(FILE * in,char * w)48 static int get_word(FILE * in, char * w)
49 {
50   int bufsize = BUFSIZE - 1;
51   register int c;
52 
53   while (c = getc(in), c <= 32 && c != EOF);
54   if (c != EOF) {
55     do {
56       *w++ = (char)(c);
57     } while (c = getc(in), c > 32 && c != EOF && --bufsize);
58   }
59   *w = '\0';
60   ungetc(c, in);
61   if (c == EOF) return 0; /* done */
62   if (bufsize)  return 1; /* normal return */
63   return 2;		  /* error, word larger than 255 chars */
64 }
65 
main(int argc,const char * argv[])66 int main (int argc, const char *argv[]) {
67 
68   if (argc == 2) {
69     char c = argv[1][0];
70     if (c == '-') c = argv[1][1];
71 
72     if (c == 'c') {
73 
74       char s1[BUFSIZE];
75       char s2[BUFSIZE];
76       char * prev = s2;
77       char * cur = s1;
78       int errFlag;
79       *prev = '\0';
80 
81       SETBIN (stdout);
82 
83       while ((errFlag = get_word(stdin, cur)) == 1) {
84         int i = 0;
85         /* get the length of the prefix */
86 	while (prev[i] != '\0' && prev[i] == cur[i])
87           ++i;
88         if (i > 31) {
89 	  if (putc('\0', stdout) < 0) goto error_out_c;
90         }
91 	if (putc(i+1, stdout) < 0) goto error_out_c;
92 	if (fputs(cur+i, stdout) < 0) goto error_out_c;
93 
94 	/* swap prev and next */
95 	{
96 	  char * tmp = cur;
97 	  cur = prev;
98 	  prev = tmp;
99         }
100       }
101       if (fflush(stdout) < 0) goto error_out_c;
102       if (errFlag) goto error_in_c;
103       return 0;
104     }
105 
106     if (c == 'd') {
107 
108       char cur[BUFSIZE+1];
109       int i;
110       int c;
111       int last_max = 0;
112 
113       SETBIN (stdin);
114 
115       i = getc(stdin);
116       if (i != 1) goto error_in_d;
117       while (i != -1) {
118         if (i == 0)
119           i = getc(stdin);
120         --i;
121         if (i < 0 || i > last_max) goto error_in_d;
122         while ((c = getc(stdin)) > 32 && i < BUFSIZE)
123           cur[i++] = (char)c;
124 	if (i >= BUFSIZE) goto error_in_d;
125         last_max = i;
126 	cur[i] = '\n'; cur[++i] = '\0';
127 	if (fputs(cur, stdout) < 0) goto error_out_d;
128         i = c;
129       }
130       return 0;
131 
132     error_in_c:
133     error_in_d:
134       fputs("ERROR: Corrupt Input.\n", stderr);
135       return 2;
136 
137     error_out_c:
138     error_out_d:
139       /* output space full or other output fault */
140       fputs("ERROR: Output Data Error.\n", stderr);
141       return 3;
142     }
143   }
144 
145   usage();
146   return 1;
147 }
148