1 /* gretlzip.c -- zipping and unzipping for gretl databases using
2    zlib routines.
3 
4    Allin Cottrell (cottrell@wfu.edu) November, 2000 (revised, October 2002)
5 
6    Further revised February 2003 to allow for inclusion of a database
7    codebook. Than again in January 2018 to allow for the codebook to
8    be a PDF file.
9 */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <dirent.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <time.h>
20 #include <errno.h>
21 
22 #include <zlib.h>
23 
24 #define MAXLEN 255
25 #define BUFSIZE 8192
26 #define PATHSEP '/'
27 #define INFOLEN 100
28 
print_time_long(char * buf,const time_t * timep)29 static void print_time_long (char *buf, const time_t *timep)
30 {
31     char *timebuf = ctime(timep);
32 
33     timebuf[strlen(timebuf)-1] = ' ';
34     strcat(buf, timebuf);
35 }
36 
print_time_short(char * buf,const time_t * timep)37 static void print_time_short (char *buf, const time_t *timep)
38 {
39     struct tm *ztime;
40     char timebuf[32];
41 
42     ztime = localtime(timep);
43     sprintf(timebuf, "%4d%02d%02d",
44 	    ztime->tm_year + 1900,
45 	    ztime->tm_mon + 1,
46 	    ztime->tm_mday);
47     strcat(buf, timebuf);
48 }
49 
clear(char * buf,size_t len)50 static void clear (char *buf, size_t len)
51 {
52     memset(buf, 0, len);
53 }
54 
strip_path(char * fname)55 static char *strip_path (char *fname)
56 {
57     char *p = strrchr(fname, PATHSEP);
58 
59     if (p != NULL && *(p + 1)) return p + 1;
60     else return fname;
61 }
62 
switch_ext(char * fname,char * ext)63 static char *switch_ext (char *fname, char *ext)
64 {
65     char *p = strrchr(fname, '.');
66 
67     if (p != NULL) {
68 	strcat(p, ext);
69     } else {
70 	strcat(fname, ext);
71     }
72     return fname;
73 }
74 
parse_db_header(const char * buf,size_t * idxlen,size_t * datalen,size_t * cblen,int * pdfdoc)75 static int parse_db_header (const char *buf, size_t *idxlen,
76 			    size_t *datalen, size_t *cblen,
77 			    int *pdfdoc)
78 {
79     char *p;
80     int err = 0;
81 
82     *cblen = 0;
83 
84     /* length of index file (required) */
85     if (sscanf(buf, "%lu", idxlen) != 1) {
86 	err = 1;
87     }
88 
89     if (!err) {
90 	/* length of data (required) */
91 	p = strchr(buf, '\n');
92 	if (p == NULL) {
93 	    err = 1;
94 	} else {
95 	    p++;
96 	    if (sscanf(p, "%lu", datalen) != 1) {
97 		err = 1;
98 	    }
99 	}
100     }
101 
102     if (!err) {
103 	/* codebook info (optional) */
104 	p = strchr(p, '\n');
105 	if (p != NULL) {
106 	    p++;
107 	    if (sscanf(p, "%lu", cblen) != 1) {
108 		*cblen = 0;
109 	    } else if (strstr(p, ".pdf")) {
110 		*pdfdoc = 1;
111 	    }
112 	}
113     }
114 
115     return err;
116 }
117 
118 /* Note: below -- the "info" string for the archive must be
119    exactly 100 bytes (or else the ggz reader must be changed).
120 */
121 
ggz_create(char * infobuf,char * fname,char * gzname)122 static int ggz_create (char *infobuf, char *fname, char *gzname)
123 {
124     int gotcb = 0;
125     int i, len, chk;
126     struct stat fbuf;
127     FILE *fidx, *fbin, *fcb;
128     char tmp[40];
129     char idxname[MAXLEN], binname[MAXLEN], cbname[MAXLEN];
130     char *readname = NULL;
131     char gzbuf[BUFSIZE];
132     gzFile fgz;
133 
134     sprintf(idxname, "%s.idx", fname);
135     sprintf(binname, "%s.bin", fname);
136     sprintf(cbname, "%s.cb", fname);
137     strcat(gzname, ".gz");
138 
139     fidx = fopen(idxname, "rb");
140     if (fidx == NULL) {
141 	sprintf(infobuf, "Couldn't open %s for reading\n", idxname);
142 	return 1;
143     }
144 
145     fbin = fopen(binname, "rb");
146     if (fbin == NULL) {
147 	sprintf(infobuf, "Couldn't open %s for reading\n", binname);
148 	fclose(fidx);
149 	return 1;
150     }
151 
152     fcb = fopen(cbname, "rb");
153     if (fcb != NULL) {
154 	/* plain text codebook */
155 	printf("Found codebook file %s\n", cbname);
156 	gotcb = 1;
157     } else {
158 	/* try for PDF? */
159 	sprintf(cbname, "%s.pdf", fname);
160 	fcb = fopen(cbname, "rb");
161 	if (fcb != NULL) {
162 	    printf("Found codebook file %s\n", cbname);
163 	    gotcb = 1;
164 	}
165     }
166 
167     fgz = gzopen(gzname, "wb");
168     if (fgz == NULL) {
169 	sprintf(infobuf, "Couldn't open %s for writing\n", gzname);
170 	fclose(fidx);
171 	fclose(fbin);
172 	if (fcb != NULL) {
173 	    fclose(fcb);
174 	}
175 	return 1;
176     }
177 
178     for (i=0; i<((gotcb)? 3 : 2); i++) {
179 	if (i == 0) readname = idxname;
180 	else if (i == 1) readname = binname;
181 	else if (i == 2) readname = cbname;
182 
183 	if (stat(readname, &fbuf)) {
184 	    sprintf(infobuf, "Error stat'ing %s\n", readname);
185 	    return 1;
186 	}
187 
188 	sprintf(tmp, "%8lu ", fbuf.st_size);
189 	strcat(infobuf, tmp);
190 	if (gotcb) {
191 	    print_time_short(infobuf, &(fbuf.st_mtime));
192 	} else {
193 	    print_time_long(infobuf, &(fbuf.st_mtime));
194 	}
195 	sprintf(tmp, "%15s", strip_path(readname));
196 	strcat(infobuf, tmp);
197 	strcat(infobuf, "\n");
198     }
199 
200     printf("infobuf: strlen = %d\n", (int) strlen(infobuf));
201     gzwrite(fgz, infobuf, INFOLEN);
202 
203     /* write compressed content of idx and bin files */
204     while ((len = fread(gzbuf, 1, BUFSIZE, fidx)) > 0) {
205 	chk = gzwrite(fgz, gzbuf, len);
206 	if (chk != len)
207 	    fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
208     }
209 
210     while ((len = fread(gzbuf, 1, BUFSIZE, fbin)) > 0) {
211 	chk = gzwrite(fgz, gzbuf, len);
212 	if (chk != len)
213 	    fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
214     }
215 
216     if (gotcb) {
217 	while ((len = fread(gzbuf, 1, BUFSIZE, fcb)) > 0) {
218 	    chk = gzwrite(fgz, gzbuf, len);
219 	    if (chk != len)
220 	        fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
221 	}
222     }
223 
224     fclose(fidx);
225     fclose(fbin);
226     if (gotcb) {
227 	fclose(fcb);
228     }
229 
230     gzclose(fgz);
231 
232     return 0;
233 }
234 
ggz_extract(char * infobuf,char * fname,char * outname)235 static int ggz_extract (char *infobuf, char *fname, char *outname)
236 {
237     int fidx, fbin, fcb = -1;
238     size_t idxlen, datalen, cblen, bytesleft;
239     int bgot, pdfdoc = 0;
240     char idxname[MAXLEN], binname[MAXLEN], cbname[MAXLEN];
241     char gzbuf[BUFSIZE];
242     gzFile fgz;
243     unsigned i;
244     int err = 0;
245 
246     strcat(fname, ".gz");
247     sprintf(idxname, "%s.idx", outname);
248     sprintf(binname, "%s.bin", outname);
249     cbname[0] = '\0';
250 
251     fgz = gzopen(fname, "rb");
252     if (fgz == NULL) {
253 	sprintf(infobuf, "Couldn't gzopen %s for reading\n", fname);
254 	return 1;
255     }
256 
257     fidx = creat(idxname, 00644);
258     if (fidx == -1) {
259 	gzclose(fgz);
260 	sprintf(infobuf, "Couldn't open %s for writing\n", idxname);
261 	return 1;
262     }
263 
264     fbin = creat(binname, 00644);
265     if (fbin == -1) {
266 	gzclose(fgz);
267 	close(fidx);
268 	sprintf(infobuf, "Couldn't open '%s' for writing\n"
269 		"Error: %s\n", binname, strerror(errno));
270 	return 1;
271     }
272 
273     clear(gzbuf, BUFSIZE);
274     gzread(fgz, gzbuf, INFOLEN);
275     strcpy(infobuf, gzbuf);
276 
277     if (parse_db_header(infobuf, &idxlen, &datalen, &cblen, &pdfdoc)) {
278 	fputs("Error reading info buffer: failed to get byte counts\n",
279 	      stderr);
280 	err = 1;
281 	goto bailout;
282     } else if (cblen > 0) {
283 	if (pdfdoc) {
284 	    fputs("Detected PDF codebook\n", stderr);
285 	    sprintf(cbname, "%s.pdf", outname);
286 	} else {
287 	    fputs("Detected plain text codebook\n", stderr);
288 	    sprintf(cbname, "%s.cb", outname);
289 	}
290 	fcb = creat(cbname, 00644);
291 	if (fcb == -1) {
292 	    sprintf(infobuf, "Couldn't open '%s' for writing\n"
293 		    "Error: %s\n", cbname, strerror(errno));
294 	    err = 1;
295 	    goto bailout;
296 	}
297     }
298 
299     for (i=0; i<1+idxlen/BUFSIZE; i++) {
300 	bytesleft = idxlen - BUFSIZE * i;
301 	if (bytesleft <= 0) break;
302 	bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
303 	write(fidx, gzbuf, bgot);
304     }
305 
306     for (i=0; i<1+datalen/BUFSIZE; i++) {
307 	bytesleft = datalen - BUFSIZE * i;
308 	if (bytesleft <= 0) break;
309 	bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
310 	write(fbin, gzbuf, bgot);
311     }
312 
313     if (cblen > 0) {
314 	for (i=0; i<1+cblen/BUFSIZE; i++) {
315 	    bytesleft = cblen - BUFSIZE * i;
316 	    if (bytesleft <= 0) break;
317 	    bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
318 	    write(fcb, gzbuf, bgot);
319 	}
320     }
321 
322  bailout:
323 
324     gzclose(fgz);
325     close(fidx);
326     close(fbin);
327     if (fcb != -1) {
328 	close(fcb);
329     }
330 
331     return err;
332 }
333 
usage(char * progname)334 static void usage (char *progname)
335 {
336     fprintf(stderr, "Please supply a flag (-c for create, -x for extract), "
337 	    "followed by\nthe basename of a file or files to operate on.\n\n"
338 	    " %s -c foo creates foo.gz from foo.idx and foo.bin\n"
339 	    " %s -x foo extracts foo.idx and foo.bin from foo.gz\n\n",
340 	    progname, progname);
341     fputs("Option: if a second basename is supplied, it is used "
342 	  "for the output file(s).\n", stderr);
343     fputs("If a codebook file (.cb) is found on archive creation, it is rolled\n"
344 	  "into the archive.\n",
345 	  stderr);
346     exit(EXIT_FAILURE);
347 }
348 
main(int argc,char * argv[])349 int main (int argc, char *argv[])
350 {
351     int err, create = 0;
352     char fname[MAXLEN], outname[MAXLEN], infobuf[INFOLEN];
353     char *callname;
354     int unzip = 0, filearg = 2;
355 
356     callname = strrchr(argv[0], '/');
357     if (callname != NULL && strlen(callname) > 1) {
358 	callname += 1;
359     } else {
360 	callname = argv[0];
361     }
362 
363     if (!strcmp(callname, "gretlunzip")) {
364         filearg--;
365         unzip = 1;
366     }
367 
368     if ((unzip && argc != 2) || (!unzip && argc < 3))
369 	usage(argv[0]);
370 
371     if (!strcmp(argv[1], "-c")) {
372 	create = 1;
373     } else if (!unzip && strcmp(argv[1], "-x")) {
374 	usage(argv[0]);
375     }
376 
377     strncpy(fname, argv[filearg], MAXLEN-1);
378     fname[MAXLEN-1] = 0;
379 
380     *outname = 0;
381     if (argc == 4) {
382 	strncat(outname, argv[filearg + 1], MAXLEN-1);
383     } else {
384 	strcpy(outname, fname);
385     }
386 
387     switch_ext(fname, "");
388     switch_ext(outname, "");
389     fprintf(stderr, "Taking input from %s%s\nWriting output to %s%s\n",
390 	    fname, (create)? " (.idx, .bin)": ".gz",
391 	    outname, (create)? ".gz" : " (.idx, .bin)");
392 
393     clear(infobuf, INFOLEN);
394 
395     if (create) {
396 	err = ggz_create(infobuf, fname, outname);
397     } else {
398 	err = ggz_extract(infobuf, fname, outname);
399     }
400 
401     if (err) {
402 	fprintf(stderr, "%s", infobuf);
403     } else if (create) {
404 	printf("Found and compressed:\n%s", infobuf);
405     } else {
406 	printf("Found and decompressed:\n%s", infobuf);
407     }
408 
409     return 0;
410 }
411 
412 
413 
414