1 /* gretlzip.c -- zipping and unzipping for gretl databases using
2 zlib routines.
3
4 Allin Cottrell (cottrell@wfu.edu) November, 2000 (revised, October 2002)
5
6 Further revised February 2003 to allow for inclusion of a database
7 codebook. Than again in January 2018 to allow for the codebook to
8 be a PDF file.
9 */
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <dirent.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <time.h>
20 #include <errno.h>
21
22 #include <zlib.h>
23
24 #define MAXLEN 255
25 #define BUFSIZE 8192
26 #define PATHSEP '/'
27 #define INFOLEN 100
28
print_time_long(char * buf,const time_t * timep)29 static void print_time_long (char *buf, const time_t *timep)
30 {
31 char *timebuf = ctime(timep);
32
33 timebuf[strlen(timebuf)-1] = ' ';
34 strcat(buf, timebuf);
35 }
36
print_time_short(char * buf,const time_t * timep)37 static void print_time_short (char *buf, const time_t *timep)
38 {
39 struct tm *ztime;
40 char timebuf[32];
41
42 ztime = localtime(timep);
43 sprintf(timebuf, "%4d%02d%02d",
44 ztime->tm_year + 1900,
45 ztime->tm_mon + 1,
46 ztime->tm_mday);
47 strcat(buf, timebuf);
48 }
49
clear(char * buf,size_t len)50 static void clear (char *buf, size_t len)
51 {
52 memset(buf, 0, len);
53 }
54
strip_path(char * fname)55 static char *strip_path (char *fname)
56 {
57 char *p = strrchr(fname, PATHSEP);
58
59 if (p != NULL && *(p + 1)) return p + 1;
60 else return fname;
61 }
62
switch_ext(char * fname,char * ext)63 static char *switch_ext (char *fname, char *ext)
64 {
65 char *p = strrchr(fname, '.');
66
67 if (p != NULL) {
68 strcat(p, ext);
69 } else {
70 strcat(fname, ext);
71 }
72 return fname;
73 }
74
parse_db_header(const char * buf,size_t * idxlen,size_t * datalen,size_t * cblen,int * pdfdoc)75 static int parse_db_header (const char *buf, size_t *idxlen,
76 size_t *datalen, size_t *cblen,
77 int *pdfdoc)
78 {
79 char *p;
80 int err = 0;
81
82 *cblen = 0;
83
84 /* length of index file (required) */
85 if (sscanf(buf, "%lu", idxlen) != 1) {
86 err = 1;
87 }
88
89 if (!err) {
90 /* length of data (required) */
91 p = strchr(buf, '\n');
92 if (p == NULL) {
93 err = 1;
94 } else {
95 p++;
96 if (sscanf(p, "%lu", datalen) != 1) {
97 err = 1;
98 }
99 }
100 }
101
102 if (!err) {
103 /* codebook info (optional) */
104 p = strchr(p, '\n');
105 if (p != NULL) {
106 p++;
107 if (sscanf(p, "%lu", cblen) != 1) {
108 *cblen = 0;
109 } else if (strstr(p, ".pdf")) {
110 *pdfdoc = 1;
111 }
112 }
113 }
114
115 return err;
116 }
117
118 /* Note: below -- the "info" string for the archive must be
119 exactly 100 bytes (or else the ggz reader must be changed).
120 */
121
ggz_create(char * infobuf,char * fname,char * gzname)122 static int ggz_create (char *infobuf, char *fname, char *gzname)
123 {
124 int gotcb = 0;
125 int i, len, chk;
126 struct stat fbuf;
127 FILE *fidx, *fbin, *fcb;
128 char tmp[40];
129 char idxname[MAXLEN], binname[MAXLEN], cbname[MAXLEN];
130 char *readname = NULL;
131 char gzbuf[BUFSIZE];
132 gzFile fgz;
133
134 sprintf(idxname, "%s.idx", fname);
135 sprintf(binname, "%s.bin", fname);
136 sprintf(cbname, "%s.cb", fname);
137 strcat(gzname, ".gz");
138
139 fidx = fopen(idxname, "rb");
140 if (fidx == NULL) {
141 sprintf(infobuf, "Couldn't open %s for reading\n", idxname);
142 return 1;
143 }
144
145 fbin = fopen(binname, "rb");
146 if (fbin == NULL) {
147 sprintf(infobuf, "Couldn't open %s for reading\n", binname);
148 fclose(fidx);
149 return 1;
150 }
151
152 fcb = fopen(cbname, "rb");
153 if (fcb != NULL) {
154 /* plain text codebook */
155 printf("Found codebook file %s\n", cbname);
156 gotcb = 1;
157 } else {
158 /* try for PDF? */
159 sprintf(cbname, "%s.pdf", fname);
160 fcb = fopen(cbname, "rb");
161 if (fcb != NULL) {
162 printf("Found codebook file %s\n", cbname);
163 gotcb = 1;
164 }
165 }
166
167 fgz = gzopen(gzname, "wb");
168 if (fgz == NULL) {
169 sprintf(infobuf, "Couldn't open %s for writing\n", gzname);
170 fclose(fidx);
171 fclose(fbin);
172 if (fcb != NULL) {
173 fclose(fcb);
174 }
175 return 1;
176 }
177
178 for (i=0; i<((gotcb)? 3 : 2); i++) {
179 if (i == 0) readname = idxname;
180 else if (i == 1) readname = binname;
181 else if (i == 2) readname = cbname;
182
183 if (stat(readname, &fbuf)) {
184 sprintf(infobuf, "Error stat'ing %s\n", readname);
185 return 1;
186 }
187
188 sprintf(tmp, "%8lu ", fbuf.st_size);
189 strcat(infobuf, tmp);
190 if (gotcb) {
191 print_time_short(infobuf, &(fbuf.st_mtime));
192 } else {
193 print_time_long(infobuf, &(fbuf.st_mtime));
194 }
195 sprintf(tmp, "%15s", strip_path(readname));
196 strcat(infobuf, tmp);
197 strcat(infobuf, "\n");
198 }
199
200 printf("infobuf: strlen = %d\n", (int) strlen(infobuf));
201 gzwrite(fgz, infobuf, INFOLEN);
202
203 /* write compressed content of idx and bin files */
204 while ((len = fread(gzbuf, 1, BUFSIZE, fidx)) > 0) {
205 chk = gzwrite(fgz, gzbuf, len);
206 if (chk != len)
207 fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
208 }
209
210 while ((len = fread(gzbuf, 1, BUFSIZE, fbin)) > 0) {
211 chk = gzwrite(fgz, gzbuf, len);
212 if (chk != len)
213 fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
214 }
215
216 if (gotcb) {
217 while ((len = fread(gzbuf, 1, BUFSIZE, fcb)) > 0) {
218 chk = gzwrite(fgz, gzbuf, len);
219 if (chk != len)
220 fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk);
221 }
222 }
223
224 fclose(fidx);
225 fclose(fbin);
226 if (gotcb) {
227 fclose(fcb);
228 }
229
230 gzclose(fgz);
231
232 return 0;
233 }
234
ggz_extract(char * infobuf,char * fname,char * outname)235 static int ggz_extract (char *infobuf, char *fname, char *outname)
236 {
237 int fidx, fbin, fcb = -1;
238 size_t idxlen, datalen, cblen, bytesleft;
239 int bgot, pdfdoc = 0;
240 char idxname[MAXLEN], binname[MAXLEN], cbname[MAXLEN];
241 char gzbuf[BUFSIZE];
242 gzFile fgz;
243 unsigned i;
244 int err = 0;
245
246 strcat(fname, ".gz");
247 sprintf(idxname, "%s.idx", outname);
248 sprintf(binname, "%s.bin", outname);
249 cbname[0] = '\0';
250
251 fgz = gzopen(fname, "rb");
252 if (fgz == NULL) {
253 sprintf(infobuf, "Couldn't gzopen %s for reading\n", fname);
254 return 1;
255 }
256
257 fidx = creat(idxname, 00644);
258 if (fidx == -1) {
259 gzclose(fgz);
260 sprintf(infobuf, "Couldn't open %s for writing\n", idxname);
261 return 1;
262 }
263
264 fbin = creat(binname, 00644);
265 if (fbin == -1) {
266 gzclose(fgz);
267 close(fidx);
268 sprintf(infobuf, "Couldn't open '%s' for writing\n"
269 "Error: %s\n", binname, strerror(errno));
270 return 1;
271 }
272
273 clear(gzbuf, BUFSIZE);
274 gzread(fgz, gzbuf, INFOLEN);
275 strcpy(infobuf, gzbuf);
276
277 if (parse_db_header(infobuf, &idxlen, &datalen, &cblen, &pdfdoc)) {
278 fputs("Error reading info buffer: failed to get byte counts\n",
279 stderr);
280 err = 1;
281 goto bailout;
282 } else if (cblen > 0) {
283 if (pdfdoc) {
284 fputs("Detected PDF codebook\n", stderr);
285 sprintf(cbname, "%s.pdf", outname);
286 } else {
287 fputs("Detected plain text codebook\n", stderr);
288 sprintf(cbname, "%s.cb", outname);
289 }
290 fcb = creat(cbname, 00644);
291 if (fcb == -1) {
292 sprintf(infobuf, "Couldn't open '%s' for writing\n"
293 "Error: %s\n", cbname, strerror(errno));
294 err = 1;
295 goto bailout;
296 }
297 }
298
299 for (i=0; i<1+idxlen/BUFSIZE; i++) {
300 bytesleft = idxlen - BUFSIZE * i;
301 if (bytesleft <= 0) break;
302 bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
303 write(fidx, gzbuf, bgot);
304 }
305
306 for (i=0; i<1+datalen/BUFSIZE; i++) {
307 bytesleft = datalen - BUFSIZE * i;
308 if (bytesleft <= 0) break;
309 bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
310 write(fbin, gzbuf, bgot);
311 }
312
313 if (cblen > 0) {
314 for (i=0; i<1+cblen/BUFSIZE; i++) {
315 bytesleft = cblen - BUFSIZE * i;
316 if (bytesleft <= 0) break;
317 bgot = gzread(fgz, gzbuf, (bytesleft > BUFSIZE)? BUFSIZE : bytesleft);
318 write(fcb, gzbuf, bgot);
319 }
320 }
321
322 bailout:
323
324 gzclose(fgz);
325 close(fidx);
326 close(fbin);
327 if (fcb != -1) {
328 close(fcb);
329 }
330
331 return err;
332 }
333
usage(char * progname)334 static void usage (char *progname)
335 {
336 fprintf(stderr, "Please supply a flag (-c for create, -x for extract), "
337 "followed by\nthe basename of a file or files to operate on.\n\n"
338 " %s -c foo creates foo.gz from foo.idx and foo.bin\n"
339 " %s -x foo extracts foo.idx and foo.bin from foo.gz\n\n",
340 progname, progname);
341 fputs("Option: if a second basename is supplied, it is used "
342 "for the output file(s).\n", stderr);
343 fputs("If a codebook file (.cb) is found on archive creation, it is rolled\n"
344 "into the archive.\n",
345 stderr);
346 exit(EXIT_FAILURE);
347 }
348
main(int argc,char * argv[])349 int main (int argc, char *argv[])
350 {
351 int err, create = 0;
352 char fname[MAXLEN], outname[MAXLEN], infobuf[INFOLEN];
353 char *callname;
354 int unzip = 0, filearg = 2;
355
356 callname = strrchr(argv[0], '/');
357 if (callname != NULL && strlen(callname) > 1) {
358 callname += 1;
359 } else {
360 callname = argv[0];
361 }
362
363 if (!strcmp(callname, "gretlunzip")) {
364 filearg--;
365 unzip = 1;
366 }
367
368 if ((unzip && argc != 2) || (!unzip && argc < 3))
369 usage(argv[0]);
370
371 if (!strcmp(argv[1], "-c")) {
372 create = 1;
373 } else if (!unzip && strcmp(argv[1], "-x")) {
374 usage(argv[0]);
375 }
376
377 strncpy(fname, argv[filearg], MAXLEN-1);
378 fname[MAXLEN-1] = 0;
379
380 *outname = 0;
381 if (argc == 4) {
382 strncat(outname, argv[filearg + 1], MAXLEN-1);
383 } else {
384 strcpy(outname, fname);
385 }
386
387 switch_ext(fname, "");
388 switch_ext(outname, "");
389 fprintf(stderr, "Taking input from %s%s\nWriting output to %s%s\n",
390 fname, (create)? " (.idx, .bin)": ".gz",
391 outname, (create)? ".gz" : " (.idx, .bin)");
392
393 clear(infobuf, INFOLEN);
394
395 if (create) {
396 err = ggz_create(infobuf, fname, outname);
397 } else {
398 err = ggz_extract(infobuf, fname, outname);
399 }
400
401 if (err) {
402 fprintf(stderr, "%s", infobuf);
403 } else if (create) {
404 printf("Found and compressed:\n%s", infobuf);
405 } else {
406 printf("Found and decompressed:\n%s", infobuf);
407 }
408
409 return 0;
410 }
411
412
413
414