1 /* cdb.c: cdb command line tool
2  *
3  * This file is a part of tinycdb package by Michael Tokarev, mjt@corpit.ru.
4  * Public domain.
5  */
6 
7 #define _GNU_SOURCE	/* #define this even on Windows */
8 
9 #ifdef _WIN32		/* by the way, how about win64? */
10 # include <io.h>
11 # include <malloc.h>
12 /* This pragma suppresses snippy VC warnings for POSIX functions like read() */
13 # pragma warning(disable: 4996)
14 #else
15 # include <unistd.h>
16 #endif
17 
18 #include <sys/types.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdarg.h>
23 #include <fcntl.h>
24 #include <errno.h>
25 #include <sys/stat.h>
26 #include "cdb.h"
27 
28 #ifndef EPROTO
29 # define EPROTO EINVAL
30 #endif
31 
32 #ifdef __GLIBC__
33 # define HAVE_PROGRAM_INVOCATION_SHORT_NAME
34 #endif
35 
36 #ifdef HAVE_PROGRAM_INVOCATION_SHORT_NAME
37 # define progname program_invocation_short_name
38 #else
39 static char *progname;
40 #endif
41 
42 #ifndef O_NOFOLLOW
43 # define O_NOFOLLOW 0
44 #endif
45 
46 #ifdef _WIN32
47 # define FBINMODE "b"
48 #else
49 # define FBINMODE
50 #endif
51 
52 #define F_DUPMASK	0x000f
53 #define F_WARNDUP	0x0100
54 #define F_ERRDUP	0x0200
55 #define F_MAP		0x1000	/* map format (or else CDB native format) */
56 
57 /* Silly defines just to suppress silly compiler warnings.
58  * The thing is, trivial routines like strlen(), fgets() etc expects
59  * char* argument, and GCC>=4 complains about using unsigned char* here.
60  * Silly silly silly.
61  */
62 #ifdef __GNUC__
ustrlen(const unsigned char * s)63 static inline size_t ustrlen(const unsigned char *s) {
64   return strlen((const char*)s);
65 }
ufgets(unsigned char * s,int size,FILE * f)66 static inline unsigned char *ufgets(unsigned char *s, int size, FILE *f) {
67   return (unsigned char*)fgets((char*)s, size, f);
68 }
69 #else
70 # define ustrlen strlen
71 # define ufgets fgets
72 #endif
73 
74 static unsigned char *buf;
75 static unsigned blen;
76 
77 static void
78 #ifdef __GNUC__
79 __attribute__((noreturn,format(printf,2,3)))
80 #endif
error(int errnum,const char * fmt,...)81 error(int errnum, const char *fmt, ...)
82 {
83   if (fmt) {
84     va_list ap;
85     fprintf(stderr, "%s: ", progname);
86     va_start(ap, fmt);
87     vfprintf(stderr, fmt, ap);
88     va_end(ap);
89   }
90   if (errnum)
91     fprintf(stderr, ": %s\n", strerror(errnum));
92   else {
93     if (fmt) putc('\n', stderr);
94     fprintf(stderr, "%s: try `%s -h' for help\n", progname, progname);
95   }
96   fflush(stderr);
97   exit(errnum ? 111 : 2);
98 }
99 
allocbuf(unsigned len)100 static void allocbuf(unsigned len) {
101   if (blen < len) {
102     buf = (unsigned char*)(buf ? realloc(buf, len) : malloc(len));
103     if (!buf)
104       error(ENOMEM, "unable to allocate %u bytes", len);
105     blen = len;
106   }
107 }
108 
qmode(char * dbname,const char * key,int num,int flags)109 static int qmode(char *dbname, const char *key, int num, int flags)
110 {
111   struct cdb c;
112   struct cdb_find cf;
113   int r;
114   int n, found;
115 
116   r = open(dbname, O_RDONLY);
117   if (r < 0 || cdb_init(&c, r) != 0)
118     error(errno, "unable to open database `%s'", dbname);
119 
120   r = cdb_findinit(&cf, &c, key, strlen(key));
121   if (!r)
122     return 100;
123   else if (r < 0)
124     error(errno, "%s", key);
125   n = 0; found = 0;
126   while((r = cdb_findnext(&cf)) > 0) {
127     ++n;
128     if (num && num != n) continue;
129     ++found;
130     allocbuf(cdb_datalen(&c));
131     if (cdb_read(&c, buf, cdb_datalen(&c), cdb_datapos(&c)) != 0)
132       error(errno, "unable to read value");
133     fwrite(buf, 1, cdb_datalen(&c), stdout);
134     if (flags & F_MAP) putchar('\n');
135     if (num)
136       break;
137   }
138   if (r < 0)
139     error(0, "%s", key);
140   return found ? 0 : 100;
141 }
142 
143 static void
fget(FILE * f,unsigned char * b,unsigned len,unsigned * posp,unsigned limit)144 fget(FILE *f, unsigned char *b, unsigned len, unsigned *posp, unsigned limit)
145 {
146   if (posp && limit - *posp < len)
147     error(EPROTO, "invalid database format");
148   if (fread(b, 1, len, f) != len) {
149     if (ferror(f)) error(errno, "unable to read");
150     fprintf(stderr, "%s: unable to read: short file\n", progname);
151     exit(2);
152   }
153   if (posp) *posp += len;
154 }
155 
156 static int
fcpy(FILE * fi,FILE * fo,unsigned len,unsigned * posp,unsigned limit)157 fcpy(FILE *fi, FILE *fo, unsigned len, unsigned *posp, unsigned limit)
158 {
159   while(len > blen) {
160     fget(fi, buf, blen, posp, limit);
161     if (fo && fwrite(buf, 1, blen, fo) != blen) return -1;
162     len -= blen;
163   }
164   if (len) {
165     fget(fi, buf, len, posp, limit);
166     if (fo && fwrite(buf, 1, len, fo) != len) return -1;
167   }
168   return 0;
169 }
170 
171 static int
dmode(char * dbname,char mode,int flags)172 dmode(char *dbname, char mode, int flags)
173 {
174   unsigned eod, klen, vlen;
175   unsigned pos = 0;
176   FILE *f;
177   if (strcmp(dbname, "-") == 0)
178     f = stdin;
179   else if ((f = fopen(dbname, "r" FBINMODE)) == NULL)
180     error(errno, "open %s", dbname);
181   allocbuf(2048);
182   fget(f, buf, 2048, &pos, 2048);
183   eod = cdb_unpack(buf);
184   while(pos < eod) {
185     fget(f, buf, 8, &pos, eod);
186     klen = cdb_unpack(buf);
187     vlen = cdb_unpack(buf + 4);
188     if (!(flags & F_MAP))
189       if (printf(mode == 'd' ? "+%u,%u:" : "+%u:", klen, vlen) < 0) return -1;
190     if (fcpy(f, stdout, klen, &pos, eod) != 0) return -1;
191     if (mode == 'd')
192       if (fputs(flags & F_MAP ? " " : "->", stdout) < 0)
193         return -1;
194     if (fcpy(f, mode == 'd' ? stdout : NULL, vlen, &pos, eod) != 0)
195       return -1;
196     if (putc('\n', stdout) < 0)
197       return -1;
198   }
199   if (pos != eod)
200     error(EPROTO, "invalid cdb file format");
201   if (!(flags & F_MAP))
202     if (putc('\n', stdout) < 0)
203       return -1;
204   return 0;
205 }
206 
smode(char * dbname)207 static int smode(char *dbname) {
208   FILE *f;
209   unsigned pos, eod;
210   unsigned cnt = 0;
211   unsigned kmin = 0, kmax = 0, ktot = 0;
212   unsigned vmin = 0, vmax = 0, vtot = 0;
213   unsigned hmin = 0, hmax = 0, htot = 0, hcnt = 0;
214 #define NDIST 11
215   unsigned dist[NDIST];
216   unsigned char toc[2048];
217   unsigned k;
218 
219   if (strcmp(dbname, "-") == 0)
220     f = stdin;
221   else if ((f = fopen(dbname, "r" FBINMODE)) == NULL)
222     error(errno, "open %s", dbname);
223 
224   pos = 0;
225   fget(f, toc, 2048, &pos, 2048);
226 
227   allocbuf(2048);
228 
229   eod = cdb_unpack(toc);
230   while(pos < eod) {
231     unsigned klen, vlen;
232     fget(f, buf, 8, &pos, eod);
233     klen = cdb_unpack(buf);
234     vlen = cdb_unpack(buf + 4);
235     fcpy(f, NULL, klen, &pos, eod);
236     fcpy(f, NULL, vlen, &pos, eod);
237     ++cnt;
238     ktot += klen;
239     if (!kmin || kmin > klen) kmin = klen;
240     if (kmax < klen) kmax = klen;
241     vtot += vlen;
242     if (!vmin || vmin > vlen) vmin = vlen;
243     if (vmax < vlen) vmax = vlen;
244     vlen += klen;
245   }
246   if (pos != eod) error(EPROTO, "invalid cdb file format");
247 
248   for (k = 0; k < NDIST; ++k)
249     dist[k] = 0;
250   for (k = 0; k < 256; ++k) {
251     unsigned i = cdb_unpack(toc + (k << 3));
252     unsigned hlen = cdb_unpack(toc + (k << 3) + 4);
253     if (i != pos) error(EPROTO, "invalid cdb hash table");
254     if (!hlen) continue;
255     for (i = 0; i < hlen; ++i) {
256       unsigned h;
257       fget(f, buf, 8, &pos, 0xffffffff);
258       if (!cdb_unpack(buf + 4)) continue;
259       h = (cdb_unpack(buf) >> 8) % hlen;
260       if (h == i) h = 0;
261       else {
262         if (h < i) h = i - h;
263         else h = hlen - h + i;
264         if (h >= NDIST) h = NDIST - 1;
265       }
266       ++dist[h];
267     }
268     if (!hmin || hmin > hlen) hmin = hlen;
269     if (hmax < hlen) hmax = hlen;
270     htot += hlen;
271     ++hcnt;
272   }
273   printf("number of records: %u\n", cnt);
274   printf("key min/avg/max length: %u/%u/%u\n",
275          kmin, cnt ? (ktot + cnt / 2) / cnt : 0, kmax);
276   printf("val min/avg/max length: %u/%u/%u\n",
277          vmin, cnt ? (vtot + cnt / 2) / cnt : 0, vmax);
278   printf("hash tables/entries/collisions: %u/%u/%u\n",
279          hcnt, htot, cnt - dist[0]);
280   printf("hash table min/avg/max length: %u/%u/%u\n",
281          hmin, hcnt ? (htot + hcnt / 2) / hcnt : 0, hmax);
282   printf("hash table distances:\n");
283   for(k = 0; k < NDIST; ++k)
284     printf(" %c%u: %6u %2u%%\n",
285            k == NDIST - 1 ? '>' : 'd', k == NDIST - 1 ? k - 1 : k,
286            dist[k], cnt ? dist[k] * 100 / cnt : 0);
287   return 0;
288 }
289 
badinput(const char * fn)290 static void badinput(const char *fn) {
291   fprintf(stderr, "%s: %s: bad format\n", progname, fn);
292   exit(2);
293 }
294 
getnum(FILE * f,unsigned * np,const char * fn)295 static int getnum(FILE *f, unsigned *np, const char *fn) {
296   unsigned n;
297   int c = getc(f);
298   if (c < '0' || c > '9') badinput(fn);
299   n = c - '0';
300   while((c = getc(f)) >= '0' && c <= '9') {
301     c -= '0';
302     if (0xffffffff / 10 - c < n) badinput(fn);
303     n = n * 10 + c;
304   }
305   *np = n;
306   return c;
307 }
308 
309 static void
addrec(struct cdb_make * cdbmp,const unsigned char * key,unsigned klen,const unsigned char * val,unsigned vlen,int flags)310 addrec(struct cdb_make *cdbmp,
311        const unsigned char *key, unsigned klen,
312        const unsigned char *val, unsigned vlen,
313        int flags)
314 {
315   int r = cdb_make_put(cdbmp, key, klen, val, vlen, flags & F_DUPMASK);
316   if (r < 0)
317     error(errno, "cdb_make_put");
318   else if (r && (flags & F_WARNDUP)) {
319     fprintf(stderr, "%s: key `", progname);
320     fwrite(key, 1, klen, stderr);
321     fputs("' duplicated\n", stderr);
322     if (flags & F_ERRDUP)
323       exit(1);
324   }
325 }
326 
327 static void
dofile_cdb(struct cdb_make * cdbmp,FILE * f,const char * fn,int flags)328 dofile_cdb(struct cdb_make *cdbmp, FILE *f, const char *fn, int flags)
329 {
330   unsigned klen, vlen;
331   int c;
332   while((c = getc(f)) == '+') {
333     if ((c = getnum(f, &klen, fn)) != ',' ||
334         (c = getnum(f, &vlen, fn)) != ':' ||
335         0xffffffff - klen < vlen)
336       badinput(fn);
337     allocbuf(klen + vlen);
338     fget(f, buf, klen, NULL, 0);
339     if (getc(f) != '-' || getc(f) != '>') badinput(fn);
340     fget(f, buf + klen, vlen, NULL, 0);
341     if (getc(f) != '\n') badinput(fn);
342     addrec(cdbmp, buf, klen, buf + klen, vlen, flags);
343   }
344   if (c != '\n') badinput(fn);
345 }
346 
347 static void
dofile_ln(struct cdb_make * cdbmp,FILE * f,int flags)348 dofile_ln(struct cdb_make *cdbmp, FILE *f, int flags)
349 {
350   unsigned char *k, *v;
351   while(ufgets(buf, blen, f) != NULL) {
352     unsigned l = 0;
353     for (;;) {
354       l += ustrlen(buf + l);
355       v = buf + l;
356       if (v > buf && v[-1] == '\n') {
357         v[-1] = '\0';
358         break;
359       }
360       if (l < blen)
361         allocbuf(l + 512);
362       if (!ufgets(buf + l, blen - l, f))
363         break;
364     }
365     k = buf;
366     while(*k == ' ' || *k == '\t') ++k;
367     if (!*k || *k == '#')
368       continue;
369     v = k;
370     while(*v && *v != ' ' && *v != '\t') ++v;
371     if (*v) *v++ = '\0';
372     while(*v == ' ' || *v == '\t') ++v;
373     addrec(cdbmp, k, ustrlen(k), v, ustrlen(v), flags);
374   }
375 }
376 
377 static void
dofile(struct cdb_make * cdbmp,FILE * f,const char * fn,int flags)378 dofile(struct cdb_make *cdbmp, FILE *f, const char *fn, int flags)
379 {
380   if (flags & F_MAP)
381     dofile_ln(cdbmp, f, flags);
382   else
383     dofile_cdb(cdbmp, f, fn, flags);
384   if (ferror(f))
385     error(errno, "read error");
386 }
387 
388 static int
cmode(char * dbname,char * tmpname,int argc,char ** argv,int flags,int perms)389 cmode(char *dbname, char *tmpname, int argc, char **argv, int flags, int perms)
390 {
391   struct cdb_make cdb;
392   int fd;
393   if (!tmpname) {
394     tmpname = (char*)malloc(strlen(dbname) + 5);
395     if (!tmpname)
396       error(ENOMEM, "unable to allocate memory");
397     /* OpenBSD compiler complains about strcat() and strcpy() usage,
398      * and suggests to replace them with (non-standard) strlcat() and
399      * strlcpy().  This is silly, since it's obvious that usage of
400      * original str*() routines here is correct.
401      * This is compiler/environment bug, not tinycdb bug, so please
402      * fix it in proper place, and don't send patches to me.  Thank you.
403      */
404     strcat(strcpy(tmpname, dbname), ".tmp");
405   }
406   else if (strcmp(tmpname, "-") == 0 || strcmp(tmpname, dbname) == 0)
407     tmpname = dbname;
408   if (perms >= 0)
409     umask(0);
410   unlink(tmpname);
411   fd = open(tmpname, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW,
412             perms >= 0 ? perms : 0666);
413   if (fd < 0)
414     error(errno, "unable to create %s", tmpname);
415   cdb_make_start(&cdb, fd);
416   allocbuf(4096);
417   if (argc) {
418     int i;
419     for (i = 0; i < argc; ++i) {
420       if (strcmp(argv[i], "-") == 0)
421         dofile(&cdb, stdin, "(stdin)", flags);
422       else {
423         FILE *f = fopen(argv[i], "r");
424         if (!f)
425           error(errno, "%s", argv[i]);
426         dofile(&cdb, f, argv[i], flags);
427         fclose(f);
428       }
429     }
430   }
431   else
432     dofile(&cdb, stdin, "(stdin)", flags);
433   if (cdb_make_finish(&cdb) != 0)
434     error(errno, "cdb_make_finish");
435   close(fd);
436   if (tmpname != dbname)
437     if (rename(tmpname, dbname) != 0)
438       error(errno, "rename %s->%s", tmpname, dbname);
439   return 0;
440 }
441 
main(int argc,char ** argv)442 int main(int argc, char **argv)
443 {
444   int c;
445   char mode = 0;
446   char *tmpname = NULL;
447   int flags = 0;
448   int num = 0;
449   int r;
450   int perms = -1;
451   extern char *optarg;
452   extern int optind;
453 
454 #ifdef HAVE_PROGRAM_INVOCATION_SHORT_NAME
455   argv[0] = progname;
456 #else
457   if (argv[0] && (progname = strrchr(argv[0], '/')) != NULL)
458     argv[0] = ++progname;
459   else
460     progname = argv[0];
461 #endif
462 
463   if (argc <= 1)
464     error(0, "no arguments given");
465 
466   while((c = getopt(argc, argv, "qdlcsht:n:mwruep:0")) != EOF)
467     switch(c) {
468     case 'q': case 'd':  case 'l': case 'c': case 's':
469       if (mode && mode != c)
470         error(0, "different modes of operation requested");
471       mode = c;
472       break;
473     case 't': tmpname = optarg; break;
474     case 'w': flags |= F_WARNDUP; break;
475     case 'e': flags |= F_WARNDUP | F_ERRDUP; break;
476     case 'r': flags = (flags & ~F_DUPMASK) | CDB_PUT_REPLACE; break;
477     case 'u': flags = (flags & ~F_DUPMASK) | CDB_PUT_INSERT; break;
478     case '0': flags = (flags & ~F_DUPMASK) | CDB_PUT_REPLACE0; break;
479     case 'm': flags |= F_MAP; break;
480     case 'p': {
481       char *ep = NULL;
482       perms = strtol(optarg, &ep, 0);
483       if (perms < 0 || perms > 0777 || (ep && *ep))
484         error(0, "invalid permissions `%s'", optarg);
485       break;
486     }
487     case 'n': {
488       char *ep = NULL;
489       if ((num = strtol(optarg, &ep, 0)) <= 0 || (ep && *ep))
490         error(0, "invalid record number `%s'", optarg);
491       break;
492     }
493     case 'h':
494 #define strify(x) _strify(x)
495 #define _strify(x) #x
496       printf("\
497 %s: Constant DataBase (CDB) tool version " strify(TINYCDB_VERSION)
498 ". Usage is:\n\
499  query:  %s -q [-m] [-n recno|-a] cdbfile key\n\
500  dump:   %s -d [-m] [cdbfile|-]\n\
501  list:   %s -l [-m] [cdbfile|-]\n\
502  create: %s -c [-m] [-wrue0] [-t tempfile|-] [-p perms] cdbfile [infile...]\n\
503  stats:  %s -s [cdbfile|-]\n\
504  help:   %s -h\n\
505 ", progname, progname, progname, progname, progname, progname, progname);
506       return 0;
507 
508     default:
509       error(0, NULL);
510     }
511 
512   argv += optind;
513   argc -= optind;
514   switch(mode) {
515     case 'q':
516       if (argc < 2) error(0, "no database or key to query specified");
517       if (argc > 2) error(0, "extra arguments in command line");
518       r = qmode(argv[0], argv[1], num, flags);
519       break;
520     case 'c':
521       if (!argc) error(0, "no database name specified");
522       if ((flags & F_WARNDUP) && !(flags & F_DUPMASK))
523         flags |= CDB_PUT_WARN;
524       r = cmode(argv[0], tmpname, argc - 1, argv + 1, flags, perms);
525       break;
526     case 'd':
527     case 'l':
528       if (argc > 1) error(0, "extra arguments for dump/list");
529       r = dmode(argc ? argv[0] : "-", mode, flags);
530       break;
531     case 's':
532       if (argc > 1) error(0, "extra argument(s) for stats");
533       r = smode(argc ? argv[0] : "-");
534       break;
535     default:
536       error(0, "no -q, -c, -d, -l or -s option specified");
537   }
538   if (r < 0 || fflush(stdout) < 0)
539     error(errno, "unable to write: %d", c);
540   return r;
541 }
542 
543