1 /* cdb.c: cdb command line tool
2 *
3 * This file is a part of tinycdb package by Michael Tokarev, mjt@corpit.ru.
4 * Public domain.
5 */
6
7 #define _GNU_SOURCE /* #define this even on Windows */
8
9 #ifdef _WIN32 /* by the way, how about win64? */
10 # include <io.h>
11 # include <malloc.h>
12 /* This pragma suppresses snippy VC warnings for POSIX functions like read() */
13 # pragma warning(disable: 4996)
14 #else
15 # include <unistd.h>
16 #endif
17
18 #include <sys/types.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdarg.h>
23 #include <fcntl.h>
24 #include <errno.h>
25 #include <sys/stat.h>
26 #include "cdb.h"
27
28 #ifndef EPROTO
29 # define EPROTO EINVAL
30 #endif
31
32 #ifdef __GLIBC__
33 # define HAVE_PROGRAM_INVOCATION_SHORT_NAME
34 #endif
35
36 #ifdef HAVE_PROGRAM_INVOCATION_SHORT_NAME
37 # define progname program_invocation_short_name
38 #else
39 static char *progname;
40 #endif
41
42 #ifndef O_NOFOLLOW
43 # define O_NOFOLLOW 0
44 #endif
45
46 #ifdef _WIN32
47 # define FBINMODE "b"
48 #else
49 # define FBINMODE
50 #endif
51
52 #define F_DUPMASK 0x000f
53 #define F_WARNDUP 0x0100
54 #define F_ERRDUP 0x0200
55 #define F_MAP 0x1000 /* map format (or else CDB native format) */
56
57 /* Silly defines just to suppress silly compiler warnings.
58 * The thing is, trivial routines like strlen(), fgets() etc expects
59 * char* argument, and GCC>=4 complains about using unsigned char* here.
60 * Silly silly silly.
61 */
62 #ifdef __GNUC__
ustrlen(const unsigned char * s)63 static inline size_t ustrlen(const unsigned char *s) {
64 return strlen((const char*)s);
65 }
ufgets(unsigned char * s,int size,FILE * f)66 static inline unsigned char *ufgets(unsigned char *s, int size, FILE *f) {
67 return (unsigned char*)fgets((char*)s, size, f);
68 }
69 #else
70 # define ustrlen strlen
71 # define ufgets fgets
72 #endif
73
74 static unsigned char *buf;
75 static unsigned blen;
76
77 static void
78 #ifdef __GNUC__
79 __attribute__((noreturn,format(printf,2,3)))
80 #endif
error(int errnum,const char * fmt,...)81 error(int errnum, const char *fmt, ...)
82 {
83 if (fmt) {
84 va_list ap;
85 fprintf(stderr, "%s: ", progname);
86 va_start(ap, fmt);
87 vfprintf(stderr, fmt, ap);
88 va_end(ap);
89 }
90 if (errnum)
91 fprintf(stderr, ": %s\n", strerror(errnum));
92 else {
93 if (fmt) putc('\n', stderr);
94 fprintf(stderr, "%s: try `%s -h' for help\n", progname, progname);
95 }
96 fflush(stderr);
97 exit(errnum ? 111 : 2);
98 }
99
allocbuf(unsigned len)100 static void allocbuf(unsigned len) {
101 if (blen < len) {
102 buf = (unsigned char*)(buf ? realloc(buf, len) : malloc(len));
103 if (!buf)
104 error(ENOMEM, "unable to allocate %u bytes", len);
105 blen = len;
106 }
107 }
108
qmode(char * dbname,const char * key,int num,int flags)109 static int qmode(char *dbname, const char *key, int num, int flags)
110 {
111 struct cdb c;
112 struct cdb_find cf;
113 int r;
114 int n, found;
115
116 r = open(dbname, O_RDONLY);
117 if (r < 0 || cdb_init(&c, r) != 0)
118 error(errno, "unable to open database `%s'", dbname);
119
120 r = cdb_findinit(&cf, &c, key, strlen(key));
121 if (!r)
122 return 100;
123 else if (r < 0)
124 error(errno, "%s", key);
125 n = 0; found = 0;
126 while((r = cdb_findnext(&cf)) > 0) {
127 ++n;
128 if (num && num != n) continue;
129 ++found;
130 allocbuf(cdb_datalen(&c));
131 if (cdb_read(&c, buf, cdb_datalen(&c), cdb_datapos(&c)) != 0)
132 error(errno, "unable to read value");
133 fwrite(buf, 1, cdb_datalen(&c), stdout);
134 if (flags & F_MAP) putchar('\n');
135 if (num)
136 break;
137 }
138 if (r < 0)
139 error(0, "%s", key);
140 return found ? 0 : 100;
141 }
142
143 static void
fget(FILE * f,unsigned char * b,unsigned len,unsigned * posp,unsigned limit)144 fget(FILE *f, unsigned char *b, unsigned len, unsigned *posp, unsigned limit)
145 {
146 if (posp && limit - *posp < len)
147 error(EPROTO, "invalid database format");
148 if (fread(b, 1, len, f) != len) {
149 if (ferror(f)) error(errno, "unable to read");
150 fprintf(stderr, "%s: unable to read: short file\n", progname);
151 exit(2);
152 }
153 if (posp) *posp += len;
154 }
155
156 static int
fcpy(FILE * fi,FILE * fo,unsigned len,unsigned * posp,unsigned limit)157 fcpy(FILE *fi, FILE *fo, unsigned len, unsigned *posp, unsigned limit)
158 {
159 while(len > blen) {
160 fget(fi, buf, blen, posp, limit);
161 if (fo && fwrite(buf, 1, blen, fo) != blen) return -1;
162 len -= blen;
163 }
164 if (len) {
165 fget(fi, buf, len, posp, limit);
166 if (fo && fwrite(buf, 1, len, fo) != len) return -1;
167 }
168 return 0;
169 }
170
171 static int
dmode(char * dbname,char mode,int flags)172 dmode(char *dbname, char mode, int flags)
173 {
174 unsigned eod, klen, vlen;
175 unsigned pos = 0;
176 FILE *f;
177 if (strcmp(dbname, "-") == 0)
178 f = stdin;
179 else if ((f = fopen(dbname, "r" FBINMODE)) == NULL)
180 error(errno, "open %s", dbname);
181 allocbuf(2048);
182 fget(f, buf, 2048, &pos, 2048);
183 eod = cdb_unpack(buf);
184 while(pos < eod) {
185 fget(f, buf, 8, &pos, eod);
186 klen = cdb_unpack(buf);
187 vlen = cdb_unpack(buf + 4);
188 if (!(flags & F_MAP))
189 if (printf(mode == 'd' ? "+%u,%u:" : "+%u:", klen, vlen) < 0) return -1;
190 if (fcpy(f, stdout, klen, &pos, eod) != 0) return -1;
191 if (mode == 'd')
192 if (fputs(flags & F_MAP ? " " : "->", stdout) < 0)
193 return -1;
194 if (fcpy(f, mode == 'd' ? stdout : NULL, vlen, &pos, eod) != 0)
195 return -1;
196 if (putc('\n', stdout) < 0)
197 return -1;
198 }
199 if (pos != eod)
200 error(EPROTO, "invalid cdb file format");
201 if (!(flags & F_MAP))
202 if (putc('\n', stdout) < 0)
203 return -1;
204 return 0;
205 }
206
smode(char * dbname)207 static int smode(char *dbname) {
208 FILE *f;
209 unsigned pos, eod;
210 unsigned cnt = 0;
211 unsigned kmin = 0, kmax = 0, ktot = 0;
212 unsigned vmin = 0, vmax = 0, vtot = 0;
213 unsigned hmin = 0, hmax = 0, htot = 0, hcnt = 0;
214 #define NDIST 11
215 unsigned dist[NDIST];
216 unsigned char toc[2048];
217 unsigned k;
218
219 if (strcmp(dbname, "-") == 0)
220 f = stdin;
221 else if ((f = fopen(dbname, "r" FBINMODE)) == NULL)
222 error(errno, "open %s", dbname);
223
224 pos = 0;
225 fget(f, toc, 2048, &pos, 2048);
226
227 allocbuf(2048);
228
229 eod = cdb_unpack(toc);
230 while(pos < eod) {
231 unsigned klen, vlen;
232 fget(f, buf, 8, &pos, eod);
233 klen = cdb_unpack(buf);
234 vlen = cdb_unpack(buf + 4);
235 fcpy(f, NULL, klen, &pos, eod);
236 fcpy(f, NULL, vlen, &pos, eod);
237 ++cnt;
238 ktot += klen;
239 if (!kmin || kmin > klen) kmin = klen;
240 if (kmax < klen) kmax = klen;
241 vtot += vlen;
242 if (!vmin || vmin > vlen) vmin = vlen;
243 if (vmax < vlen) vmax = vlen;
244 vlen += klen;
245 }
246 if (pos != eod) error(EPROTO, "invalid cdb file format");
247
248 for (k = 0; k < NDIST; ++k)
249 dist[k] = 0;
250 for (k = 0; k < 256; ++k) {
251 unsigned i = cdb_unpack(toc + (k << 3));
252 unsigned hlen = cdb_unpack(toc + (k << 3) + 4);
253 if (i != pos) error(EPROTO, "invalid cdb hash table");
254 if (!hlen) continue;
255 for (i = 0; i < hlen; ++i) {
256 unsigned h;
257 fget(f, buf, 8, &pos, 0xffffffff);
258 if (!cdb_unpack(buf + 4)) continue;
259 h = (cdb_unpack(buf) >> 8) % hlen;
260 if (h == i) h = 0;
261 else {
262 if (h < i) h = i - h;
263 else h = hlen - h + i;
264 if (h >= NDIST) h = NDIST - 1;
265 }
266 ++dist[h];
267 }
268 if (!hmin || hmin > hlen) hmin = hlen;
269 if (hmax < hlen) hmax = hlen;
270 htot += hlen;
271 ++hcnt;
272 }
273 printf("number of records: %u\n", cnt);
274 printf("key min/avg/max length: %u/%u/%u\n",
275 kmin, cnt ? (ktot + cnt / 2) / cnt : 0, kmax);
276 printf("val min/avg/max length: %u/%u/%u\n",
277 vmin, cnt ? (vtot + cnt / 2) / cnt : 0, vmax);
278 printf("hash tables/entries/collisions: %u/%u/%u\n",
279 hcnt, htot, cnt - dist[0]);
280 printf("hash table min/avg/max length: %u/%u/%u\n",
281 hmin, hcnt ? (htot + hcnt / 2) / hcnt : 0, hmax);
282 printf("hash table distances:\n");
283 for(k = 0; k < NDIST; ++k)
284 printf(" %c%u: %6u %2u%%\n",
285 k == NDIST - 1 ? '>' : 'd', k == NDIST - 1 ? k - 1 : k,
286 dist[k], cnt ? dist[k] * 100 / cnt : 0);
287 return 0;
288 }
289
badinput(const char * fn)290 static void badinput(const char *fn) {
291 fprintf(stderr, "%s: %s: bad format\n", progname, fn);
292 exit(2);
293 }
294
getnum(FILE * f,unsigned * np,const char * fn)295 static int getnum(FILE *f, unsigned *np, const char *fn) {
296 unsigned n;
297 int c = getc(f);
298 if (c < '0' || c > '9') badinput(fn);
299 n = c - '0';
300 while((c = getc(f)) >= '0' && c <= '9') {
301 c -= '0';
302 if (0xffffffff / 10 - c < n) badinput(fn);
303 n = n * 10 + c;
304 }
305 *np = n;
306 return c;
307 }
308
309 static void
addrec(struct cdb_make * cdbmp,const unsigned char * key,unsigned klen,const unsigned char * val,unsigned vlen,int flags)310 addrec(struct cdb_make *cdbmp,
311 const unsigned char *key, unsigned klen,
312 const unsigned char *val, unsigned vlen,
313 int flags)
314 {
315 int r = cdb_make_put(cdbmp, key, klen, val, vlen, flags & F_DUPMASK);
316 if (r < 0)
317 error(errno, "cdb_make_put");
318 else if (r && (flags & F_WARNDUP)) {
319 fprintf(stderr, "%s: key `", progname);
320 fwrite(key, 1, klen, stderr);
321 fputs("' duplicated\n", stderr);
322 if (flags & F_ERRDUP)
323 exit(1);
324 }
325 }
326
327 static void
dofile_cdb(struct cdb_make * cdbmp,FILE * f,const char * fn,int flags)328 dofile_cdb(struct cdb_make *cdbmp, FILE *f, const char *fn, int flags)
329 {
330 unsigned klen, vlen;
331 int c;
332 while((c = getc(f)) == '+') {
333 if ((c = getnum(f, &klen, fn)) != ',' ||
334 (c = getnum(f, &vlen, fn)) != ':' ||
335 0xffffffff - klen < vlen)
336 badinput(fn);
337 allocbuf(klen + vlen);
338 fget(f, buf, klen, NULL, 0);
339 if (getc(f) != '-' || getc(f) != '>') badinput(fn);
340 fget(f, buf + klen, vlen, NULL, 0);
341 if (getc(f) != '\n') badinput(fn);
342 addrec(cdbmp, buf, klen, buf + klen, vlen, flags);
343 }
344 if (c != '\n') badinput(fn);
345 }
346
347 static void
dofile_ln(struct cdb_make * cdbmp,FILE * f,int flags)348 dofile_ln(struct cdb_make *cdbmp, FILE *f, int flags)
349 {
350 unsigned char *k, *v;
351 while(ufgets(buf, blen, f) != NULL) {
352 unsigned l = 0;
353 for (;;) {
354 l += ustrlen(buf + l);
355 v = buf + l;
356 if (v > buf && v[-1] == '\n') {
357 v[-1] = '\0';
358 break;
359 }
360 if (l < blen)
361 allocbuf(l + 512);
362 if (!ufgets(buf + l, blen - l, f))
363 break;
364 }
365 k = buf;
366 while(*k == ' ' || *k == '\t') ++k;
367 if (!*k || *k == '#')
368 continue;
369 v = k;
370 while(*v && *v != ' ' && *v != '\t') ++v;
371 if (*v) *v++ = '\0';
372 while(*v == ' ' || *v == '\t') ++v;
373 addrec(cdbmp, k, ustrlen(k), v, ustrlen(v), flags);
374 }
375 }
376
377 static void
dofile(struct cdb_make * cdbmp,FILE * f,const char * fn,int flags)378 dofile(struct cdb_make *cdbmp, FILE *f, const char *fn, int flags)
379 {
380 if (flags & F_MAP)
381 dofile_ln(cdbmp, f, flags);
382 else
383 dofile_cdb(cdbmp, f, fn, flags);
384 if (ferror(f))
385 error(errno, "read error");
386 }
387
388 static int
cmode(char * dbname,char * tmpname,int argc,char ** argv,int flags,int perms)389 cmode(char *dbname, char *tmpname, int argc, char **argv, int flags, int perms)
390 {
391 struct cdb_make cdb;
392 int fd;
393 if (!tmpname) {
394 tmpname = (char*)malloc(strlen(dbname) + 5);
395 if (!tmpname)
396 error(ENOMEM, "unable to allocate memory");
397 /* OpenBSD compiler complains about strcat() and strcpy() usage,
398 * and suggests to replace them with (non-standard) strlcat() and
399 * strlcpy(). This is silly, since it's obvious that usage of
400 * original str*() routines here is correct.
401 * This is compiler/environment bug, not tinycdb bug, so please
402 * fix it in proper place, and don't send patches to me. Thank you.
403 */
404 strcat(strcpy(tmpname, dbname), ".tmp");
405 }
406 else if (strcmp(tmpname, "-") == 0 || strcmp(tmpname, dbname) == 0)
407 tmpname = dbname;
408 if (perms >= 0)
409 umask(0);
410 unlink(tmpname);
411 fd = open(tmpname, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW,
412 perms >= 0 ? perms : 0666);
413 if (fd < 0)
414 error(errno, "unable to create %s", tmpname);
415 cdb_make_start(&cdb, fd);
416 allocbuf(4096);
417 if (argc) {
418 int i;
419 for (i = 0; i < argc; ++i) {
420 if (strcmp(argv[i], "-") == 0)
421 dofile(&cdb, stdin, "(stdin)", flags);
422 else {
423 FILE *f = fopen(argv[i], "r");
424 if (!f)
425 error(errno, "%s", argv[i]);
426 dofile(&cdb, f, argv[i], flags);
427 fclose(f);
428 }
429 }
430 }
431 else
432 dofile(&cdb, stdin, "(stdin)", flags);
433 if (cdb_make_finish(&cdb) != 0)
434 error(errno, "cdb_make_finish");
435 close(fd);
436 if (tmpname != dbname)
437 if (rename(tmpname, dbname) != 0)
438 error(errno, "rename %s->%s", tmpname, dbname);
439 return 0;
440 }
441
main(int argc,char ** argv)442 int main(int argc, char **argv)
443 {
444 int c;
445 char mode = 0;
446 char *tmpname = NULL;
447 int flags = 0;
448 int num = 0;
449 int r;
450 int perms = -1;
451 extern char *optarg;
452 extern int optind;
453
454 #ifdef HAVE_PROGRAM_INVOCATION_SHORT_NAME
455 argv[0] = progname;
456 #else
457 if (argv[0] && (progname = strrchr(argv[0], '/')) != NULL)
458 argv[0] = ++progname;
459 else
460 progname = argv[0];
461 #endif
462
463 if (argc <= 1)
464 error(0, "no arguments given");
465
466 while((c = getopt(argc, argv, "qdlcsht:n:mwruep:0")) != EOF)
467 switch(c) {
468 case 'q': case 'd': case 'l': case 'c': case 's':
469 if (mode && mode != c)
470 error(0, "different modes of operation requested");
471 mode = c;
472 break;
473 case 't': tmpname = optarg; break;
474 case 'w': flags |= F_WARNDUP; break;
475 case 'e': flags |= F_WARNDUP | F_ERRDUP; break;
476 case 'r': flags = (flags & ~F_DUPMASK) | CDB_PUT_REPLACE; break;
477 case 'u': flags = (flags & ~F_DUPMASK) | CDB_PUT_INSERT; break;
478 case '0': flags = (flags & ~F_DUPMASK) | CDB_PUT_REPLACE0; break;
479 case 'm': flags |= F_MAP; break;
480 case 'p': {
481 char *ep = NULL;
482 perms = strtol(optarg, &ep, 0);
483 if (perms < 0 || perms > 0777 || (ep && *ep))
484 error(0, "invalid permissions `%s'", optarg);
485 break;
486 }
487 case 'n': {
488 char *ep = NULL;
489 if ((num = strtol(optarg, &ep, 0)) <= 0 || (ep && *ep))
490 error(0, "invalid record number `%s'", optarg);
491 break;
492 }
493 case 'h':
494 #define strify(x) _strify(x)
495 #define _strify(x) #x
496 printf("\
497 %s: Constant DataBase (CDB) tool version " strify(TINYCDB_VERSION)
498 ". Usage is:\n\
499 query: %s -q [-m] [-n recno|-a] cdbfile key\n\
500 dump: %s -d [-m] [cdbfile|-]\n\
501 list: %s -l [-m] [cdbfile|-]\n\
502 create: %s -c [-m] [-wrue0] [-t tempfile|-] [-p perms] cdbfile [infile...]\n\
503 stats: %s -s [cdbfile|-]\n\
504 help: %s -h\n\
505 ", progname, progname, progname, progname, progname, progname, progname);
506 return 0;
507
508 default:
509 error(0, NULL);
510 }
511
512 argv += optind;
513 argc -= optind;
514 switch(mode) {
515 case 'q':
516 if (argc < 2) error(0, "no database or key to query specified");
517 if (argc > 2) error(0, "extra arguments in command line");
518 r = qmode(argv[0], argv[1], num, flags);
519 break;
520 case 'c':
521 if (!argc) error(0, "no database name specified");
522 if ((flags & F_WARNDUP) && !(flags & F_DUPMASK))
523 flags |= CDB_PUT_WARN;
524 r = cmode(argv[0], tmpname, argc - 1, argv + 1, flags, perms);
525 break;
526 case 'd':
527 case 'l':
528 if (argc > 1) error(0, "extra arguments for dump/list");
529 r = dmode(argc ? argv[0] : "-", mode, flags);
530 break;
531 case 's':
532 if (argc > 1) error(0, "extra argument(s) for stats");
533 r = smode(argc ? argv[0] : "-");
534 break;
535 default:
536 error(0, "no -q, -c, -d, -l or -s option specified");
537 }
538 if (r < 0 || fflush(stdout) < 0)
539 error(errno, "unable to write: %d", c);
540 return r;
541 }
542
543