1 /* ncdu - NCurses Disk Usage
2
3 Copyright (c) 2007-2021 Yoran Heling
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be included
14 in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 */
25
26 /* This JSON parser has the following limitations:
27 * - No support for character encodings incompatible with ASCII (e.g.
28 * UTF-16/32)
29 * - Doesn't validate UTF-8 correctness (in fact, besides the ASCII part this
30 * parser doesn't know anything about encoding).
31 * - Doesn't validate that there are no duplicate keys in JSON objects.
32 * - Isn't very strict with validating non-integer numbers.
33 */
34
35 #include "global.h"
36
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <limits.h>
42
43
44 /* Max. length of any JSON string we're interested in. A string may of course
45 * be larger, we're not going to read more than MAX_VAL in memory. If a string
46 * we're interested in (e.g. a file name) is longer than this, reading the
47 * import will results in an error. */
48 #define MAX_VAL (32*1024)
49
50 /* Minimum number of bytes we request from fread() */
51 #define MIN_READ_SIZE 1024
52
53 /* Read buffer size. Must be at least 2*MIN_READ_SIZE, everything larger
54 * improves performance. */
55 #define READ_BUF_SIZE (32*1024)
56
57
58 int dir_import_active = 0;
59
60
61 /* Use a struct for easy batch-allocation and deallocation of state data. */
62 static struct ctx {
63 FILE *stream;
64
65 int line;
66 int byte;
67 int eof;
68 int items;
69 char *buf; /* points into readbuf, always zero-terminated. */
70 char *lastfill; /* points into readbuf, location of the zero terminator. */
71
72 /* scratch space */
73 struct dir *buf_dir;
74 struct dir_ext buf_ext[1];
75 unsigned int nlink;
76
77 char buf_name[MAX_VAL];
78 char val[MAX_VAL];
79 char readbuf[READ_BUF_SIZE];
80 } *ctx;
81
82
83 /* Fills readbuf with data from the stream. *buf will have at least n (<
84 * READ_BUF_SIZE) bytes available, unless the stream reached EOF or an error
85 * occurred. If the file data contains a null-type, this is considered an error.
86 * Returns 0 on success, non-zero on error. */
fill(int n)87 static int fill(int n) {
88 int r;
89
90 if(ctx->eof)
91 return 0;
92
93 r = READ_BUF_SIZE-(ctx->lastfill - ctx->readbuf); /* number of bytes left in the buffer */
94 if(n < r)
95 n = r-1;
96 if(n < MIN_READ_SIZE) {
97 r = ctx->lastfill - ctx->buf; /* number of unread bytes left in the buffer */
98 memcpy(ctx->readbuf, ctx->buf, r);
99 ctx->lastfill = ctx->readbuf + r;
100 ctx->buf = ctx->readbuf;
101 n = READ_BUF_SIZE-r-1;
102 }
103
104 do {
105 r = fread(ctx->lastfill, 1, n, ctx->stream);
106 if(r != n) {
107 if(feof(ctx->stream))
108 ctx->eof = 1;
109 else if(ferror(ctx->stream) && errno != EINTR) {
110 dir_seterr("Read error: %s", strerror(errno));
111 return 1;
112 }
113 }
114
115 ctx->lastfill[r] = 0;
116 if(strlen(ctx->lastfill) != (size_t)r) {
117 dir_seterr("Zero-byte found in JSON stream");
118 return 1;
119 }
120 ctx->lastfill += r;
121 n -= r;
122 } while(!ctx->eof && n > MIN_READ_SIZE);
123
124 return 0;
125 }
126
127
128 /* Two macros that break function calling behaviour, but are damn convenient */
129 #define E(_x, _m) do {\
130 if(_x) {\
131 if(!dir_fatalerr)\
132 dir_seterr("Line %d byte %d: %s", ctx->line, ctx->byte, _m);\
133 return 1;\
134 }\
135 } while(0)
136
137 #define C(_x) do {\
138 if(_x)\
139 return 1;\
140 } while(0)
141
142
143 /* Require at least n bytes in the buffer, throw an error on early EOF.
144 * (Macro to quickly handle the common case) */
145 #define rfill1 (!*ctx->buf && _rfill(1))
146 #define rfill(_n) ((ctx->lastfill - ctx->buf < (_n)) && _rfill(_n))
147
_rfill(int n)148 static int _rfill(int n) {
149 C(fill(n));
150 E(ctx->lastfill - ctx->buf < n, "Unexpected EOF");
151 return 0;
152 }
153
154
155 /* Consumes n bytes from the buffer. */
con(int n)156 static inline void con(int n) {
157 ctx->buf += n;
158 ctx->byte += n;
159 }
160
161
162 /* Consumes any whitespace. If *ctx->buf == 0 after this function, we've reached EOF. */
cons(void)163 static int cons(void) {
164 while(1) {
165 C(!*ctx->buf && fill(1));
166
167 switch(*ctx->buf) {
168 case 0x0A:
169 /* Special-case the newline-character with respect to consuming stuff
170 * from the buffer. This is the only function which *can* consume the
171 * newline character, so it's more efficient to handle it in here rather
172 * than in the more general con(). */
173 ctx->buf++;
174 ctx->line++;
175 ctx->byte = 0;
176 break;
177 case 0x20:
178 case 0x09:
179 case 0x0D:
180 con(1);
181 break;
182 default:
183 return 0;
184 }
185 }
186 }
187
188
rstring_esc(char ** dest,int * destlen)189 static int rstring_esc(char **dest, int *destlen) {
190 unsigned int n;
191
192 C(rfill1);
193
194 #define ap(c) if(*destlen > 1) { *((*dest)++) = c; (*destlen)--; }
195 switch(*ctx->buf) {
196 case '"': ap('"'); con(1); break;
197 case '\\': ap('\\'); con(1); break;
198 case '/': ap('/'); con(1); break;
199 case 'b': ap(0x08); con(1); break;
200 case 'f': ap(0x0C); con(1); break;
201 case 'n': ap(0x0A); con(1); break;
202 case 'r': ap(0x0D); con(1); break;
203 case 't': ap(0x09); con(1); break;
204 case 'u':
205 C(rfill(5));
206 #define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
207 n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]);
208 #undef hn
209 if(n <= 0x007F) {
210 ap(n);
211 } else if(n <= 0x07FF) {
212 ap(0xC0 | (n>>6));
213 ap(0x80 | (n & 0x3F));
214 } else if(n <= 0xFFFF) {
215 ap(0xE0 | (n>>12));
216 ap(0x80 | ((n>>6) & 0x3F));
217 ap(0x80 | (n & 0x3F));
218 } else /* this happens if there was an invalid character (n >= (1<<16)) */
219 E(1, "Invalid character in \\u escape");
220 con(5);
221 break;
222 default:
223 E(1, "Invalid escape sequence");
224 }
225 #undef ap
226 return 0;
227 }
228
229
230 /* Parse a JSON string and write it to *dest (max. destlen). Consumes but
231 * otherwise ignores any characters if the string is longer than destlen. *dest
232 * will be null-terminated, dest[destlen-1] = 0 if the string was cut just long
233 * enough of was cut off. That byte will be left untouched if the string is
234 * small enough. */
rstring(char * dest,int destlen)235 static int rstring(char *dest, int destlen) {
236 C(rfill1);
237 E(*ctx->buf != '"', "Expected string");
238 con(1);
239
240 while(1) {
241 C(rfill1);
242 if(*ctx->buf == '"')
243 break;
244 if(*ctx->buf == '\\') {
245 con(1);
246 C(rstring_esc(&dest, &destlen));
247 continue;
248 }
249 E((unsigned char)*ctx->buf <= 0x1F || (unsigned char)*ctx->buf == 0x7F, "Invalid character");
250 if(destlen > 1) {
251 *(dest++) = *ctx->buf;
252 destlen--;
253 }
254 con(1);
255 }
256 con(1);
257 if(destlen > 0)
258 *dest = 0;
259 return 0;
260 }
261
262
263 /* Parse and consume a JSON integer. Throws an error if the value does not fit
264 * in an uint64_t, is not an integer or is larger than 'max'. */
rint64(uint64_t * val,uint64_t max)265 static int rint64(uint64_t *val, uint64_t max) {
266 uint64_t v;
267 int haschar = 0;
268 *val = 0;
269 while(1) {
270 C(!*ctx->buf && fill(1));
271 if(*ctx->buf == '0' && !haschar) {
272 con(1);
273 break;
274 }
275 if(*ctx->buf >= '0' && *ctx->buf <= '9') {
276 haschar = 1;
277 v = (*val)*10 + (*ctx->buf-'0');
278 E(v < *val, "Invalid (positive) integer");
279 *val = v;
280 con(1);
281 continue;
282 }
283 E(!haschar, "Invalid (positive) integer");
284 break;
285 }
286 E(*val > max, "Integer out of range");
287 return 0;
288 }
289
290
291 /* Parse and consume a JSON number. The result is discarded.
292 * TODO: Improve validation. */
rnum(void)293 static int rnum(void) {
294 int haschar = 0;
295 C(rfill1);
296 while(1) {
297 C(!*ctx->buf && fill(1));
298 if(*ctx->buf == 'e' || *ctx->buf == 'E' || *ctx->buf == '-' || *ctx->buf == '+' || *ctx->buf == '.' || (*ctx->buf >= '0' && *ctx->buf <= '9')) {
299 haschar = 1;
300 con(1);
301 } else {
302 E(!haschar, "Invalid JSON value");
303 break;
304 }
305 }
306 return 0;
307 }
308
309
rlit(const char * v,int len)310 static int rlit(const char *v, int len) {
311 C(rfill(len));
312 E(strncmp(ctx->buf, v, len) != 0, "Invalid JSON value");
313 con(len);
314 return 0;
315 }
316
317
318 /* Parse the "<space> <string> <space> : <space>" part of an object key. */
rkey(char * dest,int destlen)319 static int rkey(char *dest, int destlen) {
320 C(cons() || rstring(dest, destlen) || cons());
321 E(*ctx->buf != ':', "Expected ':'");
322 con(1);
323 return cons();
324 }
325
326
327 /* (Recursively) parse and consume any JSON value. The result is discarded. */
rval(void)328 static int rval(void) {
329 C(rfill1);
330 switch(*ctx->buf) {
331 case 't': /* true */
332 C(rlit("true", 4));
333 break;
334 case 'f': /* false */
335 C(rlit("false", 5));
336 break;
337 case 'n': /* null */
338 C(rlit("null", 4));
339 break;
340 case '"': /* string */
341 C(rstring(NULL, 0));
342 break;
343 case '{': /* object */
344 con(1);
345 while(1) {
346 C(cons());
347 if(*ctx->buf == '}')
348 break;
349 C(rkey(NULL, 0) || rval() || cons());
350 if(*ctx->buf == '}')
351 break;
352 E(*ctx->buf != ',', "Expected ',' or '}'");
353 con(1);
354 }
355 con(1);
356 break;
357 case '[': /* array */
358 con(1);
359 while(1) {
360 C(cons());
361 if(*ctx->buf == ']')
362 break;
363 C(cons() || rval() || cons());
364 if(*ctx->buf == ']')
365 break;
366 E(*ctx->buf != ',', "Expected ',' or ']'");
367 con(1);
368 }
369 con(1);
370 break;
371 default: /* assume number */
372 C(rnum());
373 break;
374 }
375
376 return 0;
377 }
378
379
380 /* Consumes everything up to the root item, and checks that this item is a dir. */
header(void)381 static int header(void) {
382 uint64_t v;
383
384 C(cons());
385 E(*ctx->buf != '[', "Expected JSON array");
386 con(1);
387 C(cons() || rint64(&v, 10000) || cons());
388 E(v != 1, "Incompatible major format version");
389 E(*ctx->buf != ',', "Expected ','");
390 con(1);
391 C(cons() || rint64(&v, 10000) || cons()); /* Ignore the minor version for now */
392 E(*ctx->buf != ',', "Expected ','");
393 con(1);
394 /* Metadata block is currently ignored */
395 C(cons() || rval() || cons());
396 E(*ctx->buf != ',', "Expected ','");
397 con(1);
398
399 C(cons());
400 E(*ctx->buf != '[', "Top-level item must be a directory");
401
402 return 0;
403 }
404
405
406 static int item(uint64_t);
407
408 /* Read and add dir contents */
itemdir(uint64_t dev)409 static int itemdir(uint64_t dev) {
410 while(1) {
411 C(cons());
412 if(*ctx->buf == ']')
413 break;
414 E(*ctx->buf != ',', "Expected ',' or ']'");
415 con(1);
416 C(cons() || item(dev));
417 }
418 con(1);
419 C(cons());
420 return 0;
421 }
422
423
424 /* Reads a JSON object representing a struct dir/dir_ext item. Writes to
425 * ctx->buf_dir, ctx->buf_ext, ctx->buf_name and ctx->nlink. */
iteminfo(void)426 static int iteminfo(void) {
427 uint64_t iv;
428
429 E(*ctx->buf != '{', "Expected JSON object");
430 con(1);
431
432 while(1) {
433 C(rkey(ctx->val, MAX_VAL));
434 /* TODO: strcmp() in this fashion isn't very fast. */
435 if(strcmp(ctx->val, "name") == 0) { /* name */
436 ctx->val[MAX_VAL-1] = 1;
437 C(rstring(ctx->val, MAX_VAL));
438 E(ctx->val[MAX_VAL-1] != 1, "Too large string value");
439 strcpy(ctx->buf_name, ctx->val);
440 } else if(strcmp(ctx->val, "asize") == 0) { /* asize */
441 C(rint64(&iv, INT64_MAX));
442 ctx->buf_dir->asize = iv;
443 } else if(strcmp(ctx->val, "dsize") == 0) { /* dsize */
444 C(rint64(&iv, INT64_MAX));
445 ctx->buf_dir->size = iv;
446 } else if(strcmp(ctx->val, "dev") == 0) { /* dev */
447 C(rint64(&iv, UINT64_MAX));
448 ctx->buf_dir->dev = iv;
449 } else if(strcmp(ctx->val, "ino") == 0) { /* ino */
450 C(rint64(&iv, UINT64_MAX));
451 ctx->buf_dir->ino = iv;
452 } else if(strcmp(ctx->val, "uid") == 0) { /* uid */
453 C(rint64(&iv, INT32_MAX));
454 ctx->buf_dir->flags |= FF_EXT;
455 ctx->buf_ext->uid = iv;
456 } else if(strcmp(ctx->val, "gid") == 0) { /* gid */
457 C(rint64(&iv, INT32_MAX));
458 ctx->buf_dir->flags |= FF_EXT;
459 ctx->buf_ext->gid = iv;
460 } else if(strcmp(ctx->val, "mode") == 0) { /* mode */
461 C(rint64(&iv, UINT16_MAX));
462 ctx->buf_dir->flags |= FF_EXT;
463 ctx->buf_ext->mode = iv;
464 } else if(strcmp(ctx->val, "mtime") == 0) { /* mtime */
465 C(rint64(&iv, UINT64_MAX));
466 ctx->buf_dir->flags |= FF_EXT;
467 ctx->buf_ext->mtime = iv;
468 // Accept decimal numbers, but discard the fractional part because our data model doesn't support it.
469 if(*ctx->buf == '.') {
470 con(1);
471 while(*ctx->buf >= '0' && *ctx->buf <= '9')
472 con(1);
473 }
474 } else if(strcmp(ctx->val, "hlnkc") == 0) { /* hlnkc */
475 if(*ctx->buf == 't') {
476 C(rlit("true", 4));
477 ctx->buf_dir->flags |= FF_HLNKC;
478 } else
479 C(rlit("false", 5));
480 } else if(strcmp(ctx->val, "nlink") == 0) { /* nlink */
481 C(rint64(&iv, UINT32_MAX));
482 if(iv > 1)
483 ctx->buf_dir->flags |= FF_HLNKC;
484 ctx->nlink = iv;
485 } else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */
486 if(*ctx->buf == 't') {
487 C(rlit("true", 4));
488 ctx->buf_dir->flags |= FF_ERR;
489 } else
490 C(rlit("false", 5));
491 } else if(strcmp(ctx->val, "excluded") == 0) { /* excluded */
492 C(rstring(ctx->val, 8));
493 if(strcmp(ctx->val, "otherfs") == 0)
494 ctx->buf_dir->flags |= FF_OTHFS;
495 else if(strcmp(ctx->val, "kernfs") == 0)
496 ctx->buf_dir->flags |= FF_KERNFS;
497 else if(strcmp(ctx->val, "frmlnk") == 0)
498 ctx->buf_dir->flags |= FF_FRMLNK;
499 else
500 ctx->buf_dir->flags |= FF_EXL;
501 } else if(strcmp(ctx->val, "notreg") == 0) { /* notreg */
502 if(*ctx->buf == 't') {
503 C(rlit("true", 4));
504 ctx->buf_dir->flags &= ~FF_FILE;
505 } else
506 C(rlit("false", 5));
507 } else
508 C(rval());
509
510 C(cons());
511 if(*ctx->buf == '}')
512 break;
513 E(*ctx->buf != ',', "Expected ',' or '}'");
514 con(1);
515 }
516 con(1);
517
518 E(!*ctx->buf_name, "No name field present in item information object");
519 ctx->items++;
520 /* Only call input_handle() once for every 32 items. Importing items is so
521 * fast that the time spent in input_handle() dominates when called every
522 * time. Don't set this value too high, either, as feedback should still be
523 * somewhat responsive when our import data comes from a slow-ish source. */
524 return !(ctx->items & 31) ? input_handle(1) : 0;
525 }
526
527
528 /* Recursively reads a file or directory item */
item(uint64_t dev)529 static int item(uint64_t dev) {
530 int isdir = 0;
531 int isroot = ctx->items == 0;
532
533 if(*ctx->buf == '[') {
534 isdir = 1;
535 con(1);
536 C(cons());
537 }
538
539 memset(ctx->buf_dir, 0, offsetof(struct dir, name));
540 memset(ctx->buf_ext, 0, sizeof(struct dir_ext));
541 ctx->nlink = 0;
542 *ctx->buf_name = 0;
543 ctx->buf_dir->flags |= isdir ? FF_DIR : FF_FILE;
544 ctx->buf_dir->dev = dev;
545
546 C(iteminfo());
547 dev = ctx->buf_dir->dev;
548
549 if(isroot)
550 dir_curpath_set(ctx->buf_name);
551 else
552 dir_curpath_enter(ctx->buf_name);
553
554 if(isdir) {
555 if(dir_output.item(ctx->buf_dir, ctx->buf_name, ctx->buf_ext, ctx->nlink)) {
556 dir_seterr("Output error: %s", strerror(errno));
557 return 1;
558 }
559 C(itemdir(dev));
560 if(dir_output.item(NULL, 0, NULL, 0)) {
561 dir_seterr("Output error: %s", strerror(errno));
562 return 1;
563 }
564 } else if(dir_output.item(ctx->buf_dir, ctx->buf_name, ctx->buf_ext, ctx->nlink)) {
565 dir_seterr("Output error: %s", strerror(errno));
566 return 1;
567 }
568
569 if(!isroot)
570 dir_curpath_leave();
571
572 return 0;
573 }
574
575
footer(void)576 static int footer(void) {
577 while(1) {
578 C(cons());
579 if(*ctx->buf == ']')
580 break;
581 E(*ctx->buf != ',', "Expected ',' or ']'");
582 con(1);
583 C(cons() || rval());
584 }
585 con(1);
586 C(cons());
587 E(*ctx->buf, "Trailing garbage");
588 return 0;
589 }
590
591
process(void)592 static int process(void) {
593 int fail = 0;
594
595 header();
596
597 if(!dir_fatalerr)
598 fail = item(0);
599
600 if(!dir_fatalerr && !fail)
601 footer();
602
603 if(fclose(ctx->stream) && !dir_fatalerr && !fail)
604 dir_seterr("Error closing file: %s", strerror(errno));
605 free(ctx->buf_dir);
606 free(ctx);
607
608 while(dir_fatalerr && !input_handle(0))
609 ;
610 return dir_output.final(dir_fatalerr || fail);
611 }
612
613
dir_import_init(const char * fn)614 int dir_import_init(const char *fn) {
615 FILE *stream;
616 if(strcmp(fn, "-") == 0)
617 stream = stdin;
618 else if((stream = fopen(fn, "r")) == NULL)
619 return 1;
620
621 ctx = xmalloc(sizeof(struct ctx));
622 ctx->stream = stream;
623 ctx->line = 1;
624 ctx->byte = ctx->eof = ctx->items = 0;
625 ctx->buf = ctx->lastfill = ctx->readbuf;
626 ctx->buf_dir = xmalloc(dir_memsize(""));
627 ctx->readbuf[0] = 0;
628
629 dir_curpath_set(fn);
630 dir_process = process;
631 dir_import_active = 1;
632 return 0;
633 }
634
635