xref: /freebsd/contrib/libxo/xolint/xolint.pl (revision 0957b409)
1#!/usr/bin/env perl
2#
3# Copyright (c) 2014, Juniper Networks, Inc.
4# All rights reserved.
5# This SOFTWARE is licensed under the LICENSE provided in the
6# ../Copyright file. By downloading, installing, copying, or otherwise
7# using the SOFTWARE, you agree to be bound by the terms of that
8# LICENSE.
9# Phil Shafer, August 2014
10#
11#
12# xolint -- a lint for inspecting xo_emit format strings
13#
14# Yes, that's a long way to go for a pun.
15
16%vocabulary = ();
17
18sub main {
19    while ($ARGV[0] =~ /^-/) {
20	$_ = shift @ARGV;
21	$opt_cpp = 1 if /^-c/;
22	$opt_cflags .= shift @ARGV if /^-C/;
23	$opt_debug = 1 if /^-d/;
24	extract_docs() if /^-D/;
25	$opt_info = $opt_vocabulary = 1 if /^-I/;
26	$opt_print = 1 if /^-p/;
27	$opt_vocabulary = 1 if /^-V/;
28	extract_samples() if /^-X/;
29    }
30
31    if ($#ARGV < 0) {
32	print STDERR "xolint [options] files ...\n";
33	print STDERR "    -c    invoke 'cpp' on input\n";
34	print STDERR "    -C flags   Pass flags to cpp\n";
35	print STDERR "    -d         Show debug output\n";
36	print STDERR "    -D         Extract xolint documentation\n";
37	print STDERR "    -I         Print xo_info_t data\n";
38	print STDERR "    -p         Print input data on errors\n";
39	print STDERR "    -V         Print vocabulary (list of tags)\n";
40	print STDERR "    -X         Print examples of invalid use\n";
41	exit(1);
42    }
43
44    for $file (@ARGV) {
45	parse_file($file);
46    }
47
48    if ($opt_info) {
49	print "static xo_info_t xo_info_table[] = {\n";
50	for $name (sort(keys(%vocabulary))) {
51	    print "    { \"", $name, "\", \"type\", \"desc\" },\n";
52	}
53	print "};\n";
54	print "static int xo_info_count = "
55	    . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n";
56	print "#define XO_SET_INFO() \\\n";
57	print "    xo_set_info(NULL, xo_info_table, xo_info_count)\n";
58    } elsif ($opt_vocabulary) {
59	for $name (sort(keys(%vocabulary))) {
60	    print $name, "\n";
61	}
62    }
63}
64
65sub extract_samples {
66    my $x = "\#" . "\@";
67    my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'";
68    system($cmd);
69    exit(0);
70}
71
72sub extract_docs {
73    my $x = "\#" . "\@";
74    my $cmd = "grep -B1 '$x' $0";
75    open INPUT, "$cmd |";
76    local @input = <INPUT>;
77    close INPUT;
78    my $ln, $new = 0, $first = 1, $need_nl;
79
80    for ($ln = 0; $ln <= $#input; $ln++) {
81	chomp($_ = $input[$ln]);
82	if (/^--/) {
83	    $ln += 1;
84	    $new = 1;
85	    next;
86	}
87	if ($first) {
88	    $new = 1;
89	    $first = 0;
90	    next;
91	}
92
93	s/\s*\#\@\s*//;
94
95	if ($new) {
96	    if ($need_nl) {
97		print "\n\n";
98		$need_nl = 0;
99	    }
100
101	    print "*** '$_'\n\n";
102	    print "The message \"$_\" can be caused by code like:\n\n";
103	    $new = 0;
104
105	} elsif (/xo_emit\s*\(/) {
106	    s/^\s+//;
107	    print "    $_\n\n";
108
109	} elsif (/^Should be/i) {
110	    print "This code should be replaced with code like:\n\n";
111
112	} else {
113	    print "$_\n";
114	    $need_nl = 1;
115	}
116    }
117
118    exit(0);
119}
120
121sub parse_file {
122    local($file) = @_;
123    local($errors, $warnings, $info) = (0, 0, 0);
124    local $curfile = $file;
125    local $curln = 0;
126
127    if ($opt_cpp) {
128	die "no such file" unless -f $file;
129	open INPUT, "cpp $opt_cflags $file |";
130    } else {
131	open INPUT, $file || die "cannot open input file '$file'";
132    }
133    local @input = <INPUT>;
134    close INPUT;
135
136    local $ln, $rln, $line, $replay;
137
138    for ($ln = 0; $ln < $#input; $ln++) {
139	$line = $input[$ln];
140	$curln += 1;
141
142	if ($line =~ /^\#/) {
143	    my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/);
144	    ($curfile, $curln) = ($fn, $num) if $num;
145	    next;
146	}
147
148	next unless $line =~ /xo_emit\(/;
149
150	@tokens = parse_tokens();
151	print "token:\n    '" . join("'\n    '", @tokens) . "'\n"
152	    if $opt_debug;
153	check_format($tokens[0]);
154    }
155
156    print $file . ": $errors errors, $warnings warnings, $info info\n"
157	unless $opt_vocabulary;
158}
159
160sub parse_tokens {
161    my $full = "$'";
162    my @tokens = ();
163    my %pairs = ( "{" => "}", "[" => "]", "(" => ")" );
164    my %quotes = ( "\"" => "\"", "'" => "'" );
165    local @data = split(//, $full);
166    local @open = ();
167    local $current = "";
168    my $quote = "";
169    local $off = 0;
170    my $ch;
171
172    $replay = $curln . "     " . $line;
173    $rln = $ln + 1;
174
175    for (;;) {
176	get_tokens() if $off > $#data;
177	die "out of data" if $off > $#data;
178	$ch = $data[$off++];
179
180	print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n"
181	    if $opt_debug;
182
183	last if $ch eq ";" && $#open < 0;
184
185	if ($ch eq "," && $quote eq "" && $#open < 0) {
186	    print "[$current]\n" if $opt_debug;
187	    push @tokens, $current;
188	    $current = "";
189	    next;
190	}
191
192	next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0;
193
194	$current .= $ch;
195
196	if ($quote) {
197	    if ($ch eq $quote) {
198		$quote = "";
199	    }
200	    next;
201	}
202	if ($quotes{$ch}) {
203	    $quote = $quotes{$ch};
204	    $current = substr($current, 0, -2) if $current =~ /""$/;
205	    next;
206	}
207
208	if ($pairs{$ch}) {
209	    push @open, $pairs{$ch};
210	    next;
211	}
212
213	if ($#open >= 0 && $ch eq $open[$#open]) {
214	    pop @open;
215	    next;
216	}
217    }
218
219    push @tokens, substr($current, 0, -1);
220    return @tokens;
221}
222
223sub get_tokens {
224    if ($ln + 1 < $#input) {
225	$line = $input[++$ln];
226	$curln += 1;
227	$replay .= $curln . "     " . $line;
228	@data = split(//, $line);
229	$off = 0;
230    }
231}
232
233sub check_format {
234    my($format) = @_;
235
236    return unless $format =~ /^".*"$/;
237
238    my @data = split(//, $format);
239    my $ch;
240    my $braces = 0;
241    local $count = 0;
242    my $content = "";
243    my $off;
244    my $phase = 0;
245    my @build = ();
246    local $last, $prev = "";
247
248    # Nukes quotes
249    pop @data;
250    shift @data;
251
252    for (;;) {
253	last if $off > $#data;
254	$ch = $data[$off++];
255
256	if ($ch eq "\\") {
257	    $ch = $data[$off++];
258	    $off += 1 if $ch eq "\\"; # double backslash: "\\/"
259	    next;
260	}
261
262	if ($braces) {
263	    if ($ch eq "}") {
264		check_field(@build);
265		$braces = 0;
266		@build = ();
267		$phase = 0;
268		next;
269	    } elsif ($phase == 0 && $ch eq ":") {
270		$phase += 1;
271		next;
272	    } elsif ($ch eq "/") {
273		$phase += 1;
274		next;
275	    }
276
277	} else {
278	    if ($ch eq "{") {
279		check_text($build[0]) if length($build[0]);
280		$braces = 1;
281		@build = ();
282		$last = $prev;
283		next;
284	    }
285	    $prev = $ch;
286	}
287
288	$build[$phase] .= $ch;
289    }
290
291    if ($braces) {
292	error("missing closing brace");
293	check_field(@build);
294    } else {
295	check_text($build[0]) if length($build[0]);
296    }
297}
298
299sub check_text {
300    my($text) = @_;
301
302    print "checking text: [$text]\n" if $opt_debug;
303
304    #@ A percent sign appearing in text is a literal
305    #@     xo_emit("cost: %d", cost);
306    #@ Should be:
307    #@     xo_emit("{L:cost}: {:cost/%d}", cost);
308    #@ This can be a bit surprising and could be a field that was not
309    #@ properly converted to a libxo-style format string.
310    info("a percent sign appearing in text is a literal") if $text =~ /%/;
311}
312
313%short = (
314    # Roles
315    "color" => "C",
316    "decoration" => "D",
317    "error" => "E",
318    "label" => "L",
319    "note" => "N",
320    "padding" => "P",
321    "title" => "T",
322    "units" => "U",
323    "value" => "V",
324    "warning" => "W",
325    "start-anchor" => "[",
326    "stop-anchor" => "]",
327    # Modifiers
328    "colon" => "c",
329    "display" => "d",
330    "encoding" => "e",
331    "hn" => "h",
332    "hn-decimal" => "@",
333    "hn-space" => "@",
334    "hn-1000" => "@",
335    "humanize" => "h",
336    "key" => "k",
337    "leaf-list" => "l",
338    "no-quotes" => "n",
339    "quotes" => "q",
340    "trim" => "t",
341    "white" => "w",
342 );
343
344sub check_field {
345    my(@field) = @_;
346    print "checking field: [" . join("][", @field) . "]\n" if $opt_debug;
347
348    if ($field[0] =~ /,/) {
349	# We have long names; deal with it by turning them into short names
350	my @parts = split(/,/, $field[0]);
351	my $new = "";
352	for (my $i = 1; $i <= $#parts; $i++) {
353	    my $v = $parts[$i];
354	    $v =~ s/^\s+//;
355	    $v =~ s/\s+$//;
356	    if ($short{$v} eq "@") {
357		# ignore; has no short version
358	    } elsif ($short{$v}) {
359		$new .= $short{$v};
360	    } else {
361		#@ Unknown long name for role/modifier
362		#@   xo_emit("{,humanization:value}", value);
363		#@ Should be:
364		#@   xo_emit("{,humanize:value}", value);
365		#@ The hn-* modifiers (hn-decimal, hn-space, hn-1000)
366		#@ are only valid for fields with the {h:} modifier.
367		error("Unknown long name for role/modifier ($v)");
368	    }
369	}
370
371	$field[4] = substr($field[0], index($field[0], ","));
372	$field[0] = $parts[0] . $new;
373    }
374
375    if ($opt_vocabulary) {
376	$vocabulary{$field[1]} = 1
377	    if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/;
378	return;
379    }
380
381    #@ Last character before field definition is a field type
382    #@ A common typo:
383    #@     xo_emit("{T:Min} T{:Max}");
384    #@ Should be:
385    #@     xo_emit("{T:Min} {T:Max}");
386    #@ Twiddling the "{" and the field role is a common typo.
387    info("last character before field definition is a field type ($last)")
388	if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/;
389
390    #@ Encoding format uses different number of arguments
391    #@     xo_emit("{:name/%6.6s %%04d/%s}", name, number);
392    #@ Should be:
393    #@     xo_emit("{:name/%6.6s %04d/%s-%d}", name, number);
394    #@ Both format should consume the same number of arguments off the stack
395    my $cf = count_args($field[2]);
396    my $ce = count_args($field[3]);
397    warn("encoding format uses different number of arguments ($cf/$ce)")
398	if $ce >= 0 && $cf >= 0 && $ce != $cf;
399
400    #@ Only one field role can be used
401    #@     xo_emit("{LT:Max}");
402    #@ Should be:
403    #@     xo_emit("{T:Max}");
404    my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/);
405    error("only one field role can be used (" . join(", ", @roles) . ")")
406	if $#roles > 0;
407
408    # Field is a color, note, label, or title
409    if ($field[0] =~ /[CDLNT]/) {
410
411	#@ Potential missing slash after C, D, N, L, or T with format
412	#@     xo_emit("{T:%6.6s}\n", "Max");
413	#@ should be:
414	#@     xo_emit("{T:/%6.6s}\n", "Max");
415	#@ The "%6.6s" will be a literal, not a field format.  While
416	#@ it's possibly valid, it's likely a missing "/".
417	info("potential missing slash after C, D, N, L, or T with format")
418	    if $field[1] =~ /%/;
419
420	#@ An encoding format cannot be given (roles: DNLT)
421	#@    xo_emit("{T:Max//%s}", "Max");
422	#@ Fields with the C, D, N, L, and T roles are not emitted in
423	#@ the 'encoding' style (JSON, XML), so an encoding format
424	#@ would make no sense.
425	error("encoding format cannot be given when content is present")
426	    if $field[3];
427    }
428
429    # Field is a color, decoration, label, or title
430    if ($field[0] =~ /[CDLN]/) {
431	#@ Format cannot be given when content is present (roles: CDLN)
432	#@    xo_emit("{N:Max/%6.6s}", "Max");
433	#@ Fields with the C, D, L, or N roles can't have both
434	#@ static literal content ("{L:Label}") and a
435	#@ format ("{L:/%s}").
436	#@ This error will also occur when the content has a backslash
437	#@ in it, like "{N:Type of I/O}"; backslashes should be escaped,
438	#@ like "{N:Type of I\\/O}".  Note the double backslash, one for
439	#@ handling 'C' strings, and one for libxo.
440	error("format cannot be given when content is present")
441	    if $field[1] && $field[2];
442    }
443
444    # Field is a color/effect
445    if ($field[0] =~ /C/) {
446	if ($field[1]) {
447	    my $val;
448	    my @sub = split(/,/, $field[1]);
449	    grep { s/^\s*//; s/\s*$//; } @sub;
450
451	    for $val (@sub) {
452		if ($val =~ /^(default,black,red,green,yellow,blue,magenta,cyan,white)$/) {
453
454		    #@ Field has color without fg- or bg- (role: C)
455		    #@   xo_emit("{C:green}{:foo}{C:}", x);
456		    #@ Should be:
457		    #@   xo_emit("{C:fg-green}{:foo}{C:}", x);
458		    #@ Colors must be prefixed by either "fg-" or "bg-".
459		    error("Field has color without fg- or bg- (role: C)");
460
461		} elsif ($val =~ /^(fg|bg)-(default|black|red|green|yellow|blue|magenta|cyan|white)$/) {
462		    # color
463		} elsif ($val =~ /^(bold|underline)$/) {
464		} elsif ($val =~ /^(no-)?(bold|underline|inverse)$/) {
465		    # effect
466
467		} elsif ($val =~ /^(reset|normal)$/) {
468		    # effect also
469		} else {
470		    #@ Field has invalid color or effect (role: C)
471		    #@   xo_emit("{C:fg-purple,bold}{:foo}{C:gween}", x);
472		    #@ Should be:
473		    #@   xo_emit("{C:fg-red,bold}{:foo}{C:fg-green}", x);
474		    #@ The list of colors and effects are limited.  The
475		    #@ set of colors includes default, black, red, green,
476		    #@ yellow, blue, magenta, cyan, and white, which must
477		    #@ be prefixed by either "fg-" or "bg-".  Effects are
478		    #@ limited to bold, no-bold, underline, no-underline,
479		    #@ inverse, no-inverse, normal, and reset.  Values must
480		    #@ be separated by commas.
481		    error("Field has invalid color or effect (role: C) ($val)");
482		}
483	    }
484	}
485    }
486
487    # Humanized field
488    if ($field[0] =~ /h/) {
489	if (length($field[2]) == 0) {
490	    #@ Field has humanize modifier but no format string
491	    #@   xo_emit("{h:value}", value);
492	    #@ Should be:
493	    #@   xo_emit("{h:value/%d}", value);
494	    #@ Humanization is only value for numbers, which are not
495	    #@ likely to use the default format ("%s").
496	    error("Field has humanize modifier but no format string");
497	}
498    }
499
500    # hn-* on non-humanize field
501    if ($field[0] !~ /h/) {
502	if ($field[4] =~ /,hn-/) {
503	    #@ Field has hn-* modifier but not 'h' modifier
504	    #@   xo_emit("{,hn-1000:value}", value);
505	    #@ Should be:
506	    #@   xo_emit("{h,hn-1000:value}", value);
507	    #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000)
508	    #@ are only valid for fields with the {h:} modifier.
509	    error("Field has hn-* modifier but not 'h' modifier");
510	}
511    }
512
513    # A value field
514    if (length($field[0]) == 0 || $field[0] =~ /V/) {
515
516	#@ Value field must have a name (as content)")
517	#@     xo_emit("{:/%s}", "value");
518	#@ Should be:
519	#@     xo_emit("{:tag-name/%s}", "value");
520	#@ The field name is used for XML and JSON encodings.  These
521	#@ tags names are static and must appear directly in the
522	#@ field descriptor.
523	error("value field must have a name (as content)")
524	    unless $field[1];
525
526	#@ Use hyphens, not underscores, for value field name
527	#@     xo_emit("{:no_under_scores}", "bad");
528	#@ Should be:
529	#@     xo_emit("{:no-under-scores}", "bad");
530	#@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES
531	#@ flag can be used to generate underscores in JSON, if desired.
532	#@ But the raw field name should use hyphens.
533	error("use hyphens, not underscores, for value field name")
534	    if $field[1] =~ /_/;
535
536	#@ Value field name cannot start with digit
537	#@     xo_emit("{:10-gig/}");
538	#@ Should be:
539	#@     xo_emit("{:ten-gig/}");
540	#@ XML element names cannot start with a digit.
541	error("value field name cannot start with digit")
542	    if $field[1] =~ /^[0-9]/;
543
544	#@ Value field name should be lower case
545	#@     xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON");
546	#@ Should be:
547	#@     xo_emit("{:why-are-you-shouting}", "no reason");
548	#@ Lower case is more civilized.  Even TLAs should be lower case
549	#@ to avoid scenarios where the differences between "XPath" and
550	#@ "Xpath" drive your users crazy.  Lower case rules the seas.
551	error("value field name should be lower case")
552	    if $field[1] =~ /[A-Z]/;
553
554	#@ Value field name should be longer than two characters
555	#@     xo_emit("{:x}", "mumble");
556	#@ Should be:
557	#@     xo_emit("{:something-meaningful}", "mumble");
558	#@ Field names should be descriptive, and it's hard to
559	#@ be descriptive in less than two characters.  Consider
560	#@ your users and try to make something more useful.
561	#@ Note that this error often occurs when the field type
562	#@ is placed after the colon ("{:T/%20s}"), instead of before
563	#@ it ("{T:/20s}").
564	error("value field name should be longer than two characters")
565	    if $field[1] =~ /[A-Z]/;
566
567	#@ Value field name contains invalid character
568	#@     xo_emit("{:cost-in-$$/%u}", 15);
569	#@ Should be:
570	#@     xo_emit("{:cost-in-dollars/%u}", 15);
571	#@ An invalid character is often a sign of a typo, like "{:]}"
572	#@ instead of "{]:}".  Field names are restricted to lower-case
573	#@ characters, digits, and hyphens.
574	error("value field name contains invalid character (" . $field[1] . ")")
575	    unless $field[1] =~ /^[0-9a-z-]*$/;
576    }
577
578    # A decoration field
579    if ($field[0] =~ /D/) {
580
581	#@decoration field contains invalid character
582	#@     xo_emit("{D:not good}");
583	#@ Should be:
584	#@     xo_emit("{D:((}{:good}{D:))}", "yes");
585	#@ This is minor, but fields should use proper roles.  Decoration
586	#@ fields are meant to hold punctuation and other characters used
587	#@ to decorate the content, typically to make it more readable
588	#@ to human readers.
589	warn("decoration field contains invalid character")
590	    unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:;
591    }
592
593    if ($field[0] =~ /[\[\]]/) {
594	#@ Anchor content should be decimal width
595	#@     xo_emit("{[:mumble}");
596	#@ Should be:
597	#@     xo_emit("{[:32}");
598	#@ Anchors need an integer value to specify the width of
599	#@ the set of anchored fields.  The value can be positive
600	#@ (for left padding/right justification) or negative (for
601	#@ right padding/left justification) and can appear in
602	#@ either the start or stop anchor field descriptor.
603	error("anchor content should be decimal width")
604	    if $field[1] && $field[1] !~ /^-?\d+$/ ;
605
606	#@ Anchor format should be "%d"
607	#@     xo_emit("{[:/%s}");
608	#@ Should be:
609	#@     xo_emit("{[:/%d}");
610	#@ Anchors only grok integer values, and if the value is not static,
611	#@ if must be in an 'int' argument, represented by the "%d" format.
612	#@ Anything else is an error.
613	error("anchor format should be \"%d\"")
614	    if $field[2] && $field[2] ne "%d";
615
616	#@ Anchor cannot have both format and encoding format")
617	#@     xo_emit("{[:32/%d}");
618	#@ Should be:
619	#@     xo_emit("{[:32}");
620	#@ Anchors can have a static value or argument for the width,
621	#@ but cannot have both.
622	error("anchor cannot have both format and encoding format")
623	    if $field[1] && $field[2];
624    }
625}
626
627sub count_args {
628    my($format) = @_;
629
630    return -1 unless $format;
631
632    my $in;
633    my($text, $ff, $fc, $rest);
634    for ($in = $format; $in; $in = $rest) {
635	($text, $ff, $fc, $rest) =
636	   ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/);
637	unless ($ff) {
638	    # Might be a "%%"
639	    ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/);
640	    if ($ff) {
641		check_text($text);
642	    } else {
643		# Not sure what's going on here, but something's wrong...
644		error("invalid field format") if $in =~ /%/;
645	    }
646	    next;
647	}
648
649	check_text($text);
650	check_field_format($ff, $fc);
651    }
652
653    return 0;
654}
655
656sub check_field_format {
657    my($ff, $fc) = @_;
658
659    print "check_field_format: [$ff] [$fc]\n" if $opt_debug;
660
661    my(@chunks) = split(/\./, $ff);
662
663    #@ Max width only valid for strings
664    #@     xo_emit("{:tag/%2.4.6d}", 55);
665    #@ Should be:
666    #@     xo_emit("{:tag/%2.6d}", 55);
667    #@ libxo allows a true 'max width' in addition to the traditional
668    #@ printf-style 'max number of bytes to use for input'.  But this
669    #@ is supported only for string values, since it makes no sense
670    #@ for non-strings.  This error may occur from a typo,
671    #@ like "{:tag/%6..6d}" where only one period should be used.
672    error("max width only valid for strings")
673	if $#chunks >= 2 && $fc !~ /[sS]/;
674}
675
676sub error {
677    return if $opt_vocabulary;
678    print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n";
679    print STDERR $replay . "\n" if $opt_print;
680    $errors += 1;
681}
682
683sub warn {
684    return if $opt_vocabulary;
685    print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n";
686    print STDERR $replay . "\n" if $opt_print;
687    $warnings += 1;
688}
689
690sub info {
691    return if $opt_vocabulary;
692    print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n";
693    print STDERR $replay . "\n" if $opt_print;
694    $info += 1;
695}
696
697main: {
698    main();
699}
700