xref: /freebsd/contrib/libxo/xolint/xolint.pl (revision 34b867ca)
131337658SMarcel Moolenaar#!/usr/bin/env perl
231337658SMarcel Moolenaar#
331337658SMarcel Moolenaar# Copyright (c) 2014, Juniper Networks, Inc.
431337658SMarcel Moolenaar# All rights reserved.
531337658SMarcel Moolenaar# This SOFTWARE is licensed under the LICENSE provided in the
631337658SMarcel Moolenaar# ../Copyright file. By downloading, installing, copying, or otherwise
731337658SMarcel Moolenaar# using the SOFTWARE, you agree to be bound by the terms of that
831337658SMarcel Moolenaar# LICENSE.
931337658SMarcel Moolenaar# Phil Shafer, August 2014
1031337658SMarcel Moolenaar#
1131337658SMarcel Moolenaar#
1231337658SMarcel Moolenaar# xolint -- a lint for inspecting xo_emit format strings
1331337658SMarcel Moolenaar#
1431337658SMarcel Moolenaar# Yes, that's a long way to go for a pun.
1531337658SMarcel Moolenaar
1631337658SMarcel Moolenaar%vocabulary = ();
1731337658SMarcel Moolenaar
1831337658SMarcel Moolenaarsub main {
1931337658SMarcel Moolenaar    while ($ARGV[0] =~ /^-/) {
2031337658SMarcel Moolenaar	$_ = shift @ARGV;
2131337658SMarcel Moolenaar	$opt_cpp = 1 if /^-c/;
2231337658SMarcel Moolenaar	$opt_cflags .= shift @ARGV if /^-C/;
2331337658SMarcel Moolenaar	$opt_debug = 1 if /^-d/;
2431337658SMarcel Moolenaar	extract_docs() if /^-D/;
2531337658SMarcel Moolenaar	$opt_info = $opt_vocabulary = 1 if /^-I/;
2631337658SMarcel Moolenaar	$opt_print = 1 if /^-p/;
2731337658SMarcel Moolenaar	$opt_vocabulary = 1 if /^-V/;
2831337658SMarcel Moolenaar	extract_samples() if /^-X/;
2931337658SMarcel Moolenaar    }
3031337658SMarcel Moolenaar
31c600d307SMarcel Moolenaar    if ($#ARGV < 0) {
32c600d307SMarcel Moolenaar	print STDERR "xolint [options] files ...\n";
33c600d307SMarcel Moolenaar	print STDERR "    -c    invoke 'cpp' on input\n";
34c600d307SMarcel Moolenaar	print STDERR "    -C flags   Pass flags to cpp\n";
35c600d307SMarcel Moolenaar	print STDERR "    -d         Show debug output\n";
36c600d307SMarcel Moolenaar	print STDERR "    -D         Extract xolint documentation\n";
37c600d307SMarcel Moolenaar	print STDERR "    -I         Print xo_info_t data\n";
38c600d307SMarcel Moolenaar	print STDERR "    -p         Print input data on errors\n";
39c600d307SMarcel Moolenaar	print STDERR "    -V         Print vocabulary (list of tags)\n";
40c600d307SMarcel Moolenaar	print STDERR "    -X         Print examples of invalid use\n";
41c600d307SMarcel Moolenaar	exit(1);
42c600d307SMarcel Moolenaar    }
43c600d307SMarcel Moolenaar
4431337658SMarcel Moolenaar    for $file (@ARGV) {
4531337658SMarcel Moolenaar	parse_file($file);
4631337658SMarcel Moolenaar    }
4731337658SMarcel Moolenaar
4831337658SMarcel Moolenaar    if ($opt_info) {
4931337658SMarcel Moolenaar	print "static xo_info_t xo_info_table[] = {\n";
5031337658SMarcel Moolenaar	for $name (sort(keys(%vocabulary))) {
5131337658SMarcel Moolenaar	    print "    { \"", $name, "\", \"type\", \"desc\" },\n";
5231337658SMarcel Moolenaar	}
5331337658SMarcel Moolenaar	print "};\n";
5431337658SMarcel Moolenaar	print "static int xo_info_count = "
5531337658SMarcel Moolenaar	    . "(sizeof(xo_info_table) / sizeof(xo_info_table[0]));\n\n";
5631337658SMarcel Moolenaar	print "#define XO_SET_INFO() \\\n";
5731337658SMarcel Moolenaar	print "    xo_set_info(NULL, xo_info_table, xo_info_count)\n";
5831337658SMarcel Moolenaar    } elsif ($opt_vocabulary) {
5931337658SMarcel Moolenaar	for $name (sort(keys(%vocabulary))) {
6031337658SMarcel Moolenaar	    print $name, "\n";
6131337658SMarcel Moolenaar	}
6231337658SMarcel Moolenaar    }
6331337658SMarcel Moolenaar}
6431337658SMarcel Moolenaar
6531337658SMarcel Moolenaarsub extract_samples {
6631337658SMarcel Moolenaar    my $x = "\#" . "\@";
6731337658SMarcel Moolenaar    my $cmd = "grep -B1 -i '$x Should be' $0 | grep xo_emit | sed 's/.*\#*\@//'";
6831337658SMarcel Moolenaar    system($cmd);
6931337658SMarcel Moolenaar    exit(0);
7031337658SMarcel Moolenaar}
7131337658SMarcel Moolenaar
7231337658SMarcel Moolenaarsub extract_docs {
7331337658SMarcel Moolenaar    my $x = "\#" . "\@";
7431337658SMarcel Moolenaar    my $cmd = "grep -B1 '$x' $0";
7531337658SMarcel Moolenaar    open INPUT, "$cmd |";
7631337658SMarcel Moolenaar    local @input = <INPUT>;
7731337658SMarcel Moolenaar    close INPUT;
7831337658SMarcel Moolenaar    my $ln, $new = 0, $first = 1, $need_nl;
7931337658SMarcel Moolenaar
8031337658SMarcel Moolenaar    for ($ln = 0; $ln <= $#input; $ln++) {
8131337658SMarcel Moolenaar	chomp($_ = $input[$ln]);
8231337658SMarcel Moolenaar	if (/^--/) {
8331337658SMarcel Moolenaar	    $ln += 1;
8431337658SMarcel Moolenaar	    $new = 1;
8531337658SMarcel Moolenaar	    next;
8631337658SMarcel Moolenaar	}
8731337658SMarcel Moolenaar	if ($first) {
8831337658SMarcel Moolenaar	    $new = 1;
8931337658SMarcel Moolenaar	    $first = 0;
9031337658SMarcel Moolenaar	    next;
9131337658SMarcel Moolenaar	}
9231337658SMarcel Moolenaar
9331337658SMarcel Moolenaar	s/\s*\#\@\s*//;
9431337658SMarcel Moolenaar
9531337658SMarcel Moolenaar	if ($new) {
9631337658SMarcel Moolenaar	    if ($need_nl) {
9731337658SMarcel Moolenaar		print "\n\n";
9831337658SMarcel Moolenaar		$need_nl = 0;
9931337658SMarcel Moolenaar	    }
10031337658SMarcel Moolenaar
10176afb20cSPhil Shafer	    $under = "+" x (length($_) + 2);
10276afb20cSPhil Shafer
10376afb20cSPhil Shafer	    print "'$_'\n$under\n\n";
10476afb20cSPhil Shafer	    print "The message \"$_\" can be caused by code like:\n";
10531337658SMarcel Moolenaar	    $new = 0;
10631337658SMarcel Moolenaar
10731337658SMarcel Moolenaar	} elsif (/xo_emit\s*\(/) {
10831337658SMarcel Moolenaar	    s/^\s+//;
10976afb20cSPhil Shafer	    print "\n::\n\n    $_\n\n";
11031337658SMarcel Moolenaar
11131337658SMarcel Moolenaar	} elsif (/^Should be/i) {
11276afb20cSPhil Shafer	    print "This code should be replaced with code like:\n";
11331337658SMarcel Moolenaar
11431337658SMarcel Moolenaar	} else {
11531337658SMarcel Moolenaar	    print "$_\n";
11631337658SMarcel Moolenaar	    $need_nl = 1;
11731337658SMarcel Moolenaar	}
11831337658SMarcel Moolenaar    }
11931337658SMarcel Moolenaar
12031337658SMarcel Moolenaar    exit(0);
12131337658SMarcel Moolenaar}
12231337658SMarcel Moolenaar
12331337658SMarcel Moolenaarsub parse_file {
12431337658SMarcel Moolenaar    local($file) = @_;
12531337658SMarcel Moolenaar    local($errors, $warnings, $info) = (0, 0, 0);
12631337658SMarcel Moolenaar    local $curfile = $file;
12731337658SMarcel Moolenaar    local $curln = 0;
12831337658SMarcel Moolenaar
12931337658SMarcel Moolenaar    if ($opt_cpp) {
13031337658SMarcel Moolenaar	die "no such file" unless -f $file;
13131337658SMarcel Moolenaar	open INPUT, "cpp $opt_cflags $file |";
13231337658SMarcel Moolenaar    } else {
13331337658SMarcel Moolenaar	open INPUT, $file || die "cannot open input file '$file'";
13431337658SMarcel Moolenaar    }
13531337658SMarcel Moolenaar    local @input = <INPUT>;
13631337658SMarcel Moolenaar    close INPUT;
13731337658SMarcel Moolenaar
13831337658SMarcel Moolenaar    local $ln, $rln, $line, $replay;
13931337658SMarcel Moolenaar
14031337658SMarcel Moolenaar    for ($ln = 0; $ln < $#input; $ln++) {
14131337658SMarcel Moolenaar	$line = $input[$ln];
14231337658SMarcel Moolenaar	$curln += 1;
14331337658SMarcel Moolenaar
14431337658SMarcel Moolenaar	if ($line =~ /^\#/) {
14531337658SMarcel Moolenaar	    my($num, $fn) = ($line =~ /\#\s*(\d+)\s+"(.+)"/);
14631337658SMarcel Moolenaar	    ($curfile, $curln) = ($fn, $num) if $num;
14731337658SMarcel Moolenaar	    next;
14831337658SMarcel Moolenaar	}
14931337658SMarcel Moolenaar
15031337658SMarcel Moolenaar	next unless $line =~ /xo_emit\(/;
15131337658SMarcel Moolenaar
15231337658SMarcel Moolenaar	@tokens = parse_tokens();
15331337658SMarcel Moolenaar	print "token:\n    '" . join("'\n    '", @tokens) . "'\n"
15431337658SMarcel Moolenaar	    if $opt_debug;
15531337658SMarcel Moolenaar	check_format($tokens[0]);
15631337658SMarcel Moolenaar    }
15731337658SMarcel Moolenaar
15831337658SMarcel Moolenaar    print $file . ": $errors errors, $warnings warnings, $info info\n"
15931337658SMarcel Moolenaar	unless $opt_vocabulary;
16031337658SMarcel Moolenaar}
16131337658SMarcel Moolenaar
16231337658SMarcel Moolenaarsub parse_tokens {
16331337658SMarcel Moolenaar    my $full = "$'";
16431337658SMarcel Moolenaar    my @tokens = ();
16531337658SMarcel Moolenaar    my %pairs = ( "{" => "}", "[" => "]", "(" => ")" );
16631337658SMarcel Moolenaar    my %quotes = ( "\"" => "\"", "'" => "'" );
16731337658SMarcel Moolenaar    local @data = split(//, $full);
16831337658SMarcel Moolenaar    local @open = ();
16931337658SMarcel Moolenaar    local $current = "";
17031337658SMarcel Moolenaar    my $quote = "";
17131337658SMarcel Moolenaar    local $off = 0;
17231337658SMarcel Moolenaar    my $ch;
17331337658SMarcel Moolenaar
17431337658SMarcel Moolenaar    $replay = $curln . "     " . $line;
17531337658SMarcel Moolenaar    $rln = $ln + 1;
17631337658SMarcel Moolenaar
17731337658SMarcel Moolenaar    for (;;) {
17831337658SMarcel Moolenaar	get_tokens() if $off > $#data;
17931337658SMarcel Moolenaar	die "out of data" if $off > $#data;
18031337658SMarcel Moolenaar	$ch = $data[$off++];
18131337658SMarcel Moolenaar
18231337658SMarcel Moolenaar	print "'$ch' ($quote) ($#open) [" . join("", @open) . "]\n"
18331337658SMarcel Moolenaar	    if $opt_debug;
18431337658SMarcel Moolenaar
18531337658SMarcel Moolenaar	last if $ch eq ";" && $#open < 0;
18631337658SMarcel Moolenaar
18731337658SMarcel Moolenaar	if ($ch eq "," && $quote eq "" && $#open < 0) {
18831337658SMarcel Moolenaar	    print "[$current]\n" if $opt_debug;
18931337658SMarcel Moolenaar	    push @tokens, $current;
19031337658SMarcel Moolenaar	    $current = "";
19131337658SMarcel Moolenaar	    next;
19231337658SMarcel Moolenaar	}
19331337658SMarcel Moolenaar
19431337658SMarcel Moolenaar	next if $ch =~ /[ \t\n\r]/ && $quote eq "" && $#open < 0;
19531337658SMarcel Moolenaar
19631337658SMarcel Moolenaar	$current .= $ch;
19731337658SMarcel Moolenaar
19831337658SMarcel Moolenaar	if ($quote) {
19931337658SMarcel Moolenaar	    if ($ch eq $quote) {
20031337658SMarcel Moolenaar		$quote = "";
20131337658SMarcel Moolenaar	    }
20231337658SMarcel Moolenaar	    next;
20331337658SMarcel Moolenaar	}
20431337658SMarcel Moolenaar	if ($quotes{$ch}) {
20531337658SMarcel Moolenaar	    $quote = $quotes{$ch};
20631337658SMarcel Moolenaar	    $current = substr($current, 0, -2) if $current =~ /""$/;
20731337658SMarcel Moolenaar	    next;
20831337658SMarcel Moolenaar	}
20931337658SMarcel Moolenaar
21031337658SMarcel Moolenaar	if ($pairs{$ch}) {
21131337658SMarcel Moolenaar	    push @open, $pairs{$ch};
21231337658SMarcel Moolenaar	    next;
21331337658SMarcel Moolenaar	}
21431337658SMarcel Moolenaar
21531337658SMarcel Moolenaar	if ($#open >= 0 && $ch eq $open[$#open]) {
21631337658SMarcel Moolenaar	    pop @open;
21731337658SMarcel Moolenaar	    next;
21831337658SMarcel Moolenaar	}
21931337658SMarcel Moolenaar    }
22031337658SMarcel Moolenaar
22131337658SMarcel Moolenaar    push @tokens, substr($current, 0, -1);
22231337658SMarcel Moolenaar    return @tokens;
22331337658SMarcel Moolenaar}
22431337658SMarcel Moolenaar
22531337658SMarcel Moolenaarsub get_tokens {
22631337658SMarcel Moolenaar    if ($ln + 1 < $#input) {
22731337658SMarcel Moolenaar	$line = $input[++$ln];
22831337658SMarcel Moolenaar	$curln += 1;
22931337658SMarcel Moolenaar	$replay .= $curln . "     " . $line;
23031337658SMarcel Moolenaar	@data = split(//, $line);
23131337658SMarcel Moolenaar	$off = 0;
23231337658SMarcel Moolenaar    }
23331337658SMarcel Moolenaar}
23431337658SMarcel Moolenaar
23531337658SMarcel Moolenaarsub check_format {
23631337658SMarcel Moolenaar    my($format) = @_;
23731337658SMarcel Moolenaar
23831337658SMarcel Moolenaar    return unless $format =~ /^".*"$/;
23931337658SMarcel Moolenaar
24031337658SMarcel Moolenaar    my @data = split(//, $format);
24131337658SMarcel Moolenaar    my $ch;
24231337658SMarcel Moolenaar    my $braces = 0;
24331337658SMarcel Moolenaar    local $count = 0;
24431337658SMarcel Moolenaar    my $content = "";
24531337658SMarcel Moolenaar    my $off;
24631337658SMarcel Moolenaar    my $phase = 0;
24731337658SMarcel Moolenaar    my @build = ();
24831337658SMarcel Moolenaar    local $last, $prev = "";
24931337658SMarcel Moolenaar
25031337658SMarcel Moolenaar    # Nukes quotes
25131337658SMarcel Moolenaar    pop @data;
25231337658SMarcel Moolenaar    shift @data;
25331337658SMarcel Moolenaar
25431337658SMarcel Moolenaar    for (;;) {
25531337658SMarcel Moolenaar	last if $off > $#data;
25631337658SMarcel Moolenaar	$ch = $data[$off++];
25731337658SMarcel Moolenaar
25831337658SMarcel Moolenaar	if ($ch eq "\\") {
25931337658SMarcel Moolenaar	    $ch = $data[$off++];
26031337658SMarcel Moolenaar	    $off += 1 if $ch eq "\\"; # double backslash: "\\/"
26131337658SMarcel Moolenaar	    next;
26231337658SMarcel Moolenaar	}
26331337658SMarcel Moolenaar
26431337658SMarcel Moolenaar	if ($braces) {
26531337658SMarcel Moolenaar	    if ($ch eq "}") {
26631337658SMarcel Moolenaar		check_field(@build);
26731337658SMarcel Moolenaar		$braces = 0;
26831337658SMarcel Moolenaar		@build = ();
26931337658SMarcel Moolenaar		$phase = 0;
27031337658SMarcel Moolenaar		next;
27131337658SMarcel Moolenaar	    } elsif ($phase == 0 && $ch eq ":") {
27231337658SMarcel Moolenaar		$phase += 1;
27331337658SMarcel Moolenaar		next;
27431337658SMarcel Moolenaar	    } elsif ($ch eq "/") {
27531337658SMarcel Moolenaar		$phase += 1;
27631337658SMarcel Moolenaar		next;
27731337658SMarcel Moolenaar	    }
27831337658SMarcel Moolenaar
27931337658SMarcel Moolenaar	} else {
28031337658SMarcel Moolenaar	    if ($ch eq "{") {
28131337658SMarcel Moolenaar		check_text($build[0]) if length($build[0]);
28231337658SMarcel Moolenaar		$braces = 1;
28331337658SMarcel Moolenaar		@build = ();
28431337658SMarcel Moolenaar		$last = $prev;
28531337658SMarcel Moolenaar		next;
28631337658SMarcel Moolenaar	    }
287c600d307SMarcel Moolenaar	    $prev = $ch;
28831337658SMarcel Moolenaar	}
28931337658SMarcel Moolenaar
29031337658SMarcel Moolenaar	$build[$phase] .= $ch;
29131337658SMarcel Moolenaar    }
29231337658SMarcel Moolenaar
29331337658SMarcel Moolenaar    if ($braces) {
29431337658SMarcel Moolenaar	error("missing closing brace");
29531337658SMarcel Moolenaar	check_field(@build);
29631337658SMarcel Moolenaar    } else {
29731337658SMarcel Moolenaar	check_text($build[0]) if length($build[0]);
29831337658SMarcel Moolenaar    }
29931337658SMarcel Moolenaar}
30031337658SMarcel Moolenaar
30131337658SMarcel Moolenaarsub check_text {
30231337658SMarcel Moolenaar    my($text) = @_;
30331337658SMarcel Moolenaar
30431337658SMarcel Moolenaar    print "checking text: [$text]\n" if $opt_debug;
30531337658SMarcel Moolenaar
30631337658SMarcel Moolenaar    #@ A percent sign appearing in text is a literal
30731337658SMarcel Moolenaar    #@     xo_emit("cost: %d", cost);
30831337658SMarcel Moolenaar    #@ Should be:
30931337658SMarcel Moolenaar    #@     xo_emit("{L:cost}: {:cost/%d}", cost);
31031337658SMarcel Moolenaar    #@ This can be a bit surprising and could be a field that was not
31131337658SMarcel Moolenaar    #@ properly converted to a libxo-style format string.
31231337658SMarcel Moolenaar    info("a percent sign appearing in text is a literal") if $text =~ /%/;
31331337658SMarcel Moolenaar}
31431337658SMarcel Moolenaar
315d1a0d267SMarcel Moolenaar%short = (
316d1a0d267SMarcel Moolenaar    # Roles
317d1a0d267SMarcel Moolenaar    "color" => "C",
318d1a0d267SMarcel Moolenaar    "decoration" => "D",
319d1a0d267SMarcel Moolenaar    "error" => "E",
320d1a0d267SMarcel Moolenaar    "label" => "L",
321d1a0d267SMarcel Moolenaar    "note" => "N",
322d1a0d267SMarcel Moolenaar    "padding" => "P",
323d1a0d267SMarcel Moolenaar    "title" => "T",
324d1a0d267SMarcel Moolenaar    "units" => "U",
325d1a0d267SMarcel Moolenaar    "value" => "V",
326d1a0d267SMarcel Moolenaar    "warning" => "W",
327d1a0d267SMarcel Moolenaar    "start-anchor" => "[",
328d1a0d267SMarcel Moolenaar    "stop-anchor" => "]",
329d1a0d267SMarcel Moolenaar    # Modifiers
330d1a0d267SMarcel Moolenaar    "colon" => "c",
331d1a0d267SMarcel Moolenaar    "display" => "d",
332d1a0d267SMarcel Moolenaar    "encoding" => "e",
333d1a0d267SMarcel Moolenaar    "hn" => "h",
334d1a0d267SMarcel Moolenaar    "hn-decimal" => "@",
335d1a0d267SMarcel Moolenaar    "hn-space" => "@",
336d1a0d267SMarcel Moolenaar    "hn-1000" => "@",
337d1a0d267SMarcel Moolenaar    "humanize" => "h",
338d1a0d267SMarcel Moolenaar    "key" => "k",
339d1a0d267SMarcel Moolenaar    "leaf-list" => "l",
340d1a0d267SMarcel Moolenaar    "no-quotes" => "n",
341d1a0d267SMarcel Moolenaar    "quotes" => "q",
342d1a0d267SMarcel Moolenaar    "trim" => "t",
343d1a0d267SMarcel Moolenaar    "white" => "w",
344d1a0d267SMarcel Moolenaar );
345d1a0d267SMarcel Moolenaar
34631337658SMarcel Moolenaarsub check_field {
34731337658SMarcel Moolenaar    my(@field) = @_;
34831337658SMarcel Moolenaar    print "checking field: [" . join("][", @field) . "]\n" if $opt_debug;
34931337658SMarcel Moolenaar
350d1a0d267SMarcel Moolenaar    if ($field[0] =~ /,/) {
351d1a0d267SMarcel Moolenaar	# We have long names; deal with it by turning them into short names
352d1a0d267SMarcel Moolenaar	my @parts = split(/,/, $field[0]);
353d1a0d267SMarcel Moolenaar	my $new = "";
354d1a0d267SMarcel Moolenaar	for (my $i = 1; $i <= $#parts; $i++) {
355d1a0d267SMarcel Moolenaar	    my $v = $parts[$i];
356d1a0d267SMarcel Moolenaar	    $v =~ s/^\s+//;
357d1a0d267SMarcel Moolenaar	    $v =~ s/\s+$//;
358d1a0d267SMarcel Moolenaar	    if ($short{$v} eq "@") {
359d1a0d267SMarcel Moolenaar		# ignore; has no short version
360d1a0d267SMarcel Moolenaar	    } elsif ($short{$v}) {
361d1a0d267SMarcel Moolenaar		$new .= $short{$v};
362d1a0d267SMarcel Moolenaar	    } else {
363d1a0d267SMarcel Moolenaar		#@ Unknown long name for role/modifier
364d1a0d267SMarcel Moolenaar		#@   xo_emit("{,humanization:value}", value);
365d1a0d267SMarcel Moolenaar		#@ Should be:
366d1a0d267SMarcel Moolenaar		#@   xo_emit("{,humanize:value}", value);
367d1a0d267SMarcel Moolenaar		#@ The hn-* modifiers (hn-decimal, hn-space, hn-1000)
368d1a0d267SMarcel Moolenaar		#@ are only valid for fields with the {h:} modifier.
369d1a0d267SMarcel Moolenaar		error("Unknown long name for role/modifier ($v)");
370d1a0d267SMarcel Moolenaar	    }
371d1a0d267SMarcel Moolenaar	}
372d1a0d267SMarcel Moolenaar
373d1a0d267SMarcel Moolenaar	$field[4] = substr($field[0], index($field[0], ","));
374d1a0d267SMarcel Moolenaar	$field[0] = $parts[0] . $new;
375d1a0d267SMarcel Moolenaar    }
376d1a0d267SMarcel Moolenaar
37731337658SMarcel Moolenaar    if ($opt_vocabulary) {
37831337658SMarcel Moolenaar	$vocabulary{$field[1]} = 1
37931337658SMarcel Moolenaar	    if $field[1] && $field[0] !~ /[DELNPTUW\[\]]/;
38031337658SMarcel Moolenaar	return;
38131337658SMarcel Moolenaar    }
38231337658SMarcel Moolenaar
38331337658SMarcel Moolenaar    #@ Last character before field definition is a field type
38431337658SMarcel Moolenaar    #@ A common typo:
38531337658SMarcel Moolenaar    #@     xo_emit("{T:Min} T{:Max}");
38631337658SMarcel Moolenaar    #@ Should be:
38731337658SMarcel Moolenaar    #@     xo_emit("{T:Min} {T:Max}");
38831337658SMarcel Moolenaar    #@ Twiddling the "{" and the field role is a common typo.
38931337658SMarcel Moolenaar    info("last character before field definition is a field type ($last)")
39031337658SMarcel Moolenaar	if $last =~ /[DELNPTUVW\[\]]/ && $field[0] !~ /[DELNPTUVW\[\]]/;
39131337658SMarcel Moolenaar
39231337658SMarcel Moolenaar    #@ Encoding format uses different number of arguments
39331337658SMarcel Moolenaar    #@     xo_emit("{:name/%6.6s %%04d/%s}", name, number);
39431337658SMarcel Moolenaar    #@ Should be:
39531337658SMarcel Moolenaar    #@     xo_emit("{:name/%6.6s %04d/%s-%d}", name, number);
39631337658SMarcel Moolenaar    #@ Both format should consume the same number of arguments off the stack
39731337658SMarcel Moolenaar    my $cf = count_args($field[2]);
39831337658SMarcel Moolenaar    my $ce = count_args($field[3]);
39931337658SMarcel Moolenaar    warn("encoding format uses different number of arguments ($cf/$ce)")
40031337658SMarcel Moolenaar	if $ce >= 0 && $cf >= 0 && $ce != $cf;
40131337658SMarcel Moolenaar
40231337658SMarcel Moolenaar    #@ Only one field role can be used
40331337658SMarcel Moolenaar    #@     xo_emit("{LT:Max}");
40431337658SMarcel Moolenaar    #@ Should be:
40531337658SMarcel Moolenaar    #@     xo_emit("{T:Max}");
40631337658SMarcel Moolenaar    my(@roles) = ($field[0] !~ /([DELNPTUVW\[\]]).*([DELNPTUVW\[\]])/);
40731337658SMarcel Moolenaar    error("only one field role can be used (" . join(", ", @roles) . ")")
40831337658SMarcel Moolenaar	if $#roles > 0;
40931337658SMarcel Moolenaar
410788ca347SMarcel Moolenaar    # Field is a color, note, label, or title
411788ca347SMarcel Moolenaar    if ($field[0] =~ /[CDLNT]/) {
41231337658SMarcel Moolenaar
413788ca347SMarcel Moolenaar	#@ Potential missing slash after C, D, N, L, or T with format
41431337658SMarcel Moolenaar	#@     xo_emit("{T:%6.6s}\n", "Max");
41531337658SMarcel Moolenaar	#@ should be:
41631337658SMarcel Moolenaar	#@     xo_emit("{T:/%6.6s}\n", "Max");
41731337658SMarcel Moolenaar	#@ The "%6.6s" will be a literal, not a field format.  While
41831337658SMarcel Moolenaar	#@ it's possibly valid, it's likely a missing "/".
419788ca347SMarcel Moolenaar	info("potential missing slash after C, D, N, L, or T with format")
42031337658SMarcel Moolenaar	    if $field[1] =~ /%/;
42131337658SMarcel Moolenaar
42231337658SMarcel Moolenaar	#@ An encoding format cannot be given (roles: DNLT)
42331337658SMarcel Moolenaar	#@    xo_emit("{T:Max//%s}", "Max");
424788ca347SMarcel Moolenaar	#@ Fields with the C, D, N, L, and T roles are not emitted in
42531337658SMarcel Moolenaar	#@ the 'encoding' style (JSON, XML), so an encoding format
42631337658SMarcel Moolenaar	#@ would make no sense.
42731337658SMarcel Moolenaar	error("encoding format cannot be given when content is present")
42831337658SMarcel Moolenaar	    if $field[3];
42931337658SMarcel Moolenaar    }
43031337658SMarcel Moolenaar
431788ca347SMarcel Moolenaar    # Field is a color, decoration, label, or title
432788ca347SMarcel Moolenaar    if ($field[0] =~ /[CDLN]/) {
433788ca347SMarcel Moolenaar	#@ Format cannot be given when content is present (roles: CDLN)
434c600d307SMarcel Moolenaar	#@    xo_emit("{N:Max/%6.6s}", "Max");
435788ca347SMarcel Moolenaar	#@ Fields with the C, D, L, or N roles can't have both
436c600d307SMarcel Moolenaar	#@ static literal content ("{L:Label}") and a
437c600d307SMarcel Moolenaar	#@ format ("{L:/%s}").
438c600d307SMarcel Moolenaar	#@ This error will also occur when the content has a backslash
439c600d307SMarcel Moolenaar	#@ in it, like "{N:Type of I/O}"; backslashes should be escaped,
440c600d307SMarcel Moolenaar	#@ like "{N:Type of I\\/O}".  Note the double backslash, one for
441c600d307SMarcel Moolenaar	#@ handling 'C' strings, and one for libxo.
442c600d307SMarcel Moolenaar	error("format cannot be given when content is present")
443c600d307SMarcel Moolenaar	    if $field[1] && $field[2];
444c600d307SMarcel Moolenaar    }
445c600d307SMarcel Moolenaar
446788ca347SMarcel Moolenaar    # Field is a color/effect
447788ca347SMarcel Moolenaar    if ($field[0] =~ /C/) {
448788ca347SMarcel Moolenaar	if ($field[1]) {
449788ca347SMarcel Moolenaar	    my $val;
450788ca347SMarcel Moolenaar	    my @sub = split(/,/, $field[1]);
451788ca347SMarcel Moolenaar	    grep { s/^\s*//; s/\s*$//; } @sub;
452788ca347SMarcel Moolenaar
453788ca347SMarcel Moolenaar	    for $val (@sub) {
454788ca347SMarcel Moolenaar		if ($val =~ /^(default,black,red,green,yellow,blue,magenta,cyan,white)$/) {
455788ca347SMarcel Moolenaar
456788ca347SMarcel Moolenaar		    #@ Field has color without fg- or bg- (role: C)
457788ca347SMarcel Moolenaar		    #@   xo_emit("{C:green}{:foo}{C:}", x);
458788ca347SMarcel Moolenaar		    #@ Should be:
459788ca347SMarcel Moolenaar		    #@   xo_emit("{C:fg-green}{:foo}{C:}", x);
460788ca347SMarcel Moolenaar		    #@ Colors must be prefixed by either "fg-" or "bg-".
461788ca347SMarcel Moolenaar		    error("Field has color without fg- or bg- (role: C)");
462788ca347SMarcel Moolenaar
463788ca347SMarcel Moolenaar		} elsif ($val =~ /^(fg|bg)-(default|black|red|green|yellow|blue|magenta|cyan|white)$/) {
464788ca347SMarcel Moolenaar		    # color
465788ca347SMarcel Moolenaar		} elsif ($val =~ /^(bold|underline)$/) {
466788ca347SMarcel Moolenaar		} elsif ($val =~ /^(no-)?(bold|underline|inverse)$/) {
467788ca347SMarcel Moolenaar		    # effect
468788ca347SMarcel Moolenaar
469788ca347SMarcel Moolenaar		} elsif ($val =~ /^(reset|normal)$/) {
470788ca347SMarcel Moolenaar		    # effect also
471788ca347SMarcel Moolenaar		} else {
472788ca347SMarcel Moolenaar		    #@ Field has invalid color or effect (role: C)
473788ca347SMarcel Moolenaar		    #@   xo_emit("{C:fg-purple,bold}{:foo}{C:gween}", x);
474788ca347SMarcel Moolenaar		    #@ Should be:
475788ca347SMarcel Moolenaar		    #@   xo_emit("{C:fg-red,bold}{:foo}{C:fg-green}", x);
476788ca347SMarcel Moolenaar		    #@ The list of colors and effects are limited.  The
477788ca347SMarcel Moolenaar		    #@ set of colors includes default, black, red, green,
478788ca347SMarcel Moolenaar		    #@ yellow, blue, magenta, cyan, and white, which must
479788ca347SMarcel Moolenaar		    #@ be prefixed by either "fg-" or "bg-".  Effects are
480788ca347SMarcel Moolenaar		    #@ limited to bold, no-bold, underline, no-underline,
481788ca347SMarcel Moolenaar		    #@ inverse, no-inverse, normal, and reset.  Values must
482788ca347SMarcel Moolenaar		    #@ be separated by commas.
483788ca347SMarcel Moolenaar		    error("Field has invalid color or effect (role: C) ($val)");
484788ca347SMarcel Moolenaar		}
485788ca347SMarcel Moolenaar	    }
486788ca347SMarcel Moolenaar	}
487788ca347SMarcel Moolenaar    }
488788ca347SMarcel Moolenaar
489d1a0d267SMarcel Moolenaar    # Humanized field
490d1a0d267SMarcel Moolenaar    if ($field[0] =~ /h/) {
491d1a0d267SMarcel Moolenaar	if (length($field[2]) == 0) {
492d1a0d267SMarcel Moolenaar	    #@ Field has humanize modifier but no format string
493d1a0d267SMarcel Moolenaar	    #@   xo_emit("{h:value}", value);
494d1a0d267SMarcel Moolenaar	    #@ Should be:
495d1a0d267SMarcel Moolenaar	    #@   xo_emit("{h:value/%d}", value);
496d1a0d267SMarcel Moolenaar	    #@ Humanization is only value for numbers, which are not
497d1a0d267SMarcel Moolenaar	    #@ likely to use the default format ("%s").
498d1a0d267SMarcel Moolenaar	    error("Field has humanize modifier but no format string");
499d1a0d267SMarcel Moolenaar	}
500d1a0d267SMarcel Moolenaar    }
501d1a0d267SMarcel Moolenaar
502d1a0d267SMarcel Moolenaar    # hn-* on non-humanize field
503d1a0d267SMarcel Moolenaar    if ($field[0] !~ /h/) {
504d1a0d267SMarcel Moolenaar	if ($field[4] =~ /,hn-/) {
505d1a0d267SMarcel Moolenaar	    #@ Field has hn-* modifier but not 'h' modifier
506d1a0d267SMarcel Moolenaar	    #@   xo_emit("{,hn-1000:value}", value);
507d1a0d267SMarcel Moolenaar	    #@ Should be:
508d1a0d267SMarcel Moolenaar	    #@   xo_emit("{h,hn-1000:value}", value);
509d1a0d267SMarcel Moolenaar	    #@ The hn-* modifiers (hn-decimal, hn-space, hn-1000)
510d1a0d267SMarcel Moolenaar	    #@ are only valid for fields with the {h:} modifier.
511d1a0d267SMarcel Moolenaar	    error("Field has hn-* modifier but not 'h' modifier");
512d1a0d267SMarcel Moolenaar	}
513d1a0d267SMarcel Moolenaar    }
514d1a0d267SMarcel Moolenaar
51531337658SMarcel Moolenaar    # A value field
51631337658SMarcel Moolenaar    if (length($field[0]) == 0 || $field[0] =~ /V/) {
51731337658SMarcel Moolenaar
51831337658SMarcel Moolenaar	#@ Value field must have a name (as content)")
51931337658SMarcel Moolenaar	#@     xo_emit("{:/%s}", "value");
52031337658SMarcel Moolenaar	#@ Should be:
52131337658SMarcel Moolenaar	#@     xo_emit("{:tag-name/%s}", "value");
52231337658SMarcel Moolenaar	#@ The field name is used for XML and JSON encodings.  These
52331337658SMarcel Moolenaar	#@ tags names are static and must appear directly in the
52431337658SMarcel Moolenaar	#@ field descriptor.
52531337658SMarcel Moolenaar	error("value field must have a name (as content)")
52631337658SMarcel Moolenaar	    unless $field[1];
52731337658SMarcel Moolenaar
52831337658SMarcel Moolenaar	#@ Use hyphens, not underscores, for value field name
52931337658SMarcel Moolenaar	#@     xo_emit("{:no_under_scores}", "bad");
53031337658SMarcel Moolenaar	#@ Should be:
53131337658SMarcel Moolenaar	#@     xo_emit("{:no-under-scores}", "bad");
53231337658SMarcel Moolenaar	#@ Use of hyphens is traditional in XML, and the XOF_UNDERSCORES
53331337658SMarcel Moolenaar	#@ flag can be used to generate underscores in JSON, if desired.
53431337658SMarcel Moolenaar	#@ But the raw field name should use hyphens.
53531337658SMarcel Moolenaar	error("use hyphens, not underscores, for value field name")
53631337658SMarcel Moolenaar	    if $field[1] =~ /_/;
53731337658SMarcel Moolenaar
53831337658SMarcel Moolenaar	#@ Value field name cannot start with digit
53931337658SMarcel Moolenaar	#@     xo_emit("{:10-gig/}");
54031337658SMarcel Moolenaar	#@ Should be:
54131337658SMarcel Moolenaar	#@     xo_emit("{:ten-gig/}");
54231337658SMarcel Moolenaar	#@ XML element names cannot start with a digit.
54331337658SMarcel Moolenaar	error("value field name cannot start with digit")
54431337658SMarcel Moolenaar	    if $field[1] =~ /^[0-9]/;
54531337658SMarcel Moolenaar
54631337658SMarcel Moolenaar	#@ Value field name should be lower case
54731337658SMarcel Moolenaar	#@     xo_emit("{:WHY-ARE-YOU-SHOUTING}", "NO REASON");
54831337658SMarcel Moolenaar	#@ Should be:
54931337658SMarcel Moolenaar	#@     xo_emit("{:why-are-you-shouting}", "no reason");
55031337658SMarcel Moolenaar	#@ Lower case is more civilized.  Even TLAs should be lower case
55131337658SMarcel Moolenaar	#@ to avoid scenarios where the differences between "XPath" and
55231337658SMarcel Moolenaar	#@ "Xpath" drive your users crazy.  Lower case rules the seas.
55331337658SMarcel Moolenaar	error("value field name should be lower case")
55431337658SMarcel Moolenaar	    if $field[1] =~ /[A-Z]/;
55531337658SMarcel Moolenaar
55631337658SMarcel Moolenaar	#@ Value field name should be longer than two characters
55731337658SMarcel Moolenaar	#@     xo_emit("{:x}", "mumble");
55831337658SMarcel Moolenaar	#@ Should be:
55931337658SMarcel Moolenaar	#@     xo_emit("{:something-meaningful}", "mumble");
56031337658SMarcel Moolenaar	#@ Field names should be descriptive, and it's hard to
56131337658SMarcel Moolenaar	#@ be descriptive in less than two characters.  Consider
56231337658SMarcel Moolenaar	#@ your users and try to make something more useful.
56331337658SMarcel Moolenaar	#@ Note that this error often occurs when the field type
56431337658SMarcel Moolenaar	#@ is placed after the colon ("{:T/%20s}"), instead of before
56531337658SMarcel Moolenaar	#@ it ("{T:/20s}").
56631337658SMarcel Moolenaar	error("value field name should be longer than two characters")
56731337658SMarcel Moolenaar	    if $field[1] =~ /[A-Z]/;
56831337658SMarcel Moolenaar
56931337658SMarcel Moolenaar	#@ Value field name contains invalid character
57031337658SMarcel Moolenaar	#@     xo_emit("{:cost-in-$$/%u}", 15);
57131337658SMarcel Moolenaar	#@ Should be:
57231337658SMarcel Moolenaar	#@     xo_emit("{:cost-in-dollars/%u}", 15);
57331337658SMarcel Moolenaar	#@ An invalid character is often a sign of a typo, like "{:]}"
57431337658SMarcel Moolenaar	#@ instead of "{]:}".  Field names are restricted to lower-case
57531337658SMarcel Moolenaar	#@ characters, digits, and hyphens.
57631337658SMarcel Moolenaar	error("value field name contains invalid character (" . $field[1] . ")")
57731337658SMarcel Moolenaar	    unless $field[1] =~ /^[0-9a-z-]*$/;
57831337658SMarcel Moolenaar    }
57931337658SMarcel Moolenaar
58031337658SMarcel Moolenaar    # A decoration field
58131337658SMarcel Moolenaar    if ($field[0] =~ /D/) {
58231337658SMarcel Moolenaar
58331337658SMarcel Moolenaar	#@decoration field contains invalid character
58431337658SMarcel Moolenaar	#@     xo_emit("{D:not good}");
58531337658SMarcel Moolenaar	#@ Should be:
58631337658SMarcel Moolenaar	#@     xo_emit("{D:((}{:good}{D:))}", "yes");
58731337658SMarcel Moolenaar	#@ This is minor, but fields should use proper roles.  Decoration
588788ca347SMarcel Moolenaar	#@ fields are meant to hold punctuation and other characters used
58931337658SMarcel Moolenaar	#@ to decorate the content, typically to make it more readable
59031337658SMarcel Moolenaar	#@ to human readers.
59131337658SMarcel Moolenaar	warn("decoration field contains invalid character")
59231337658SMarcel Moolenaar	    unless $field[1] =~ m:^[~!\@\#\$%^&\*\(\);\:\[\]\{\} ]+$:;
59331337658SMarcel Moolenaar    }
59431337658SMarcel Moolenaar
59531337658SMarcel Moolenaar    if ($field[0] =~ /[\[\]]/) {
59631337658SMarcel Moolenaar	#@ Anchor content should be decimal width
59731337658SMarcel Moolenaar	#@     xo_emit("{[:mumble}");
59831337658SMarcel Moolenaar	#@ Should be:
59931337658SMarcel Moolenaar	#@     xo_emit("{[:32}");
60031337658SMarcel Moolenaar	#@ Anchors need an integer value to specify the width of
60131337658SMarcel Moolenaar	#@ the set of anchored fields.  The value can be positive
60231337658SMarcel Moolenaar	#@ (for left padding/right justification) or negative (for
60331337658SMarcel Moolenaar	#@ right padding/left justification) and can appear in
60431337658SMarcel Moolenaar	#@ either the start or stop anchor field descriptor.
60531337658SMarcel Moolenaar	error("anchor content should be decimal width")
60631337658SMarcel Moolenaar	    if $field[1] && $field[1] !~ /^-?\d+$/ ;
60731337658SMarcel Moolenaar
60831337658SMarcel Moolenaar	#@ Anchor format should be "%d"
60931337658SMarcel Moolenaar	#@     xo_emit("{[:/%s}");
61031337658SMarcel Moolenaar	#@ Should be:
61131337658SMarcel Moolenaar	#@     xo_emit("{[:/%d}");
61231337658SMarcel Moolenaar	#@ Anchors only grok integer values, and if the value is not static,
61331337658SMarcel Moolenaar	#@ if must be in an 'int' argument, represented by the "%d" format.
61431337658SMarcel Moolenaar	#@ Anything else is an error.
61531337658SMarcel Moolenaar	error("anchor format should be \"%d\"")
61631337658SMarcel Moolenaar	    if $field[2] && $field[2] ne "%d";
61731337658SMarcel Moolenaar
61831337658SMarcel Moolenaar	#@ Anchor cannot have both format and encoding format")
61931337658SMarcel Moolenaar	#@     xo_emit("{[:32/%d}");
62031337658SMarcel Moolenaar	#@ Should be:
62131337658SMarcel Moolenaar	#@     xo_emit("{[:32}");
62231337658SMarcel Moolenaar	#@ Anchors can have a static value or argument for the width,
62331337658SMarcel Moolenaar	#@ but cannot have both.
62431337658SMarcel Moolenaar	error("anchor cannot have both format and encoding format")
62531337658SMarcel Moolenaar	    if $field[1] && $field[2];
62631337658SMarcel Moolenaar    }
62731337658SMarcel Moolenaar}
62831337658SMarcel Moolenaar
62931337658SMarcel Moolenaarsub count_args {
63031337658SMarcel Moolenaar    my($format) = @_;
63131337658SMarcel Moolenaar
63231337658SMarcel Moolenaar    return -1 unless $format;
63331337658SMarcel Moolenaar
63431337658SMarcel Moolenaar    my $in;
63531337658SMarcel Moolenaar    my($text, $ff, $fc, $rest);
63631337658SMarcel Moolenaar    for ($in = $format; $in; $in = $rest) {
63731337658SMarcel Moolenaar	($text, $ff, $fc, $rest) =
63831337658SMarcel Moolenaar	   ($in =~ /^([^%]*)(%[^%diouxXDOUeEfFgGaAcCsSp]*)([diouxXDOUeEfFgGaAcCsSp])(.*)$/);
63931337658SMarcel Moolenaar	unless ($ff) {
64031337658SMarcel Moolenaar	    # Might be a "%%"
64131337658SMarcel Moolenaar	    ($text, $ff, $rest) = ($in =~ /^([^%]*)(%%)(.*)$/);
64231337658SMarcel Moolenaar	    if ($ff) {
64331337658SMarcel Moolenaar		check_text($text);
64431337658SMarcel Moolenaar	    } else {
64531337658SMarcel Moolenaar		# Not sure what's going on here, but something's wrong...
64631337658SMarcel Moolenaar		error("invalid field format") if $in =~ /%/;
64731337658SMarcel Moolenaar	    }
64831337658SMarcel Moolenaar	    next;
64931337658SMarcel Moolenaar	}
65031337658SMarcel Moolenaar
65131337658SMarcel Moolenaar	check_text($text);
65231337658SMarcel Moolenaar	check_field_format($ff, $fc);
65331337658SMarcel Moolenaar    }
65431337658SMarcel Moolenaar
65531337658SMarcel Moolenaar    return 0;
65631337658SMarcel Moolenaar}
65731337658SMarcel Moolenaar
65831337658SMarcel Moolenaarsub check_field_format {
65931337658SMarcel Moolenaar    my($ff, $fc) = @_;
66031337658SMarcel Moolenaar
66131337658SMarcel Moolenaar    print "check_field_format: [$ff] [$fc]\n" if $opt_debug;
66231337658SMarcel Moolenaar
66331337658SMarcel Moolenaar    my(@chunks) = split(/\./, $ff);
66431337658SMarcel Moolenaar
66531337658SMarcel Moolenaar    #@ Max width only valid for strings
66631337658SMarcel Moolenaar    #@     xo_emit("{:tag/%2.4.6d}", 55);
66731337658SMarcel Moolenaar    #@ Should be:
66831337658SMarcel Moolenaar    #@     xo_emit("{:tag/%2.6d}", 55);
66931337658SMarcel Moolenaar    #@ libxo allows a true 'max width' in addition to the traditional
67031337658SMarcel Moolenaar    #@ printf-style 'max number of bytes to use for input'.  But this
67131337658SMarcel Moolenaar    #@ is supported only for string values, since it makes no sense
67231337658SMarcel Moolenaar    #@ for non-strings.  This error may occur from a typo,
67331337658SMarcel Moolenaar    #@ like "{:tag/%6..6d}" where only one period should be used.
67431337658SMarcel Moolenaar    error("max width only valid for strings")
675c600d307SMarcel Moolenaar	if $#chunks >= 2 && $fc !~ /[sS]/;
67631337658SMarcel Moolenaar}
67731337658SMarcel Moolenaar
67831337658SMarcel Moolenaarsub error {
67931337658SMarcel Moolenaar    return if $opt_vocabulary;
68031337658SMarcel Moolenaar    print STDERR $curfile . ": " .$curln . ": error: " . join(" ", @_) . "\n";
68131337658SMarcel Moolenaar    print STDERR $replay . "\n" if $opt_print;
68231337658SMarcel Moolenaar    $errors += 1;
68331337658SMarcel Moolenaar}
68431337658SMarcel Moolenaar
68531337658SMarcel Moolenaarsub warn {
68631337658SMarcel Moolenaar    return if $opt_vocabulary;
68731337658SMarcel Moolenaar    print STDERR $curfile . ": " .$curln . ": warning: " . join(" ", @_) . "\n";
68831337658SMarcel Moolenaar    print STDERR $replay . "\n" if $opt_print;
68931337658SMarcel Moolenaar    $warnings += 1;
69031337658SMarcel Moolenaar}
69131337658SMarcel Moolenaar
69231337658SMarcel Moolenaarsub info {
69331337658SMarcel Moolenaar    return if $opt_vocabulary;
69431337658SMarcel Moolenaar    print STDERR $curfile . ": " .$curln . ": info: " . join(" ", @_) . "\n";
69531337658SMarcel Moolenaar    print STDERR $replay . "\n" if $opt_print;
69631337658SMarcel Moolenaar    $info += 1;
69731337658SMarcel Moolenaar}
69831337658SMarcel Moolenaar
69931337658SMarcel Moolenaarmain: {
70031337658SMarcel Moolenaar    main();
70131337658SMarcel Moolenaar}
702