1#!/usr/bin/env perl 2# ts=4 3# Warren Block 4# special thanks to Glen Barber for limitless 5# patience and the use of his svn repository 6 7# igor: check man pages and DocBook 8# needs Perl 5.8 or higher 9 10use strict; 11use warnings; 12use locale; 13 14# Copyright (c) 2012, 2013, 2014 Warren Block 15# All rights reserved. 16# 17# Redistribution and use in source and binary forms, with or without 18# modification, are permitted provided that the following conditions 19# are met: 20# 1. Redistributions of source code must retain the above copyright 21# notice, this list of conditions and the following disclaimer. 22# 2. Redistributions in binary form must reproduce the above copyright 23# notice, this list of conditions and the following disclaimer in the 24# documentation and/or other materials provided with the distribution. 25# 26# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36# SUCH DAMAGE. 37 38use Getopt::Std; 39use File::Basename; 40use POSIX qw/strftime/; 41 42my $file = "/usr/bin/file"; 43my $gzcat = "/usr/bin/gzcat"; 44my $bzcat = "/usr/bin/bzcat"; 45my $man = "/usr/bin/man"; 46 47my $tmpdir = "/tmp"; 48 49my $rev = '$Revision: 596 $'; 50 51my ($fh, $tmpfile, $stdinfile, $docdate); 52 53my ($prevline, $prevnonblank, $origline) = ('', ''); 54my $ignoreblock; 55my $titleblock = 0; 56my $today; 57 58my $linelensgml; 59my ($startline, $stopline); 60my $vid; 61my ($ignoreblockstart, $ignoreblockend); 62my %misspelled_words; 63my @badphrases; 64my @contractions; 65my @freebsdobs; 66my ($lc_regex, $uc_regex, $fixedcase_regex, $ignoreregex); 67my ($indent_regex, $inline_regex); 68my ($redundantword_regex, $redundanttagword_regex); 69my (@straggler_tags, $literalblock_regex); 70my $eos_regex; 71my (@openclose_tags, $openclose_regex, %opentag, $list_regex, $parawrap_regex); 72 73my ($bname, $type); 74 75my $prog = basename($0); 76 77sub usage { 78 $rev =~ /Revision: (\d+)/; 79 my $version = "1.$1"; 80 print <<USAGE; 81$prog $version 82usage: $prog -h 83 $prog [-abcdefilmnorstuwxyzDERSWXZ] [-C range] [-L n] file [file ...] 84 85 -h show summary of command line options and exit 86 87 Output options 88 -R ANSI highlights (use with 'less -R') 89 -C range Restrict output to a range of lines from the source file 90 -v Verbose output 91 -V vid Restrict output to a VID (or 'latest') in a VuXML file 92 -X XML output (overrides -R) 93 94 Tests 95 If individual test options are given, only those tests are done. 96 97 Shortcuts 98 -z all standard non-whitespace tests 99 -Z all standard whitespace tests 100 101 Tests for all files 102 -a abbreviations like "e.g.," and "i.e.," 103 -b bad phrases 104 -f FreeBSD obsolete features 105 -r repeated words 106 -s spelling 107 -u contractions 108 -w whitespace 109 -y style suggestions (off by default) 110 111 mdoc(7) tests 112 -d document date (.Dd) 113 -e sentences should begin on a new line 114 -g See Also xrefs are not duplicated 115 -m mdoc structure requirements 116 -p mdoc whitespace requirements 117 -x additional xref (.Xr) tests (off by default, implies -m) 118 -D all but document date (same as -abefmrsuw) 119 120 DocBook tests 121 -c title capitalization 122 -i indentation 123 -l long lines (see -L below) 124 -n sentences start with two spaces 125 -o open/close tags match 126 -t tag usage style 127 -E writing style 128 -S straggler tags with undesired content whitespace 129 -W whitespace on SGML indentation 130 131 DocBook test options 132 -L n set line length used in long line test (default 70) 133 134 EXAMPLES 135 136 $prog -R gpart.8.gz | less -R -S 137 $prog -R -D -y /usr/share/man/man7/tuning.7.gz | less -R -S 138 cat /usr/share/man/man1/csh.1.gz | $prog -D 139 $prog -Rz chapter.sgml | less -RS 140 $prog -R `find /usr/doc/en_US.ISO8859-1/ -name "*.xml"` | less -RS 141 $prog -RD /usr/share/man/man8/* | less -RS 142 143 gzip and bzip2 files are automatically decompressed. 144USAGE 145 exit 0; 146} 147 148our ($opt_a, $opt_b, $opt_c, $opt_d, $opt_e, $opt_f, $opt_g, $opt_h, 149 $opt_i, $opt_l, $opt_m, $opt_n, $opt_o, $opt_p, $opt_r, $opt_s, 150 $opt_t, $opt_u, $opt_v, $opt_w, $opt_x, $opt_y, $opt_z, $opt_C, 151 $opt_E, $opt_D, $opt_L, $opt_R, $opt_S, $opt_V, $opt_W, $opt_X, 152 $opt_Z); 153 154getopts('abcdefghilmnoprstuvwxyzC:DEL:RSV:WXZ'); 155 156usage() if $opt_h; 157 158my $verbose = 1 if $opt_v; 159 160# ANSI color codes 161my @colors = qw/ red green yellow blue magenta cyan /; 162my %ansi; 163my $inverse = "\033[7m"; 164my $reset = "\033[0;24;27m"; 165my $lf = ''; # filename 166my $rf = ''; 167my $ll = ''; # line number 168my $lr = ''; 169my $lh = '['; # highlight 170my $rh = ']'; 171my $li = '['; # whitespace 172my $ri = ']'; 173 174# mdoc SEE ALSO section flag and xrefs 175my $seealso = 0; 176my %seealsoxrefs; 177 178# mdoc macros 179my @macros = (qw/ Dd Dt Os Sh_NAME Nm Nd Sh_SYNOPSIS Sh_DESCRIPTION /); 180my %macroval; 181 182sub INT_handler { 183 ( close $fh or die "could not close filehandle:$!\n" ) if fileno($fh); 184 removetempfiles(); 185 exit 0; 186} 187 188sub initialize { 189 $today = strftime("%B %e, %Y", localtime); 190 $today =~ s/ / /g; 191 192 # ANSI color codes 193 for my $i (0..@colors-1) { 194 $ansi{"dark$colors[$i]"} = "\033[" . ($i+31) . "m"; 195 $ansi{"$colors[$i]"} = "\033[1;" . ($i+31) . "m"; 196 } 197 # minor hackery: darkblue is so dark it needs a white background 198 $ansi{"darkblue"} = $ansi{"darkblue"} . "\033[47m"; 199 200 # use ANSI highlights 201 if ( $opt_R ) { 202 $lf = $ansi{darkyellow}; # filename 203 $rf = $reset; 204 $ll = $ansi{darkcyan}; # line number 205 $lr = $reset; 206 $lh = $ansi{darkgreen}; # highlight 207 $rh = $reset; 208 $li = $inverse; # whitespace 209 $ri = $reset; 210 } 211 212 # SGML line length 213 $linelensgml = 70; 214 if ( defined($opt_L) && ($opt_L =~ /(\d+)/) ) { 215 $linelensgml = $1 if $1 > 0; 216 } 217 218 # -C start-end limits output to a range of lines 219 if ( $opt_C ) { 220 ($startline, $stopline) = split(':|-', $opt_C); 221 die "-C option requires a line number range (start- | start-end | -end)\n" unless $startline || $stopline; 222 } 223 224 # -V vid limits output to a range of lines 225 if ( $opt_V ) { 226 $vid = $opt_V; 227 unless ($vid eq 'latest' || $vid =~/.*-.*-/) { 228 die "-V requires vulnerability ID like 348bfa69-25a2-11e5-ade1-0011d823eebd\n"; 229 } 230 $startline=999999; 231 $stopline = $startline; 232 } 233 234 # -D equals -abefgmprsuw 235 if ( $opt_D ) { 236 $opt_a = $opt_b = $opt_e = $opt_f = $opt_g = $opt_m = $opt_p 237 = $opt_r = $opt_s = $opt_u = $opt_w = 1; 238 } 239 240 if ( $opt_z ) { 241 # all non-whitespace tests 242 $opt_a = $opt_b = $opt_c = $opt_d = $opt_e = $opt_f = $opt_g 243 = $opt_m = $opt_o = $opt_p = $opt_r = $opt_s = $opt_u 244 = $opt_E = $opt_S = 1; 245 } 246 247 if ( $opt_Z ) { 248 # all whitespace tests 249 $opt_i = $opt_l = $opt_n = $opt_t = $opt_w = $opt_W = 1; 250 } 251 252 if ( $opt_x ) { 253 # -x implies -m 254 $opt_m = 1; 255 } 256 257 # if no tests are chosen, do them all 258 unless ( $opt_a || $opt_b || $opt_c || $opt_d || $opt_e 259 || $opt_f || $opt_g || $opt_i || $opt_l || $opt_m 260 || $opt_n || $opt_o || $opt_p || $opt_r || $opt_s 261 || $opt_t || $opt_u || $opt_w || $opt_x || $opt_y 262 || $opt_E || $opt_S || $opt_W ) { 263 $opt_a = $opt_b = $opt_c = $opt_d = $opt_e 264 = $opt_f = $opt_g = $opt_i = $opt_l = $opt_m 265 = $opt_n = $opt_o = $opt_p = $opt_r = $opt_s 266 = $opt_t = $opt_u = $opt_w = $opt_E = $opt_S 267 = $opt_W = 1; 268 $opt_x = $opt_y = 0; 269 } 270 271 init_ignoreblocks(); 272 init_spellingerrors(); 273 init_badphrases(); 274 init_contractions(); 275 init_freebsdobs(); 276 init_doc_titles(); 277 init_doc_indentation(); 278 init_doc_sentence(); 279 init_doc_openclose(); 280 init_literalblock_regex(); 281 init_doc_writestyle(); 282 init_doc_stragglers(); 283 284 # ctrl-c handler 285 $SIG{'INT'} = 'INT_handler'; 286 # do the same thing if the pipe closes 287 $SIG{'PIPE'} = 'INT_handler'; 288 289 # autoflush 290 $| = 1; 291 292 # allow stdin 293 push @ARGV, "stdin" if $#ARGV < 0; 294} 295 296sub firstext { 297 my $fname = shift; 298 my $ext = ''; 299 if ( basename($fname) =~ /\.(.*?)(?:\.|$)/ ) { 300 $ext = $1; 301 } 302 return $ext; 303} 304 305sub lastext { 306 my $fname = shift; 307 my $ext = ''; 308 if ( basename($fname) =~ /\.([^.]*?)$/ ) { 309 $ext = $1; 310 } 311 return $ext; 312} 313 314sub baseonly { 315 my $fname = shift; 316 $fname = basename($fname); 317 $fname =~ s/\..*$//; 318 return $fname; 319} 320 321sub tmpfilename { 322 my $fname = shift; 323 my $ext = firstext($fname); 324 my $name = baseonly($fname); 325 return "$tmpdir/$prog-tmp-$$-$name.$ext"; 326} 327 328sub filetype { 329 my $fname = shift; 330 # detect type from extension if possible 331 my $ext = lastext($fname); 332 if ( $ext ) { 333 print "detecting file type by extension: '$ext'\n" if $verbose; 334 for ( $ext ) { 335 if ( /\d{1}/ ) { return "troff" } 336 elsif ( /bz2/i ) { return "bzip" } 337 elsif ( /gz/i ) { return "gzip" } 338 elsif ( /sgml/i ) { return "sgml" } 339 elsif ( /xml/i ) { return "xml" } 340 else { return "unknown" } 341 } 342 } 343 # fall back to file(1) 344 print "detecting file type with file(1)\n" if $verbose; 345 my $out = `$file -b $fname`; 346 $out =~ /^(\S+\s+\S+)/; # first two words 347 if ( $1 ) { 348 my $id = $1; 349 for ( $id ) { 350 if ( /^troff/ ) { return "troff" } 351 elsif ( /^exported SGML/ ) { return "sgml" } 352 # some DocBook documents are detected as "Lisp/Scheme" 353 elsif ( /^Lisp\/Scheme/ ) { return "sgml" } 354 elsif ( /^gzip/ ) { return "gzip" } 355 elsif ( /^bzip/ ) { return "bzip" } 356 else { return "unknown" } 357 } 358 } 359 return "unknown"; 360} 361 362sub uncompress { 363 my ($fname, $type) = @_; 364 my $tmpfile = tmpfilename($fname); 365 print "uncompressing '$fname' to '$tmpfile'\n" if $verbose; 366 for ( $type ) { 367 if ( /gzip/ ) { 368 system("$gzcat $fname > $tmpfile") == 0 369 or die "could not create '$tmpfile':$!\n"; 370 } 371 elsif ( /bzip/ ) { 372 system("$bzcat $fname > $tmpfile") == 0 373 or die "could not create '$tmpfile':$!\n"; 374 } 375 else { 376 die "unknown compression type '$type'\n"; 377 } 378 } 379 return $tmpfile; 380} 381 382sub writestdinfile { 383 $stdinfile = "$tmpdir/$prog-stdin.$$"; 384 open $fh, ">", $stdinfile or die "could not create '$stdinfile':$!\n"; 385 print $fh <STDIN>; 386 close $fh or die "could not close '$stdinfile':$!\n"; 387 return $stdinfile; 388} 389 390sub removetempfiles { 391 if ( $stdinfile && -f $stdinfile ) { 392 print "deleting stdinfile '$stdinfile'\n" if $verbose; 393 unlink $stdinfile or die "could not remove '$stdinfile':$!\n"; 394 } 395 if ( $tmpfile && -f $tmpfile ) { 396 print "deleting tmpfile '$tmpfile'\n" if $verbose; 397 unlink $tmpfile or die "could not remove '$tmpfile':$!\n"; 398 } 399} 400 401sub xmlize { 402 my $txt = shift; 403 $txt =~ s/'/'/g; 404 $txt =~ s/"/"/g; 405 $txt =~ s/</</g; 406 $txt =~ s/>/>/g; 407 return $txt; 408} 409 410sub showline { 411 my ($bname, $linenum, $color, $errordesc, $txt) = @_; 412 # limit output to line number range 413 return if $startline && ($. < $startline); 414 if ( !$opt_X ) { 415 print "$lf$bname$rf:"; 416 print "$ll$linenum$lr:"; 417 print $color if $opt_R; 418 print "$errordesc"; 419 print $reset if $opt_R; 420 print ":$txt\n"; 421 } else { 422 print " <error "; 423 print "line=\"$linenum\" "; 424 # these two are not presently implemented in igor 425 print "column=\"1\" "; 426 print "severity=\"warning\" "; 427 # 428 print "message=\"", xmlize($errordesc), "\" "; 429 print "source=\"$prog\""; 430 print "/>\n"; 431 } 432} 433 434sub is_lowercase { 435 my $word = shift; 436 return $word =~ /^[a-z]{1}/; 437} 438 439sub is_uppercase { 440 my $word = shift; 441 return $word =~ /^[A-Z]{1}/; 442} 443 444sub highlight_word { 445 my ($txt, $word) = @_; 446 $txt =~ s/\Q$word\E/$lh$word$rh/g; 447 return $txt; 448} 449 450sub highlight_string { 451 my $txt = shift; 452 return "$lh$txt$rh"; 453} 454 455sub expand_tabs { 456 my $txt = shift; 457 $txt =~ s/\t/ /g; 458 return $txt; 459} 460 461sub leading_space { 462 my $txt = shift; 463 my $leading; 464 $txt =~ /^(\s+)/; 465 $leading = ($1 ? $1 : ''); 466 $leading = expand_tabs($leading); 467 return $leading; 468} 469 470sub splitter { 471 my $txt = shift; 472 return ($txt) unless ( $txt =~ /$ignoreblockstart|$ignoreblockend/ ); 473 my @split = split /($ignoreblockstart|$ignoreblockend)/, $txt; 474 return grep { ! /^\s*$/ } @split; 475} 476 477sub init_ignoreblocks { 478 print "initializing ignoreblocks\n" if $verbose; 479 # create regex for sgml block start and end 480 my @ignoreblock_tags = qw/ literallayout screen programlisting /; 481 $ignoreblockstart = '(?:<!--|<!\['; 482 for my $tag (@ignoreblock_tags) { 483 $ignoreblockstart .= "|<$tag.*?>"; 484 } 485 $ignoreblockstart .= ')'; 486 $ignoreblockend = '(?:-->|\]\]>'; 487 for my $tag (@ignoreblock_tags) { 488 $ignoreblockend .= "|<\/$tag>"; 489 } 490 $ignoreblockend .= ')'; 491} 492 493sub showwhitespace { 494 my $txt = shift; 495 $txt =~ s/\t/{tab}/g; 496 return $txt; 497} 498 499# global tests 500 501sub abbrevs { 502 my ($bname, $line, $txt) = @_; 503 return if $txt =~ /^\s*$/; 504 return if $ignoreblock; 505 my $txtbak = $txt;; 506 507 if ( $txt =~ /(?:\W|^)c\.f\./i ) { 508 $txt =~ s/(c\.f\.)/$lh$1$rh/i; 509 showline($bname, $line, $ansi{darkmagenta}, 'use "cf."', $txt); 510 } 511 512 $txt = $txtbak; 513 if ( $txt =~ /(?:\W|^)e\.?g\.(?:[^,:]|$)/ ) { 514 $txt =~ s/(e\.?g\.)/$lh$1$rh/; 515 showline($bname, $line, $ansi{darkmagenta}, 'no comma after "e.g."', $txt); 516 } 517 518 $txt = $txtbak; 519 if ( $txt =~ /(?:\W|^)i\.?e\.(?:[^,:]|$)/ ) { 520 $txt =~ s/(i\.?e\.)/$lh$1$rh/; 521 showline($bname, $line, $ansi{darkmagenta}, 'no comma after "i.e."', $txt); 522 } 523 524 $txt = $txtbak; 525 if ( $txt =~ /(?:\W|^)a\.k\.a\./i ) { 526 $txt =~ s/(a\.k\.a\.)/$lh$1$rh/i; 527 showline($bname, $line, $ansi{darkmagenta}, 'use "aka" (AP style)', $txt); 528 } 529 530 $txt = $txtbak; 531 if ( $txt =~ /(?:\W|^)v\.?s(?:\.|\s|$)/i ) { 532 $txt =~ s/(v\.?s\.)/$lh$1$rh/i; 533 showline($bname, $line, $ansi{darkmagenta}, '"versus" abbreviated', $txt); 534 } 535} 536 537sub init_badphrases { 538 print "initializing badphrases\n" if $verbose; 539 @badphrases = ('2nd', '3rd', '3way', '4th', '5th','allow to', 540 'allows to', 'become gain', 'be also', 'been also', 541 'being build', 'can not', "chroot'd", "compress'd", 542 'could might', 'could of', 'equally as', 'for to', 543 "ftp'd", 'get take', "gzip'd", 'in on', 'it self', 544 'may will', "mfc'ed", 'might could', 'often are' 545 ,"or'ing", 'that without', 'the a', 'the each', 546 'the to', 'this mean that', 'to can', 'to for', 547 'to of', 'to performs', 'will has', 'with to', 548 'would of',); 549} 550 551sub badphrases { 552 my ($bname, $line, $txt) = @_; 553 my $txtbak = $txt; 554 return if $txt =~ /^\s*$/; 555 556 for my $bad (@badphrases) { 557 $txt = $txtbak; 558 # check for a loose but fast match first 559 if ( $txt =~ /\Q$bad\E/i ) { 560 if ( $txt =~ s/\b(\Q$bad\E)\b/$lh$1$rh/i ) { 561 showline($bname, $line, $ansi{yellow}, 'bad phrase', $txt); 562 } 563 } 564 565 # detect bad phrases wrapping over two lines 566 # skip this test if the phrase was all on the previous line 567 next if ( $prevline =~ /\Q$bad\E\b/i ); 568 569 $txt = "$prevline $txtbak"; 570 if ( $txt =~ /\Q$bad\E\b/i ) { 571 my @right = split /\s/, $bad; 572 my @left = (); 573 my $leftstr = ''; 574 while ( @right ) { 575 push @left, shift @right; 576 $leftstr = join ' ',@left; 577 last if ( $prevline =~ /(\Q$leftstr\E)\s*$/i ); 578 } 579 unless ( $leftstr =~ /\Q$bad\E/ ) { 580 showline($bname, $line - 1, $ansi{yellow}, 'bad phrase', 581 "... $lh$leftstr$rh"); 582 $txt = $txtbak; 583 my $rightstr = join ' ', @right; 584 $txt =~ s/(\Q$rightstr\E)/$lh$1$rh/i; 585 showline($bname, $line, $ansi{yellow}, 'bad phrase', $txt); 586 } 587 } 588 } 589} 590 591sub init_contractions { 592 print "initializing contractions\n" if $verbose; 593 @contractions = ("aren't", "can't", "doesn't", "don't", "hasn't", 594 "i'll", "i'm", "isn't", "it's", "i've", "let's", 595 "shouldn't", "that's", "they'll", "you're", 596 "you've", "we'd", "we'll", "we're", "we've", 597 "won't", "would've"); 598} 599 600sub contractions { 601 my ($bname, $line, $txt) = @_; 602 my $txtbak = $txt; 603 return if $txt =~ /^\s*$/; 604 605 for my $con (@contractions) { 606 $txt = $txtbak; 607 if ( $txt =~ /\Q$con\E/i ) { 608 if ( $txt =~ s/\b(\Q$con\E)\b/$lh$1$rh/i ) { 609 showline($bname, $line, $ansi{yellow}, 'contraction', $txt); 610 } 611 } 612 } 613} 614 615sub init_freebsdobs { 616 print "initializing FreeBSDobs\n" if $verbose; 617 @freebsdobs = qw/ cvsup /; 618} 619 620sub freebsdobsolete { 621 my ($bname, $line, $txt) = @_; 622 return if $txt =~ /^\s*$/; 623 624 for my $word (@freebsdobs) { 625 if ( $txt =~ s/(\s+)($word)([^.]+.*)$/$1$lh$2$lr$3/ ) { 626 showline($bname, $line, $ansi{darkgreen}, 'freebsd-obsolete', $txt); 627 } 628 } 629} 630 631sub repeatedwords { 632 my ($bname, $line, $txt) = @_; 633 return if $txt =~ /^\s*$/; 634 635 my $txtbak = $txt; 636 my %count = (); 637 my @words = grep(! /^\s*$/, split /\b/, $txt); 638 map { $count{$_}++ } @words; 639 my @multiples = grep { $count{$_} > 1 } keys %count; 640 #for my $word (keys %count) { 641 for my $word (@multiples) { 642 # skip special cases 643 # repeated numbers 644 next if $word =~ /\d{1}/; 645 # repeated slashes 646 next if $word eq '/'; 647 # repeated rows of dashes 648 next if $word =~ /-+/; 649 # repeated rows of underscores 650 next if $word =~ /_+/; 651 # skip some mdoc commands 652 next if $word =~ /Fl|Ns|Oc|Oo/; 653 $txt = $txtbak; 654 if ( $txt =~ s/\b(\Q$word\E\s+\Q$word\E)\b/$lh$1$rh/i ) { 655 print "repeatedwords: repeat found:'$word'\n" if $verbose; 656 showline($bname, $line, $ansi{darkred}, 'repeated', $txt); 657 } 658 } 659 # check for repeated word from the end of the previous line 660 # to the beginning of the current line 661 # $prevline =~ m%(\w+\s+)*([^ *.#|+-]+\s*)$%; 662 $prevline =~ m%(\w+\s+)*(\S+\s*)$%; 663 my $cmd = ($1 ? $1 : ''); 664 my $prevlastword = ($2 ? $2 : ''); 665 # short-circuit when the previous line... 666 # had no last word 667 return unless $prevlastword; 668 # didn't repeat any of the words on the current line 669 $count{$prevlastword}++; 670 return unless $count{$prevlastword} > 1; 671 # was a groff(7) comment 672 return if $prevlastword eq '.c'; 673 # was a groff(7) zero-space character for tables (\&.) 674 return if $prevlastword eq '\&.'; 675 # was a single non-word character 676 return if $prevlastword =~ /^\W{1}$/; 677 # was an mdoc(7) or nroff(7) comment 678 return if $prevlastword =~ /^\W{1}\\\"/; 679 # was an mdoc command 680 return if $prevlastword =~ /\.(?:Ar|Oo|Nm|Tp)/i; 681 # when the next-to-last word was an mdoc command 682 return if $cmd =~ /Ar |Cm |Fa |Em |Ic |Ip |It |Li |Pa |Ss /i; 683 if ( $txt =~ s/^\s*(\Q$prevlastword\E)(\s+.*)$/$lh$1$rh$2/ ) { 684 showline($bname, $line - 1, $ansi{darkred}, 'repeated', 685 "... $cmd$lh$prevlastword$rh"); 686 showline($bname, $line, $ansi{darkred}, 'repeated', $txt); 687 } 688} 689 690# read an external file of spelling errors 691# the misspelled word is the first sequence of \w or ' characters 692# up to a non-word character 693sub readspelling { 694 my $spname = shift; 695 my $added = 0; 696 print "adding spelling file '$spname'\n" if $verbose; 697 open my $sf, '<', $spname or die "cannot open '$spname':$!\n"; 698 while ( <$sf> ) { 699 next if /^$/; 700 next if /^\s*#/; 701 if ( /^\s*((?:\w|\')+)\W+/ ) { 702 $misspelled_words{$1} = 1; 703 $added++; 704 } 705 } 706 close $sf or die "could not close '$spname':$!\n"; 707 print "added misspellings: $added\n" if $verbose; 708} 709 710# list of common spellingwords 711sub init_spellingerrors { 712 print "initializing spellingerrors\n" if $verbose; 713 for my $word (qw/ &nbps; aan abel abismal abjectely ablve abondan abotu abour abouy abscence absense 714 absolue absolut absolutelly absolutly absoulte abuttes accelleration acceptible acces accesable 715 accesed accesing accessable accidentaly accidently acclerate acclerating accomadate accomodate 716 accoring accound accpeted accroding accross accuarate acculate acess achitecture achive 717 acknowledgent acquisions acse actal actaully activly actuall actualy actyually acutally adavnce 718 adddress adde addesses addiotional additionnal additonal additonally addres addreses addressess 719 addresss addtions adecuate adhear adhearance adherance adiministration adjustement administator 720 adminstrator adminstrators admited adress adressed adresses advence adventerous advertisment 721 advetise advetised adviasory advices aer afterall afternoont agai agains ageing aggree 722 aggregatable aggresive aggresively agian agregate agregation agressive agressively agrivating 723 agument ahold ahte ahve akses aksually alaram albel albels alergic algoritm alignement allign 724 alligned allmost alloacted allos allready allright allthough allways alot alreday alredy alright 725 altenrative althought althougn altough amasing amke ammend ammount amn amybe analasys analyizing 726 ancestory ande anderstand andthe ane anf annonymous annotatation annotatations annoucement 727 annoucing announcment annoyting anonnyed anonymus anormalous ansamble answeres antiq anual anyay 728 anyhitng anyhoo anymore anyonw anyore anythign anyways anywere aobut apac apllay apllication 729 apparant apparantly apparentely apparentry apparnetly appart appartment appearence appearred 730 appendencies apperantly appercaite appers appicable appleances appleis appliabce applicatin 731 applieds applogize appraently appriciate appriciated appropiate approprate apreciate apreciated 732 apropriate aproval aptch aqueue arbitary arbitrafy arbritrary archiecture architectual arent 733 arguements arguemnt aritmetic aritmetics arledy arond aroudn aroung arrisen arround arrray 734 artikels aslo asoc asparin assigenments assocation assoicated assotiations assumtion aswers 735 asychronous asynchonously asynchroneous atack athalon athe athentication athough atleast atrget 736 atribute attachements attatude attemps attemts atuomatic atuomatically augus autentication 737 autheinticating authenticatication authention authetnication authoratative authorative authorty 738 automaticall automaticaly automaticly autonimous avaiable avaialble avaible availabe availabel 739 availablity availbility availible availiblity avaliable avalibale avalible avilable aweful 740 awhile awlso awsome axatly axcuse bannana bartition basec basicly basse bateria baybe beachmark 741 beacuse beated becasue becease beceause becuase becuse beeing beffer beggining begining beginnig 742 behaiver behauvier behvaiour beign beleive belive belived benefitial benfit benifit beoken 743 beowser ber berak bercause berkley beseuse besure beter bettr beurocratic beween beyonf bgack 744 bgiger bheve bikesheding bince bineary birght birt blatently bloatwed bloging bnechmark boostrap 745 boostrapping bootabe bootleneck bootlenecks bootsrap boradband bordism borken borre borred 746 borring boting bottem bottonm boundries boundry boxd bradband branche briner bringign brocessor 747 broked brokeness broser brower browesable browseable browswer btit buch bugzills buidl buildling 748 buildt buile buillt buld bulding bulds bultin burried bycicle bysect bysected byt cacheing 749 calatog calcualted cale calender calles caluclate caluclated camllia campatibility cange cannnot 750 cannonical cant capabilites capabilties capabiltiy capabily capitzliation captial captialism 751 caracteristics casse casues catagory catched cathegory ceep ceratin ceratinly cerificate 752 certaintly certian certifcate certifcates certificat certifictate certiin certiinly chace 753 chacing chaged challange challanging chane chang changable changess changs chaning chanse 754 charakteristic charakteristics cheapter chech checkng checksuming chek chekc cheked cheking 755 chhosing chian chipest choise choosed choosen choses chronologocal chunck chuncking cince 756 cirruption claimst clal clarifynig classifcation cleand cleandepened clearification clearl clen 757 cliens cluter cmmit cmopile cmopiles cmplain cmplaining cna cnanot cnditions cobsidered 758 cofiguration colision colisions colom comands comapred combersome comemnts comiling comit 759 comiters comitted comitter comlplex commandline commen commenly commer commerical commericial 760 comming commited commiter commiters commiting committment committs commnad commnads commnand 761 commnications communciation communciations comooil comooiled compability comparision 762 comparisions compatability compatabilty compatablity compatiable compatibilty compatiblity 763 compentens compiel compilcated compilling compiltaion complaing complainig comples complet 764 completly completness complie componet componetn compontens comprimise compromiseable 765 comptemporary comsume comsumed comsumption comunication comunity concatanated concensus 766 conctacted conect conected conection conerter conerters configrable configration configuation 767 configuraiton configurate confimation confiuration confiused confugure confussion congraturation 768 congraturations conitinue conjob conjuction connecion connecs connecter connecters connectin 769 connenctions connet conneting connnects conntact conntect conpact conputer conreoller consensu 770 consept consequtive conservatie considerd consistant consistentency consitute conslusion 771 construcgtor consuption contai containg contect conteins contens continously continu continus 772 contiune contol contrained contribuition contributer contributers controled controler controll 773 conujunction conut conuter conuters conveinently convelient conveniece convertion convesation 774 convienient convinience coordinatory coorparative copiedd copmiler copmilers coppied corectly 775 correced correctely correcture correleate corresponsding corrsponding cosnole costantly couldnt 776 cound cource courious courve coyping crach craching crahs crahsed crasch crasching crassing 777 crasy crazyness creapage creapt creat creatopm credentail credentails creeate crnuch crnuching 778 csvup cuase curcuit currenly currentlu currnetly currrently curser customaril custommer 779 custommers cuttoff cuty cvould cvs2vn damange damanged datas dayt dbout deactive deaemon 780 deaemons deafult dealocates deamon deamons deault debuf debuging decendant decentant decicission 781 decidely decission declerations decliens decompresssion decribed decriptor ded defalt defaut 782 defautl deffirent definate definately definiately definitiely definitly definitons defintion 783 degradate degugging dehaviour deicde deine deines deivce dekstop delcared delending deley 784 deliever delievers dellicious delste demnstrate depcreation depdendency depedancy depedencies 785 depedency depeding depednent depencdny dependacies dependancies dependancy dependancys dependant 786 dependding dependeancy dependeant dependecies dependecy dependend dependendencies dependiences 787 dependiency dependig depenesis deploies deprechated deprectated depricated derivats derrivates 788 desapointed desaster desasters descendand descendents desciptors descirption descrete describd 789 descrpition descrption desease deseases desing desireable desperatly despert desprate 790 destinatino destine destory detatched detec detecing detemine deterined devdeloper deveation 791 deveices develoeprs developement developeminet developped developper developpers developre 792 developvers devestate devestating devide devided devies devinces devisions devives devleop devot 793 diablog dictaded dictonary did'n didicated didnt didsk didunt dieing diferent diffence 794 differenciate differencies differenlty differents differnce differnces differnece differnetiates 795 differnt diffrent diffrently diffsof dificult dificulty diging dilema diliver dilligence dind't 796 dindt diphthongs dircet dirctory directorys diretly diretories diretory dirft dirver diry disabe 797 disappered disasterous disclamier discourraged discoverd discuessed dismouted dispair dispalay 798 dispaly dissable dissabled dissapeared dissapointment dissillusioned distain distiguish distord 799 distorded distribition distribitions distribtue distributted distribvution distrubute 800 distrubuted dnow docuentary documantation documenation documentaiton documentatino 801 documentiation documention documetation documtns doen't doesen doesent doesnot doesnt doest 802 domainmame domani donatiosn donde donn't donot dont donw dor dotally doues droped droping 803 drustrating ducplications duplictiy duratoin duratoins durign durning durring dwsktop dynaic ean 804 eanble earler easely eather ebeen ecah eceived ecourage ecouraged ect ecurrent effecive effetive 805 effetively efficancy efficency efficent efficently effor efford eficciently efter ehere elememt 806 elipsis elliminates emaling embaress embaressing eme emial emporer enabe enbale enchanced 807 enclousure enconter encrypion encyrpt encyrpted ende endianess endoresed endtdate enior 808 enivorement enoountering enought enourmous enow enscrambled ensute enteries enterprse 809 enthusiatic entierly entites enviorement enviornment enviornmental envirionment enviroment 810 enviroments environement environnement equipted equivalen equivilent erebuild erlier erliere 811 errore errorneusly erros escolated esier esiest esle esome essense estracting ethenret etherenet 812 ething ethings etnry evenning eventaul eventaully eventhough everthing everythign everytime 813 everyting evet eveyr evne evreyone ewhich exagerate examble exapnd excactly excat exceedes 814 excelent excellant excercize excersise excert excesive exclusivly execept execption execptions 815 exectable exectables exectuable execuation exellent exemple exemtion exeption exercice exibits 816 exisiting exisitng existance existsing exmaple expalin expecially experied experince expession 817 expiremental expirience expirt explaination explainations explaned explans explantation 818 explatnation explicitely exponentionally exquse exsits exstra extemely exteneded extenstions 819 extentensible extention extentions extranious extreemly extremly facilites facter faield failded 820 faile failes failur faimiliar faliure falsh familar farwarding fase faught feasable febuary 821 fecth feebsd feelt fgights fianlly fids fiel fiels fien fienw figureing fileame filewall filks 822 filname finaly firmwares firmwrae fisrt fitler fixe fixen fixztion flages flasg flexable focuss 823 folkz folllowed follwo follwoing follwong folow folowed folowing fomr forbiden forcable forece 824 foreignphrse forgoten formate formated formost fornated forsee forthermore forusers fot foto 825 fotos foudn foward fowarding fractoinal fraemwork fragemented fragmentated fragmentatio 826 frameowkr fransisco frebsd freedback freeed freezed freind frequence freze frezze frome fthernet 827 fucntion fuction fulfil funcational funcition functionmames functionnality functoin functuion 828 funtion furhter furthur fush futher futur fysical gaint garanties gatherd gauging gaurd geeting 829 generaly generat genertaes geniue geograhically gernal gernerates gettign ghostscrip giove 830 givent glas gnerated gnoime godo gohostscrip goiung gonna gonne goot gotta grafic grammer grap 831 grapics gratefull grately graub greaet greate greatful greatfully greif grpahs gruop gthe guage 832 guarateed guarentee guarenteed guarentees guarranteed guidence gurantees hackyness hade haed hai 833 haing halp hanbook handeling hapen hapilly happend happended happends happing happpens hardisk 834 hardwares hardwrae harmpless harrass harrassment harsch hashs hatered hav havent havfe havn't 835 headup healt heavly heirarchy hellon helpfuk helpfull hep hereon hessitate hessitation 836 hexadecimals hexidecimal hibarnate hibarnating hiearchy hierachy hierarchial hierarhy higest 837 hight higly hinderences hiuge hobbiest hodling homours honets honnest honnestly honnor honnorr 838 honnorred honnors honst hookled hopful hopfully horiztonal horiztonally hounderd hounderds 839 howeber howevrr hsotname hsotnames htat hte hter htere hthe htink htis hunderts hypens 840 hypervisior hypocracy ibn idee identially identifer identifers identifiy identiy idff 841 idosyncracies iea ifhghting im imagen imagening imatating imbeded imeplementation immanent 842 immediatly immenent immidiatly immitating impariment impedence impelment implemenation 843 implementaitons implementating implementng implemetation implemetn implentor implicitely 844 implicits impliment implimentation implmentation imporant imporvement imposable imposible 845 improbe improove improoved improvments imprted inacativity inaccesible inadvertant inadvertantly 846 incase incedent incldue incluseion incomming incompeents incomptaible inconsistancy inconsitent 847 inconvienent incopatible incrase incrimental incrment incrmental indefinately indefinitly 848 indended indendently indentical indentifier indentifiers indention indentions indepedently 849 independant independantly independendly independet indepth indestrcteble indiate indiciations 850 indicies indivual indivudual indstalled inetersting infact infavour infomation informations 851 informatoin infrastcture infrasture infromation inherity inital initalise initalization 852 initalize initalized initiatior initiliased initilize inititialization inport inpossible inpunt 853 inputed inquiery insall insatll insatlled insensivite instace instal instalation instaled 854 instaler installad installaed installaing installatio installtion installtions instanciation 855 insted insteresting instractions instructuions instuctions intall intallation integerate 856 integreated integrituy intendend intepretation interal interations interchangable interchangably 857 interconverts interes interesitng interesst interessting intereting interfactive interfer 858 interferring intergrated interist interisting intermal intermittant intermittantly internaly 859 internat interneal interogate interpretedt interpretted interpretter interpretting interressing 860 interrest interresting interrestingly interrim interrups intersting interupt intial 861 intialization intialize intolerate intregate intrest intresting introduceing intruction 862 invarients invicible invole involes involvemnt invoplved invovle irt isnt isntall isoltation 863 isonly issueing ist istead isuus isystem ita iteinerant itelf ith itnel itseld ive iwll jailes 864 joing jornal jounal jsut juged juste kenrel kerel kerenel kerenels kerenl kerle kernal kernell 865 kernl keybaord killled kno knowlegde knowlege knowlodgeable knwo kust kwyrod labes lable lables 866 laeyer lagacy lanaguage langage languge laods larged lastest laterly latley latre laught 867 laughted layed layput lazyness leasure leat leav legitimite lemme lenght leson leter lettesrs 868 lexicographal lgertimately libararies libary librairies libraray libraris libraru licencing 869 licene liek lien liesure lightnig ligned liinux likeing likly liks limtations lineair linerly 870 ling liniarly lised lisens listet listning lite literrally littel litteral litterally liviness 871 llow lniux loadeded loally locak localy loccked locically loder loged loggoued loggs loging 872 loink lok loke lokking loks looh lookig lookking looksy loopack loosing loosly losseless lpatop 873 lpdng lter ltieral mabe maby mabye macademia machien machiens machin machince machinew maching 874 machne macrow macrows maek mahually mailling maintainace maintainance maintaince maintanance 875 maintaned maintanence maintenable maintence maintened maintener mames manageement managemnet 876 managent managment mananged manangement manaul manditory mangagement manged mangment manpage 877 manpages manuallying manualy manuell manufactring manyally marcro marcros markkup maschine maske 878 mater mathced maun maxaximum maximium maximun mdorn meaninful meantine measusre mechanim 879 mechanims mechiansm mechnism mechnisms memeber memery memroy ment mentined mentionned menue meny 880 mergeing mericracy meriticracy merrits mes messege messgae messgaes metada methode mfcd 881 micrcontroller microbnechmark mininum minmum minumum minut minuts miror mis miscelleneous 882 miscellenious mising misprediced missign missinc missking misspeling missplelling misterious 883 mistery mistypted misunterstood mkaes mke moble modifing modifiy modifiyng modiying momment 884 monalithic moniter monolitic mont montherboard montor montoring monut monuted mooved moter 885 motercycle motercycles motiviation moudels mountign mpre mssing multile multipled multipy 886 mutiple mutualy mvoed mysefl myst myt namming natioal natsy ncessary ncie nderstand necassary 887 neccasary neccesary neccesery neccessary necesary necessairely necessarely needto neet neetwork 888 neglegt negociate negociated neightbor nemisis nescessarily nescessary nessesery nimber nintees 889 nobady noet noice noipe nomally nonexistant noone normanlly notaions notavailable notefection 890 nothern nothin noticable notied notofocations notquite nouvou numberic numer numner nusance 891 nutrual obejct obfascated objejcts obselete obsolote obsticles obvoius ocassionaly occassion 892 occassionally occassions occation occations occurance occured occurence occurences occuried 893 occuring ocure oether ofcourse offenseive offical ofr oftem okey om ommisions ommit ommited 894 ommitt ommitted omre omrning onfigured ongoin onl onle onlne onlt onsult onthe ontop onts onw 895 ony oparation operationg opertunity opion opperation oppertunity oppinion oppions oppisite 896 oprations oprion oprions optial optiion optionsal optoin ordenary orginal orginally originaes 897 origine orignal ot otehr otsuts ouf ouput ouputing outher outout outstaning outtage overhall 898 overidden overlaping overlayed overrided overriden overritten overwritting ovre owkr pacakge 899 pacakges pachae packge packges padd padds paert painfull panices parallell paramenter parametr 900 parametrs paramtere paramters paranthesis paremeter parenticies parhaps parition paritioning 901 paritions parntheses parrallel parrellel partameters partialy particualar particulary partion 902 partions partionting partiton partitoning partitons passprhase passtrough passwrd pasto patche 903 patchex pathalogical pathces pathes peaople peform peformance peformed peice peices pengiun 904 peopel pepetual pepetually perfecly perfom perfomance perfoms perfor perfored performace 905 performancing performence performend perhas periperal peripherial peripherials permanant 906 permantly peroid persisent persistant personnal personnally personnaly persoon pertubation 907 peticular pevious pfew pgk phabriator pheraps phisical phoneix phorase phyiscall physcal physial 908 physicaly piblic pitty placte plaing plateform platfrom platorms pleae plin plisss poatch poblem 909 poblematic poeple pofessional poinitng poirts poitn poitner politley poluting polution pople 910 popularuity pordriere porevious porject porrtability porst portes portupgrde posible positiv 911 positve possability possbile posseses possibillity possilbe possition postion postitions postive 912 postress poting potr potupgrade poud poudirere poudrier poudrierre pourdiere pourdriere 913 pouridere poweful powerfull poyrts prameter pratcice preatty preblem preceed preceeded 914 preceeding preceeds precice precidence predictibly preemtive prefere prefered prefering 915 preferrable preferrably preffer preffered prefferred preform preformance premissions 916 preoblematic prepair prepairing preperation preperations preprend preprietary preprocesor 917 presense presidence presonally presumeably prety pretyt preume prevelent previos previouse 918 previousely previus prevoius pricipal primative primatives princial principes priorisation 919 priotity prirority pritn pritnf pritnfs privelege priveleged priviledge priviledges privilige 920 privledged privleges probabilly probabyl probaly probbaly probblem probem problaly proble 921 problen problme problmes probobly proccess proccesses proceedure proces proceses procols 922 proctect proepr proeprly profesional profesionals proffesional profie profilier profissional 923 progam progams progess programable programatic programlistning programm programms progrtam 924 projcet projecte prolematic prolonges promiscous promiscuos promisive promissed promissing 925 prompot promt proove propaged proped propegation propigate propogate propogation propolsal 926 proporion proporty propper propreitary propreitery propsing prorammer prorgram prosessor prot 927 protcol protcols protec prots provde provent provice providre pseuuedo pshycial pssword psuedo 928 ptach ptiner pudate puncing puroses pursache pursached puting qeustion quandries quard quater 929 quaterly queestion querys quesston questionr questoin questsions queueing qui quickier quiety 930 quirck quire quitted quoteas rabase rabased rabmling rae rans rapidely rase rasing raspberri 931 rater reactoin readd readning realated realloacted realy realyl reaosn reasoably reasonnable 932 reassambled reate reboote rebove rebuilded rebuitling rebult reccomended receieve recevied 933 recieve recieved recinded recive recoide recomend recomendation recomended recommand recommanded 934 recommanding recommened recommented reconigize recrusively redable redering rediculous 935 redundantcy reeated reelvent reember refered referes refering refernce refernces refernece 936 refferance refreind refridgerator refulat regardes regened regularely regularlly regulat reguls 937 reivew reized relaly relase relases relavent releated relese relesed relevent reloation 938 reloations relply rember remdial remebered remebers rememver remmeber remobal remvoe remvoed 939 rendtion repare reparing repative repetion repitition repititions replaceing replacemnet 940 replases replce repleaced reponding reponse reponses reponsible reposotory repostory 941 reprecussion reprecussions reproducable reproducibily reproductible reprository repy requiment 942 requireing requiretd requirments requistes requred rerurn resampeling resaonnable resemblence 943 resently resetart resetted resiilver resilliancy resillience resilliency resillient resise 944 resistnace resitor resitors resivoir reslove resloving resolf resonable resonably resons resouce 945 respecitively responce respository respresentation resseler ressources restaring restartet 946 restaurnat restaurnats resuce resuerrect resuerrecting resurections resusccitate rethnik retnia 947 retreive retrive returs reuild revalent reveiw reversse revison revisons rewcursion rewite 948 rewriten rezervation riddens rigth riht rmeoval rmore rmove roken roling rott roughy rreally 949 rreplace rrquest rudamentary runing runinig runnig runnign runnnig runnning ruote ruter sacn 950 saerch safed sahred saif saior sais salavge satsify saturage scenartio sched scheduld 951 schedulling scritp scrubing scrupt seached secction secend secion secions secondes secttion 952 secturity secund securiy seemless seemlessly seens seether senarios sence sendt sepaking 953 separatly separe separtely sepcial sepcific sepcifies sepcify seperate seperated seperately 954 seperates seperating seperation seperator seprate sequencially serching sercurity serie 955 seriosuly serius serivce serveral servicability servise sesion setable setiing seting setings 956 settt sevice sexond sey shae shaer shaers sheding shepard shepards shephard shepharding shooping 957 shoping shoud shoudl shoudn't shoulld shrinked shuld shure shuting shyed siginificant 958 significnat signle siilar sile sime similat simillar simpel simpl simplfied simplier simpliifed 959 simplyfies simular simultanious simultaniously singel singeling singels singnificant sinificant 960 sinse sintax sistems sitll skiped sligh slighly slove sloved slpw slue smaler smebody smeone 961 snapshoted snoflake snopped soe soehow soemone soemones soemthing soething softaware softner 962 softwae sofware sohuld soif soley solition solusion someoene somes somethign somethng sometime 963 someting sometjhing someway somoene somthing somwhere sonud sonuds soo soruce sparce spearator 964 specfic specifes specifi specifiaction specificially specificly specifig specifing specifiy 965 specifiying specifyed spectacte speficy sperate spesific spindels spititng splic spliting 966 splitted spose spreadth srews srtuff srync ssorted sspares ssytem stabalization stadnard 967 stairing standart standerd stantdard startet starup staticlly statuc steller steping stilla 968 stiring stkicks stoll stollen stoped stoping stoppe stoppped straigh strang strangly strat 969 strategie strenght striaght stricktly strippped stroage structurees stucture stuf stystem subet 970 submited submiter submitt substaintally substition substract substraction subsytem subsytems 971 subverion succed succeded succeds succesful succesfully successfull successfuly suceeding 972 sucesfully sucess sucessful sucessfull sucessors suckser sucksers suddently sudirectories 973 suffecient sufficent suficient sugesstion sugest sugested suggesiton suggestsions suggetion 974 suggetions sugroup suject sumbit sume superceed superiour supoose suposed suposedly suppor 975 suppotr suppotred supprts supress supressed supresses suprise suprised suprising surpise 976 surpised surpressed surprice surpriced surprize surprized surronded surroudn surroudning 977 susbtitute suspec suspection sutiable swape swepped swich switche swith swithc switich 978 switiching swop syas symetrical symtom symtoms synchronisaton syncrhonous syncrhonously 979 synonomous sysem sysetm syslodg systeam systme systmes sytem sytems sytsem taged taging taht 980 tahts talkes targer tat te teamm techer techical techincally techncially teh tehre tehse tehy 981 tempaltes temperatire templaitize temporarely tenticles tere terirrlbe termal termonology 982 termperature termporary tey th tha thaat thaknk thakns thank's thankje thansk thanx thatis thats 983 thay theese thefirst themeing thems themself theoraticly theorethically ther therads therefor 984 theres therory thets theyre theyve thie thier thign thigns thingking thinke thinkg thinkw 985 thinling thirs thnak thnig thnk tho thos thouch thoug thougt thouogh threated thremal throgh 986 throtteling throug throughly throught throuhg throwed thru thrugh tht thta thudner thwo thye ti 987 tiems tihngs tihs timestatmp tinket tinketing tipycal tirck tird titeled tlak tlaking tnan 988 todays todl togehter togethe tohers tols tomake tommorow toolcain toolchian topick totaly 989 tottaly tought tougue tpage tpye traafic tradeing traditoin traditoinal tradtional trafic 990 trailling tranalation tranalations tranfer tranfered tranfers transfered transfering 991 translateion translater translaters transltion transmision traslate traslation treatement trid 992 triede triewd trigonmetric tring tripple trival trnaslate trnaslated trofy troublehsooting 993 troubleshoute troughout trow trows trpi trrue trry trubolsome truely trully tryed tryied tryign 994 tsable tsart tsill tsrarted tthe tthis tu tunning tunr turend turnt tutoriales tye tyhrow 995 typicall typicaly udnerstand udnerstandable udpate udpates uesd uisng umounted uncapable 996 unchaged unchange uncoment unconsistent undefinied undefinitely undeflowed undersatnd understadn 997 understadning understandlable understandood understaning underway undescores undesireable 998 undestand undustrialized unecessary unecrypted unfortauntely unfortenately unfortuante 999 unfortunatelly unfortunatly unfortuntelly unfrastructure unfreezed uniion uniquily unitentinally 1000 unknwn unkown unlinke unmouting unnceccessary unneccessary unnecssary unprivilegded unrelevant 1001 unresolveable unreversable unsubstanciated unsuccesful unsucessfully unsutiable untill untis 1002 unuseable upate updaing updateing updte updtes upgade upgaded upgarde upo uppon uprade upsteam 1003 upstrewams upto ur usally useable useage usedul usefull useing usesd usign ussage usse ussually 1004 usully utilites utilties uttrerly vagrand varaible varanty varialbe varialbes varliable varois 1005 varoius vender vengeace veresion verion verison verry versionned versionning versoin verty 1006 veryify virtial virutal visable voa volenteer volenteers voltave vontinues votlage vulnability 1007 waas waht wahtever wakupe wantd warant waranted wass webupage wecam wehre wek wel wer wether 1008 whanever whats whcih whe whene whereever wheres whewn whhich whie whihc whilte whinning whish 1009 whit whith whne wht wich wierd wiht wihtout wilde wirh wirtten wistle wistles witdh withe 1010 withhin withing withme withough withouth witk witout witt wlll wnat wnats wnet wo wonderfull 1011 woner wont wor worflows workint workoad workoads workstion worng worrty woth woud woudl wouldbe 1012 wouldnt wouls wranty wraper wriatble writen writtend writting wroking wroute wsouse wuch 1013 xontains ycould yea yeild yeilds yesm yhe youd youi youll youre yu yuo yut /) { 1014 $misspelled_words{$word} = 1; 1015 } 1016 print "spellingerrors: ", scalar (keys %misspelled_words), " misspellings known\n" if $verbose; 1017 my @spellfiles; 1018 # IGORSPELLFILES environment variable is a whitespace-separated list of files 1019 push (@spellfiles, split /\s/, $ENV{'IGORSPELLFILES'}) if defined($ENV{'IGORSPELLFILES'}); 1020 # all files found in /usr/local/etc/igor/spelling 1021 push (@spellfiles, split /\s/, `ls /usr/local/etc/igor/spelling/*`) if -d '/usr/local/etc/igor/spelling'; 1022 for my $spellfile (@spellfiles) { 1023 readspelling($spellfile); 1024 } 1025} 1026 1027sub spellingerrors { 1028 my ($bname, $line, $txt) = @_; 1029 return if $txt =~ /^\s*$/; 1030 1031 my $txtbak = $txt; 1032 my @words = split /\W+/, $txt; 1033 for my $currentword (@words) { 1034 if ( $misspelled_words{lc($currentword)} ) { 1035 $txt = highlight_word($txt, $currentword); 1036 } 1037 } 1038 if ( $txt ne $txtbak ) { 1039 showline($bname, $line, $ansi{darkmagenta}, 'spelling', $txt); 1040 } 1041} 1042 1043sub whitespace { 1044 my ($bname, $line, $txt) = @_; 1045 return if $txt =~ /^$/; 1046 1047 my $txtbak = $txt; 1048 if ( $txt =~ s/^(\s+)$/$li$1$ri/ ) { 1049 showline($bname, $line, $ansi{darkblue}, 'blank line with whitespace', $txt); 1050 } 1051 $txt = $txtbak; 1052 if ( $txt =~ s/(\S+)(\s+)$/$1$li$2$ri/ ) { 1053 showline($bname, $line, $ansi{darkblue}, 'trailing whitespace', $txt); 1054 } 1055 $txt = $txtbak; 1056 if ( $txt =~ s/( +)\t+/$li$1$ri/ ) { 1057 showline($bname, $line, $ansi{darkmagenta}, 'tab after space', $txt); 1058 } 1059} 1060 1061 1062# global batch tests 1063sub style { 1064 my ($bname, $txt) = @_; 1065 print "$lf$bname style check:$rf\n"; 1066 1067 my $you = ($txt =~ s/you\b/you/gi); 1068 my $your = ($txt =~ s/your/your/gi); 1069 if ( $you || $your ) { 1070 print " $lh\"you\" used $you time", ($you==1 ? '':'s'), "$rh\n" if $you; 1071 print " $lh\"your\" used $your time", ($your==1 ? '':'s'), "$rh\n" if $your; 1072 print " \"You\" and \"your\" are informal and subjective.\n"; 1073 print " Attempt to be formal and objective: \"the file\" rather than \"your file\".\n"; 1074 } 1075 1076 my $should = ($txt =~ s/should/should/gi); 1077 if ( $should ) { 1078 print " $lh\"should\" used $should time", ($should==1 ? '':'s'), "$rh\n"; 1079 print " Use \"should\" sparingly, it is feeble and suggests unsureness.\n"; 1080 print " Attempt to be imperative: \"do this\" rather than \"you should do this\".\n"; 1081 } 1082 1083 my $obviously = ($txt =~ s/obviously/obviously/gi); 1084 if ( $obviously ) { 1085 print " $lh\"obviously\" used $obviously time", ($obviously==1 ? '':'s'), "$rh\n"; 1086 print " If it is really obvious, it does not need to be pointed out.\n"; 1087 } 1088 1089 my $needless = ($txt =~ s/needless to say/needless to say/gi); 1090 if ( $needless ) { 1091 print " $lh\"needless to say\" used $needless time", ($needless==1 ? '':'s'), "$rh\n"; 1092 print " If it doesn't need to be said, why say it?\n"; 1093 } 1094 1095 my $thefollowing = ($txt =~ s/the following/the following/gi); 1096 my $asfollows = ($txt =~ s/as follows/as follows/gi); 1097 if ( $thefollowing || $asfollows ) { 1098 print " $lh\"the following\" used $thefollowing time", ($thefollowing==1 ? '':'s'), "$rh\n" if $thefollowing; 1099 print " $lh\"as follows\" used $asfollows time", ($asfollows==1 ? '':'s'), "$rh\n" if $asfollows; 1100 print " If something is following, the reader can see it without being told.\n"; 1101 } 1102 1103 my $followingexample = ($txt =~ s/following example/following example/gi); 1104 if ( $followingexample ) { 1105 print " $lh\"following example\" used $followingexample time", ($followingexample==1 ? '':'s'), "$rh\n"; 1106 print " If an example is following, the reader can see it without being told.\n"; 1107 } 1108 1109 my $simply = ($txt =~ s/simply/simply/gi); 1110 my $basically = ($txt =~ s/basically/basically/gi); 1111 if ( $simply || $basically ) { 1112 print " $lh\"simply\" used $simply time", ($simply==1 ? '':'s'), "$rh\n" if $simply; 1113 print " Use \"simply\" to mean \"in a simple manner\", \"just\", or \"merely\", not the\n"; 1114 print " patronizing \"details omitted because they are not simple enough for you\".\n"; 1115 print " $lh\"basically\" used $basically time", ($basically==1 ? '':'s'), "$rh\n" if $basically; 1116 print " Use \"basically\" to mean \"essentially\" or \"fundamentally\", not \"only the\n"; 1117 print " basics are shown because anything more will be too complicated for you\".\n"; 1118 } 1119 1120 my $the = ($txt =~ s/(?:^the|\.\s+the)\b/the/gi); 1121 my $sent = ($txt =~ s/([^.]+\.\s+)/$1/gi); 1122 my $percent = ($sent > 0 ? int($the/$sent*100) : 0); 1123 if ( $the && ($percent > 19) ) { 1124 print " $lh\"The\" used to start a sentence $the time", ($the==1 ? '':'s'), " in $sent sentence", ($sent==1 ? '':'s'), " ($percent%)$rh\n"; 1125 print " Starting too many sentences with \"the\" can be repetitive\n"; 1126 print " and dull to read.\n"; 1127 } 1128 1129 my $cf = ($txt =~ s/\Wcf\./cf./gi); 1130 my $eg = ($txt =~ s/e\.g\./e.g./gi); 1131 my $ie = ($txt =~ s/i\.e\./i.e./gi); 1132 my $nb = ($txt =~ s/n\.b\./n.b./gi); 1133 if ( $cf ) { 1134 print " $lh\"cf.\" used $cf time", ($cf==1 ? '':'s'), "$rh\n"; 1135 print " \"Cf.\" (Latin \"confer\") means \"${lf}compare$rf\" and is mostly used in academic\n"; 1136 print " and scientific writing. Consider replacing with the more common English\n"; 1137 print " words.\n"; 1138 } 1139 if ( $eg ) { 1140 print " $lh\"e.g.\" used $eg time", ($eg==1 ? '':'s'), "$rh\n"; 1141 print " \"E.g.\" (Latin \"exempli gratia\") means \"${lf}for example$rf\" and is mostly\n"; 1142 print " used in academic and scientific writing. Consider replacing with the\n"; 1143 print " more common English words. Both forms are usually followed by a\n"; 1144 print " comma for a verbal pause: \"e.g., a b c\" or \"for example, a b c\"\n"; 1145 } 1146 if ( $ie ) { 1147 print " $lh\"i.e.\" used $ie time", ($ie==1 ? '':'s'), "$rh\n"; 1148 print " \"I.e.\" (Latin \"id est\") means \"${lf}that is$rf\" and is mostly used in academic\n"; 1149 print " and scientific writing. Consider replacing with the more common\n"; 1150 print " English words. Both forms are usually followed by a comma for\n"; 1151 print " a verbal pause: \"i.e., a b c\" or \"that is, a b c\"\n"; 1152 } 1153 if ( $nb ) { 1154 print " $lh\"n.b.\" used $nb time", ($nb==1 ? '':'s'), "$rh\n"; 1155 print " \"N.b.\" (Latin \"nota bene\") means \"${lf}note$rf\" or \"${lf}take notice${rf}\" and is mostly\n"; 1156 print " used in academic and scientific writing. Consider replacing with\n"; 1157 print " the more common English words.\n"; 1158 } 1159 1160 my $inorderto = ($txt =~ s/in order to/in order to/gi); 1161 if ( $inorderto ) { 1162 print " $lh\"in order to\" used $inorderto time", ($inorderto==1 ? '':'s'), "$rh\n"; 1163 print " Unless \"in order to\" has some special meaning here, \"to\" is simpler.\n"; 1164 } 1165 1166 my $invoke = ($txt =~ s/invoke/invoke/gi); 1167 if ( $invoke ) { 1168 print " $lh\"invoke\" used $invoke time", ($invoke==1 ? '':'s'), "$rh\n"; 1169 print " Unless \"invoke\" has some special meaning in context, \"run\" is simpler.\n"; 1170 } 1171 1172 my $parenplural = ($txt =~ s/\(s\)/\(s\)/gi); 1173 if ( $parenplural ) { 1174 print " $lh\"(s)\" used $parenplural time", ($parenplural==1 ? '':'s'), "$rh\n"; 1175 print " Please do not form plurals this way. It is a holdover from lazy\n"; 1176 print " programming practices, is difficult to read, and almost always\n"; 1177 print " unnecessary. A plural formed with a plain \"s\" is usually correct\n"; 1178 print " when speaking about numbers of one or more.\n"; 1179 } 1180 1181 # type-specific tests 1182 if ( $type eq "troff" ) { 1183 my $examples = ($txt =~ /\n\.\s*Sh\s+EXAMPLES/i); 1184 unless ( $examples ) { 1185 print " ${lh}no \"EXAMPLES\" section found$rh\n"; 1186 print " Even trivial examples can improve clarity.\n"; 1187 print " Common-use examples are better yet.\n"; 1188 } 1189 } 1190} 1191 1192# mdoc line-by-line tests 1193my @md_displays; 1194sub mdoc_whitespace { 1195 my ($bname, $line, $txt) = @_; 1196 1197 if ( $txt =~ /^\.\s*Bd\s/ ) { 1198 push @md_displays, ($txt =~ /-(?:literal|unfilled)/ || 0); 1199 } elsif ( $txt =~ /^\.\s*Ed\b/ ) { 1200 pop @md_displays; 1201 } elsif ( ! length $txt && ! grep $_, @md_displays ) { 1202 showline($bname, $line, $ansi{darkblue}, "blank line", $txt); 1203 } 1204} 1205 1206sub mdoc_date { 1207 my ($bname, $line, $txt) = @_; 1208 return if $txt =~ /^\s*$/; 1209 1210 if ( $txt =~ s/^(\.\s*Dd\s+)(.*)$/$1$lh$2$rh/ ) { 1211 $docdate = $2; 1212 showline($bname, $line, $ansi{darkyellow}, "date not today, $today", $txt) if $docdate ne $today; 1213 } 1214} 1215 1216sub mdoc_sentence { 1217 my ($bname, $line, $txt) = @_; 1218 return if $txt =~ /^\s*$/; 1219 1220 if ( $txt =~ s/^(\w{2,}.*?[^ .]{2,}\.\s+)(A |I |\w{2,})(.*)$/$1$lh$2$3$rh/ ) { 1221 showline($bname, $line, $ansi{darkcyan}, 'sentence not on new line', $txt); 1222 } 1223} 1224 1225sub init_mdoc_uniqxrefs { 1226 print "initializing mdoc_uniqxrefs\n" if $verbose; 1227 %seealsoxrefs = (); 1228} 1229 1230sub mdoc_uniqxrefs { 1231 my ($bname, $line, $txt) = @_; 1232 return if $txt =~ /^\s*$/; 1233 1234 # set a flag to indicate when a .Sh SEE ALSO section is found 1235 if ( $txt =~ /^\.Sh\s+(.*)/i ) { 1236 $seealso = ( $1 =~ /SEE ALSO/i ); 1237 print "mdoc_uniqxrefs: SEE ALSO section found\n" if $verbose; 1238 return; 1239 } 1240 1241 # only check xrefs for repeats inside a SEE ALSO section 1242 if ( $seealso ) { 1243 # if inside a SEE ALSO section, stop looking for duplicates 1244 # after non-.Xr macros. These would probably be text sections 1245 # talking about the external references, not included in the list. 1246 if ( ($txt =~ /^\./) && ($txt !~ /^\.Xr/i) ) { 1247 $seealso = 0; 1248 return; 1249 } 1250 1251 # allow both valid mdoc formats (.Xr umount 8 ,) 1252 # and bad ones (.Xr xorg.conf(5),) 1253 if ( $txt =~ /\.Xr\s+(.*)(?:\s|\()(\d{1}\w?)/i ) { 1254 my $xrefname = $1; 1255 my $xrefsect = $2; 1256 if ( $seealsoxrefs{"$xrefname-$xrefsect"} ) { 1257 $txt =~ s/($xrefname.*$xrefsect)/$lh$1$rh/g; 1258 showline($bname, $line, $ansi{yellow}, "duplicate SEE ALSO reference", $txt); 1259 } else { 1260 $seealsoxrefs{"$xrefname-$xrefsect"} = 1; 1261 } 1262 } 1263 } 1264} 1265 1266sub showmacvals { 1267 my ($lastmacro, $bname, $line) = @_; 1268 for my $macro (@macros) { 1269 last if $macro eq $lastmacro; 1270 unless ( $macroval{$macro} ) { 1271 showline($bname, $line, $ansi{red}, ".$lastmacro used here", "but .$macro has not been defined"); 1272 } 1273 } 1274} 1275 1276sub init_mdoc_structure { 1277 print "initializing mdoc_structure\n" if $verbose; 1278 for my $macro (@macros) { 1279 $macro =~ tr/_/ /; 1280 $macroval{$macro} = ''; 1281 } 1282} 1283 1284sub mdoc_structure { 1285 my ($bname, $line, $txt) = @_; 1286 return if $txt =~ /^\s*$/; 1287 1288 # skip if the line starts with an mdoc macro 1289 # technically, whitespace is allowed before macros 1290 return unless $txt =~ /^\s*\./; 1291 1292 # check for required minimum macros 1293 my $parm; 1294 for my $macro (@macros) { 1295 $parm = ''; 1296 $macro =~ tr/_/ /; 1297 next if $macroval{$macro}; 1298 if ( $txt =~ /^\.\s*\Q$macro\E\s*(.*)/i ) { 1299 my $parm = $1; 1300 # provide a blank parameter for macros with optional parameters 1301 $parm = ' ' if ($macro =~ /^Os|Sh NAME|Sh SYNOPSIS|Sh DESCRIPTION/) && (!$parm); 1302 $macroval{$macro} = $parm; 1303 showmacvals($macro, $bname, $line); 1304 last; 1305 } 1306 } 1307 1308 # check external refs (.Xr) 1309 # suggested by Glen Barber 1310 return unless $txt =~ /^.Xr/; 1311 1312 # characters to treat as whitespace in an Xr macro 1313 my $wspace = '[ (),.:]'; 1314 # character class for section numbers 1315 # an initial number possibly followed by a letter 1316 my $sect = '\d{1}[A-Za-z]?'; 1317 1318 my $xname = ''; 1319 $xname = $1 if $txt =~ /^.Xr$wspace+(\S+)/; 1320 my $xsection = ''; 1321 $xsection = $1 if $txt =~ /^.Xr$wspace+\S+$wspace+($sect)/; 1322 1323 if ( ! $xname ) { 1324 showline($bname, $line, $ansi{yellow}, 'xref name missing', $txt); 1325 return; 1326 } 1327 1328 if ( $xname =~ /\($sect\)/ ) { 1329 $txt =~ s/($xname)/$lh$1$rh/; 1330 showline($bname, $line, $ansi{yellow}, 'section number in name', $txt); 1331 return; 1332 } 1333 1334 if ( $xsection && ($xsection gt "9") ) { 1335 $txt =~ s/^(.Xr$wspace+\S+$wspace+)($sect)/$1$lh$2$rh/; 1336 showline($bname, $line, $ansi{yellow}, 'section higher than 9', $txt); 1337 # no point in checking for sections higher than 9 1338 return; 1339 } 1340 1341 if ( $opt_x ) { 1342 system("$man -w $xsection $xname >/dev/null 2>&1"); 1343 if ( $? ) { 1344 if ( $xsection ) { 1345 $txt =~ s/^(.Xr$wspace+)(\S+$wspace+$sect)/$1$lh$2$rh/; 1346 } else { 1347 $txt =~ s/^(.Xr$wspace+)(\S+)/$1$lh$2$rh/; 1348 } 1349 showline($bname, $line, $ansi{darkmagenta}, 'external man page not found', $txt); 1350 # not found, no point in checking if it's this one 1351 return; 1352 } 1353 } 1354 1355 # is this external reference referring to itself? 1356 # skip if the .Nm macro has no value 1357 return if $macroval{'Nm'} ne $xname; 1358 my $currsection = ''; 1359 if ( $macroval{'Dt'} =~ /^\S+\s+($sect)/ ) { 1360 $currsection = $1; 1361 } 1362 return if $xsection ne $currsection; 1363 if ( $xsection && $currsection ) { 1364 $txt =~ s/^(.Xr$wspace+)(\S+$wspace+$sect)/$1$lh$2$rh/; 1365 } else { 1366 $txt =~ s/^(.Xr$wspace+)(\S+)/$1$lh$2$rh/; 1367 } 1368 showline($bname, $line, $ansi{darkmagenta}, 'xref refers to *this* page (use .Nm)', $txt); 1369} 1370 1371 1372# DocBook line-by-line tests 1373 1374sub init_doc_titles { 1375 print "initializing doc_titles\n" if $verbose; 1376 # build regex of words that should be lowercase in titles 1377 my @lc_words = qw/ a an and at by down for from in into like near 1378 nor of off on onto or over past the to upon with /; 1379 $lc_regex = '(?:' . join('|', @lc_words) . ')'; 1380 my @uc_words = qw/ about are how log new not set tag use 1381 one two three four five six seven eight nine /; 1382 $uc_regex = '(?:' . join('|', @uc_words) . ')'; 1383 my @fixedcase_words = qw/ amd64 i386 iSCSI x86 /; 1384 $fixedcase_regex = '(?:' . join('|', @fixedcase_words) . ')'; 1385 1386 # build regex for ignoring DocBook tagged words in titles 1387 # like <command>ls</command> 1388 my @ignoretags = qw/ acronym application command filename function 1389 link literal varname replaceable systemitem tag /; 1390 for my $tag (@ignoretags) { 1391 $tag = "<$tag.*?>.*?<\/$tag>"; 1392 } 1393 $ignoreregex = '<anchor.*?>|' . join('|', @ignoretags) 1394} 1395 1396sub doc_titles { 1397 my ($bname, $line, $txt) = @_; 1398 return if $txt =~ /^\s*$/; 1399 1400 my $txtbak = $txt; 1401 1402 return if $ignoreblock; 1403 $titleblock = 1 if $txt =~ /<title/; 1404 return unless $titleblock; 1405 1406 print "doc_titles: '$txt'\n" if $verbose; 1407 1408 my @words; 1409 1410 # take the text from between title tags, or the 1411 # whole line if a title tag is not present 1412 # split the result into an array of words, keeping 1413 # ignorable DocBook tags wrapped around text 1414 if ( ($txt =~ /<title.*?>(.*?)(?:<\/title>|$)/) 1415 || ($txt =~ /(.*)(?:<\/title>)/) ) { 1416 @words = split /($ignoreregex|\s+)/, $1; 1417 } else { 1418 @words = split /($ignoreregex|\s+)/, $txt; 1419 } 1420 1421 # use AP style: capitalize words longer than three letters; see also 1422 # http://www.freebsd.org/cgi/cvsweb.cgi/doc/en_US.ISO8859-1/books/handbook/linuxemu/chapter.sgml#rev1.48 1423 WORD: for my $i (0..$#words) { 1424 my $word = $words[$i]; 1425 1426 print "doc_titles: analyzing '$word'\n" if $verbose; 1427 1428 next WORD if $word =~ /\s+/; 1429 next WORD if $word =~ /$ignoreregex/; 1430 1431 # special case: skip the contents of some unfinished tags 1432 # <title>Configuring <acronym role="Domain Name 1433 # System">DNS</acronym></title> 1434 next WORD if $word =~ /(?:role)=/; 1435 1436 # special case: allow single lowercase "s" for plurals 1437 next WORD if $word eq 's'; 1438 1439 # special case words that should not be capitalized 1440 next WORD if $word =~ /^$fixedcase_regex$/; 1441 1442 # first word should be capitalized 1443 if ( ($txt =~ /<title/) && ($i == 0) ) { 1444 if ( is_lowercase($word) ) { 1445 $words[$i] = highlight_string($word); 1446 } 1447 # first word is special, skip other tests 1448 next WORD; 1449 } 1450 1451 # last word should be capitalized 1452 if ( ($txt =~ /<\/title/) && ($i == $#words) ) { 1453 if ( is_lowercase($word) ) { 1454 $words[$i] = highlight_string($word); 1455 } 1456 # last word is special, skip other tests 1457 last WORD; 1458 } 1459 1460 # words that should be lower case 1461 if ( is_uppercase($word) ) { 1462 if ( $word =~ /^$lc_regex$/i ) { 1463 $words[$i] = highlight_string($word); 1464 next WORD; 1465 } 1466 } 1467 1468 # words that should be upper case 1469 if ( is_lowercase($word) ) { 1470 if ( $word !~ /^$lc_regex$/i ) { 1471 if ( (length($word) > 3) ) { 1472 $words[$i] = highlight_string($word); 1473 next WORD; 1474 } 1475 } 1476 if ( $word =~ /^$uc_regex$/i ) { 1477 $words[$i] = highlight_string($word); 1478 next WORD; 1479 } 1480 } 1481 } 1482 1483 # reconstruct the now-capitalized title 1484 $txt = ''; 1485 $txt = $1 if $txtbak =~ /^(.*<title.*?>)/; 1486 $txt .= join('', @words); 1487 $txt .= $1 if $txtbak =~ /(<\/title.*?>)/; 1488 1489 if ( $txt ne $txtbak ) { 1490 print "doc_titles:\n original='$txtbak'\n highlighted='$txt'\n" if $verbose; 1491 showline($bname, $line, $ansi{blue}, 'capitalization', $txt); 1492 } 1493 1494 $titleblock = 0 if $txt =~ /<\/title>/; 1495} 1496 1497sub init_doc_indentation { 1498 print "initializing doc_indentation\n" if $verbose; 1499 # build regex for detecting DocBook tags that begin or 1500 # end an indented section 1501 my @indent_tags = qw/ abstract answer appendix article articleinfo 1502 author authorgroup biblioentry bibliography 1503 biblioset blockquote book bookinfo callout 1504 calloutlist category chapter chapterinfo colophon 1505 caution contrib date day entry event example 1506 figure formalpara funcdef funcsynopsis 1507 funcprototype glossary glossdef glossdiv 1508 glossentry glossterm important imageobject 1509 imageobjectco info informaltable 1510 informalexample itemizedlist legalnotice 1511 listitem mediaobject mediaobjectco month name 1512 note orderedlist para paramdef partintro 1513 personname preface procedure qandadiv 1514 qandaentry qandaset question row screenco 1515 sect1 sect2 sect3 sect4 sect5 section 1516 seglistitem segmentedlist sidebar step 1517 stepalternatives surname table tbody tgroup 1518 thead tip title variablelist varlistentry 1519 warning year /; 1520 # add VuXML tags 1521 @indent_tags = (@indent_tags, qw/ affects body cvename dates 1522 description discovery head html li name p range 1523 references topic ul vuln vuxml /); 1524 @indent_tags = (sort {length($b) <=> length($a)} @indent_tags); 1525 print "indentation tags: @indent_tags\n" if $verbose; 1526 $indent_regex = '(?:' . join('|', @indent_tags) . ')'; 1527 print "indentation regex: $indent_regex\n" if $verbose; 1528 # build regex for inline tags like 1529 # <filename>blah</filename> 1530 my @inline_tags = qw/ a acronym application citetitle command 1531 computeroutput devicename emphasis envar 1532 errorname filename firstterm footnote function 1533 guimenu guimenuitem hostid imagedata indexterm 1534 keycap keycombo link literal makevar option 1535 optional package parameter primary quote 1536 remark replaceable secondary see seg sgmltag 1537 simpara strong structname systemitem term tt 1538 ulink uri varname /; 1539 # add VuXML tags 1540 @inline_tags = (@inline_tags, qw/ ge gt le lt url /); 1541 @inline_tags = (sort {length($b) <=> length($a)} @inline_tags); 1542 print "inline tags: @inline_tags\n" if $verbose; 1543 $inline_regex = '(?:' . join('|', @inline_tags) . ')'; 1544 print "inline regex: $inline_regex\n" if $verbose; 1545} 1546 1547sub doc_indentation { 1548 my ($bname, $line, $currline) = @_; 1549 my ($init_prev_indent, $init_curr_indent); 1550 return if $currline =~ /^\s*$/; 1551 1552 # indents are not significant inside ignorable SGML blocks. 1553 return if $ignoreblock; 1554 1555 return if $currline =~ /^\s*<!--.*-->\s*$/; 1556 1557 # \b is needed here to prevent <parameter> being detected as <para> 1558 return unless $prevnonblank =~ /<\/*$indent_regex\b.*?>/; 1559 1560 my $prev_indent = length(leading_space($prevnonblank)); 1561 my $curr_indent = length(leading_space($currline)); 1562 if ( $verbose ) { 1563 # save initial values for later verbose reporting 1564 $init_prev_indent = $prev_indent; 1565 $init_curr_indent = $curr_indent; 1566 } 1567 1568 # indent once for open tag on previous line 1569 $prev_indent += 2 if $prevnonblank =~ /<$indent_regex\b/; 1570 1571 # allow for inline tag indenting, like 1572 # <link 1573 # url= 1574 # or 1575 # <makevar>xyz 1576 # abc</makevar> 1577 my $count = 0; 1578 $count += ($prevnonblank =~ s/(<$inline_regex)\b/$1/g); 1579 $count -= ($prevnonblank =~ s/(<\/$inline_regex)\b/$1/g); 1580 $prev_indent += (2 * $count); 1581 1582 # if previous line ends in an open xref, indent 1583 $prev_indent += 2 if ($prevnonblank =~ /<xref\s*$/); 1584 1585 # <xref> has no close tag, but uses "linkend=" the same as <link> 1586 # which *does* have a close tag... so if there's a linkend= on 1587 # previous line but no </ulink> or </link> on either previous 1588 # or current lines, assume it's an xref and outdent 1589 my $broken_regex = '(?:(?:linkend|url)=)'; 1590 if ( $prevnonblank =~ /^\s*$broken_regex/ ) { 1591 if ($prevnonblank !~ /<\/(?:link|ulink)/) { 1592 if ($currline !~ /<\/(?:link|ulink)/) { 1593 $prev_indent -= 2; 1594 } 1595 } 1596 } 1597 1598 # outdent for close tag at end of previous line 1599 $prev_indent -= 2 if ($prevnonblank =~ /\S+.*<\/$indent_regex>\s*$/); 1600 1601 # outdent for close tag at the start of this line 1602 $prev_indent -= 2 if ($currline =~ /^\s*<\/$indent_regex/); 1603 1604 # outdent after footnote 1605 $prev_indent -=2 if $prevnonblank =~ /<\/para><\/footnote>/; 1606 1607 # singleton tags like <entry/> are really just an empty 1608 # open/close tag, <entry></entry>, allow for them 1609 $prev_indent -=2 if $prevnonblank =~ /\/>$/; 1610 1611 # close tags after long sections of nonindented blocks, 1612 # like the end of a programlisting, cannot be correctly 1613 # checked for indentation in this hacky way, so ignore them 1614 if ( ($prevnonblank =~ /$ignoreblockstart|$ignoreblockend/) 1615 || ($currline =~ /$ignoreblockend/) ) { 1616 $curr_indent = $prev_indent; 1617 } 1618 1619 if ( $curr_indent != $prev_indent ) { 1620 if ( $verbose ) { 1621 print "doc_indentation:\n"; 1622 my $vprev = showwhitespace($prevnonblank); 1623 my $vcurr = showwhitespace($currline); 1624 print "previous nonblank line: '$vprev\'\n"; 1625 print " current line: '$vcurr\'\n"; 1626 print "\t\t\t\tinitial\tfinal\n"; 1627 print "previous nonblank indent:\t$init_prev_indent\t$prev_indent\n"; 1628 print " current indent:\t$init_curr_indent\t$curr_indent\n"; 1629 } 1630 my $out = $origline; 1631 $out =~ s/(^\s+)/$li$1$ri/; 1632 showline($bname, $line, $ansi{darkred}, 'bad tag indent', $out); 1633 } 1634} 1635 1636# split and return leading space and content 1637sub splitleading { 1638 my $txt = shift; 1639 my $inspace = ''; 1640 my $content = $txt; 1641 if ( $txt =~ /^(\s*)(.*)/ ) { 1642 $inspace = $1 if $1; 1643 $content = $2 if $2; 1644 } 1645 return ($inspace, $content); 1646} 1647 1648sub doc_longlines { 1649 my ($bname, $line, $txt) = @_; 1650 return if $txt =~ /^\s*$/; 1651 return if $ignoreblock; 1652 # if line is longer than $linelensgml (normally 70) chars 1653 # and the part after the indent has spaces 1654 # this should be smarter, like seeing if the part before the space 1655 # will benefit from wrapping 1656 1657 # ignore long lines with these tags 1658 return if $txt =~ /<(?:!DOCTYPE|!ENTITY|pubdate|releaseinfo)/; 1659 1660 $txt = expand_tabs($txt); 1661 1662 if ( length($txt) > $linelensgml ) { 1663 my ($inspace, $content) = splitleading($txt); 1664 my $currline = substr($content, 0, $linelensgml - length($inspace)); 1665 my $nextline = substr($content, length($currline)); 1666 if ( $currline =~ / / ) { 1667 $currline =~ s/^(.*)? (.*)$/$1$li $ri$2/; 1668 showline($bname, $line, $ansi{green}, 'wrap long line', "$inspace$currline$nextline"); 1669 } elsif ( $nextline =~ s/ /$li $ri/ ) { 1670 showline($bname, $line, $ansi{green}, 'wrap long line', "$inspace$currline$nextline"); 1671 } 1672 } 1673} 1674 1675sub init_doc_sentence { 1676 print "initializing doc_sentence\n" if $verbose; 1677 # end of sentence characters: literal dot, question mark, exclamation point 1678 $eos_regex = '\.|\?\!'; 1679} 1680 1681sub doc_sentence { 1682 my ($bname, $line, $txt) = @_; 1683 1684 return if $txt =~ /^\s*$/; 1685 return if $ignoreblock; 1686 1687 # skip if there is no end-of-sentence character 1688 return unless $txt =~ /(?:$eos_regex)/; 1689 1690 my $errcount = 0; 1691 my ($inspace, $content) = splitleading($txt); 1692 my @sentences = grep (! /^$/, split /((?:.*?(?:$eos_regex)+\s+)|(?:<.*?>))/, $content); 1693 1694 for my $s (@sentences) { 1695 # skip unless it has a one-space possible sentence start 1696 next unless $s =~ /\. $/; 1697 1698 # SGML markup, like "<emphasis>bold</emphasis>." 1699 #next if $s =~ />\. $/; 1700 1701 # single dots, like from "find . -name '*.sgml'" 1702 next if $s =~ / \. $/; 1703 1704 # initials 1705 next if $s =~ /[A-Z]{1}\. $/; 1706 1707 # common abbreviations 1708 next if $s =~ /(?:Ave|Dr|Ed|etc|Inc|Jr|Mass|Pub|Sp|St|Str|str|o\.o)\. $/; 1709 1710 # ignore misuse of cf., e.g., i.e., and v.s., they are not 1711 # end of sentence errors 1712 next if $s =~ /(?:cf|e(?:\.)*g|i\.e|v\.s)\. $/i; 1713 1714 # months 1715 next if $s =~ /(?:Jan|Feb|Mar|Apr|May|Jul|Aug|Sep|Oct|Nov|Dec)\. $/; 1716 1717 # numbers, like "... and 1997." 1718 next if $s =~ /\d+\. $/; 1719 1720 # ellipsis 1721 next if $s =~ /\.\.\. $/; 1722 1723 # it must be a single-space sentence start 1724 $s =~ s/ $/$li $ri/; 1725 $errcount++; 1726 } 1727 1728 if ( $errcount ) { 1729 # reassemble the now-highlighted string 1730 $txt = $inspace . join('', @sentences); 1731 showline($bname, $line, $ansi{darkblue}, 'use two spaces at sentence start', $txt); 1732 } 1733} 1734 1735sub init_doc_openclose { 1736 print "initializing doc_openclose\n" if $verbose; 1737 @openclose_tags = qw/ callout entry filename footnote li listitem literal p para row step /; 1738 for my $tag (@openclose_tags) { 1739 $opentag{$tag} = 0; 1740 } 1741 $openclose_regex = join('|', @openclose_tags); 1742 my @list_tags = qw/ itemizedlist orderedlist variablelist /; 1743 $list_regex = join('|', @list_tags); 1744 my @parawrap_tags = qw/ footnote listitem /; 1745 $parawrap_regex = join('|', @parawrap_tags); 1746} 1747 1748sub doc_openclose { 1749 my ($bname, $line, $txt) = @_; 1750 return if $txt =~ /^\s*$/; 1751 return if $ignoreblock; 1752 return unless $txt =~ /</; 1753 1754 my $errcount = 0; 1755 my ($inspace, $content) = splitleading($txt); 1756 my @chunks = split(/(<.*?(?:>|$))/, $content); 1757 @chunks = grep (! /^\s*$/, @chunks); 1758 1759 for my $chunk (@chunks) { 1760 next unless $chunk =~ /</; 1761 1762 for my $tag (@openclose_tags) { 1763 next unless $chunk =~ /(?:$openclose_regex)/; 1764 if ( $chunk =~ /$tag/ ) { 1765 # check for open without close 1766 if ( $opentag{$tag} && $chunk =~ /<$tag\b/ ) { 1767 $chunk =~ s/(<$tag\b)/$lh$1$rh/; 1768 showline($bname, $line, $ansi{red}, "open <$tag> without closing", $inspace . join('', @chunks)); 1769 } 1770 1771 # check for close without open 1772 if ( ! $opentag{$tag} && $chunk =~ /<\/$tag>/ ) { 1773 $chunk =~ s/(<\/$tag\W)/$lh$1$rh/; 1774 showline($bname, $line, $ansi{red}, "close </$tag> without opening", $inspace . join('', @chunks)); 1775 } 1776 1777 # evaluate closes 1778 $opentag{$tag} = 0 if $chunk =~ /<\/$tag>/; 1779 # evaluate opens 1780 $opentag{$tag} = 1 if $chunk =~ /<$tag\b/; 1781 } 1782 } 1783 1784 # special-case closes 1785 # <para> can be inside footnotes or lists 1786 $opentag{'para'} = 0 if $chunk =~ /<(?:$parawrap_regex)\b/; 1787 $opentag{'para'} = 0 if $chunk =~ /<\/(?:$list_regex)>/; 1788 1789 # list tags like <itemizedlist> start a new list 1790 # so 'listitem' is no longer open 1791 $opentag{'listitem'} = 0 if $chunk =~ /<(?:$list_regex)\b/; 1792 1793 # procedures can be nested, so <procedure> closes <step> 1794 $opentag{'step'} = 0 if $chunk =~ /<procedure\b/; 1795 1796 1797 # special-case opens 1798 $opentag{'para'} = 1 if $chunk =~ /<\/(?:$parawrap_regex)>/; 1799 $opentag{'para'} = 1 if $chunk =~ /<(?:$list_regex)\b/; 1800 1801 # list tags like </itemizedlist> end a list 1802 # so 'listitem' is open again 1803 $opentag{'listitem'} = 1 if $chunk =~ /<\/(?:$list_regex)>/; 1804 1805 # procedures can be nested, so </procedure> opens <step> 1806 $opentag{'step'} = 1 if $chunk =~ /<\/procedure\b/; 1807 } 1808} 1809 1810sub init_literalblock_regex { 1811 print "initializing literalblock_regex\n" if $verbose; 1812 # used by multiple tests 1813 $literalblock_regex = 'literallayout|programlisting|screen'; 1814} 1815 1816sub doc_tagstyle_whitespace { 1817 my ($bname, $line, $currline) = @_; 1818 return if $ignoreblock; 1819 1820 my $currlinebak = $currline; 1821 1822 # <title> 1823 if ( $currline =~ s/^(\s*\S+.*?)(<title)/$1$lh$2$rh/ ) { 1824 showline($bname, $line, $ansi{darkcyan}, 'put <title> on new line', $currline); 1825 $currline = $currlinebak; 1826 } 1827 1828 # <para> 1829 if ( $currline =~ s/(<para>)\s*$/$1/ ) { 1830 showline($bname, $line, $ansi{red}, 'start <para> content on same line', $currline); 1831 } 1832 if ( $currline =~ s/(<\/para>)([^< ]+)$/$1$lh$2$rh/ ) { 1833 showline($bname, $line, $ansi{red}, 'character data is not allowed here', $currline); 1834 $currline = $currlinebak; 1835 } 1836 1837 # (programlisting> 1838 if ( $currline =~ /<programlisting/ ) { 1839 # <programlisting> should not be used as an inline tag 1840 if ( $currline =~ s/(\S+\s*<programlisting.*?>)/$lh$1$rh/ ) { 1841 showline($bname, $line, $ansi{red}, 'do not use <programlisting> inline in other elements', $currline); 1842 $currline = $currlinebak; 1843 } elsif ( ($currline =~ /\s*<programlisting/) 1844 && ($prevnonblank !~ /<\/(?:entry|formalpara|indexterm|note|para|programlisting|screen|title)>\s*$/) ) { 1845 # <programlisting> allowed inside these elements 1846 return if $prevnonblank =~ /<(?:example|informalexample)>/; 1847 $currline =~ s/(<programlisting.*?>)/$lh$1$rh/; 1848 showline($bname, $line, $ansi{red}, 'do not use <programlisting> inside other elements', $currline); 1849 $currline = $currlinebak; 1850 } 1851 } 1852 1853 # elements that should be preceded by a blank line 1854 if ( $prevline =~ /\S+/ ) { 1855 # an open tag like <informalexample> is okay, otherwise 1856 # there should be a blank line before these tags 1857 if ( ($prevline !~ /<.*?>\s*$/) && ($currline =~ s/(<(?:$literalblock_regex).*?(?:>|$))/$lh$1$rh/) ) { 1858 showline($bname, $line, $ansi{darkcyan}, "precede $1 with a blank line", $currline); 1859 $currline = $currlinebak; 1860 } 1861 } 1862 1863 # elements that should be followed by a blank line 1864 if ( $currline =~ /\S+/ ) { 1865 # a close tag like </note> is okay, otherwise there 1866 # should be a blank line after these tags 1867 # unless they are followed by another close tag on the same line 1868 # example: </literallayout></entry> 1869 # if ( ($currline !~ /^\s*<\//) && ($prevline =~ /(<\/(?:$literalblock_regex|row|step|title)>)/) ) { 1870 if ( ($currline !~ /^\s*<\//) && ($prevline =~ /(<\/(?:$literalblock_regex|row|step|title)>)/) && ($prevline !~ /<\/entry>$/) ) { 1871 showline($bname, $line, $ansi{darkcyan}, "add blank line after $1 on previous line", "$lh$currline$rh"); 1872 } 1873 } 1874} 1875 1876sub init_doc_writestyle { 1877 print "initializing doc_writestyle\n" if $verbose; 1878 $redundantword_regex = 'command|filename|keycap|option'; 1879 $redundanttagword_regex = '(<\/(?:command> command|filename> file|keycap> key|option> option))\b'; 1880} 1881 1882sub doc_writestyle { 1883 my ($bname, $line, $currline) = @_; 1884 return if $ignoreblock; 1885 1886 my $currlinebak = $currline; 1887 1888 # test for redundant markup and words starting on the previous line 1889 if ( $prevline =~ /(<\/(?:$redundantword_regex)>*\s*$)/ ) { 1890 my $prevend = $1; 1891 for my $word (split('|', $redundantword_regex)) { 1892 next unless $prevend =~ /$word/; 1893 next unless $currline =~ /^\s*>*\s*(\w+)\s*(?:\W+|$)/; 1894 my $firstword = $1; 1895 if ( "$prevend $firstword" =~ /$redundanttagword_regex/ ) { 1896 $currline =~ s/^(\s*)($firstword)\b/$1$lh$2$rh/; 1897 showline($bname, $line-1, $ansi{darkmagenta}, 'redundant markup and word', "... $lh$prevend$rh"); 1898 showline($bname, $line, $ansi{darkmagenta}, 'redundant markup and word', $currline); 1899 $currline = $currlinebak; 1900 last; 1901 } 1902 } 1903 } 1904 1905 # test for redundant markup and words on the current line 1906 if ( $currline =~ /$redundantword_regex/ ) { 1907 if ( $currline =~ s/$redundanttagword_regex/$lh$1$rh/ ) { 1908 showline($bname, $line, $ansi{darkmagenta}, 'redundant markup and word', $currline); 1909 $currline = $currlinebak; 1910 } 1911 } 1912} 1913 1914sub init_doc_stragglers { 1915 print "initializing doc_stragglers\n" if $verbose; 1916 @straggler_tags = qw/ application command entry filename 1917 guibutton guimenu keycap link literal para 1918 title ulink uri varname /; 1919} 1920 1921sub doc_stragglers { 1922 my ($bname, $line, $txt) = @_; 1923 return if $txt =~ /^\s*$/; 1924 1925 # check for spaces after open tags or before close tags 1926 # like <title> Something</title> 1927 # or <filename>/etc/rc.conf </filename> 1928 1929 # these tags should not have spaces or tabs around content 1930 # opening tags (this will not catch link tags with attributes) 1931 for my $tag (@straggler_tags) { 1932 next if $tag eq 'entry'; 1933 if ( $txt =~ /(<$tag>\s+)/ ) { 1934 print "doc_stragglers opening tags: tag='$tag', found='$1'\n" if $verbose; 1935 $txt = highlight_word($txt, $1); 1936 showline($bname, $line, $ansi{yellow}, "space before content", $txt); 1937 } 1938 } 1939 # closing tags 1940 for my $tag (@straggler_tags) { 1941 next if $tag eq 'entry'; 1942 if ( $txt =~ /(\s+<\/$tag>)/ ) { 1943 print "doc_stragglers closing tags: tag='$tag', found='$1'\n" if $verbose; 1944 $txt = highlight_word($txt, $1); 1945 showline($bname, $line, $ansi{yellow}, "space after content", $txt); 1946 } 1947 } 1948 # special case: link tags 1949 # like <link xlink:href="&url.articles.gjournal-desktop;"> 1950 # ignore the opening < and just key off of xlink:href 1951 if ( $txt =~ /(xlink:href\S+?>)(.)/ ) { 1952 my $lastchar = $2; 1953 if ( $lastchar eq ' ' || $lastchar eq "\t" ) { 1954 print "doc_stragglers xlink:href, found='$1$lastchar'\n" if $verbose; 1955 $txt = highlight_word($txt, $1); 1956 showline($bname, $line, $ansi{yellow}, "space before content", $txt); 1957 } 1958 } 1959 1960 # check for literal start tags without listing on the same line 1961 my $tag; 1962 if ( $txt =~ />\s*$/ ) { 1963 if ( $txt =~ /<($literalblock_regex)[^<]?>$/ ) { 1964 $tag = $1; 1965 $txt =~ s/(<$tag[^<]?>)$/$lh$1$rh/; 1966 showline($bname, $line, $ansi{yellow}, "put <$tag> listing on same line", $txt); 1967 return; 1968 } elsif ( $txt =~ /^\s*<\/($literalblock_regex)[^<]?>/ ) { 1969 $tag = $1; 1970 $txt =~ s/(<\/$tag[^<]?>)$/$lh$1$rh/; 1971 showline($bname, $line, $ansi{yellow}, "straggling </$tag>", $txt); 1972 return; 1973 } 1974 } 1975 1976 # the following tests are only for close tags at the start of a line 1977 return unless $txt =~ /^\s*<\//; 1978 1979 return if $ignoreblock; 1980 1981 # stragglers can't be detected when coming out of an ignore block 1982 return if ( $prevline =~ /$ignoreblockstart|$ignoreblockend/ ); 1983 1984 # more special-case hackery to handle 1985 # </table> 1986 # </para> 1987 if ( ($prevline =~ /<\/table>\s*$/) 1988 && ($txt =~ /^\s*<\/para>\s*$/) ) { 1989 return; 1990 } 1991 1992 # even more special-case hackery to handle 1993 # <para>...</para> 1994 # <note>...</note> 1995 # </entry> 1996 if ( ($prevline =~ /<\/para>|<\/note>\s*$/) 1997 && ($txt =~ /^\s*<\/entry>\s*$/) ) { 1998 return; 1999 } 2000 2001 for my $tag (@straggler_tags) { 2002 if ( $txt =~ /^\s*(<\/$tag>)\s*$/ ) { 2003 $txt = highlight_word($txt, $1); 2004 showline($bname, $line, $ansi{yellow}, "straggling </$tag>", $txt); 2005 } 2006 } 2007} 2008 2009sub doc_whitespace { 2010 my ($bname, $line, $txt) = @_; 2011 my $txtbak = $txt; 2012 2013 # indents and tabs/spaces are not significant inside 2014 # ignorable SGML blocks 2015 return if $ignoreblock; 2016 2017 # multiples of eight spaces at the start a line 2018 # (after zero or more tabs) should be a tab 2019 if ( $txt =~ s/^(\t* {8})+/$li$1$ri/g ) { 2020 showline($bname, $line, $ansi{darkmagenta}, 'use tabs instead of spaces', $txt); 2021 } 2022 2023 # tabs hidden in paragraphs is also bad 2024 $txt = $txtbak; 2025 if ( $txt =~ s/^(\s*\S+)(.*)(\t)/$1$2$li$3$ri/ ) { 2026 showline($bname, $line, $ansi{darkmagenta}, 'tab in content', $txt); 2027 } 2028 2029 # if coming out of an ignoreblock, odd spaces are 2030 # an artifact of splitting the line and can't be checked 2031 return if ( $prevline =~ /$ignoreblockstart|$ignoreblockend/ ); 2032 2033 # one or more occurrences of single tabs or double spaces, 2034 # followed by a single space, is a bad indent 2035 # if ( $txt =~ s/^((?:(?: )+|(?:\t+))* )\b/$li$1$ri/ ) { 2036 2037 # but simpler just to expand tabs to 8 spaces 2038 # and check for an odd number of spaces 2039 $txt = $txtbak; 2040 $txt = expand_tabs($txt); 2041 if ( $txt =~ s/^((?: )* )\b/$li$1$ri/ ) { 2042 showline($bname, $line, $ansi{darkred}, 'bad indent', $txt); 2043 } 2044} 2045 2046 2047# DocBook batch tests 2048 2049 2050 2051# remember previous line for comparison 2052sub saveprevline { 2053 my $pline = shift; 2054 $prevline = $pline; 2055 if ( $pline =~ /\S+/ ) { 2056 # treat comments as blank lines 2057 return if $pline =~ /\s*<!--/; 2058 return if $pline =~ /-->\s*$/; 2059 $prevnonblank = $pline; 2060 } 2061} 2062 2063 2064initialize(); 2065 2066if ( $opt_X ) { 2067 print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 2068 print "<checkstyle version=\"7.1\">\n"; 2069} 2070 2071# main loop 2072foreach my $fname (@ARGV) { 2073 if ( $fname ne 'stdin' ) { 2074 next if -d $fname; 2075 unless ( -f $fname ) { 2076 print "$fname: not found\n"; 2077 next; 2078 } 2079 unless ( -r $fname ) { 2080 print "$fname: not readable\n"; 2081 next; 2082 } 2083 } 2084 2085 unless ( $opt_X ) { 2086 print "$fname:\n" if $#ARGV > 0; 2087 } else { 2088 print " <file name=\"", xmlize($fname), "\">\n"; 2089 } 2090 $fname = writestdinfile() if $fname eq "stdin"; 2091 2092 $bname = basename($fname); 2093 $tmpfile = ''; 2094 $type = filetype($fname); 2095 2096 if ( $type =~ /gzip|bzip/ ) { 2097 $tmpfile = uncompress($fname, $type); 2098 $type = filetype($tmpfile); 2099 } 2100 2101 print "detected file type:$type\n" if $verbose; 2102 2103 open $fh, '<', ($tmpfile ? $tmpfile : $fname) or die "cannot open '$tmpfile':$!\n"; 2104 2105 # reset for each new document 2106 init_mdoc_uniqxrefs() if $opt_g; # mdoc see also xrefs 2107 init_mdoc_structure() if $opt_m; # mdoc tag presence 2108 $ignoreblock = 0; # ignore SGML block 2109 my $saveindent = ''; # SGML indent level 2110 2111 # line-by-line tests 2112 while (<$fh>) { 2113 # limit output to one vulnerability ID 2114 if ( $vid ) { 2115 if ( $_ =~ /<vuln/ ) { 2116 print "checking for VID in '$_'\n" if $verbose; 2117 if ( $vid eq 'latest' ) { 2118 $startline = $. if $_ =~ /<vuln vid=/; 2119 } else { 2120 $startline = $. if $_ =~ /<vuln vid=\"$vid\"/; 2121 } 2122 $stopline = $. if $_ =~ /<\/vuln/; 2123 print "VID: startline=$startline, stopline=$stopline\n" if $verbose; 2124 } 2125 } 2126 2127 # end if past specified ending line number 2128 last if $stopline && ($. > $stopline); 2129 2130 chomp; 2131 2132 # global tests 2133 abbrevs($bname, $., $_) if $opt_a; 2134 badphrases($bname, $., $_) if $opt_b; 2135 contractions($bname, $., $_) if $opt_u; 2136 freebsdobsolete($bname, $., $_) if $opt_f; 2137 repeatedwords($bname, $., $_) if $opt_r; 2138 spellingerrors($bname, $., $_) if $opt_s; 2139 whitespace($bname, $., $_) if $opt_w; 2140 2141 # mdoc line tests 2142 if ( $type eq "troff" ) { 2143 next if /^\.\\\"/; # ignore comments for these tests 2144 2145 mdoc_whitespace($bname, $., $_) if $opt_p; 2146 mdoc_date($bname, $., $_) if $opt_d; 2147 mdoc_sentence($bname, $., $_) if $opt_e; 2148 mdoc_uniqxrefs($bname, $., $_) if $opt_g; 2149 mdoc_structure($bname, $., $_) if $opt_m; 2150 } 2151 2152 # DocBook line tests 2153 if ( $type =~ /sgml|xml/ ) { 2154 $origline = $_; 2155 doc_stragglers($bname, $., $_) if $opt_S; 2156 doc_tagstyle_whitespace($bname, $., $_) if $opt_t; 2157 for my $segment (splitter($_)) { 2158 if ( $segment =~ /($ignoreblockstart)/ ) { 2159 # when entering an ignore block, test the full 2160 # line for indentation unless it is a comment 2161 unless ( $origline =~ /^\s*<!--/ ) { 2162 doc_indentation($bname, $., $origline) if $opt_i; 2163 # test just the indent for whitespace 2164 my ($origindent, undef) = splitleading($origline); 2165 doc_whitespace($bname, $., $origindent) if $opt_W; 2166 $saveindent = leading_space($origline); 2167 # save the same state information as the main loop would 2168 saveprevline($saveindent . $1); 2169 # test just the leading whitespace 2170 } 2171 $ignoreblock++; 2172 next; 2173 } elsif ( $segment =~ /($ignoreblockend)/ ) { 2174 # restore the indent level at the end of an ignore block 2175 $ignoreblock--; 2176 $prevline = substr($saveindent,0,length($saveindent)-2) . $1; 2177 next; 2178 } 2179 doc_titles($bname, $., $segment) if $opt_c; 2180 doc_indentation($bname, $., $segment) if $opt_i; 2181 doc_longlines($bname, $., $segment) if $opt_l; 2182 doc_sentence($bname, $., $segment) if $opt_n; 2183 doc_openclose($bname, $., $segment) if $opt_o; 2184 doc_writestyle($bname, $., $segment) if $opt_E; 2185 doc_whitespace($bname, $., $segment) if $opt_W; 2186 } 2187 } 2188 saveprevline($_); 2189 } 2190 2191 close $fh or die "could not close file:$!\n"; 2192 2193 if ( $opt_d || $opt_y ) { 2194 # skip batch tests if a line range is set 2195 last if $opt_C; 2196 2197 # slurp the whole file 2198 open $fh, '<', ($tmpfile ? $tmpfile : $fname) or die "cannot open '$tmpfile':$!\n"; 2199 my $fulltext = do { local($/); <$fh> }; 2200 close $fh or die "could not close file:$!\n"; 2201 2202 # global batch tests 2203 style($bname, $fulltext) if $opt_y; 2204 2205 # mdoc batch tests 2206 if ( ($type eq "troff") && ($opt_d) && (!$docdate) ) { 2207 showline($bname, '-', '.Dd date not set', '', ''); 2208 } 2209 } 2210 2211 if ( $opt_X ) { 2212 print " </file>\n"; 2213 } 2214 2215 removetempfiles(); 2216} 2217 2218if ( $opt_X ) { 2219 print "</checkstyle>\n"; 2220} 2221