1# trim.tcl -- 2# 3# Various ways of trimming a string. 4# 5# Copyright (c) 2000 by Ajuba Solutions. 6# Copyright (c) 2000 by Eric Melski <ericm@ajubasolutions.com> 7# Copyright (c) 2002-2004 by Johannes-Heinrich Vogeler <vogeler@users.sourceforge.net> 8# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 9# 10# See the file "license.terms" for information on usage and redistribution 11# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 12# 13# RCS: @(#) $Id: adjust.tcl,v 1.16 2011/12/13 18:12:56 andreas_kupries Exp $ 14 15# ### ### ### ######### ######### ######### 16## Requirements 17 18package require Tcl 8.2 19package require textutil::repeat 20package require textutil::string 21 22namespace eval ::textutil::adjust {} 23 24# ### ### ### ######### ######### ######### 25## API implementation 26 27namespace eval ::textutil::adjust { 28 namespace import -force ::textutil::repeat::strRepeat 29} 30 31proc ::textutil::adjust::adjust {text args} { 32 if {[string length [string trim $text]] == 0} { 33 return "" 34 } 35 36 Configure $args 37 Adjust text newtext 38 39 return $newtext 40} 41 42proc ::textutil::adjust::Configure {args} { 43 variable Justify left 44 variable Length 72 45 variable FullLine 0 46 variable StrictLength 0 47 variable Hyphenate 0 48 variable HyphPatterns ; # hyphenation patterns (TeX) 49 50 set args [ lindex $args 0 ] 51 foreach { option value } $args { 52 switch -exact -- $option { 53 -full { 54 if { ![ string is boolean -strict $value ] } then { 55 error "expected boolean but got \"$value\"" 56 } 57 set FullLine [ string is true $value ] 58 } 59 -hyphenate { 60 # the word exceeding the length of line is tried to be 61 # hyphenated; if a word cannot be hyphenated to fit into 62 # the line processing stops! The length of the line should 63 # be set to a reasonable value! 64 65 if { ![ string is boolean -strict $value ] } then { 66 error "expected boolean but got \"$value\"" 67 } 68 set Hyphenate [string is true $value] 69 if { $Hyphenate && ![info exists HyphPatterns(_LOADED_)]} { 70 error "hyphenation patterns not loaded!" 71 } 72 } 73 -justify { 74 set lovalue [ string tolower $value ] 75 switch -exact -- $lovalue { 76 left - 77 right - 78 center - 79 plain { 80 set Justify $lovalue 81 } 82 default { 83 error "bad value \"$value\": should be center, left, plain or right" 84 } 85 } 86 } 87 -length { 88 if { ![ string is integer $value ] } then { 89 error "expected positive integer but got \"$value\"" 90 } 91 if { $value < 1 } then { 92 error "expected positive integer but got \"$value\"" 93 } 94 set Length $value 95 } 96 -strictlength { 97 # the word exceeding the length of line is moved to the 98 # next line without hyphenation; words longer than given 99 # line length are cut into smaller pieces 100 101 if { ![ string is boolean -strict $value ] } then { 102 error "expected boolean but got \"$value\"" 103 } 104 set StrictLength [ string is true $value ] 105 } 106 default { 107 error "bad option \"$option\": must be -full, -hyphenate, \ 108 -justify, -length, or -strictlength" 109 } 110 } 111 } 112 113 return "" 114} 115 116# ::textutil::adjust::Adjust 117# 118# History: 119# rewritten on 2004-04-13 for bugfix tcllib-bugs-882402 (jhv) 120 121proc ::textutil::adjust::Adjust { varOrigName varNewName } { 122 variable Length 123 variable FullLine 124 variable StrictLength 125 variable Hyphenate 126 127 upvar $varOrigName orig 128 upvar $varNewName text 129 130 set pos 0; # Cursor after writing 131 set line "" 132 set text "" 133 134 135 if {!$FullLine} { 136 regsub -all -- "(\n)|(\t)" $orig " " orig 137 regsub -all -- " +" $orig " " orig 138 regsub -all -- "(^ *)|( *\$)" $orig "" orig 139 } 140 141 set words [split $orig] 142 set numWords [llength $words] 143 set numline 0 144 145 for {set cnt 0} {$cnt < $numWords} {incr cnt} { 146 147 set w [lindex $words $cnt] 148 set wLen [string length $w] 149 150 # the word $w doesn't fit into the present line 151 # case #1: we try to hyphenate 152 153 if {$Hyphenate && ($pos+$wLen >= $Length)} { 154 # Hyphenation instructions 155 set w2 [textutil::adjust::Hyphenation $w] 156 157 set iMax [llength $w2] 158 if {$iMax == 1 && [string length $w] > $Length} { 159 # word cannot be hyphenated and exceeds linesize 160 161 error "Word \"$w2\" can\'t be hyphenated\ 162 and exceeds linesize $Length!" 163 } else { 164 # hyphenating of $w was successfull, but we have to look 165 # that every sylable would fit into the line 166 167 foreach x $w2 { 168 if {[string length $x] >= $Length} { 169 error "Word \"$w\" can\'t be hyphenated\ 170 to fit into linesize $Length!" 171 } 172 } 173 } 174 175 for {set i 0; set w3 ""} {$i < $iMax} {incr i} { 176 set syl [lindex $w2 $i] 177 if {($pos+[string length " $w3$syl-"]) > $Length} {break} 178 append w3 $syl 179 } 180 for {set w4 ""} {$i < $iMax} {incr i} { 181 set syl [lindex $w2 $i] 182 append w4 $syl 183 } 184 185 if {[string length $w3] && [string length $w4]} { 186 # hyphenation was successfull: redefine 187 # list of words w => {"$w3-" "$w4"} 188 189 set x [lreplace $words $cnt $cnt "$w4"] 190 set words [linsert $x $cnt "$w3-"] 191 set w [lindex $words $cnt] 192 set wLen [string length $w] 193 incr numWords 194 } 195 } 196 197 # the word $w doesn't fit into the present line 198 # case #2: we try to cut the word into pieces 199 200 if {$StrictLength && ([string length $w] > $Length)} { 201 # cut word into two pieces 202 set w2 $w 203 204 set over [expr {$pos+2+$wLen-$Length}] 205 206 incr Length -1 207 set w3 [string range $w2 0 $Length] 208 incr Length 209 set w4 [string range $w2 $Length end] 210 211 set x [lreplace $words $cnt $cnt $w4] 212 set words [linsert $x $cnt $w3 ] 213 set w [lindex $words $cnt] 214 set wLen [string length $w] 215 incr numWords 216 } 217 218 # continuing with the normal procedure 219 220 if {($pos+$wLen < $Length)} { 221 # append word to current line 222 223 if {$pos} {append line " "; incr pos} 224 append line $w 225 incr pos $wLen 226 } else { 227 # line full => write buffer and begin a new line 228 229 if {[string length $text]} {append text "\n"} 230 append text [Justification $line [incr numline]] 231 set line $w 232 set pos $wLen 233 } 234 } 235 236 # write buffer and return! 237 238 if {[string length $text]} {append text "\n"} 239 append text [Justification $line end] 240 return $text 241} 242 243# ::textutil::adjust::Justification 244# 245# justify a given line 246# 247# Parameters: 248# line text for justification 249# index index for line in text 250# 251# Returns: 252# the justified line 253# 254# Remarks: 255# Only lines with size not exceeding the max. linesize provided 256# for text formatting are justified!!! 257 258proc ::textutil::adjust::Justification { line index } { 259 variable Justify 260 variable Length 261 variable FullLine 262 263 set len [string length $line]; # length of current line 264 265 if { $Length <= $len } then { 266 # the length of current line ($len) is equal as or greater than 267 # the value provided for text formatting ($Length) => to avoid 268 # inifinite loops we leave $line unchanged and return! 269 270 return $line 271 } 272 273 # Special case: 274 # for the last line, and if the justification is set to 'plain' 275 # the real justification is 'left' if the length of the line 276 # is less than 90% (rounded) of the max length allowed. This is 277 # to avoid expansion of this line when it is too small: without 278 # it, the added spaces will 'unbeautify' the result. 279 # 280 281 set justify $Justify 282 if { ( "$index" == "end" ) && \ 283 ( "$Justify" == "plain" ) && \ 284 ( $len < round($Length * 0.90) ) } then { 285 set justify left 286 } 287 288 # For a left justification, nothing to do, but to 289 # add some spaces at the end of the line if requested 290 291 if { "$justify" == "left" } then { 292 set jus "" 293 if { $FullLine } then { 294 set jus [strRepeat " " [ expr { $Length - $len } ]] 295 } 296 return "${line}${jus}" 297 } 298 299 # For a right justification, just add enough spaces 300 # at the beginning of the line 301 302 if { "$justify" == "right" } then { 303 set jus [strRepeat " " [ expr { $Length - $len } ]] 304 return "${jus}${line}" 305 } 306 307 # For a center justification, add half of the needed spaces 308 # at the beginning of the line, and the rest at the end 309 # only if needed. 310 311 if { "$justify" == "center" } then { 312 set mr [ expr { ( $Length - $len ) / 2 } ] 313 set ml [ expr { $Length - $len - $mr } ] 314 set jusl [strRepeat " " $ml] 315 set jusr [strRepeat " " $mr] 316 if { $FullLine } then { 317 return "${jusl}${line}${jusr}" 318 } else { 319 return "${jusl}${line}" 320 } 321 } 322 323 # For a plain justification, it's a little bit complex: 324 # 325 # if some spaces are missing, then 326 # 327 # 1) sort the list of words in the current line by decreasing size 328 # 2) foreach word, add one space before it, except if it's the 329 # first word, until enough spaces are added 330 # 3) rebuild the line 331 332 if { "$justify" == "plain" } then { 333 set miss [ expr { $Length - [ string length $line ] } ] 334 335 # Bugfix tcllib-bugs-860753 (jhv) 336 337 set words [split $line] 338 set numWords [llength $words] 339 340 if {$numWords < 2} { 341 # current line consists of less than two words - we can't 342 # insert blanks to achieve a plain justification => leave 343 # $line unchanged and return! 344 345 return $line 346 } 347 348 for {set i 0; set totalLen 0} {$i < $numWords} {incr i} { 349 set w($i) [lindex $words $i] 350 if {$i > 0} {set w($i) " $w($i)"} 351 set wLen($i) [string length $w($i)] 352 set totalLen [expr {$totalLen+$wLen($i)}] 353 } 354 355 set miss [expr {$Length - $totalLen}] 356 357 # len walks through all lengths of words of the line under 358 # consideration 359 360 for {set len 1} {$miss > 0} {incr len} { 361 for {set i 1} {($i < $numWords) && ($miss > 0)} {incr i} { 362 if {$wLen($i) == $len} { 363 set w($i) " $w($i)" 364 incr wLen($i) 365 incr miss -1 366 } 367 } 368 } 369 370 set line "" 371 for {set i 0} {$i < $numWords} {incr i} { 372 set line "$line$w($i)" 373 } 374 375 # End of bugfix 376 377 return "${line}" 378 } 379 380 error "Illegal justification key \"$justify\"" 381} 382 383proc ::textutil::adjust::SortList { list dir index } { 384 385 if { [ catch { lsort -integer -$dir -index $index $list } sl ] != 0 } then { 386 error "$sl" 387 } 388 389 return $sl 390} 391 392# Hyphenation utilities based on Knuth's algorithm 393# 394# Copyright (C) 2001-2003 by Dr.Johannes-Heinrich Vogeler (jhv) 395# These procedures may be used as part of the tcllib 396 397# textutil::adjust::Hyphenation 398# 399# Hyphenate a string using Knuth's algorithm 400# 401# Parameters: 402# str string to be hyphenated 403# 404# Returns: 405# the hyphenated string 406 407proc ::textutil::adjust::Hyphenation { str } { 408 409 # if there are manual set hyphenation marks e.g. "Recht\-schrei\-bung" 410 # use these for hyphenation and return 411 412 if {[regexp {[^\\-]*[\\-][.]*} $str]} { 413 regsub -all {(\\)(-)} $str {-} tmp 414 return [split $tmp -] 415 } 416 417 # Don't hyphenate very short words! Minimum length for hyphenation 418 # is set to 3 characters! 419 420 if { [string length $str] < 4 } then { return $str } 421 422 # otherwise follow Knuth's algorithm 423 424 variable HyphPatterns; # hyphenation patterns (TeX) 425 426 set w ".[string tolower $str]."; # transform to lower case 427 set wLen [string length $w]; # and add delimiters 428 429 # Initialize hyphenation weights 430 431 set s {} 432 for {set i 0} {$i < $wLen} {incr i} { 433 lappend s 0 434 } 435 436 for {set i 0} {$i < $wLen} {incr i} { 437 set kmax [expr {$wLen-$i}] 438 for {set k 1} {$k < $kmax} {incr k} { 439 set sw [string range $w $i [expr {$i+$k}]] 440 if {[info exists HyphPatterns($sw)]} { 441 set hw $HyphPatterns($sw) 442 set hwLen [string length $hw] 443 for {set l1 0; set l2 0} {$l1 < $hwLen} {incr l1} { 444 set c [string index $hw $l1] 445 if {[string is digit $c]} { 446 set sPos [expr {$i+$l2}] 447 if {$c > [lindex $s $sPos]} { 448 set s [lreplace $s $sPos $sPos $c] 449 } 450 } else { 451 incr l2 452 } 453 } 454 } 455 } 456 } 457 458 # Replace all even hyphenation weigths by zero 459 460 for {set i 0} {$i < [llength $s]} {incr i} { 461 set c [lindex $s $i] 462 if {!($c%2)} { set s [lreplace $s $i $i 0] } 463 } 464 465 # Don't start with a hyphen! Take also care of words enclosed in quotes 466 # or that someone has forgotten to put a blank between a punctuation 467 # character and the following word etc. 468 469 for {set i 1} {$i < ($wLen-1)} {incr i} { 470 set c [string range $w $i end] 471 if {[regexp {^[:alpha:][.]*} $c]} { 472 for {set k 1} {$k < ($i+1)} {incr k} { 473 set s [lreplace $s $k $k 0] 474 } 475 break 476 } 477 } 478 479 # Don't separate the last character of a word with a hyphen 480 481 set max [expr {[llength $s]-2}] 482 if {$max} {set s [lreplace $s $max end 0]} 483 484 # return the syllabels of the hyphenated word as a list! 485 486 set ret "" 487 set w ".$str." 488 for {set i 1} {$i < ($wLen-1)} {incr i} { 489 if {[lindex $s $i]} { append ret - } 490 append ret [string index $w $i] 491 } 492 return [split $ret -] 493} 494 495# textutil::adjust::listPredefined 496# 497# Return the names of the hyphenation files coming with the package. 498# 499# Parameters: 500# None. 501# 502# Result: 503# List of filenames (without directory) 504 505proc ::textutil::adjust::listPredefined {} { 506 variable here 507 return [glob -type f -directory $here -tails *.tex] 508} 509 510# textutil::adjust::getPredefined 511# 512# Retrieve the full path for a predefined hyphenation file 513# coming with the package. 514# 515# Parameters: 516# name Name of the predefined file. 517# 518# Results: 519# Full path to the file, or an error if it doesn't 520# exist or is matching the pattern *.tex. 521 522proc ::textutil::adjust::getPredefined {name} { 523 variable here 524 525 if {![string match *.tex $name]} { 526 return -code error \ 527 "Illegal hyphenation file \"$name\"" 528 } 529 set path [file join $here $name] 530 if {![file exists $path]} { 531 return -code error \ 532 "Unknown hyphenation file \"$path\"" 533 } 534 return $path 535} 536 537# textutil::adjust::readPatterns 538# 539# Read hyphenation patterns from a file and store them in an array 540# 541# Parameters: 542# filNam name of the file containing the patterns 543 544proc ::textutil::adjust::readPatterns { filNam } { 545 546 variable HyphPatterns; # hyphenation patterns (TeX) 547 548 # HyphPatterns(_LOADED_) is used as flag for having loaded 549 # hyphenation patterns from the respective file (TeX format) 550 551 if {[info exists HyphPatterns(_LOADED_)]} { 552 unset HyphPatterns(_LOADED_) 553 } 554 555 # the array xlat provides translation from TeX encoded characters 556 # to those of the ISO-8859-1 character set 557 558 set xlat(\"s) \337; # 223 := sharp s " 559 set xlat(\`a) \340; # 224 := a, grave 560 set xlat(\'a) \341; # 225 := a, acute 561 set xlat(\^a) \342; # 226 := a, circumflex 562 set xlat(\"a) \344; # 228 := a, diaeresis " 563 set xlat(\`e) \350; # 232 := e, grave 564 set xlat(\'e) \351; # 233 := e, acute 565 set xlat(\^e) \352; # 234 := e, circumflex 566 set xlat(\`i) \354; # 236 := i, grave 567 set xlat(\'i) \355; # 237 := i, acute 568 set xlat(\^i) \356; # 238 := i, circumflex 569 set xlat(\~n) \361; # 241 := n, tilde 570 set xlat(\`o) \362; # 242 := o, grave 571 set xlat(\'o) \363; # 243 := o, acute 572 set xlat(\^o) \364; # 244 := o, circumflex 573 set xlat(\"o) \366; # 246 := o, diaeresis " 574 set xlat(\`u) \371; # 249 := u, grave 575 set xlat(\'u) \372; # 250 := u, acute 576 set xlat(\^u) \373; # 251 := u, circumflex 577 set xlat(\"u) \374; # 252 := u, diaeresis " 578 579 set fd [open $filNam RDONLY] 580 set status 0 581 582 while {[gets $fd line] >= 0} { 583 584 switch -exact $status { 585 PATTERNS { 586 if {[regexp {^\}[.]*} $line]} { 587 # End of patterns encountered: set status 588 # and ignore that line 589 set status 0 590 continue 591 } else { 592 # This seems to be pattern definition line; to process it 593 # we have first to do some editing 594 # 595 # 1) eat comments in a pattern definition line 596 # 2) eat braces and coded linefeeds 597 598 set z [string first "%" $line] 599 if {$z > 0} { set line [string range $line 0 [expr {$z-1}]] } 600 601 regsub -all {(\\n|\{|\})} $line {} tmp 602 set line $tmp 603 604 # Now $line should consist only of hyphenation patterns 605 # separated by white space 606 607 # Translate TeX encoded characters to ISO-8859-1 characters 608 # using the array xlat defined above 609 610 foreach x [array names xlat] { 611 regsub -all {$x} $line $xlat($x) tmp 612 set line $tmp 613 } 614 615 # split the line and create a lookup array for 616 # the repective hyphenation patterns 617 618 foreach item [split $line] { 619 if {[string length $item]} { 620 if {![string match {\\} $item]} { 621 # create index for hyphenation patterns 622 623 set var $item 624 regsub -all {[0-9]} $var {} idx 625 # store hyphenation patterns as elements of an array 626 627 set HyphPatterns($idx) $item 628 } 629 } 630 } 631 } 632 } 633 EXCEPTIONS { 634 if {[regexp {^\}[.]*} $line]} { 635 # End of patterns encountered: set status 636 # and ignore that line 637 set status 0 638 continue 639 } else { 640 # to be done in the future 641 } 642 } 643 default { 644 if {[regexp {^\\endinput[.]*} $line]} { 645 # end of data encountered, stop processing and 646 # ignore all the following text .. 647 break 648 } elseif {[regexp {^\\patterns[.]*} $line]} { 649 # begin of patterns encountered: set status 650 # and ignore that line 651 set status PATTERNS 652 continue 653 } elseif {[regexp {^\\hyphenation[.]*} $line]} { 654 # some particular cases to be treated separately 655 set status EXCEPTIONS 656 continue 657 } else { 658 set status 0 659 } 660 } 661 } 662 } 663 664 close $fd 665 set HyphPatterns(_LOADED_) 1 666 667 return 668} 669 670####################################################### 671 672# @c The specified <a text>block is indented 673# @c by <a prefix>ing each line. The first 674# @c <a hang> lines ares skipped. 675# 676# @a text: The paragraph to indent. 677# @a prefix: The string to use as prefix for each line 678# @a prefix: of <a text> with. 679# @a skip: The number of lines at the beginning to leave untouched. 680# 681# @r Basically <a text>, but indented a certain amount. 682# 683# @i indent 684# @n This procedure is not checked by the testsuite. 685 686proc ::textutil::adjust::indent {text prefix {skip 0}} { 687 set text [string trimright $text] 688 689 set res [list] 690 foreach line [split $text \n] { 691 if {[string compare "" [string trim $line]] == 0} { 692 lappend res {} 693 } else { 694 set line [string trimright $line] 695 if {$skip <= 0} { 696 lappend res $prefix$line 697 } else { 698 lappend res $line 699 } 700 } 701 if {$skip > 0} {incr skip -1} 702 } 703 return [join $res \n] 704} 705 706# Undent the block of text: Compute LCP (restricted to whitespace!) 707# and remove that from each line. Note that this preverses the 708# shaping of the paragraph (i.e. hanging indent are _not_ flattened) 709# We ignore empty lines !! 710 711proc ::textutil::adjust::undent {text} { 712 713 if {$text == {}} {return {}} 714 715 set lines [split $text \n] 716 set ne [list] 717 foreach l $lines { 718 if {[string length [string trim $l]] == 0} continue 719 lappend ne $l 720 } 721 set lcp [::textutil::string::longestCommonPrefixList $ne] 722 723 if {[string length $lcp] == 0} {return $text} 724 725 regexp "^(\[\t \]*)" $lcp -> lcp 726 727 if {[string length $lcp] == 0} {return $text} 728 729 set len [string length $lcp] 730 731 set res [list] 732 foreach l $lines { 733 if {[string length [string trim $l]] == 0} { 734 lappend res {} 735 } else { 736 lappend res [string range $l $len end] 737 } 738 } 739 return [join $res \n] 740} 741 742# ### ### ### ######### ######### ######### 743## Data structures 744 745namespace eval ::textutil::adjust { 746 variable here [file dirname [info script]] 747 748 variable Justify left 749 variable Length 72 750 variable FullLine 0 751 variable StrictLength 0 752 variable Hyphenate 0 753 variable HyphPatterns 754 755 namespace export adjust indent undent 756} 757 758# ### ### ### ######### ######### ######### 759## Ready 760 761package provide textutil::adjust 0.7.3 762