1# trim.tcl --
2#
3#	Various ways of trimming a string.
4#
5# Copyright (c) 2000      by Ajuba Solutions.
6# Copyright (c) 2000      by Eric Melski <ericm@ajubasolutions.com>
7# Copyright (c) 2002-2004 by Johannes-Heinrich Vogeler <vogeler@users.sourceforge.net>
8# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
9#
10# See the file "license.terms" for information on usage and redistribution
11# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
12#
13# RCS: @(#) $Id: adjust.tcl,v 1.16 2011/12/13 18:12:56 andreas_kupries Exp $
14
15# ### ### ### ######### ######### #########
16## Requirements
17
18package require Tcl 8.2
19package require textutil::repeat
20package require textutil::string
21
22namespace eval ::textutil::adjust {}
23
24# ### ### ### ######### ######### #########
25## API implementation
26
27namespace eval ::textutil::adjust {
28    namespace import -force ::textutil::repeat::strRepeat
29}
30
31proc ::textutil::adjust::adjust {text args} {
32    if {[string length [string trim $text]] == 0} {
33        return ""
34    }
35
36    Configure $args
37    Adjust text newtext
38
39    return $newtext
40}
41
42proc ::textutil::adjust::Configure {args} {
43    variable Justify      left
44    variable Length       72
45    variable FullLine     0
46    variable StrictLength 0
47    variable Hyphenate    0
48    variable HyphPatterns    ; # hyphenation patterns (TeX)
49
50    set args [ lindex $args 0 ]
51    foreach { option value } $args {
52	switch -exact -- $option {
53	    -full {
54		if { ![ string is boolean -strict $value ] } then {
55		    error "expected boolean but got \"$value\""
56		}
57		set FullLine [ string is true $value ]
58	    }
59	    -hyphenate {
60		# the word exceeding the length of line is tried to be
61		# hyphenated; if a word cannot be hyphenated to fit into
62		# the line processing stops! The length of the line should
63		# be set to a reasonable value!
64
65		if { ![ string is boolean -strict $value ] } then {
66		    error "expected boolean but got \"$value\""
67		}
68		set Hyphenate [string is true $value]
69		if { $Hyphenate && ![info exists HyphPatterns(_LOADED_)]} {
70		    error "hyphenation patterns not loaded!"
71		}
72	    }
73	    -justify {
74		set lovalue [ string tolower $value ]
75		switch -exact -- $lovalue {
76		    left -
77		    right -
78		    center -
79		    plain {
80			set Justify $lovalue
81		    }
82		    default {
83			error "bad value \"$value\": should be center, left, plain or right"
84		    }
85		}
86	    }
87	    -length {
88		if { ![ string is integer $value ] } then {
89		    error "expected positive integer but got \"$value\""
90		}
91		if { $value < 1 } then {
92		    error "expected positive integer but got \"$value\""
93		}
94		set Length $value
95	    }
96	    -strictlength {
97		# the word exceeding the length of line is moved to the
98		# next line without hyphenation; words longer than given
99		# line length are cut into smaller pieces
100
101		if { ![ string is boolean -strict $value ] } then {
102		    error "expected boolean but got \"$value\""
103		}
104		set StrictLength [ string is true $value ]
105	    }
106	    default {
107		error "bad option \"$option\": must be -full, -hyphenate, \
108			-justify, -length, or -strictlength"
109	    }
110	}
111    }
112
113    return ""
114}
115
116# ::textutil::adjust::Adjust
117#
118# History:
119#      rewritten on 2004-04-13 for bugfix tcllib-bugs-882402 (jhv)
120
121proc ::textutil::adjust::Adjust { varOrigName varNewName } {
122    variable Length
123    variable FullLine
124    variable StrictLength
125    variable Hyphenate
126
127    upvar $varOrigName orig
128    upvar $varNewName  text
129
130    set pos 0;                                   # Cursor after writing
131    set line ""
132    set text ""
133
134
135    if {!$FullLine} {
136	regsub -all -- "(\n)|(\t)"     $orig   " "  orig
137	regsub -all -- " +"            $orig  " "   orig
138	regsub -all -- "(^ *)|( *\$)"  $orig  ""    orig
139    }
140
141    set words [split $orig]
142    set numWords [llength $words]
143    set numline 0
144
145    for {set cnt 0} {$cnt < $numWords} {incr cnt} {
146
147	set w [lindex $words $cnt]
148	set wLen [string length $w]
149
150	# the word $w doesn't fit into the present line
151	# case #1: we try to hyphenate
152
153	if {$Hyphenate && ($pos+$wLen >= $Length)} {
154	    # Hyphenation instructions
155	    set w2 [textutil::adjust::Hyphenation $w]
156
157	    set iMax [llength $w2]
158	    if {$iMax == 1 && [string length $w] > $Length} {
159		# word cannot be hyphenated and exceeds linesize
160
161		error "Word \"$w2\" can\'t be hyphenated\
162			and exceeds linesize $Length!"
163	    } else {
164		# hyphenating of $w was successfull, but we have to look
165		# that every sylable would fit into the line
166
167		foreach x $w2 {
168		    if {[string length $x] >= $Length} {
169			error "Word \"$w\" can\'t be hyphenated\
170				to fit into linesize $Length!"
171		    }
172		}
173	    }
174
175	    for {set i 0; set w3 ""} {$i < $iMax} {incr i} {
176		set syl [lindex $w2 $i]
177		if {($pos+[string length " $w3$syl-"]) > $Length} {break}
178		append w3 $syl
179	    }
180	    for {set w4 ""} {$i < $iMax} {incr i} {
181		set syl [lindex $w2 $i]
182		append w4 $syl
183	    }
184
185	    if {[string length $w3] && [string length $w4]} {
186		# hyphenation was successfull: redefine
187		# list of words w => {"$w3-" "$w4"}
188
189		set x [lreplace $words $cnt $cnt "$w4"]
190		set words [linsert $x $cnt "$w3-"]
191		set w [lindex $words $cnt]
192		set wLen [string length $w]
193		incr numWords
194	    }
195	}
196
197	# the word $w doesn't fit into the present line
198	# case #2: we try to cut the word into pieces
199
200	if {$StrictLength && ([string length $w] > $Length)} {
201	    # cut word into two pieces
202	    set w2 $w
203
204	    set over [expr {$pos+2+$wLen-$Length}]
205
206	    incr Length -1
207	    set w3   [string range $w2 0 $Length]
208	    incr Length
209	    set w4   [string range $w2 $Length end]
210
211	    set x [lreplace $words $cnt $cnt $w4]
212	    set words [linsert $x $cnt $w3 ]
213	    set w [lindex $words $cnt]
214	    set wLen [string length $w]
215	    incr numWords
216	}
217
218	# continuing with the normal procedure
219
220	if {($pos+$wLen < $Length)} {
221	    # append word to current line
222
223	    if {$pos} {append line " "; incr pos}
224	    append line $w
225	    incr pos $wLen
226	} else {
227	    # line full => write buffer and  begin a new line
228
229	    if {[string length $text]} {append text "\n"}
230	    append text [Justification $line [incr numline]]
231	    set line $w
232	    set pos $wLen
233	}
234    }
235
236    # write buffer and return!
237
238    if {[string length $text]} {append text "\n"}
239    append text [Justification $line end]
240    return $text
241}
242
243# ::textutil::adjust::Justification
244#
245# justify a given line
246#
247# Parameters:
248#      line    text for justification
249#      index   index for line in text
250#
251# Returns:
252#      the justified line
253#
254# Remarks:
255#      Only lines with size not exceeding the max. linesize provided
256#      for text formatting are justified!!!
257
258proc ::textutil::adjust::Justification { line index } {
259    variable Justify
260    variable Length
261    variable FullLine
262
263    set len [string length $line];               # length of current line
264
265    if { $Length <= $len } then {
266	# the length of current line ($len) is equal as or greater than
267	# the value provided for text formatting ($Length) => to avoid
268	# inifinite loops we leave $line unchanged and return!
269
270	return $line
271    }
272
273    # Special case:
274    # for the last line, and if the justification is set to 'plain'
275    # the real justification is 'left' if the length of the line
276    # is less than 90% (rounded) of the max length allowed. This is
277    # to avoid expansion of this line when it is too small: without
278    # it, the added spaces will 'unbeautify' the result.
279    #
280
281    set justify $Justify
282    if { ( "$index" == "end" ) && \
283	    ( "$Justify" == "plain" ) && \
284	    ( $len < round($Length * 0.90) ) } then {
285	set justify left
286    }
287
288    # For a left justification, nothing to do, but to
289    # add some spaces at the end of the line if requested
290
291    if { "$justify" == "left" } then {
292	set jus ""
293	if { $FullLine } then {
294	    set jus [strRepeat " " [ expr { $Length - $len } ]]
295	}
296	return "${line}${jus}"
297    }
298
299    # For a right justification, just add enough spaces
300    # at the beginning of the line
301
302    if { "$justify" == "right" } then {
303	set jus [strRepeat " " [ expr { $Length - $len } ]]
304	return "${jus}${line}"
305    }
306
307    # For a center justification, add half of the needed spaces
308    # at the beginning of the line, and the rest at the end
309    # only if needed.
310
311    if { "$justify" == "center" } then {
312	set mr [ expr { ( $Length - $len ) / 2 } ]
313	set ml [ expr { $Length - $len - $mr } ]
314	set jusl [strRepeat " " $ml]
315	set jusr [strRepeat " " $mr]
316	if { $FullLine } then {
317	    return "${jusl}${line}${jusr}"
318	} else {
319	    return "${jusl}${line}"
320	}
321    }
322
323    # For a plain justification, it's a little bit complex:
324    #
325    # if some spaces are missing, then
326    #
327    # 1) sort the list of words in the current line by decreasing size
328    # 2) foreach word, add one space before it, except if it's the
329    #    first word, until enough spaces are added
330    # 3) rebuild the line
331
332    if { "$justify" == "plain" } then {
333	set miss [ expr { $Length - [ string length $line ] } ]
334
335	# Bugfix tcllib-bugs-860753 (jhv)
336
337	set words [split $line]
338	set numWords [llength $words]
339
340	if {$numWords < 2} {
341	    # current line consists of less than two words - we can't
342	    # insert blanks to achieve a plain justification => leave
343	    # $line unchanged and return!
344
345	    return $line
346	}
347
348	for {set i 0; set totalLen 0} {$i < $numWords} {incr i} {
349	    set w($i) [lindex $words $i]
350	    if {$i > 0} {set w($i) " $w($i)"}
351	    set wLen($i) [string length $w($i)]
352	    set totalLen [expr {$totalLen+$wLen($i)}]
353	}
354
355	set miss [expr {$Length - $totalLen}]
356
357	# len walks through all lengths of words of the line under
358	# consideration
359
360	for {set len 1} {$miss > 0} {incr len} {
361	    for {set i 1} {($i < $numWords) && ($miss > 0)} {incr i} {
362		if {$wLen($i) == $len} {
363		    set w($i) " $w($i)"
364		    incr wLen($i)
365		    incr miss -1
366		}
367	    }
368	}
369
370	set line ""
371	for {set i 0} {$i < $numWords} {incr i} {
372	    set line "$line$w($i)"
373	}
374
375	# End of bugfix
376
377	return "${line}"
378    }
379
380    error "Illegal justification key \"$justify\""
381}
382
383proc ::textutil::adjust::SortList { list dir index } {
384
385    if { [ catch { lsort -integer -$dir -index $index $list } sl ] != 0 } then {
386        error "$sl"
387    }
388
389    return $sl
390}
391
392# Hyphenation utilities based on Knuth's algorithm
393#
394# Copyright (C) 2001-2003 by Dr.Johannes-Heinrich Vogeler (jhv)
395# These procedures may be used as part of the tcllib
396
397# textutil::adjust::Hyphenation
398#
399#      Hyphenate a string using Knuth's algorithm
400#
401# Parameters:
402#      str     string to be hyphenated
403#
404# Returns:
405#      the hyphenated string
406
407proc ::textutil::adjust::Hyphenation { str } {
408
409    # if there are manual set hyphenation marks e.g. "Recht\-schrei\-bung"
410    # use these for hyphenation and return
411
412    if {[regexp {[^\\-]*[\\-][.]*} $str]} {
413	regsub -all {(\\)(-)} $str {-} tmp
414	return [split $tmp -]
415    }
416
417    # Don't hyphenate very short words! Minimum length for hyphenation
418    # is set to 3 characters!
419
420    if { [string length $str] < 4 } then { return $str }
421
422    # otherwise follow Knuth's algorithm
423
424    variable HyphPatterns;                       # hyphenation patterns (TeX)
425
426    set w ".[string tolower $str].";             # transform to lower case
427    set wLen [string length $w];                 # and add delimiters
428
429    # Initialize hyphenation weights
430
431    set s {}
432    for {set i 0} {$i < $wLen} {incr i} {
433	lappend s 0
434    }
435
436    for {set i 0} {$i < $wLen} {incr i} {
437	set kmax [expr {$wLen-$i}]
438	for {set k 1} {$k < $kmax} {incr k} {
439	    set sw [string range $w $i [expr {$i+$k}]]
440	    if {[info exists HyphPatterns($sw)]} {
441		set hw $HyphPatterns($sw)
442		set hwLen [string length $hw]
443		for {set l1 0; set l2 0} {$l1 < $hwLen} {incr l1} {
444		    set c [string index $hw $l1]
445		    if {[string is digit $c]} {
446			set sPos [expr {$i+$l2}]
447			if {$c > [lindex $s $sPos]} {
448			    set s [lreplace $s $sPos $sPos $c]
449			}
450		    } else {
451			incr l2
452		    }
453		}
454	    }
455	}
456    }
457
458    # Replace all even hyphenation weigths by zero
459
460    for {set i 0} {$i < [llength $s]} {incr i} {
461	set c [lindex $s $i]
462	if {!($c%2)} { set s [lreplace $s $i $i 0] }
463    }
464
465    # Don't start with a hyphen! Take also care of words enclosed in quotes
466    # or that someone has forgotten to put a blank between a punctuation
467    # character and the following word etc.
468
469    for {set i 1} {$i < ($wLen-1)} {incr i} {
470	set c [string range $w $i end]
471	if {[regexp {^[:alpha:][.]*} $c]} {
472	    for {set k 1} {$k < ($i+1)} {incr k} {
473		set s [lreplace $s $k $k 0]
474	    }
475	    break
476	}
477    }
478
479    # Don't separate the last character of a word with a hyphen
480
481    set max [expr {[llength $s]-2}]
482    if {$max} {set s [lreplace $s $max end 0]}
483
484    # return the syllabels of the hyphenated word as a list!
485
486    set ret ""
487    set w ".$str."
488    for {set i 1} {$i < ($wLen-1)} {incr i} {
489	if {[lindex $s $i]} { append ret - }
490	append ret [string index $w $i]
491    }
492    return [split $ret -]
493}
494
495# textutil::adjust::listPredefined
496#
497#      Return the names of the hyphenation files coming with the package.
498#
499# Parameters:
500#      None.
501#
502# Result:
503#       List of filenames (without directory)
504
505proc ::textutil::adjust::listPredefined {} {
506    variable here
507    return [glob -type f -directory $here -tails *.tex]
508}
509
510# textutil::adjust::getPredefined
511#
512#      Retrieve the full path for a predefined hyphenation file
513#       coming with the package.
514#
515# Parameters:
516#      name     Name of the predefined file.
517#
518# Results:
519#       Full path to the file, or an error if it doesn't
520#       exist or is matching the pattern *.tex.
521
522proc ::textutil::adjust::getPredefined {name} {
523    variable here
524
525    if {![string match *.tex $name]} {
526        return -code error \
527                "Illegal hyphenation file \"$name\""
528    }
529    set path [file join $here $name]
530    if {![file exists $path]} {
531        return -code error \
532                "Unknown hyphenation file \"$path\""
533    }
534    return $path
535}
536
537# textutil::adjust::readPatterns
538#
539#      Read hyphenation patterns from a file and store them in an array
540#
541# Parameters:
542#      filNam  name of the file containing the patterns
543
544proc ::textutil::adjust::readPatterns { filNam } {
545
546    variable HyphPatterns;                       # hyphenation patterns (TeX)
547
548    # HyphPatterns(_LOADED_) is used as flag for having loaded
549    # hyphenation patterns from the respective file (TeX format)
550
551    if {[info exists HyphPatterns(_LOADED_)]} {
552	unset HyphPatterns(_LOADED_)
553    }
554
555    # the array xlat provides translation from TeX encoded characters
556    # to those of the ISO-8859-1 character set
557
558    set xlat(\"s) \337;  # 223 := sharp s    "
559    set xlat(\`a) \340;  # 224 := a, grave
560    set xlat(\'a) \341;  # 225 := a, acute
561    set xlat(\^a) \342;  # 226 := a, circumflex
562    set xlat(\"a) \344;  # 228 := a, diaeresis "
563    set xlat(\`e) \350;  # 232 := e, grave
564    set xlat(\'e) \351;  # 233 := e, acute
565    set xlat(\^e) \352;  # 234 := e, circumflex
566    set xlat(\`i) \354;  # 236 := i, grave
567    set xlat(\'i) \355;  # 237 := i, acute
568    set xlat(\^i) \356;  # 238 := i, circumflex
569    set xlat(\~n) \361;  # 241 := n, tilde
570    set xlat(\`o) \362;  # 242 := o, grave
571    set xlat(\'o) \363;  # 243 := o, acute
572    set xlat(\^o) \364;  # 244 := o, circumflex
573    set xlat(\"o) \366;  # 246 := o, diaeresis "
574    set xlat(\`u) \371;  # 249 := u, grave
575    set xlat(\'u) \372;  # 250 := u, acute
576    set xlat(\^u) \373;  # 251 := u, circumflex
577    set xlat(\"u) \374;  # 252 := u, diaeresis "
578
579    set fd [open $filNam RDONLY]
580    set status 0
581
582    while {[gets $fd line] >= 0} {
583
584	switch -exact $status {
585	    PATTERNS {
586		if {[regexp {^\}[.]*} $line]} {
587		    # End of patterns encountered: set status
588		    # and ignore that line
589		    set status 0
590		    continue
591		} else {
592		    # This seems to be pattern definition line; to process it
593		    # we have first to do some editing
594		    #
595		    # 1) eat comments in a pattern definition line
596		    # 2) eat braces and coded linefeeds
597
598		    set z [string first "%" $line]
599		    if {$z > 0} { set line [string range $line 0 [expr {$z-1}]] }
600
601		    regsub -all {(\\n|\{|\})} $line {} tmp
602		    set line $tmp
603
604		    # Now $line should consist only of hyphenation patterns
605		    # separated by white space
606
607		    # Translate TeX encoded characters to ISO-8859-1 characters
608		    # using the array xlat defined above
609
610		    foreach x [array names xlat] {
611			regsub -all {$x} $line $xlat($x) tmp
612			set line $tmp
613		    }
614
615		    # split the line and create a lookup array for
616		    # the repective hyphenation patterns
617
618		    foreach item [split $line] {
619			if {[string length $item]} {
620			    if {![string match {\\} $item]} {
621				# create index for hyphenation patterns
622
623				set var $item
624				regsub -all {[0-9]} $var {} idx
625				# store hyphenation patterns as elements of an array
626
627				set HyphPatterns($idx) $item
628			    }
629			}
630		    }
631		}
632	    }
633	    EXCEPTIONS {
634		if {[regexp {^\}[.]*} $line]} {
635		    # End of patterns encountered: set status
636		    # and ignore that line
637		    set status 0
638		    continue
639		} else {
640		    # to be done in the future
641		}
642	    }
643	    default {
644		if {[regexp {^\\endinput[.]*} $line]} {
645		    # end of data encountered, stop processing and
646		    # ignore all the following text ..
647		    break
648		} elseif {[regexp {^\\patterns[.]*} $line]} {
649		    # begin of patterns encountered: set status
650		    # and ignore that line
651		    set status PATTERNS
652		    continue
653		} elseif {[regexp {^\\hyphenation[.]*} $line]} {
654		    # some particular cases to be treated separately
655		    set status EXCEPTIONS
656		    continue
657		} else {
658		    set status 0
659		}
660	    }
661	}
662    }
663
664    close $fd
665    set HyphPatterns(_LOADED_) 1
666
667    return
668}
669
670#######################################################
671
672# @c The specified <a text>block is indented
673# @c by <a prefix>ing each line. The first
674# @c <a hang> lines ares skipped.
675#
676# @a text:   The paragraph to indent.
677# @a prefix: The string to use as prefix for each line
678# @a prefix: of <a text> with.
679# @a skip:   The number of lines at the beginning to leave untouched.
680#
681# @r Basically <a text>, but indented a certain amount.
682#
683# @i indent
684# @n This procedure is not checked by the testsuite.
685
686proc ::textutil::adjust::indent {text prefix {skip 0}} {
687    set text [string trimright $text]
688
689    set res [list]
690    foreach line [split $text \n] {
691	if {[string compare "" [string trim $line]] == 0} {
692	    lappend res {}
693	} else {
694	    set line [string trimright $line]
695	    if {$skip <= 0} {
696		lappend res $prefix$line
697	    } else {
698		lappend res $line
699	    }
700	}
701	if {$skip > 0} {incr skip -1}
702    }
703    return [join $res \n]
704}
705
706# Undent the block of text: Compute LCP (restricted to whitespace!)
707# and remove that from each line. Note that this preverses the
708# shaping of the paragraph (i.e. hanging indent are _not_ flattened)
709# We ignore empty lines !!
710
711proc ::textutil::adjust::undent {text} {
712
713    if {$text == {}} {return {}}
714
715    set lines [split $text \n]
716    set ne [list]
717    foreach l $lines {
718	if {[string length [string trim $l]] == 0} continue
719	lappend ne $l
720    }
721    set lcp [::textutil::string::longestCommonPrefixList $ne]
722
723    if {[string length $lcp] == 0} {return $text}
724
725    regexp "^(\[\t \]*)" $lcp -> lcp
726
727    if {[string length $lcp] == 0} {return $text}
728
729    set len [string length $lcp]
730
731    set res [list]
732    foreach l $lines {
733	if {[string length [string trim $l]] == 0} {
734	    lappend res {}
735	} else {
736	    lappend res [string range $l $len end]
737	}
738    }
739    return [join $res \n]
740}
741
742# ### ### ### ######### ######### #########
743## Data structures
744
745namespace eval ::textutil::adjust {
746    variable here [file dirname [info script]]
747
748    variable Justify      left
749    variable Length       72
750    variable FullLine     0
751    variable StrictLength 0
752    variable Hyphenate    0
753    variable HyphPatterns
754
755    namespace export adjust indent undent
756}
757
758# ### ### ### ######### ######### #########
759## Ready
760
761package provide textutil::adjust 0.7.3
762