1#
2# As the author of the procs 'tabify2' and 'untabify2' I suggest that the
3# comments explaining their behaviour be kept in this file.
4# 1) Beginners in any programming language (I am new to Tcl so I know what I
5#    am talking about) can profit enormously from studying 'correct' code.
6#    Of course comments will help a lot in this regard.
7# 2) Many problems newbies face can be solved by directing them towards
8#    available libraries - after all, libraries have been written to solve
9#    recurring problems. Then they can just use them, or have a closer look
10#    to see and to discover how things are done the 'Tcl way'.
11# 3) And if ever a proc from a library should be less than perfect, having
12#    comments explaining the behaviour of the code will surely help.
13#
14# This said, I will welcome any error reports or suggestions for improvements
15# (especially on the 'doing things the Tcl way' aspect).
16#
17# Use of these sources is licensed under the same conditions as is Tcl.
18#
19# June 2001, Helmut Giese (hgiese@ratiosoft.com)
20#
21# ----------------------------------------------------------------------------
22#
23# The original procs 'tabify' and 'untabify' each work with complete blocks
24# of $num spaces ('num' holding the tab size). While this is certainly useful
25# in some circumstances, it does not reflect the way an editor works:
26# 	Counting columns from 1, assuming a tab size of 8 and entering '12345'
27#   followed by a tab, you expect to advance to column 9. Your editor might
28#   put a tab into the file or 3 spaces, depending on its configuration.
29#	Now, on 'tabifying' you will expect to see those 3 spaces converted to a
30#	tab (and on the other hand expect the tab *at this position* to be
31#	converted to 3 spaces).
32#
33#	This behaviour is mimicked by the new procs 'tabify2' and 'untabify2'.
34#   Both have one feature in common: They accept multi-line strings (a whole
35#   file if you want to) but in order to make life simpler for the programmer,
36#   they split the incoming string into individual lines and hand each line to
37#   a proc that does the real work.
38#
39#   One design decision worth mentioning here:
40#      A single space is never converted to a tab even if its position would
41#      allow to do so.
42#   Single spaces occur very often, say in arithmetic expressions like
43#   [expr (($a + $b) * $c) < $d]. If we didn't follow the above rule we might
44#   need to replace one or more of them to tabs. However if the tab size gets
45#   changed, this expression would be formatted quite differently - which is
46#   probably not a good idea.
47#
48#   'untabifying' on the other hand might need to replace a tab with a single
49#   space: If the current position requires it, what else to do?
50#   As a consequence those two procs are unsymmetric in this aspect, but I
51#   couldn't think of a better solution. Could you?
52#
53# ----------------------------------------------------------------------------
54#
55
56# ### ### ### ######### ######### #########
57## Requirements
58
59package require Tcl 8.2
60package require textutil::repeat
61
62namespace eval ::textutil::tabify {}
63
64# ### ### ### ######### ######### #########
65## API implementation
66
67namespace eval ::textutil::tabify {
68    namespace import -force ::textutil::repeat::strRepeat
69}
70
71proc ::textutil::tabify::tabify { string { num 8 } } {
72    return [string map [list [MakeTabStr $num] \t] $string]
73}
74
75proc ::textutil::tabify::untabify { string { num 8 } } {
76    return [string map [list \t [MakeTabStr $num]] $string]
77}
78
79proc ::textutil::tabify::MakeTabStr { num } {
80    variable TabStr
81    variable TabLen
82
83    if { $TabLen != $num } then {
84	set TabLen $num
85	set TabStr [strRepeat " " $num]
86    }
87
88    return $TabStr
89}
90
91# ----------------------------------------------------------------------------
92#
93# tabifyLine: Works on a single line of text, replacing 'spaces at correct
94# 		positions' with tabs. $num is the requested tab size.
95#		Returns the (possibly modified) line.
96#
97# 'spaces at correct positions': Only spaces which 'fill the space' between
98# an arbitrary position and the next tab stop can be replaced.
99# Example: With tab size 8, spaces at positions 11 - 13 will *not* be replaced,
100#          because an expansion of a tab at position 11 will jump up to 16.
101# See also the comment at the beginning of this file why single spaces are
102# *never* replaced by a tab.
103#
104# The proc works backwards, from the end of the string up to the beginning:
105#	- Set the position to start the search from ('lastPos') to 'end'.
106#	- Find the last occurrence of ' ' in 'line' with respect to 'lastPos'
107#         ('currPos' below). This is a candidate for replacement.
108#       - Find to 'currPos' the following tab stop using the expression
109#           set nextTab [expr ($currPos + $num) - ($currPos % $num)]
110#         and get the previous tab stop as well (this will be the starting
111#         point for the next iteration).
112#	- The ' ' at 'currPos' is only a candidate for replacement if
113#	  1) it is just one position before a tab stop *and*
114#	  2) there is at least one space at its left (see comment above on not
115#	     touching an isolated space).
116#	  Continue, if any of these conditions is not met.
117#	- Determine where to put the tab (that is: how many spaces to replace?)
118#	  by stepping up to the beginning until
119#		-- you hit a non-space or
120#		-- you are at the previous tab position
121#	- Do the replacement and continue.
122#
123# This algorithm only works, if $line does not contain tabs. Otherwise our
124# interpretation of any position beyond the tab will be wrong. (Imagine you
125# find a ' ' at position 4 in $line. If you got 3 leading tabs, your *real*
126# position might be 25 (tab size of 8). Since in real life some strings might
127# already contain tabs, we test for it (and eventually call untabifyLine).
128#
129
130proc ::textutil::tabify::tabifyLine { line num } {
131    if { [string first \t $line] != -1 } {
132	# assure array 'Spaces' is set up 'comme il faut'
133	checkArr $num
134	# remove existing tabs
135	set line [untabifyLine $line $num]
136    }
137
138    set lastPos end
139
140    while { $lastPos > 0 } {
141	set currPos [string last " " $line $lastPos]
142	if { $currPos == -1 } {
143	    # no more spaces
144	    break;
145	}
146
147	set nextTab [expr {($currPos + $num) - ($currPos % $num)}]
148	set prevTab [expr {$nextTab - $num}]
149
150	# prepare for next round: continue at 'previous tab stop - 1'
151	set lastPos [expr {$prevTab - 1}]
152
153	if { ($currPos + 1) != $nextTab } {
154	    continue			;# crit. (1)
155	}
156
157	if { [string index $line [expr {$currPos - 1}]] != " " } {
158	    continue			;# crit. (2)
159	}
160
161	# now step backwards while there are spaces
162	for {set pos [expr {$currPos - 2}]} {$pos >= $prevTab} {incr pos -1} {
163	    if { [string index $line $pos] != " " } {
164		break;
165	    }
166	}
167
168	# ... and replace them
169	set line [string replace $line [expr {$pos + 1}] $currPos \t]
170    }
171    return $line
172}
173
174#
175# Helper proc for 'untabifyLine': Checks if all needed elements of array
176# 'Spaces' exist and creates the missing ones if needed.
177#
178
179proc ::textutil::tabify::checkArr { num } {
180    variable TabLen2
181    variable Spaces
182
183    if { $num > $TabLen2 } {
184	for { set i [expr {$TabLen2 + 1}] } { $i <= $num } { incr i } {
185	    set Spaces($i) [strRepeat " " $i]
186	}
187	set TabLen2 $num
188    }
189}
190
191
192# untabifyLine: Works on a single line of text, replacing tabs with enough
193#		spaces to get to the next tab position.
194#		Returns the (possibly modified) line.
195#
196# The procedure is straight forward:
197#	- Find the next tab.
198#	- Calculate the next tab position following it.
199#	- Delete the tab and insert as many spaces as needed to get there.
200#
201
202proc ::textutil::tabify::untabifyLine { line num } {
203    variable Spaces
204
205    set currPos 0
206    while { 1 } {
207	set currPos [string first \t $line $currPos]
208	if { $currPos == -1 } {
209	    # no more tabs
210	    break
211	}
212
213	# how far is the next tab position ?
214	set dist [expr {$num - ($currPos % $num)}]
215	# replace '\t' at $currPos with $dist spaces
216	set line [string replace $line $currPos $currPos $Spaces($dist)]
217
218	# set up for next round (not absolutely necessary but maybe a trifle
219	# more efficient)
220	incr currPos $dist
221    }
222    return $line
223}
224
225# tabify2: Replace all 'appropriate' spaces as discussed above with tabs.
226#	'string' might hold any number of lines, 'num' is the requested tab size.
227#	Returns (possibly modified) 'string'.
228#
229proc ::textutil::tabify::tabify2 { string { num 8 } } {
230
231    # split string into individual lines
232    set inLst [split $string \n]
233
234    # now work on each line
235    set outLst [list]
236    foreach line $inLst {
237	lappend outLst [tabifyLine $line $num]
238    }
239
240    # return all as one string
241    return [join $outLst \n]
242}
243
244
245# untabify2: Replace all tabs with the appropriate number of spaces.
246#	'string' might hold any number of lines, 'num' is the requested tab size.
247#	Returns (possibly modified) 'string'.
248#
249proc ::textutil::tabify::untabify2 { string { num 8 } } {
250
251    # assure array 'Spaces' is set up 'comme il faut'
252    checkArr $num
253
254    set inLst [split $string \n]
255
256    set outLst [list]
257    foreach line $inLst {
258	lappend outLst [untabifyLine $line $num]
259    }
260
261    return [join $outLst \n]
262}
263
264
265
266# ### ### ### ######### ######### #########
267## Data structures
268
269namespace eval ::textutil::tabify {
270    variable TabLen  8
271    variable TabStr  [strRepeat " " $TabLen]
272
273    namespace export tabify untabify tabify2 untabify2
274
275    # The proc 'untabify2' uses the following variables for efficiency.
276    # Since a tab can be replaced by one up to 'tab size' spaces, it is handy
277    # to have the appropriate 'space strings' available. This is the use of
278    # the array 'Spaces', where 'Spaces(n)' contains just 'n' spaces.
279    # The variable 'TabLen2' remembers the biggest tab size used.
280
281    variable  TabLen2 0
282    variable  Spaces
283    array set Spaces {0 ""}
284}
285
286# ### ### ### ######### ######### #########
287## Ready
288
289package provide textutil::tabify 0.7
290