1# -*- tcl -*-
2#
3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4# Operations with characters: (Un)quoting.
5
6# ### ### ### ######### ######### #########
7## Requisites
8
9package require Tcl 8.5
10
11namespace eval char {
12    namespace export unquote quote
13    namespace ensemble create
14    namespace eval quote {
15	namespace export tcl string comment cstring
16	namespace ensemble create
17    }
18}
19
20# ### ### ### ######### ######### #########
21## API
22
23proc ::char::unquote {args} {
24    if {1 == [llength $args]} { return [Unquote {*}$args] }
25    set res {}
26    foreach ch $args { lappend res [Unquote $ch] }
27    return $res
28}
29
30proc ::char::Unquote {ch} {
31
32    # A character, stored in quoted form is transformed back into a
33    # proper Tcl character (i.e. the internal representation).
34
35    switch -exact -- $ch {
36	"\\n"  {return \n}
37	"\\t"  {return \t}
38	"\\r"  {return \r}
39	"\\["  {return \[}
40	"\\]"  {return \]}
41	"\\'"  {return '}
42	"\\\"" {return "\""}
43	"\\\\" {return \\}
44    }
45
46    if {[regexp {^\\([0-2][0-7][0-7])$} $ch -> ocode]} {
47	return [format %c $ocode]
48
49    } elseif {[regexp {^\\([0-7][0-7]?)$} $ch -> ocode]} {
50	return [format %c 0$ocode]
51
52    } elseif {[regexp {^\\u([[:xdigit:]][[:xdigit:]]?[[:xdigit:]]?[[:xdigit:]]?)$} $ch -> hcode]} {
53	return [format %c 0x$hcode]
54
55    }
56
57    return $ch
58}
59
60# ### ### ### ######### ######### #########
61
62proc ::char::quote::tcl {ch args} {
63    Arg Tcl $ch {*}$args
64}
65
66proc ::char::quote::Tcl {ch} {
67    # Input:  A single character
68    # Output: A string representing the input.
69    # Properties of the output:
70    # (1) Contains only ASCII characters (7bit Unicode subset).
71    # (2) When embedded in a ""-quoted Tcl string in a piece of Tcl
72    #     code the Tcl parser will regenerate the input character.
73
74    # Special character?
75    switch -exact -- $ch {
76	"\n" {return "\\n"}
77	"\r" {return "\\r"}
78	"\t" {return "\\t"}
79	"\\" - "\;" -
80	" "  - "\"" -
81	"("  - ")"  -
82	"\{" - "\}" -
83	"\[" - "\]" {
84	    # Quote space and all the brackets as well, using octal,
85	    # for easy impure list-ness.
86
87	    scan $ch %c chcode
88	    return \\[format %o $chcode]
89	}
90    }
91
92    scan $ch %c chcode
93
94    # Control character?
95    if {[::string is control -strict $ch]} {
96	return \\[format %o $chcode]
97    }
98
99    # Unicode beyond 7bit ASCII?
100    if {$chcode > 127} {
101	return \\u[format %04x $chcode]
102    }
103
104    # Regular character: Is its own representation.
105    return $ch
106}
107
108# ### ### ### ######### ######### #########
109
110proc ::char::quote::string {ch args} {
111    Arg String $ch {*}$args
112}
113
114proc ::char::quote::String {ch} {
115    # Input:  A single character
116    # Output: A string representing the input
117    # Properties of the output
118    # (1) Human-readable, for use in error messages, or comments.
119    # (1a) Uses only printable characters.
120    # (2) NO particular properties with regard to C or Tcl parsers.
121
122    scan $ch %c chcode
123
124    # Map the ascii control characters to proper names.
125    if {($chcode <= 32) || ($chcode == 127)} {
126	variable strmap
127	return [dict get $strmap $chcode]
128    }
129
130    # Printable ascii characters represent themselves.
131    if {$chcode < 128} {
132	return $ch
133    }
134
135    # Unicode characters. Mostly represent themselves, except if
136    # control or not printable. Then they are represented by their
137    # codepoint.
138
139    # Control characters: Octal
140    if {[::string is control -strict $ch] ||
141	![::string is print -strict $ch]} {
142	return <U+[format %04x $chcode]>
143    }
144
145    return $ch
146}
147
148namespace eval ::char::quote {
149    variable strmap {
150	0 <NUL>  8 <BS>   16 <DLE> 24 <CAN>  32 <SPACE>
151	1 <SOH>  9 <TAB>  17 <DC1> 25 <EM>  127 <DEL>
152	2 <STX> 10 <LF>   18 <DC2> 26 <SUB>
153	3 <ETX> 11 <VTAB> 19 <DC3> 27 <ESC>
154	4 <EOT> 12 <FF>   20 <DC4> 28 <FS>
155	5 <ENQ> 13 <CR>   21 <NAK> 29 <GS>
156	6 <ACK> 14 <SO>   22 <SYN> 30 <RS>
157	7 <BEL> 15 <SI>   23 <ETB> 31 <US>
158    }
159}
160
161# ### ### ### ######### ######### #########
162
163proc ::char::quote::cstring {ch args} {
164    Arg CString $ch {*}$args
165}
166
167proc ::char::quote::CString {ch} {
168    # Input:  A single character
169    # Output: A string representing the input.
170    # Properties of the output:
171    # (1) Contains only ASCII characters (7bit Unicode subset).
172    # (2) When embedded in a ""-quoted C string in a piece of
173    #     C code the C parser will regenerate the input character
174    #     in UTF-8 encoding.
175
176    # Special characters (named).
177    switch -exact -- $ch {
178	"\n" {return "\\n"}
179	"\r" {return "\\r"}
180	"\t" {return "\\t"}
181	"\"" - "\\" {
182	    return \\$ch
183	}
184	"\{" - "\}" {
185	    # The generated C code containing the result of this
186	    # transform may be embedded in Tcl code (Brace-quoted),
187	    # i.e. like for a critcl-based package. To avoid tripping
188	    # the Tcl parser with unbalanced braces we sacrifice
189	    # readability of the generated code a bit and insert
190	    # braces in their octal form.
191	    scan $ch %c chcode
192	    return \\[format %o $chcode]
193	}
194    }
195
196    scan $ch %c chcode
197
198    # Control characters: Octal
199    if {[::string is control -strict $ch]} {
200	return \\[format %o $chcode]
201    }
202
203    # Beyond 7-bit ASCII: Unicode
204    if {$chcode > 127} {
205	# Recode the character into the sequence of utf-8 bytes and
206	# convert each to octal.
207	foreach x [split [encoding convertto utf-8 $ch] {}] {
208	    scan $x %c x
209	    append res \\[format %o $x]
210	}
211	return $res
212    }
213
214    # Regular character: Is its own representation.
215
216    return $ch
217}
218
219# ### ### ### ######### ######### #########
220
221proc ::char::quote::comment {ch args} {
222    Arg Comment $ch {*}$args
223}
224
225proc ::char::quote::Comment {ch} {
226    # Converts a Tcl character (internal representation) into a string
227    # which is accepted by the Tcl parser when used within a Tcl
228    # comment.
229
230    # Special characters
231
232    switch -exact -- $ch {
233	" "  {return "<blank>"}
234	"\n" {return "\\n"}
235	"\r" {return "\\r"}
236	"\t" {return "\\t"}
237	"\"" -
238	"\{" - "\}" -
239	"("  - ")"  {
240	    return \\$ch
241	}
242    }
243
244    scan $ch %c chcode
245
246    # Control characters: Octal
247    if {[::string is control -strict $ch]} {
248	return \\[format %o $chcode]
249    }
250
251    # Beyond 7-bit ASCII: Unicode
252
253    if {$chcode > 127} {
254	return \\u[format %04x $chcode]
255    }
256
257    # Regular character: Is its own representation.
258
259    return $ch
260}
261
262# ### ### ### ######### ######### #########
263## Internal. Argument processing helper
264
265proc ::char::quote::Arg {cmdpfx str args} {
266    # single argument => treat as string,
267    # process all characters separately.
268    # return transformed string.
269    if {![llength $args]} {
270	set r {}
271	foreach c [split $str {}] {
272	    append r [uplevel 1 [linsert $cmdpfx end $c]]
273	}
274	return $r
275    }
276
277    # multiple arguments => process each like a single argument, and
278    # return list of transform results.
279    set args [linsert $args 0 $str]
280    foreach str $args {
281	lappend res [uplevel 1 [list Arg $cmdpfx $str]]
282    }
283    return $res
284}
285
286# ### ### ### ######### ######### #########
287## Ready
288
289package provide char 1.0.1
290