1# -*- tcl -*- 2# 3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Operations with characters: (Un)quoting. 5 6# ### ### ### ######### ######### ######### 7## Requisites 8 9package require Tcl 8.5 10 11namespace eval char { 12 namespace export unquote quote 13 namespace ensemble create 14 namespace eval quote { 15 namespace export tcl string comment cstring 16 namespace ensemble create 17 } 18} 19 20# ### ### ### ######### ######### ######### 21## API 22 23proc ::char::unquote {args} { 24 if {1 == [llength $args]} { return [Unquote {*}$args] } 25 set res {} 26 foreach ch $args { lappend res [Unquote $ch] } 27 return $res 28} 29 30proc ::char::Unquote {ch} { 31 32 # A character, stored in quoted form is transformed back into a 33 # proper Tcl character (i.e. the internal representation). 34 35 switch -exact -- $ch { 36 "\\n" {return \n} 37 "\\t" {return \t} 38 "\\r" {return \r} 39 "\\[" {return \[} 40 "\\]" {return \]} 41 "\\'" {return '} 42 "\\\"" {return "\""} 43 "\\\\" {return \\} 44 } 45 46 if {[regexp {^\\([0-2][0-7][0-7])$} $ch -> ocode]} { 47 return [format %c $ocode] 48 49 } elseif {[regexp {^\\([0-7][0-7]?)$} $ch -> ocode]} { 50 return [format %c 0$ocode] 51 52 } elseif {[regexp {^\\u([[:xdigit:]][[:xdigit:]]?[[:xdigit:]]?[[:xdigit:]]?)$} $ch -> hcode]} { 53 return [format %c 0x$hcode] 54 55 } 56 57 return $ch 58} 59 60# ### ### ### ######### ######### ######### 61 62proc ::char::quote::tcl {ch args} { 63 Arg Tcl $ch {*}$args 64} 65 66proc ::char::quote::Tcl {ch} { 67 # Input: A single character 68 # Output: A string representing the input. 69 # Properties of the output: 70 # (1) Contains only ASCII characters (7bit Unicode subset). 71 # (2) When embedded in a ""-quoted Tcl string in a piece of Tcl 72 # code the Tcl parser will regenerate the input character. 73 74 # Special character? 75 switch -exact -- $ch { 76 "\n" {return "\\n"} 77 "\r" {return "\\r"} 78 "\t" {return "\\t"} 79 "\\" - "\;" - 80 " " - "\"" - 81 "(" - ")" - 82 "\{" - "\}" - 83 "\[" - "\]" { 84 # Quote space and all the brackets as well, using octal, 85 # for easy impure list-ness. 86 87 scan $ch %c chcode 88 return \\[format %o $chcode] 89 } 90 } 91 92 scan $ch %c chcode 93 94 # Control character? 95 if {[::string is control -strict $ch]} { 96 return \\[format %o $chcode] 97 } 98 99 # Unicode beyond 7bit ASCII? 100 if {$chcode > 127} { 101 return \\u[format %04x $chcode] 102 } 103 104 # Regular character: Is its own representation. 105 return $ch 106} 107 108# ### ### ### ######### ######### ######### 109 110proc ::char::quote::string {ch args} { 111 Arg String $ch {*}$args 112} 113 114proc ::char::quote::String {ch} { 115 # Input: A single character 116 # Output: A string representing the input 117 # Properties of the output 118 # (1) Human-readable, for use in error messages, or comments. 119 # (1a) Uses only printable characters. 120 # (2) NO particular properties with regard to C or Tcl parsers. 121 122 scan $ch %c chcode 123 124 # Map the ascii control characters to proper names. 125 if {($chcode <= 32) || ($chcode == 127)} { 126 variable strmap 127 return [dict get $strmap $chcode] 128 } 129 130 # Printable ascii characters represent themselves. 131 if {$chcode < 128} { 132 return $ch 133 } 134 135 # Unicode characters. Mostly represent themselves, except if 136 # control or not printable. Then they are represented by their 137 # codepoint. 138 139 # Control characters: Octal 140 if {[::string is control -strict $ch] || 141 ![::string is print -strict $ch]} { 142 return <U+[format %04x $chcode]> 143 } 144 145 return $ch 146} 147 148namespace eval ::char::quote { 149 variable strmap { 150 0 <NUL> 8 <BS> 16 <DLE> 24 <CAN> 32 <SPACE> 151 1 <SOH> 9 <TAB> 17 <DC1> 25 <EM> 127 <DEL> 152 2 <STX> 10 <LF> 18 <DC2> 26 <SUB> 153 3 <ETX> 11 <VTAB> 19 <DC3> 27 <ESC> 154 4 <EOT> 12 <FF> 20 <DC4> 28 <FS> 155 5 <ENQ> 13 <CR> 21 <NAK> 29 <GS> 156 6 <ACK> 14 <SO> 22 <SYN> 30 <RS> 157 7 <BEL> 15 <SI> 23 <ETB> 31 <US> 158 } 159} 160 161# ### ### ### ######### ######### ######### 162 163proc ::char::quote::cstring {ch args} { 164 Arg CString $ch {*}$args 165} 166 167proc ::char::quote::CString {ch} { 168 # Input: A single character 169 # Output: A string representing the input. 170 # Properties of the output: 171 # (1) Contains only ASCII characters (7bit Unicode subset). 172 # (2) When embedded in a ""-quoted C string in a piece of 173 # C code the C parser will regenerate the input character 174 # in UTF-8 encoding. 175 176 # Special characters (named). 177 switch -exact -- $ch { 178 "\n" {return "\\n"} 179 "\r" {return "\\r"} 180 "\t" {return "\\t"} 181 "\"" - "\\" { 182 return \\$ch 183 } 184 "\{" - "\}" { 185 # The generated C code containing the result of this 186 # transform may be embedded in Tcl code (Brace-quoted), 187 # i.e. like for a critcl-based package. To avoid tripping 188 # the Tcl parser with unbalanced braces we sacrifice 189 # readability of the generated code a bit and insert 190 # braces in their octal form. 191 scan $ch %c chcode 192 return \\[format %o $chcode] 193 } 194 } 195 196 scan $ch %c chcode 197 198 # Control characters: Octal 199 if {[::string is control -strict $ch]} { 200 return \\[format %o $chcode] 201 } 202 203 # Beyond 7-bit ASCII: Unicode 204 if {$chcode > 127} { 205 # Recode the character into the sequence of utf-8 bytes and 206 # convert each to octal. 207 foreach x [split [encoding convertto utf-8 $ch] {}] { 208 scan $x %c x 209 append res \\[format %o $x] 210 } 211 return $res 212 } 213 214 # Regular character: Is its own representation. 215 216 return $ch 217} 218 219# ### ### ### ######### ######### ######### 220 221proc ::char::quote::comment {ch args} { 222 Arg Comment $ch {*}$args 223} 224 225proc ::char::quote::Comment {ch} { 226 # Converts a Tcl character (internal representation) into a string 227 # which is accepted by the Tcl parser when used within a Tcl 228 # comment. 229 230 # Special characters 231 232 switch -exact -- $ch { 233 " " {return "<blank>"} 234 "\n" {return "\\n"} 235 "\r" {return "\\r"} 236 "\t" {return "\\t"} 237 "\"" - 238 "\{" - "\}" - 239 "(" - ")" { 240 return \\$ch 241 } 242 } 243 244 scan $ch %c chcode 245 246 # Control characters: Octal 247 if {[::string is control -strict $ch]} { 248 return \\[format %o $chcode] 249 } 250 251 # Beyond 7-bit ASCII: Unicode 252 253 if {$chcode > 127} { 254 return \\u[format %04x $chcode] 255 } 256 257 # Regular character: Is its own representation. 258 259 return $ch 260} 261 262# ### ### ### ######### ######### ######### 263## Internal. Argument processing helper 264 265proc ::char::quote::Arg {cmdpfx str args} { 266 # single argument => treat as string, 267 # process all characters separately. 268 # return transformed string. 269 if {![llength $args]} { 270 set r {} 271 foreach c [split $str {}] { 272 append r [uplevel 1 [linsert $cmdpfx end $c]] 273 } 274 return $r 275 } 276 277 # multiple arguments => process each like a single argument, and 278 # return list of transform results. 279 set args [linsert $args 0 $str] 280 foreach str $args { 281 lappend res [uplevel 1 [list Arg $cmdpfx $str]] 282 } 283 return $res 284} 285 286# ### ### ### ######### ######### ######### 287## Ready 288 289package provide char 1.0.1 290