1# # ## ### ##### ######## ############# ##################### 2## Copyright (c) 2013 Andreas Kupries, BSD licensed 3 4# # ## ### ##### ######## ############# ##################### 5## Requisites 6 7package require Tcl 8.5 8package require string::token 9 10# # ## ### ##### ######## ############# ##################### 11## API setup 12 13namespace eval ::string::token { 14 # Note: string::token claims the "text" and "file" commands. 15 namespace export shell 16 namespace ensemble create 17} 18 19proc ::string::token::shell {args} { 20 # result = list (word) 21 22 set partial 0 23 set indices 0 24 while {[llength $args]} { 25 switch -glob -- [set o [lindex $args 0]] { 26 -partial { set partial 1 } 27 -indices { set indices 1 } 28 -- { 29 set args [lrange $args 1 end] 30 break 31 } 32 -* { 33 # Unknown option. 34 return -code error \ 35 -errorcode {STRING TOKEN SHELL BAD OPTION} \ 36 "Bad option $o, expected one of -indices, or -partial" 37 } 38 * { 39 # Non-option, stop option processing. 40 break 41 } 42 } 43 set args [lrange $args 1 end] 44 } 45 if {[llength $args] != 1} { 46 return -code error \ 47 -errorcode {STRING TOKEN WRONG ARGS} \ 48 "wrong \# args: should be \"[lindex [info level 0] 0] ?-indices? ?-partial? ?--? text\"" 49 } else { 50 set text [lindex $args 0] 51 } 52 53 set space \\s 54 set lexer {} 55 lappend lexer ${space}+ WSPACE 56 lappend lexer {'[^']*'} S:QUOTED 57 lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*\"" D:QUOTED 58 lappend lexer "((\[^ $space'\"\])|(\\\\\")|(\\\\\\\\))+" PLAIN 59 60 if {$partial} { 61 lappend lexer {'[^']*$} S:QUOTED:PART 62 lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*$" D:QUOTED:PART 63 } 64 65 lappend lexer {.*} ERROR 66 67 set dequote [list \\" \" \\\\ \\ ] ; #" 68 69 set result {} 70 71 # Parsing of a shell line is a simple grammar, RE-equivalent 72 # actually, thus tractable with a plain finite state machine. 73 # 74 # States: 75 # - WS-WORD : Expected whitespace or word. 76 # - WS : Expected whitespace 77 # - WORD : Expected word. 78 79 # We may have leading whitespace. 80 set state WS-WORD 81 foreach token [text $lexer $text] { 82 lassign $token type start end 83 84 #puts "[format %7s $state] + ($token) = <<[string range $text $start $end]>>" 85 86 set changed 0 87 switch -glob -- ${type}/$state { 88 ERROR/* { 89 return -code error \ 90 -errorcode {STRING TOKEN SHELL BAD SYNTAX CHAR} \ 91 "Unexpected character '[string index $text $start]' at offset $start" 92 } 93 WSPACE/WORD { 94 # Impossible 95 return -code error \ 96 -errorcode {STRING TOKEN SHELL BAD SYNTAX WHITESPACE} \ 97 "Expected start of word, got whitespace at offset $start." 98 } 99 PLAIN/WS - 100 *:QUOTED*/WS { 101 return -code error \ 102 -errorcode {STRING TOKEN SHELL BAD SYNTAX WORD} \ 103 "Expected whitespace, got start of word at offset $start" 104 } 105 WSPACE/WS* { 106 # Ignore leading, inter-word, and trailing whitespace 107 # Must be followed by a word 108 set state WORD 109 } 110 S:QUOTED/*WORD { 111 # Quoted word, single, extract it, ignore delimiters. 112 # Must be followed by whitespace. 113 incr start 114 incr end -1 115 lappend result [string range $text $start $end] 116 set state WS 117 set changed 1 118 } 119 S:QUOTED:PART/*WORD { 120 # Quoted partial word (at end), single, extract it, ignore delimiter at start, none at end. 121 # Must be followed by nothing. 122 incr start 123 lappend result [string range $text $start $end] 124 set state WS 125 set changed 1 126 } 127 D:QUOTED/*WORD { 128 # Quoted word, double, extract it, ignore delimiters. 129 # Have to check for and reduce escaped double quotes and backslashes. 130 # Must be followed by whitespace. 131 incr start 132 incr end -1 133 lappend result [string map $dequote [string range $text $start $end]] 134 set state WS 135 set changed 1 136 } 137 D:QUOTED:PART/*WORD { 138 # Quoted word, double, extract it, ignore delimiter at start, none at end. 139 # Have to check for and reduce escaped double quotes and backslashes. 140 # Must be followed by nothing. 141 incr start 142 lappend result [string map $dequote [string range $text $start $end]] 143 set state WS 144 set changed 1 145 } 146 PLAIN/*WORD { 147 # Unquoted word. extract. 148 # Have to check for and reduce escaped double quotes and backslashes. 149 # Must be followed by whitespace. 150 lappend result [string map $dequote [string range $text $start $end]] 151 set state WS 152 set changed 1 153 } 154 * { 155 return -code error \ 156 -errorcode {STRING TOKEN SHELL INTERNAL} \ 157 "Illegal token/state combination $type/$state" 158 } 159 } 160 if {$indices && $changed} { 161 set last [lindex $result end] 162 set result [lreplace $result end end [list {*}$token $last]] 163 } 164 } 165 return $result 166} 167 168# # ## ### ##### ######## ############# ##################### 169## Ready 170 171package provide string::token::shell 1.2 172return 173