1# # ## ### ##### ######## ############# #####################
2## Copyright (c) 2013 Andreas Kupries, BSD licensed
3
4# # ## ### ##### ######## ############# #####################
5## Requisites
6
7package require Tcl 8.5
8package require string::token
9
10# # ## ### ##### ######## ############# #####################
11## API setup
12
13namespace eval ::string::token {
14    # Note: string::token claims the "text" and "file" commands.
15    namespace export shell
16    namespace ensemble create
17}
18
19proc ::string::token::shell {args} {
20    # result = list (word)
21
22    set partial 0
23    set indices 0
24    while {[llength $args]} {
25	switch -glob -- [set o [lindex $args 0]] {
26	    -partial { set partial 1 }
27	    -indices { set indices 1 }
28	    -- {
29		set args [lrange $args 1 end]
30		break
31	    }
32	    -* {
33		# Unknown option.
34		return -code error \
35		    -errorcode {STRING TOKEN SHELL BAD OPTION} \
36		    "Bad option $o, expected one of -indices, or -partial"
37	    }
38	    * {
39		# Non-option, stop option processing.
40		break
41	    }
42	}
43	set args [lrange $args 1 end]
44    }
45    if {[llength $args] != 1} {
46	return -code error \
47	    -errorcode {STRING TOKEN WRONG ARGS} \
48	    "wrong \# args: should be \"[lindex [info level 0] 0] ?-indices? ?-partial? ?--? text\""
49    } else {
50	set text [lindex $args 0]
51    }
52
53    set space    \\s
54    set     lexer {}
55    lappend lexer ${space}+                                  WSPACE
56    lappend lexer {'[^']*'}                                  S:QUOTED
57    lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*\""       D:QUOTED
58    lappend lexer "((\[^ $space'\"\])|(\\\\\")|(\\\\\\\\))+" PLAIN
59
60    if {$partial} {
61	lappend lexer {'[^']*$}                             S:QUOTED:PART
62	lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*$"   D:QUOTED:PART
63    }
64
65    lappend lexer {.*}                                       ERROR
66
67    set dequote [list \\" \" \\\\ \\ ] ; #"
68
69    set result {}
70
71    # Parsing of a shell line is a simple grammar, RE-equivalent
72    # actually, thus tractable with a plain finite state machine.
73    #
74    # States:
75    # - WS-WORD : Expected whitespace or word.
76    # - WS      : Expected whitespace
77    # - WORD    : Expected word.
78
79    # We may have leading whitespace.
80    set state WS-WORD
81    foreach token [text $lexer $text] {
82	lassign $token type start end
83
84	#puts "[format %7s $state] + ($token) = <<[string range $text $start $end]>>"
85
86	set changed 0
87	switch -glob -- ${type}/$state {
88	    ERROR/* {
89		return -code error \
90		    -errorcode {STRING TOKEN SHELL BAD SYNTAX CHAR} \
91		    "Unexpected character '[string index $text $start]' at offset $start"
92	    }
93	    WSPACE/WORD {
94		# Impossible
95		return -code error \
96		    -errorcode {STRING TOKEN SHELL BAD SYNTAX WHITESPACE} \
97		    "Expected start of word, got whitespace at offset $start."
98	    }
99	    PLAIN/WS -
100	    *:QUOTED*/WS {
101		return -code error \
102		    -errorcode {STRING TOKEN SHELL BAD SYNTAX WORD} \
103		    "Expected whitespace, got start of word at offset $start"
104	    }
105            WSPACE/WS* {
106		# Ignore leading, inter-word, and trailing whitespace
107		# Must be followed by a word
108		set state WORD
109	    }
110	    S:QUOTED/*WORD {
111		# Quoted word, single, extract it, ignore delimiters.
112		# Must be followed by whitespace.
113		incr start
114		incr end -1
115		lappend result [string range $text $start $end]
116		set state WS
117		set changed 1
118	    }
119	    S:QUOTED:PART/*WORD {
120		# Quoted partial word (at end), single, extract it, ignore delimiter at start, none at end.
121		# Must be followed by nothing.
122		incr start
123		lappend result [string range $text $start $end]
124		set state WS
125		set changed 1
126	    }
127	    D:QUOTED/*WORD {
128		# Quoted word, double, extract it, ignore delimiters.
129		# Have to check for and reduce escaped double quotes and backslashes.
130		# Must be followed by whitespace.
131		incr start
132		incr end -1
133		lappend result [string map $dequote [string range $text $start $end]]
134		set state WS
135		set changed 1
136	    }
137	    D:QUOTED:PART/*WORD {
138		# Quoted word, double, extract it, ignore delimiter at start, none at end.
139		# Have to check for and reduce escaped double quotes and backslashes.
140		# Must be followed by nothing.
141		incr start
142		lappend result [string map $dequote [string range $text $start $end]]
143		set state WS
144		set changed 1
145	    }
146	    PLAIN/*WORD {
147		# Unquoted word. extract.
148		# Have to check for and reduce escaped double quotes and backslashes.
149		# Must be followed by whitespace.
150		lappend result [string map $dequote [string range $text $start $end]]
151		set state WS
152		set changed 1
153	    }
154	    * {
155		return -code error \
156		    -errorcode {STRING TOKEN SHELL INTERNAL} \
157		    "Illegal token/state combination $type/$state"
158	    }
159        }
160	if {$indices && $changed} {
161	    set last [lindex $result end]
162	    set result [lreplace $result end end [list {*}$token $last]]
163	}
164    }
165    return $result
166}
167
168# # ## ### ##### ######## ############# #####################
169## Ready
170
171package provide string::token::shell 1.2
172return
173