1<?php
2
3
4namespace Stecman\Component\Symfony\Console\BashCompletion;
5
6/**
7 * Command line context for completion
8 *
9 * Represents the current state of the command line that is being completed
10 */
11class CompletionContext
12{
13    /**
14     * The current contents of the command line as a single string
15     *
16     * Bash equivalent: COMP_LINE
17     *
18     * @var string
19     */
20    protected $commandLine;
21
22    /**
23     * The index of the user's cursor relative to the start of the command line.
24     *
25     * If the current cursor position is at the end of the current command,
26     * the value of this variable is equal to the length of $this->commandLine
27     *
28     * Bash equivalent: COMP_POINT
29     *
30     * @var int
31     */
32    protected $charIndex = 0;
33
34    /**
35     * An array of the individual words in the current command line.
36     *
37     * This is not set until $this->splitCommand() is called, when it is populated by
38     * $commandLine exploded by $wordBreaks
39     *
40     * Bash equivalent: COMP_WORDS
41     *
42     * @var string[]|null
43     */
44    protected $words = null;
45
46    /**
47     * Words from the currently command-line before quotes and escaping is processed
48     *
49     * This is indexed the same as $this->words, but in their raw input terms are in their input form, including
50     * quotes and escaping.
51     *
52     * @var string[]|null
53     */
54    protected $rawWords = null;
55
56    /**
57     * The index in $this->words containing the word at the current cursor position.
58     *
59     * This is not set until $this->splitCommand() is called.
60     *
61     * Bash equivalent: COMP_CWORD
62     *
63     * @var int|null
64     */
65    protected $wordIndex = null;
66
67    /**
68     * Characters that $this->commandLine should be split on to get a list of individual words
69     *
70     * Bash equivalent: COMP_WORDBREAKS
71     *
72     * @var string
73     */
74    protected $wordBreaks = "= \t\n";
75
76    /**
77     * Set the whole contents of the command line as a string
78     *
79     * @param string $commandLine
80     */
81    public function setCommandLine($commandLine)
82    {
83        $this->commandLine = $commandLine;
84        $this->reset();
85    }
86
87    /**
88     * Return the current command line verbatim as a string
89     *
90     * @return string
91     */
92    public function getCommandLine()
93    {
94        return $this->commandLine;
95    }
96
97    /**
98     * Return the word from the command line that the cursor is currently in
99     *
100     * Most of the time this will be a partial word. If the cursor has a space before it,
101     * this will return an empty string, indicating a new word.
102     *
103     * @return string
104     */
105    public function getCurrentWord()
106    {
107        if (isset($this->words[$this->wordIndex])) {
108            return $this->words[$this->wordIndex];
109        }
110
111        return '';
112    }
113
114    /**
115     * Return the unprocessed string for the word under the cursor
116     *
117     * This preserves any quotes and escaping that are present in the input command line.
118     *
119     * @return string
120     */
121    public function getRawCurrentWord()
122    {
123        if (isset($this->rawWords[$this->wordIndex])) {
124            return $this->rawWords[$this->wordIndex];
125        }
126
127        return '';
128    }
129
130    /**
131     * Return a word by index from the command line
132     *
133     * @see $words, $wordBreaks
134     * @param int $index
135     * @return string
136     */
137    public function getWordAtIndex($index)
138    {
139        if (isset($this->words[$index])) {
140            return $this->words[$index];
141        }
142
143        return '';
144    }
145
146    /**
147     * Get the contents of the command line, exploded into words based on the configured word break characters
148     *
149     * @see $wordBreaks, setWordBreaks
150     * @return array
151     */
152    public function getWords()
153    {
154        if ($this->words === null) {
155            $this->splitCommand();
156        }
157
158        return $this->words;
159    }
160
161    /**
162     * Get the unprocessed/literal words from the command line
163     *
164     * This is indexed the same as getWords(), but preserves any quoting and escaping from the command line
165     *
166     * @return string[]
167     */
168    public function getRawWords()
169    {
170        if ($this->rawWords === null) {
171            $this->splitCommand();
172        }
173
174        return $this->rawWords;
175    }
176
177    /**
178     * Get the index of the word the cursor is currently in
179     *
180     * @see getWords, getCurrentWord
181     * @return int
182     */
183    public function getWordIndex()
184    {
185        if ($this->wordIndex === null) {
186            $this->splitCommand();
187        }
188
189        return $this->wordIndex;
190    }
191
192    /**
193     * Get the character index of the user's cursor on the command line
194     *
195     * This is in the context of the full command line string, so includes word break characters.
196     * Note that some shells can only provide an approximation for character index. Under ZSH for
197     * example, this will always be the character at the start of the current word.
198     *
199     * @return int
200     */
201    public function getCharIndex()
202    {
203        return $this->charIndex;
204    }
205
206    /**
207     * Set the cursor position as a character index relative to the start of the command line
208     *
209     * @param int $index
210     */
211    public function setCharIndex($index)
212    {
213        $this->charIndex = $index;
214        $this->reset();
215    }
216
217    /**
218     * Set characters to use as split points when breaking the command line into words
219     *
220     * This defaults to a sane value based on BASH's word break characters and shouldn't
221     * need to be changed unless your completions contain the default word break characters.
222     *
223     * @deprecated This is becoming an internal setting that doesn't make sense to expose publicly.
224     *
225     * @see wordBreaks
226     * @param string $charList - a single string containing all of the characters to break words on
227     */
228    public function setWordBreaks($charList)
229    {
230        // Drop quotes from break characters - strings are handled separately to word breaks now
231        $this->wordBreaks = str_replace(array('"', '\''), '', $charList);;
232        $this->reset();
233    }
234
235    /**
236     * Split the command line into words using the configured word break characters
237     *
238     * @return string[]
239     */
240    protected function splitCommand()
241    {
242        $tokens = $this->tokenizeString($this->commandLine);
243
244        foreach ($tokens as $token) {
245            if ($token['type'] != 'break') {
246                $this->words[] = $this->getTokenValue($token);
247                $this->rawWords[] = $token['value'];
248            }
249
250            // Determine which word index the cursor is inside once we reach it's offset
251            if ($this->wordIndex === null && $this->charIndex <= $token['offsetEnd']) {
252                $this->wordIndex = count($this->words) - 1;
253
254                if ($token['type'] == 'break') {
255                    // Cursor is in the break-space after a word
256                    // Push an empty word at the cursor to allow completion of new terms at the cursor, ignoring words ahead
257                    $this->wordIndex++;
258                    $this->words[] = '';
259                    $this->rawWords[] = '';
260                    continue;
261                }
262
263                if ($this->charIndex < $token['offsetEnd']) {
264                    // Cursor is inside the current word - truncate the word at the cursor to complete on
265                    // This emulates BASH completion's behaviour with COMP_CWORD
266
267                    // Create a copy of the token with its value truncated
268                    $truncatedToken = $token;
269                    $relativeOffset = $this->charIndex - $token['offset'];
270                    $truncatedToken['value'] = substr($token['value'], 0, $relativeOffset);
271
272                    // Replace the current word with the truncated value
273                    $this->words[$this->wordIndex] = $this->getTokenValue($truncatedToken);
274                    $this->rawWords[$this->wordIndex] = $truncatedToken['value'];
275                }
276            }
277        }
278
279        // Cursor position is past the end of the command line string - consider it a new word
280        if ($this->wordIndex === null) {
281            $this->wordIndex = count($this->words);
282            $this->words[] = '';
283            $this->rawWords[] = '';
284        }
285    }
286
287    /**
288     * Return a token's value with escaping and quotes removed
289     *
290     * @see self::tokenizeString()
291     * @param array $token
292     * @return string
293     */
294    protected function getTokenValue($token)
295    {
296        $value = $token['value'];
297
298        // Remove outer quote characters (or first quote if unclosed)
299        if ($token['type'] == 'quoted') {
300            $value = preg_replace('/^(?:[\'"])(.*?)(?:[\'"])?$/', '$1', $value);
301        }
302
303        // Remove escape characters
304        $value = preg_replace('/\\\\(.)/', '$1', $value);
305
306        return $value;
307    }
308
309    /**
310     * Break a string into words, quoted strings and non-words (breaks)
311     *
312     * Returns an array of unmodified segments of $string with offset and type information.
313     *
314     * @param string $string
315     * @return array as [ [type => string, value => string, offset => int], ... ]
316     */
317    protected function tokenizeString($string)
318    {
319        // Map capture groups to returned token type
320        $typeMap = array(
321            'double_quote_string' => 'quoted',
322            'single_quote_string' => 'quoted',
323            'word' => 'word',
324            'break' => 'break',
325        );
326
327        // Escape every word break character including whitespace
328        // preg_quote won't work here as it doesn't understand the ignore whitespace flag ("x")
329        $breaks = preg_replace('/(.)/', '\\\$1', $this->wordBreaks);
330
331        $pattern = <<<"REGEX"
332            /(?:
333                (?P<double_quote_string>
334                    "(\\\\.|[^\"\\\\])*(?:"|$)
335                ) |
336                (?P<single_quote_string>
337                    '(\\\\.|[^'\\\\])*(?:'|$)
338                ) |
339                (?P<word>
340                    (?:\\\\.|[^$breaks])+
341                ) |
342                (?P<break>
343                     [$breaks]+
344                )
345            )/x
346REGEX;
347
348        $tokens = array();
349
350        if (!preg_match_all($pattern, $string, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
351            return $tokens;
352        }
353
354        foreach ($matches as $set) {
355            foreach ($set as $groupName => $match) {
356
357                // Ignore integer indices preg_match outputs (duplicates of named groups)
358                if (is_integer($groupName)) {
359                    continue;
360                }
361
362                // Skip if the offset indicates this group didn't match
363                if ($match[1] === -1) {
364                    continue;
365                }
366
367                $tokens[] = array(
368                    'type' => $typeMap[$groupName],
369                    'value' => $match[0],
370                    'offset' => $match[1],
371                    'offsetEnd' => $match[1] + strlen($match[0])
372                );
373
374                // Move to the next set (only one group should match per set)
375                continue;
376            }
377        }
378
379        return $tokens;
380    }
381
382    /**
383     * Reset the computed words so that $this->splitWords is forced to run again
384     */
385    protected function reset()
386    {
387        $this->words = null;
388        $this->wordIndex = null;
389    }
390}
391