1<?php
2
3/**
4 * PHP Command Line Tools
5 *
6 * This source file is subject to the MIT license that is bundled
7 * with this package in the file LICENSE.
8 *
9 * @author    James Logsdon <dwarf@girsbrain.org>
10 * @copyright 2010 James Logsdom (http://girsbrain.org)
11 * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
12 */
13
14namespace cli;
15
16/**
17 * Handles rendering strings. If extra scalar arguments are given after the `$msg`
18 * the string will be rendered with `sprintf`. If the second argument is an `array`
19 * then each key in the array will be the placeholder name. Placeholders are of the
20 * format {:key}.
21 *
22 * @param string   $msg  The message to render.
23 * @param mixed    ...   Either scalar arguments or a single array argument.
24 * @return string  The rendered string.
25 */
26function render( $msg ) {
27	return Streams::_call( 'render', func_get_args() );
28}
29
30/**
31 * Shortcut for printing to `STDOUT`. The message and parameters are passed
32 * through `sprintf` before output.
33 *
34 * @param string  $msg  The message to output in `printf` format.
35 * @param mixed   ...   Either scalar arguments or a single array argument.
36 * @return void
37 * @see \cli\render()
38 */
39function out( $msg ) {
40	Streams::_call( 'out', func_get_args() );
41}
42
43/**
44 * Pads `$msg` to the width of the shell before passing to `cli\out`.
45 *
46 * @param string  $msg  The message to pad and pass on.
47 * @param mixed   ...   Either scalar arguments or a single array argument.
48 * @return void
49 * @see cli\out()
50 */
51function out_padded( $msg ) {
52	Streams::_call( 'out_padded', func_get_args() );
53}
54
55/**
56 * Prints a message to `STDOUT` with a newline appended. See `\cli\out` for
57 * more documentation.
58 *
59 * @see cli\out()
60 */
61function line( $msg = '' ) {
62	Streams::_call( 'line', func_get_args() );
63}
64
65/**
66 * Shortcut for printing to `STDERR`. The message and parameters are passed
67 * through `sprintf` before output.
68 *
69 * @param string  $msg  The message to output in `printf` format. With no string,
70 *                      a newline is printed.
71 * @param mixed   ...   Either scalar arguments or a single array argument.
72 * @return void
73 */
74function err( $msg = '' ) {
75	Streams::_call( 'err', func_get_args() );
76}
77
78/**
79 * Takes input from `STDIN` in the given format. If an end of transmission
80 * character is sent (^D), an exception is thrown.
81 *
82 * @param string  $format  A valid input format. See `fscanf` for documentation.
83 *                         If none is given, all input up to the first newline
84 *                         is accepted.
85 * @return string  The input with whitespace trimmed.
86 * @throws \Exception  Thrown if ctrl-D (EOT) is sent as input.
87 */
88function input( $format = null ) {
89	return Streams::input( $format );
90}
91
92/**
93 * Displays an input prompt. If no default value is provided the prompt will
94 * continue displaying until input is received.
95 *
96 * @param string  $question The question to ask the user.
97 * @param string  $default  A default value if the user provides no input.
98 * @param string  $marker   A string to append to the question and default value on display.
99 * @param boolean $hide     If the user input should be hidden
100 * @return string  The users input.
101 * @see cli\input()
102 */
103function prompt( $question, $default = false, $marker = ': ', $hide = false ) {
104	return Streams::prompt( $question, $default, $marker, $hide );
105}
106
107/**
108 * Presents a user with a multiple choice question, useful for 'yes/no' type
109 * questions (which this function defaults too).
110 *
111 * @param string      $question   The question to ask the user.
112 * @param string      $choice
113 * @param string|null $default    The default choice. NULL if a default is not allowed.
114 * @internal param string $valid  A string of characters allowed as a response. Case
115 *                                is ignored.
116 * @return string  The users choice.
117 * @see      cli\prompt()
118 */
119function choose( $question, $choice = 'yn', $default = 'n' ) {
120	return Streams::choose( $question, $choice, $default );
121}
122
123/**
124 * Does the same as {@see choose()}, but always asks yes/no and returns a boolean
125 *
126 * @param string    $question  The question to ask the user.
127 * @param bool|null $default   The default choice, in a boolean format.
128 * @return bool
129 */
130function confirm( $question, $default = false ) {
131	if ( is_bool( $default ) ) {
132		$default = $default? 'y' : 'n';
133	}
134	$result  = choose( $question, 'yn', $default );
135	return $result == 'y';
136}
137
138/**
139 * Displays an array of strings as a menu where a user can enter a number to
140 * choose an option. The array must be a single dimension with either strings
141 * or objects with a `__toString()` method.
142 *
143 * @param array  $items   The list of items the user can choose from.
144 * @param string $default The index of the default item.
145 * @param string $title   The message displayed to the user when prompted.
146 * @return string  The index of the chosen item.
147 * @see cli\line()
148 * @see cli\input()
149 * @see cli\err()
150 */
151function menu( $items, $default = null, $title = 'Choose an item' ) {
152	return Streams::menu( $items, $default, $title );
153}
154
155/**
156 * Attempts an encoding-safe way of getting string length. If intl extension or PCRE with '\X' or mb_string extension aren't
157 * available, falls back to basic strlen.
158 *
159 * @param  string      $str      The string to check.
160 * @param  string|bool $encoding Optional. The encoding of the string. Default false.
161 * @return int  Numeric value that represents the string's length
162 */
163function safe_strlen( $str, $encoding = false ) {
164	// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strlen(), "other" strlen().
165	$test_safe_strlen = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_STRLEN' );
166
167	// Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string.
168	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $length = grapheme_strlen( $str ) ) ) {
169		if ( ! $test_safe_strlen || ( $test_safe_strlen & 1 ) ) {
170			return $length;
171		}
172	}
173	// Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string.
174	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $length = preg_match_all( '/\X/u', $str, $dummy /*needed for PHP 5.3*/ ) ) ) {
175		if ( ! $test_safe_strlen || ( $test_safe_strlen & 2 ) ) {
176			return $length;
177		}
178	}
179	// Legacy encodings and old PHPs will reach here.
180	if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
181		if ( ! $encoding ) {
182			$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
183		}
184		$length = $encoding ? mb_strlen( $str, $encoding ) : mb_strlen( $str ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
185		if ( 'UTF-8' === $encoding ) {
186			// Subtract combining characters.
187			$length -= preg_match_all( get_unicode_regexs( 'm' ), $str, $dummy /*needed for PHP 5.3*/ );
188		}
189		if ( ! $test_safe_strlen || ( $test_safe_strlen & 4 ) ) {
190			return $length;
191		}
192	}
193	return strlen( $str );
194}
195
196/**
197 * Attempts an encoding-safe way of getting a substring. If intl extension or PCRE with '\X' or mb_string extension aren't
198 * available, falls back to substr().
199 *
200 * @param  string        $str      The input string.
201 * @param  int           $start    The starting position of the substring.
202 * @param  int|bool|null $length   Optional, unless $is_width is set. Maximum length of the substring. Default false. Negative not supported.
203 * @param  int|bool      $is_width Optional. If set and encoding is UTF-8, $length (which must be specified) is interpreted as spacing width. Default false.
204 * @param  string|bool   $encoding Optional. The encoding of the string. Default false.
205 * @return bool|string  False if given unsupported args, otherwise substring of string specified by start and length parameters
206 */
207function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) {
208	// Negative $length or $is_width and $length not specified not supported.
209	if ( $length < 0 || ( $is_width && ( null === $length || false === $length ) ) ) {
210		return false;
211	}
212	// Need this for normalization below and other uses.
213	$safe_strlen = safe_strlen( $str, $encoding );
214
215	// Normalize `$length` when not specified - PHP 5.3 substr takes false as full length, PHP > 5.3 takes null.
216	if ( null === $length || false === $length ) {
217		$length = $safe_strlen;
218	}
219	// Normalize `$start` - various methods treat this differently.
220	if ( $start > $safe_strlen ) {
221		return '';
222	}
223	if ( $start < 0 && -$start > $safe_strlen ) {
224		$start = 0;
225	}
226
227	// Allow for selective testings - "1" bit set tests grapheme_substr(), "2" preg_split( '/\X/' ), "4" mb_substr(), "8" substr().
228	$test_safe_substr = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_SUBSTR' );
229
230	// Assume UTF-8 if no encoding given - `grapheme_substr()` will return false (not null like `grapheme_strlen()`) if given non-UTF-8 string.
231	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && false !== ( $try = grapheme_substr( $str, $start, $length ) ) ) {
232		if ( ! $test_safe_substr || ( $test_safe_substr & 1 ) ) {
233			return $is_width ? _safe_substr_eaw( $try, $length ) : $try;
234		}
235	}
236	// Assume UTF-8 if no encoding given - `preg_split()` returns a one element array if given non-UTF-8 string (PHP bug) so need to check `preg_last_error()`.
237	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() ) {
238		if ( false !== ( $try = preg_split( '/(\X)/u', $str, $safe_strlen + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ) ) && ! preg_last_error() ) {
239			$try = implode( '', array_slice( $try, $start, $length ) );
240			if ( ! $test_safe_substr || ( $test_safe_substr & 2 ) ) {
241				return $is_width ? _safe_substr_eaw( $try, $length ) : $try;
242			}
243		}
244	}
245	// Legacy encodings and old PHPs will reach here.
246	if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
247		if ( ! $encoding ) {
248			$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
249		}
250		// Bug: not adjusting for combining chars.
251		$try = $encoding ? mb_substr( $str, $start, $length, $encoding ) : mb_substr( $str, $start, $length ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
252		if ( 'UTF-8' === $encoding && $is_width ) {
253			$try = _safe_substr_eaw( $try, $length );
254		}
255		if ( ! $test_safe_substr || ( $test_safe_substr & 4 ) ) {
256			return $try;
257		}
258	}
259	return substr( $str, $start, $length );
260}
261
262/**
263 * Internal function used by `safe_substr()` to adjust for East Asian double-width chars.
264 *
265 * @return string
266 */
267function _safe_substr_eaw( $str, $length ) {
268	// Set the East Asian Width regex.
269	$eaw_regex = get_unicode_regexs( 'eaw' );
270
271	// If there's any East Asian double-width chars...
272	if ( preg_match( $eaw_regex, $str ) ) {
273		// Note that if the length ends in the middle of a double-width char, the char is excluded, not included.
274
275		// See if it's all EAW.
276		if ( function_exists( 'mb_substr' ) && preg_match_all( $eaw_regex, $str, $dummy /*needed for PHP 5.3*/ ) === $length ) {
277			// Just halve the length so (rounded down to a minimum of 1).
278			$str = mb_substr( $str, 0, max( (int) ( $length / 2 ), 1 ), 'UTF-8' );
279		} else {
280			// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
281			$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $str, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
282			$cnt = min( count( $chars ), $length );
283			$width = $length;
284
285			for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
286				$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
287			}
288			// Round down to a minimum of 1.
289			if ( $width < 0 && $length > 1 ) {
290				$length--;
291			}
292			return join( '', array_slice( $chars, 0, $length ) );
293		}
294	}
295	return $str;
296}
297
298/**
299 * An encoding-safe way of padding string length for display
300 *
301 * @param  string      $string   The string to pad.
302 * @param  int         $length   The length to pad it to.
303 * @param  string|bool $encoding Optional. The encoding of the string. Default false.
304 * @return string
305 */
306function safe_str_pad( $string, $length, $encoding = false ) {
307	$real_length = strwidth( $string, $encoding );
308	$diff = strlen( $string ) - $real_length;
309	$length += $diff;
310
311	return str_pad( $string, $length );
312}
313
314/**
315 * Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8.
316 *
317 * @param  string      $string   The string to check.
318 * @param  string|bool $encoding Optional. The encoding of the string. Default false.
319 * @return int  The string's width.
320 */
321function strwidth( $string, $encoding = false ) {
322	// Set the East Asian Width and Mark regexs.
323	list( $eaw_regex, $m_regex ) = get_unicode_regexs();
324
325	// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
326	$test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' );
327
328	// Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string.
329	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $width = grapheme_strlen( $string ) ) ) {
330		if ( ! $test_strwidth || ( $test_strwidth & 1 ) ) {
331			return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
332		}
333	}
334	// Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string.
335	if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $width = preg_match_all( '/\X/u', $string, $dummy /*needed for PHP 5.3*/ ) ) ) {
336		if ( ! $test_strwidth || ( $test_strwidth & 2 ) ) {
337			return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
338		}
339	}
340	// Legacy encodings and old PHPs will reach here.
341	if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
342		if ( ! $encoding ) {
343			$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
344		}
345		$width = $encoding ? mb_strwidth( $string, $encoding ) : mb_strwidth( $string ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
346		if ( 'UTF-8' === $encoding ) {
347			// Subtract combining characters.
348			$width -= preg_match_all( $m_regex, $string, $dummy /*needed for PHP 5.3*/ );
349		}
350		if ( ! $test_strwidth || ( $test_strwidth & 4 ) ) {
351			return $width;
352		}
353	}
354	return safe_strlen( $string, $encoding );
355}
356
357/**
358 * Returns whether ICU is modern enough not to flake out.
359 *
360 * @return bool
361 */
362function can_use_icu() {
363	static $can_use_icu = null;
364
365	if ( null === $can_use_icu ) {
366		// Choosing ICU 54, Unicode 7.0.
367		$can_use_icu = defined( 'INTL_ICU_VERSION' ) && version_compare( INTL_ICU_VERSION, '54.1', '>=' ) && function_exists( 'grapheme_strlen' ) && function_exists( 'grapheme_substr' );
368	}
369
370	return $can_use_icu;
371}
372
373/**
374 * Returns whether PCRE Unicode extended grapheme cluster '\X' is available for use.
375 *
376 * @return bool
377 */
378function can_use_pcre_x() {
379	static $can_use_pcre_x = null;
380
381	if ( null === $can_use_pcre_x ) {
382		// '\X' introduced (as Unicde extended grapheme cluster) in PCRE 8.32 - see https://vcs.pcre.org/pcre/code/tags/pcre-8.32/ChangeLog?view=markup line 53.
383		// Older versions of PCRE were bundled with PHP <= 5.3.23 & <= 5.4.13.
384		$pcre_version = substr( PCRE_VERSION, 0, strspn( PCRE_VERSION, '0123456789.' ) ); // Remove any trailing date stuff.
385		$can_use_pcre_x = version_compare( $pcre_version, '8.32', '>=' ) && false !== @preg_match( '/\X/u', '' );
386	}
387
388	return $can_use_pcre_x;
389}
390
391/**
392 * Get the regexs generated from Unicode data.
393 *
394 * @param string $idx Optional. Return a specific regex only. Default null.
395 * @return array|string  Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
396 */
397function get_unicode_regexs( $idx = null ) {
398	static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
399	static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
400	if ( null === $eaw_regex ) {
401		// Load both regexs generated from Unicode data.
402		require __DIR__ . '/unicode/regex.php';
403	}
404
405	if ( null !== $idx ) {
406		if ( 'eaw' === $idx ) {
407			return $eaw_regex;
408		}
409		if ( 'm' === $idx ) {
410			return $m_regex;
411		}
412	}
413
414	return array( $eaw_regex, $m_regex, );
415}
416