1<?php 2 3/** 4 * PHP Command Line Tools 5 * 6 * This source file is subject to the MIT license that is bundled 7 * with this package in the file LICENSE. 8 * 9 * @author James Logsdon <dwarf@girsbrain.org> 10 * @copyright 2010 James Logsdom (http://girsbrain.org) 11 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 12 */ 13 14namespace cli; 15 16/** 17 * Handles rendering strings. If extra scalar arguments are given after the `$msg` 18 * the string will be rendered with `sprintf`. If the second argument is an `array` 19 * then each key in the array will be the placeholder name. Placeholders are of the 20 * format {:key}. 21 * 22 * @param string $msg The message to render. 23 * @param mixed ... Either scalar arguments or a single array argument. 24 * @return string The rendered string. 25 */ 26function render( $msg ) { 27 return Streams::_call( 'render', func_get_args() ); 28} 29 30/** 31 * Shortcut for printing to `STDOUT`. The message and parameters are passed 32 * through `sprintf` before output. 33 * 34 * @param string $msg The message to output in `printf` format. 35 * @param mixed ... Either scalar arguments or a single array argument. 36 * @return void 37 * @see \cli\render() 38 */ 39function out( $msg ) { 40 Streams::_call( 'out', func_get_args() ); 41} 42 43/** 44 * Pads `$msg` to the width of the shell before passing to `cli\out`. 45 * 46 * @param string $msg The message to pad and pass on. 47 * @param mixed ... Either scalar arguments or a single array argument. 48 * @return void 49 * @see cli\out() 50 */ 51function out_padded( $msg ) { 52 Streams::_call( 'out_padded', func_get_args() ); 53} 54 55/** 56 * Prints a message to `STDOUT` with a newline appended. See `\cli\out` for 57 * more documentation. 58 * 59 * @see cli\out() 60 */ 61function line( $msg = '' ) { 62 Streams::_call( 'line', func_get_args() ); 63} 64 65/** 66 * Shortcut for printing to `STDERR`. The message and parameters are passed 67 * through `sprintf` before output. 68 * 69 * @param string $msg The message to output in `printf` format. With no string, 70 * a newline is printed. 71 * @param mixed ... Either scalar arguments or a single array argument. 72 * @return void 73 */ 74function err( $msg = '' ) { 75 Streams::_call( 'err', func_get_args() ); 76} 77 78/** 79 * Takes input from `STDIN` in the given format. If an end of transmission 80 * character is sent (^D), an exception is thrown. 81 * 82 * @param string $format A valid input format. See `fscanf` for documentation. 83 * If none is given, all input up to the first newline 84 * is accepted. 85 * @return string The input with whitespace trimmed. 86 * @throws \Exception Thrown if ctrl-D (EOT) is sent as input. 87 */ 88function input( $format = null ) { 89 return Streams::input( $format ); 90} 91 92/** 93 * Displays an input prompt. If no default value is provided the prompt will 94 * continue displaying until input is received. 95 * 96 * @param string $question The question to ask the user. 97 * @param string $default A default value if the user provides no input. 98 * @param string $marker A string to append to the question and default value on display. 99 * @param boolean $hide If the user input should be hidden 100 * @return string The users input. 101 * @see cli\input() 102 */ 103function prompt( $question, $default = false, $marker = ': ', $hide = false ) { 104 return Streams::prompt( $question, $default, $marker, $hide ); 105} 106 107/** 108 * Presents a user with a multiple choice question, useful for 'yes/no' type 109 * questions (which this function defaults too). 110 * 111 * @param string $question The question to ask the user. 112 * @param string $choice 113 * @param string|null $default The default choice. NULL if a default is not allowed. 114 * @internal param string $valid A string of characters allowed as a response. Case 115 * is ignored. 116 * @return string The users choice. 117 * @see cli\prompt() 118 */ 119function choose( $question, $choice = 'yn', $default = 'n' ) { 120 return Streams::choose( $question, $choice, $default ); 121} 122 123/** 124 * Does the same as {@see choose()}, but always asks yes/no and returns a boolean 125 * 126 * @param string $question The question to ask the user. 127 * @param bool|null $default The default choice, in a boolean format. 128 * @return bool 129 */ 130function confirm( $question, $default = false ) { 131 if ( is_bool( $default ) ) { 132 $default = $default? 'y' : 'n'; 133 } 134 $result = choose( $question, 'yn', $default ); 135 return $result == 'y'; 136} 137 138/** 139 * Displays an array of strings as a menu where a user can enter a number to 140 * choose an option. The array must be a single dimension with either strings 141 * or objects with a `__toString()` method. 142 * 143 * @param array $items The list of items the user can choose from. 144 * @param string $default The index of the default item. 145 * @param string $title The message displayed to the user when prompted. 146 * @return string The index of the chosen item. 147 * @see cli\line() 148 * @see cli\input() 149 * @see cli\err() 150 */ 151function menu( $items, $default = null, $title = 'Choose an item' ) { 152 return Streams::menu( $items, $default, $title ); 153} 154 155/** 156 * Attempts an encoding-safe way of getting string length. If intl extension or PCRE with '\X' or mb_string extension aren't 157 * available, falls back to basic strlen. 158 * 159 * @param string $str The string to check. 160 * @param string|bool $encoding Optional. The encoding of the string. Default false. 161 * @return int Numeric value that represents the string's length 162 */ 163function safe_strlen( $str, $encoding = false ) { 164 // Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strlen(), "other" strlen(). 165 $test_safe_strlen = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_STRLEN' ); 166 167 // Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string. 168 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $length = grapheme_strlen( $str ) ) ) { 169 if ( ! $test_safe_strlen || ( $test_safe_strlen & 1 ) ) { 170 return $length; 171 } 172 } 173 // Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string. 174 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $length = preg_match_all( '/\X/u', $str, $dummy /*needed for PHP 5.3*/ ) ) ) { 175 if ( ! $test_safe_strlen || ( $test_safe_strlen & 2 ) ) { 176 return $length; 177 } 178 } 179 // Legacy encodings and old PHPs will reach here. 180 if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) { 181 if ( ! $encoding ) { 182 $encoding = mb_detect_encoding( $str, null, true /*strict*/ ); 183 } 184 $length = $encoding ? mb_strlen( $str, $encoding ) : mb_strlen( $str ); // mbstring funcs can fail if given `$encoding` arg that evals to false. 185 if ( 'UTF-8' === $encoding ) { 186 // Subtract combining characters. 187 $length -= preg_match_all( get_unicode_regexs( 'm' ), $str, $dummy /*needed for PHP 5.3*/ ); 188 } 189 if ( ! $test_safe_strlen || ( $test_safe_strlen & 4 ) ) { 190 return $length; 191 } 192 } 193 return strlen( $str ); 194} 195 196/** 197 * Attempts an encoding-safe way of getting a substring. If intl extension or PCRE with '\X' or mb_string extension aren't 198 * available, falls back to substr(). 199 * 200 * @param string $str The input string. 201 * @param int $start The starting position of the substring. 202 * @param int|bool|null $length Optional, unless $is_width is set. Maximum length of the substring. Default false. Negative not supported. 203 * @param int|bool $is_width Optional. If set and encoding is UTF-8, $length (which must be specified) is interpreted as spacing width. Default false. 204 * @param string|bool $encoding Optional. The encoding of the string. Default false. 205 * @return bool|string False if given unsupported args, otherwise substring of string specified by start and length parameters 206 */ 207function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) { 208 // Negative $length or $is_width and $length not specified not supported. 209 if ( $length < 0 || ( $is_width && ( null === $length || false === $length ) ) ) { 210 return false; 211 } 212 // Need this for normalization below and other uses. 213 $safe_strlen = safe_strlen( $str, $encoding ); 214 215 // Normalize `$length` when not specified - PHP 5.3 substr takes false as full length, PHP > 5.3 takes null. 216 if ( null === $length || false === $length ) { 217 $length = $safe_strlen; 218 } 219 // Normalize `$start` - various methods treat this differently. 220 if ( $start > $safe_strlen ) { 221 return ''; 222 } 223 if ( $start < 0 && -$start > $safe_strlen ) { 224 $start = 0; 225 } 226 227 // Allow for selective testings - "1" bit set tests grapheme_substr(), "2" preg_split( '/\X/' ), "4" mb_substr(), "8" substr(). 228 $test_safe_substr = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_SUBSTR' ); 229 230 // Assume UTF-8 if no encoding given - `grapheme_substr()` will return false (not null like `grapheme_strlen()`) if given non-UTF-8 string. 231 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && false !== ( $try = grapheme_substr( $str, $start, $length ) ) ) { 232 if ( ! $test_safe_substr || ( $test_safe_substr & 1 ) ) { 233 return $is_width ? _safe_substr_eaw( $try, $length ) : $try; 234 } 235 } 236 // Assume UTF-8 if no encoding given - `preg_split()` returns a one element array if given non-UTF-8 string (PHP bug) so need to check `preg_last_error()`. 237 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() ) { 238 if ( false !== ( $try = preg_split( '/(\X)/u', $str, $safe_strlen + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ) ) && ! preg_last_error() ) { 239 $try = implode( '', array_slice( $try, $start, $length ) ); 240 if ( ! $test_safe_substr || ( $test_safe_substr & 2 ) ) { 241 return $is_width ? _safe_substr_eaw( $try, $length ) : $try; 242 } 243 } 244 } 245 // Legacy encodings and old PHPs will reach here. 246 if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) { 247 if ( ! $encoding ) { 248 $encoding = mb_detect_encoding( $str, null, true /*strict*/ ); 249 } 250 // Bug: not adjusting for combining chars. 251 $try = $encoding ? mb_substr( $str, $start, $length, $encoding ) : mb_substr( $str, $start, $length ); // mbstring funcs can fail if given `$encoding` arg that evals to false. 252 if ( 'UTF-8' === $encoding && $is_width ) { 253 $try = _safe_substr_eaw( $try, $length ); 254 } 255 if ( ! $test_safe_substr || ( $test_safe_substr & 4 ) ) { 256 return $try; 257 } 258 } 259 return substr( $str, $start, $length ); 260} 261 262/** 263 * Internal function used by `safe_substr()` to adjust for East Asian double-width chars. 264 * 265 * @return string 266 */ 267function _safe_substr_eaw( $str, $length ) { 268 // Set the East Asian Width regex. 269 $eaw_regex = get_unicode_regexs( 'eaw' ); 270 271 // If there's any East Asian double-width chars... 272 if ( preg_match( $eaw_regex, $str ) ) { 273 // Note that if the length ends in the middle of a double-width char, the char is excluded, not included. 274 275 // See if it's all EAW. 276 if ( function_exists( 'mb_substr' ) && preg_match_all( $eaw_regex, $str, $dummy /*needed for PHP 5.3*/ ) === $length ) { 277 // Just halve the length so (rounded down to a minimum of 1). 278 $str = mb_substr( $str, 0, max( (int) ( $length / 2 ), 1 ), 'UTF-8' ); 279 } else { 280 // Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php". 281 $chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $str, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 282 $cnt = min( count( $chars ), $length ); 283 $width = $length; 284 285 for ( $length = 0; $length < $cnt && $width > 0; $length++ ) { 286 $width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1; 287 } 288 // Round down to a minimum of 1. 289 if ( $width < 0 && $length > 1 ) { 290 $length--; 291 } 292 return join( '', array_slice( $chars, 0, $length ) ); 293 } 294 } 295 return $str; 296} 297 298/** 299 * An encoding-safe way of padding string length for display 300 * 301 * @param string $string The string to pad. 302 * @param int $length The length to pad it to. 303 * @param string|bool $encoding Optional. The encoding of the string. Default false. 304 * @return string 305 */ 306function safe_str_pad( $string, $length, $encoding = false ) { 307 $real_length = strwidth( $string, $encoding ); 308 $diff = strlen( $string ) - $real_length; 309 $length += $diff; 310 311 return str_pad( $string, $length ); 312} 313 314/** 315 * Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8. 316 * 317 * @param string $string The string to check. 318 * @param string|bool $encoding Optional. The encoding of the string. Default false. 319 * @return int The string's width. 320 */ 321function strwidth( $string, $encoding = false ) { 322 // Set the East Asian Width and Mark regexs. 323 list( $eaw_regex, $m_regex ) = get_unicode_regexs(); 324 325 // Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen(). 326 $test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' ); 327 328 // Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string. 329 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $width = grapheme_strlen( $string ) ) ) { 330 if ( ! $test_strwidth || ( $test_strwidth & 1 ) ) { 331 return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ ); 332 } 333 } 334 // Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string. 335 if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $width = preg_match_all( '/\X/u', $string, $dummy /*needed for PHP 5.3*/ ) ) ) { 336 if ( ! $test_strwidth || ( $test_strwidth & 2 ) ) { 337 return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ ); 338 } 339 } 340 // Legacy encodings and old PHPs will reach here. 341 if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) { 342 if ( ! $encoding ) { 343 $encoding = mb_detect_encoding( $string, null, true /*strict*/ ); 344 } 345 $width = $encoding ? mb_strwidth( $string, $encoding ) : mb_strwidth( $string ); // mbstring funcs can fail if given `$encoding` arg that evals to false. 346 if ( 'UTF-8' === $encoding ) { 347 // Subtract combining characters. 348 $width -= preg_match_all( $m_regex, $string, $dummy /*needed for PHP 5.3*/ ); 349 } 350 if ( ! $test_strwidth || ( $test_strwidth & 4 ) ) { 351 return $width; 352 } 353 } 354 return safe_strlen( $string, $encoding ); 355} 356 357/** 358 * Returns whether ICU is modern enough not to flake out. 359 * 360 * @return bool 361 */ 362function can_use_icu() { 363 static $can_use_icu = null; 364 365 if ( null === $can_use_icu ) { 366 // Choosing ICU 54, Unicode 7.0. 367 $can_use_icu = defined( 'INTL_ICU_VERSION' ) && version_compare( INTL_ICU_VERSION, '54.1', '>=' ) && function_exists( 'grapheme_strlen' ) && function_exists( 'grapheme_substr' ); 368 } 369 370 return $can_use_icu; 371} 372 373/** 374 * Returns whether PCRE Unicode extended grapheme cluster '\X' is available for use. 375 * 376 * @return bool 377 */ 378function can_use_pcre_x() { 379 static $can_use_pcre_x = null; 380 381 if ( null === $can_use_pcre_x ) { 382 // '\X' introduced (as Unicde extended grapheme cluster) in PCRE 8.32 - see https://vcs.pcre.org/pcre/code/tags/pcre-8.32/ChangeLog?view=markup line 53. 383 // Older versions of PCRE were bundled with PHP <= 5.3.23 & <= 5.4.13. 384 $pcre_version = substr( PCRE_VERSION, 0, strspn( PCRE_VERSION, '0123456789.' ) ); // Remove any trailing date stuff. 385 $can_use_pcre_x = version_compare( $pcre_version, '8.32', '>=' ) && false !== @preg_match( '/\X/u', '' ); 386 } 387 388 return $can_use_pcre_x; 389} 390 391/** 392 * Get the regexs generated from Unicode data. 393 * 394 * @param string $idx Optional. Return a specific regex only. Default null. 395 * @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string. 396 */ 397function get_unicode_regexs( $idx = null ) { 398 static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html 399 static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes. 400 if ( null === $eaw_regex ) { 401 // Load both regexs generated from Unicode data. 402 require __DIR__ . '/unicode/regex.php'; 403 } 404 405 if ( null !== $idx ) { 406 if ( 'eaw' === $idx ) { 407 return $eaw_regex; 408 } 409 if ( 'm' === $idx ) { 410 return $m_regex; 411 } 412 } 413 414 return array( $eaw_regex, $m_regex, ); 415} 416