1<?php 2/** 3 * String handling methods. 4 * 5 * CakePHP(tm) : Rapid Development Framework (https://cakephp.org) 6 * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org) 7 * 8 * Licensed under The MIT License 9 * For full copyright and license information, please see the LICENSE.txt 10 * Redistributions of files must retain the above copyright notice. 11 * 12 * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org) 13 * @link https://cakephp.org CakePHP(tm) Project 14 * @package Cake.Utility 15 * @since CakePHP(tm) v 1.2.0.5551 16 * @license https://opensource.org/licenses/mit-license.php MIT License 17 */ 18 19/** 20 * String handling methods. 21 * 22 * @package Cake.Utility 23 */ 24class CakeText { 25 26/** 27 * Generate a random UUID 28 * 29 * @see http://www.ietf.org/rfc/rfc4122.txt 30 * @return string RFC 4122 UUID 31 */ 32 public static function uuid() { 33 $random = function_exists('random_int') ? 'random_int' : 'mt_rand'; 34 return sprintf( 35 '%04x%04x-%04x-%04x-%04x-%04x%04x%04x', 36 // 32 bits for "time_low" 37 $random(0, 65535), 38 $random(0, 65535), 39 // 16 bits for "time_mid" 40 $random(0, 65535), 41 // 12 bits before the 0100 of (version) 4 for "time_hi_and_version" 42 $random(0, 4095) | 0x4000, 43 // 16 bits, 8 bits for "clk_seq_hi_res", 44 // 8 bits for "clk_seq_low", 45 // two most significant bits holds zero and one for variant DCE1.1 46 $random(0, 0x3fff) | 0x8000, 47 // 48 bits for "node" 48 $random(0, 65535), 49 $random(0, 65535), 50 $random(0, 65535) 51 ); 52 } 53 54/** 55 * Tokenizes a string using $separator, ignoring any instance of $separator that appears between 56 * $leftBound and $rightBound. 57 * 58 * @param string $data The data to tokenize. 59 * @param string $separator The token to split the data on. 60 * @param string $leftBound The left boundary to ignore separators in. 61 * @param string $rightBound The right boundary to ignore separators in. 62 * @return mixed Array of tokens in $data or original input if empty. 63 */ 64 public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')') { 65 if (empty($data)) { 66 return array(); 67 } 68 69 $depth = 0; 70 $offset = 0; 71 $buffer = ''; 72 $results = array(); 73 $length = mb_strlen($data); 74 $open = false; 75 76 while ($offset <= $length) { 77 $tmpOffset = -1; 78 $offsets = array( 79 mb_strpos($data, $separator, $offset), 80 mb_strpos($data, $leftBound, $offset), 81 mb_strpos($data, $rightBound, $offset) 82 ); 83 for ($i = 0; $i < 3; $i++) { 84 if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) { 85 $tmpOffset = $offsets[$i]; 86 } 87 } 88 if ($tmpOffset !== -1) { 89 $buffer .= mb_substr($data, $offset, ($tmpOffset - $offset)); 90 $char = mb_substr($data, $tmpOffset, 1); 91 if (!$depth && $char === $separator) { 92 $results[] = $buffer; 93 $buffer = ''; 94 } else { 95 $buffer .= $char; 96 } 97 if ($leftBound !== $rightBound) { 98 if ($char === $leftBound) { 99 $depth++; 100 } 101 if ($char === $rightBound) { 102 $depth--; 103 } 104 } else { 105 if ($char === $leftBound) { 106 if (!$open) { 107 $depth++; 108 $open = true; 109 } else { 110 $depth--; 111 } 112 } 113 } 114 $offset = ++$tmpOffset; 115 } else { 116 $results[] = $buffer . mb_substr($data, $offset); 117 $offset = $length + 1; 118 } 119 } 120 if (empty($results) && !empty($buffer)) { 121 $results[] = $buffer; 122 } 123 124 if (!empty($results)) { 125 return array_map('trim', $results); 126 } 127 128 return array(); 129 } 130 131/** 132 * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array 133 * corresponds to a variable placeholder name in $str. 134 * Example: `CakeText::insert(':name is :age years old.', array('name' => 'Bob', '65'));` 135 * Returns: Bob is 65 years old. 136 * 137 * Available $options are: 138 * 139 * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`) 140 * - after: The character or string after the name of the variable placeholder (Defaults to null) 141 * - escape: The character or string used to escape the before character / string (Defaults to `\`) 142 * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/` 143 * (Overwrites before, after, breaks escape / clean) 144 * - clean: A boolean or array with instructions for CakeText::cleanInsert 145 * 146 * @param string $str A string containing variable placeholders 147 * @param array $data A key => val array where each key stands for a placeholder variable name 148 * to be replaced with val 149 * @param array $options An array of options, see description above 150 * @return string 151 */ 152 public static function insert($str, $data, $options = array()) { 153 $defaults = array( 154 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false 155 ); 156 $options += $defaults; 157 $format = $options['format']; 158 $data = (array)$data; 159 if (empty($data)) { 160 return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str; 161 } 162 163 if (!isset($format)) { 164 $format = sprintf( 165 '/(?<!%s)%s%%s%s/', 166 preg_quote($options['escape'], '/'), 167 str_replace('%', '%%', preg_quote($options['before'], '/')), 168 str_replace('%', '%%', preg_quote($options['after'], '/')) 169 ); 170 } 171 172 if (strpos($str, '?') !== false && is_numeric(key($data))) { 173 $offset = 0; 174 while (($pos = strpos($str, '?', $offset)) !== false) { 175 $val = array_shift($data); 176 $offset = $pos + strlen($val); 177 $str = substr_replace($str, $val, $pos, 1); 178 } 179 return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str; 180 } 181 182 asort($data); 183 184 $dataKeys = array_keys($data); 185 $hashKeys = array_map('crc32', $dataKeys); 186 $tempData = array_combine($dataKeys, $hashKeys); 187 krsort($tempData); 188 189 foreach ($tempData as $key => $hashVal) { 190 $key = sprintf($format, preg_quote($key, '/')); 191 $str = preg_replace($key, $hashVal, $str); 192 } 193 $dataReplacements = array_combine($hashKeys, array_values($data)); 194 foreach ($dataReplacements as $tmpHash => $tmpValue) { 195 $tmpValue = (is_array($tmpValue)) ? '' : $tmpValue; 196 $str = str_replace($tmpHash, $tmpValue, $str); 197 } 198 199 if (!isset($options['format']) && isset($options['before'])) { 200 $str = str_replace($options['escape'] . $options['before'], $options['before'], $str); 201 } 202 return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str; 203 } 204 205/** 206 * Cleans up a CakeText::insert() formatted string with given $options depending on the 'clean' key in 207 * $options. The default method used is text but html is also available. The goal of this function 208 * is to replace all whitespace and unneeded markup around placeholders that did not get replaced 209 * by CakeText::insert(). 210 * 211 * @param string $str CakeText to clean. 212 * @param array $options Options list. 213 * @return string 214 * @see CakeText::insert() 215 */ 216 public static function cleanInsert($str, $options) { 217 $clean = $options['clean']; 218 if (!$clean) { 219 return $str; 220 } 221 if ($clean === true) { 222 $clean = array('method' => 'text'); 223 } 224 if (!is_array($clean)) { 225 $clean = array('method' => $options['clean']); 226 } 227 switch ($clean['method']) { 228 case 'html': 229 $clean = array_merge(array( 230 'word' => '[\w,.]+', 231 'andText' => true, 232 'replacement' => '', 233 ), $clean); 234 $kleenex = sprintf( 235 '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i', 236 preg_quote($options['before'], '/'), 237 $clean['word'], 238 preg_quote($options['after'], '/') 239 ); 240 $str = preg_replace($kleenex, $clean['replacement'], $str); 241 if ($clean['andText']) { 242 $options['clean'] = array('method' => 'text'); 243 $str = CakeText::cleanInsert($str, $options); 244 } 245 break; 246 case 'text': 247 $clean = array_merge(array( 248 'word' => '[\w,.]+', 249 'gap' => '[\s]*(?:(?:and|or)[\s]*)?', 250 'replacement' => '', 251 ), $clean); 252 253 $kleenex = sprintf( 254 '/(%s%s%s%s|%s%s%s%s)/', 255 preg_quote($options['before'], '/'), 256 $clean['word'], 257 preg_quote($options['after'], '/'), 258 $clean['gap'], 259 $clean['gap'], 260 preg_quote($options['before'], '/'), 261 $clean['word'], 262 preg_quote($options['after'], '/') 263 ); 264 $str = preg_replace($kleenex, $clean['replacement'], $str); 265 break; 266 } 267 return $str; 268 } 269 270/** 271 * Wraps text to a specific width, can optionally wrap at word breaks. 272 * 273 * ### Options 274 * 275 * - `width` The width to wrap to. Defaults to 72. 276 * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true. 277 * - `indent` CakeText to indent with. Defaults to null. 278 * - `indentAt` 0 based index to start indenting at. Defaults to 0. 279 * 280 * @param string $text The text to format. 281 * @param array|int $options Array of options to use, or an integer to wrap the text to. 282 * @return string Formatted text. 283 */ 284 public static function wrap($text, $options = array()) { 285 if (is_numeric($options)) { 286 $options = array('width' => $options); 287 } 288 $options += array('width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0); 289 if ($options['wordWrap']) { 290 $wrapped = static::wordWrap($text, $options['width'], "\n"); 291 } else { 292 $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n")); 293 } 294 if (!empty($options['indent'])) { 295 $chunks = explode("\n", $wrapped); 296 for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) { 297 $chunks[$i] = $options['indent'] . $chunks[$i]; 298 } 299 $wrapped = implode("\n", $chunks); 300 } 301 return $wrapped; 302 } 303 304/** 305 * Unicode aware version of wordwrap. 306 * 307 * @param string $text The text to format. 308 * @param int $width The width to wrap to. Defaults to 72. 309 * @param string $break The line is broken using the optional break parameter. Defaults to '\n'. 310 * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width. 311 * @return string Formatted text. 312 */ 313 public static function wordWrap($text, $width = 72, $break = "\n", $cut = false) { 314 $paragraphs = explode($break, $text); 315 foreach ($paragraphs as &$paragraph) { 316 $paragraph = static::_wordWrap($paragraph, $width, $break, $cut); 317 } 318 return implode($break, $paragraphs); 319 } 320 321/** 322 * Helper method for wordWrap(). 323 * 324 * @param string $text The text to format. 325 * @param int $width The width to wrap to. Defaults to 72. 326 * @param string $break The line is broken using the optional break parameter. Defaults to '\n'. 327 * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width. 328 * @return string Formatted text. 329 */ 330 protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false) { 331 if ($cut) { 332 $parts = array(); 333 while (mb_strlen($text) > 0) { 334 $part = mb_substr($text, 0, $width); 335 $parts[] = trim($part); 336 $text = trim(mb_substr($text, mb_strlen($part))); 337 } 338 return implode($break, $parts); 339 } 340 341 $parts = array(); 342 while (mb_strlen($text) > 0) { 343 if ($width >= mb_strlen($text)) { 344 $parts[] = trim($text); 345 break; 346 } 347 348 $part = mb_substr($text, 0, $width); 349 $nextChar = mb_substr($text, $width, 1); 350 if ($nextChar !== ' ') { 351 $breakAt = mb_strrpos($part, ' '); 352 if ($breakAt === false) { 353 $breakAt = mb_strpos($text, ' ', $width); 354 } 355 if ($breakAt === false) { 356 $parts[] = trim($text); 357 break; 358 } 359 $part = mb_substr($text, 0, $breakAt); 360 } 361 362 $part = trim($part); 363 $parts[] = $part; 364 $text = trim(mb_substr($text, mb_strlen($part))); 365 } 366 367 return implode($break, $parts); 368 } 369 370/** 371 * Highlights a given phrase in a text. You can specify any expression in highlighter that 372 * may include the \1 expression to include the $phrase found. 373 * 374 * ### Options: 375 * 376 * - `format` The piece of html with that the phrase will be highlighted 377 * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted 378 * - `regex` a custom regex rule that is used to match words, default is '|$tag|iu' 379 * 380 * @param string $text Text to search the phrase in. 381 * @param string|array $phrase The phrase or phrases that will be searched. 382 * @param array $options An array of html attributes and options. 383 * @return string The highlighted text 384 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::highlight 385 */ 386 public static function highlight($text, $phrase, $options = array()) { 387 if (empty($phrase)) { 388 return $text; 389 } 390 391 $defaults = array( 392 'format' => '<span class="highlight">\1</span>', 393 'html' => false, 394 'regex' => "|%s|iu" 395 ); 396 $options += $defaults; 397 extract($options); 398 399 if (is_array($phrase)) { 400 $replace = array(); 401 $with = array(); 402 403 foreach ($phrase as $key => $segment) { 404 $segment = '(' . preg_quote($segment, '|') . ')'; 405 if ($html) { 406 $segment = "(?![^<]+>)$segment(?![^<]+>)"; 407 } 408 409 $with[] = (is_array($format)) ? $format[$key] : $format; 410 $replace[] = sprintf($options['regex'], $segment); 411 } 412 413 return preg_replace($replace, $with, $text); 414 } 415 416 $phrase = '(' . preg_quote($phrase, '|') . ')'; 417 if ($html) { 418 $phrase = "(?![^<]+>)$phrase(?![^<]+>)"; 419 } 420 421 return preg_replace(sprintf($options['regex'], $phrase), $format, $text); 422 } 423 424/** 425 * Strips given text of all links (<a href=....). 426 * 427 * @param string $text Text 428 * @return string The text without links 429 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::stripLinks 430 */ 431 public static function stripLinks($text) { 432 return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text)); 433 } 434 435/** 436 * Truncates text starting from the end. 437 * 438 * Cuts a string to the length of $length and replaces the first characters 439 * with the ellipsis if the text is longer than length. 440 * 441 * ### Options: 442 * 443 * - `ellipsis` Will be used as Beginning and prepended to the trimmed string 444 * - `exact` If false, $text will not be cut mid-word 445 * 446 * @param string $text CakeText to truncate. 447 * @param int $length Length of returned string, including ellipsis. 448 * @param array $options An array of options. 449 * @return string Trimmed string. 450 */ 451 public static function tail($text, $length = 100, $options = array()) { 452 $defaults = array( 453 'ellipsis' => '...', 'exact' => true 454 ); 455 $options += $defaults; 456 extract($options); 457 458 if (!function_exists('mb_strlen')) { 459 class_exists('Multibyte'); 460 } 461 462 if (mb_strlen($text) <= $length) { 463 return $text; 464 } 465 466 $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis)); 467 if (!$exact) { 468 $spacepos = mb_strpos($truncate, ' '); 469 $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos)); 470 } 471 472 return $ellipsis . $truncate; 473 } 474 475/** 476 * Truncates text. 477 * 478 * Cuts a string to the length of $length and replaces the last characters 479 * with the ellipsis if the text is longer than length. 480 * 481 * ### Options: 482 * 483 * - `ellipsis` Will be used as Ending and appended to the trimmed string (`ending` is deprecated) 484 * - `exact` If false, $text will not be cut mid-word 485 * - `html` If true, HTML tags would be handled correctly 486 * 487 * @param string $text CakeText to truncate. 488 * @param int $length Length of returned string, including ellipsis. 489 * @param array $options An array of html attributes and options. 490 * @return string Trimmed string. 491 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::truncate 492 */ 493 public static function truncate($text, $length = 100, $options = array()) { 494 $defaults = array( 495 'ellipsis' => '...', 'exact' => true, 'html' => false 496 ); 497 if (isset($options['ending'])) { 498 $defaults['ellipsis'] = $options['ending']; 499 } elseif (!empty($options['html']) && Configure::read('App.encoding') === 'UTF-8') { 500 $defaults['ellipsis'] = "\xe2\x80\xa6"; 501 } 502 $options += $defaults; 503 extract($options); 504 505 if (!function_exists('mb_strlen')) { 506 class_exists('Multibyte'); 507 } 508 509 if ($html) { 510 if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) { 511 return $text; 512 } 513 $totalLength = mb_strlen(strip_tags($ellipsis)); 514 $openTags = array(); 515 $truncate = ''; 516 517 preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER); 518 foreach ($tags as $tag) { 519 if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) { 520 if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) { 521 array_unshift($openTags, $tag[2]); 522 } elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) { 523 $pos = array_search($closeTag[1], $openTags); 524 if ($pos !== false) { 525 array_splice($openTags, $pos, 1); 526 } 527 } 528 } 529 $truncate .= $tag[1]; 530 531 $contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3])); 532 if ($contentLength + $totalLength > $length) { 533 $left = $length - $totalLength; 534 $entitiesLength = 0; 535 if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) { 536 foreach ($entities[0] as $entity) { 537 if ($entity[1] + 1 - $entitiesLength <= $left) { 538 $left--; 539 $entitiesLength += mb_strlen($entity[0]); 540 } else { 541 break; 542 } 543 } 544 } 545 546 $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength); 547 break; 548 } else { 549 $truncate .= $tag[3]; 550 $totalLength += $contentLength; 551 } 552 if ($totalLength >= $length) { 553 break; 554 } 555 } 556 } else { 557 if (mb_strlen($text) <= $length) { 558 return $text; 559 } 560 $truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis)); 561 } 562 if (!$exact) { 563 $spacepos = mb_strrpos($truncate, ' '); 564 if ($html) { 565 $truncateCheck = mb_substr($truncate, 0, $spacepos); 566 $lastOpenTag = mb_strrpos($truncateCheck, '<'); 567 $lastCloseTag = mb_strrpos($truncateCheck, '>'); 568 if ($lastOpenTag > $lastCloseTag) { 569 preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches); 570 $lastTag = array_pop($lastTagMatches[0]); 571 $spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag); 572 } 573 $bits = mb_substr($truncate, $spacepos); 574 preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER); 575 if (!empty($droppedTags)) { 576 if (!empty($openTags)) { 577 foreach ($droppedTags as $closingTag) { 578 if (!in_array($closingTag[1], $openTags)) { 579 array_unshift($openTags, $closingTag[1]); 580 } 581 } 582 } else { 583 foreach ($droppedTags as $closingTag) { 584 $openTags[] = $closingTag[1]; 585 } 586 } 587 } 588 } 589 $truncate = mb_substr($truncate, 0, $spacepos); 590 } 591 $truncate .= $ellipsis; 592 593 if ($html) { 594 foreach ($openTags as $tag) { 595 $truncate .= '</' . $tag . '>'; 596 } 597 } 598 599 return $truncate; 600 } 601 602/** 603 * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side 604 * determined by radius. 605 * 606 * @param string $text CakeText to search the phrase in 607 * @param string $phrase Phrase that will be searched for 608 * @param int $radius The amount of characters that will be returned on each side of the founded phrase 609 * @param string $ellipsis Ending that will be appended 610 * @return string Modified string 611 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::excerpt 612 */ 613 public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...') { 614 if (empty($text) || empty($phrase)) { 615 return static::truncate($text, $radius * 2, array('ellipsis' => $ellipsis)); 616 } 617 618 $append = $prepend = $ellipsis; 619 620 $phraseLen = mb_strlen($phrase); 621 $textLen = mb_strlen($text); 622 623 $pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase)); 624 if ($pos === false) { 625 return mb_substr($text, 0, $radius) . $ellipsis; 626 } 627 628 $startPos = $pos - $radius; 629 if ($startPos <= 0) { 630 $startPos = 0; 631 $prepend = ''; 632 } 633 634 $endPos = $pos + $phraseLen + $radius; 635 if ($endPos >= $textLen) { 636 $endPos = $textLen; 637 $append = ''; 638 } 639 640 $excerpt = mb_substr($text, $startPos, $endPos - $startPos); 641 $excerpt = $prepend . $excerpt . $append; 642 643 return $excerpt; 644 } 645 646/** 647 * Creates a comma separated list where the last two items are joined with 'and', forming natural language. 648 * 649 * @param array $list The list to be joined. 650 * @param string $and The word used to join the last and second last items together with. Defaults to 'and'. 651 * @param string $separator The separator used to join all the other items together. Defaults to ', '. 652 * @return string The glued together string. 653 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::toList 654 */ 655 public static function toList($list, $and = null, $separator = ', ') { 656 if ($and === null) { 657 $and = __d('cake', 'and'); 658 } 659 if (count($list) > 1) { 660 return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list); 661 } 662 663 return array_pop($list); 664 } 665} 666