1<?php
2/**
3 * String handling methods.
4 *
5 * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
6 * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
7 *
8 * Licensed under The MIT License
9 * For full copyright and license information, please see the LICENSE.txt
10 * Redistributions of files must retain the above copyright notice.
11 *
12 * @copyright     Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
13 * @link          https://cakephp.org CakePHP(tm) Project
14 * @package       Cake.Utility
15 * @since         CakePHP(tm) v 1.2.0.5551
16 * @license       https://opensource.org/licenses/mit-license.php MIT License
17 */
18
19/**
20 * String handling methods.
21 *
22 * @package       Cake.Utility
23 */
24class CakeText {
25
26/**
27 * Generate a random UUID
28 *
29 * @see http://www.ietf.org/rfc/rfc4122.txt
30 * @return string RFC 4122 UUID
31 */
32	public static function uuid() {
33		$random = function_exists('random_int') ? 'random_int' : 'mt_rand';
34		return sprintf(
35			'%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
36			// 32 bits for "time_low"
37			$random(0, 65535),
38			$random(0, 65535),
39			// 16 bits for "time_mid"
40			$random(0, 65535),
41			// 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
42			$random(0, 4095) | 0x4000,
43			// 16 bits, 8 bits for "clk_seq_hi_res",
44			// 8 bits for "clk_seq_low",
45			// two most significant bits holds zero and one for variant DCE1.1
46			$random(0, 0x3fff) | 0x8000,
47			// 48 bits for "node"
48			$random(0, 65535),
49			$random(0, 65535),
50			$random(0, 65535)
51		);
52	}
53
54/**
55 * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
56 * $leftBound and $rightBound.
57 *
58 * @param string $data The data to tokenize.
59 * @param string $separator The token to split the data on.
60 * @param string $leftBound The left boundary to ignore separators in.
61 * @param string $rightBound The right boundary to ignore separators in.
62 * @return mixed Array of tokens in $data or original input if empty.
63 */
64	public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')') {
65		if (empty($data)) {
66			return array();
67		}
68
69		$depth = 0;
70		$offset = 0;
71		$buffer = '';
72		$results = array();
73		$length = mb_strlen($data);
74		$open = false;
75
76		while ($offset <= $length) {
77			$tmpOffset = -1;
78			$offsets = array(
79				mb_strpos($data, $separator, $offset),
80				mb_strpos($data, $leftBound, $offset),
81				mb_strpos($data, $rightBound, $offset)
82			);
83			for ($i = 0; $i < 3; $i++) {
84				if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
85					$tmpOffset = $offsets[$i];
86				}
87			}
88			if ($tmpOffset !== -1) {
89				$buffer .= mb_substr($data, $offset, ($tmpOffset - $offset));
90				$char = mb_substr($data, $tmpOffset, 1);
91				if (!$depth && $char === $separator) {
92					$results[] = $buffer;
93					$buffer = '';
94				} else {
95					$buffer .= $char;
96				}
97				if ($leftBound !== $rightBound) {
98					if ($char === $leftBound) {
99						$depth++;
100					}
101					if ($char === $rightBound) {
102						$depth--;
103					}
104				} else {
105					if ($char === $leftBound) {
106						if (!$open) {
107							$depth++;
108							$open = true;
109						} else {
110							$depth--;
111						}
112					}
113				}
114				$offset = ++$tmpOffset;
115			} else {
116				$results[] = $buffer . mb_substr($data, $offset);
117				$offset = $length + 1;
118			}
119		}
120		if (empty($results) && !empty($buffer)) {
121			$results[] = $buffer;
122		}
123
124		if (!empty($results)) {
125			return array_map('trim', $results);
126		}
127
128		return array();
129	}
130
131/**
132 * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
133 * corresponds to a variable placeholder name in $str.
134 * Example: `CakeText::insert(':name is :age years old.', array('name' => 'Bob', '65'));`
135 * Returns: Bob is 65 years old.
136 *
137 * Available $options are:
138 *
139 * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
140 * - after: The character or string after the name of the variable placeholder (Defaults to null)
141 * - escape: The character or string used to escape the before character / string (Defaults to `\`)
142 * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
143 *   (Overwrites before, after, breaks escape / clean)
144 * - clean: A boolean or array with instructions for CakeText::cleanInsert
145 *
146 * @param string $str A string containing variable placeholders
147 * @param array $data A key => val array where each key stands for a placeholder variable name
148 *     to be replaced with val
149 * @param array $options An array of options, see description above
150 * @return string
151 */
152	public static function insert($str, $data, $options = array()) {
153		$defaults = array(
154			'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
155		);
156		$options += $defaults;
157		$format = $options['format'];
158		$data = (array)$data;
159		if (empty($data)) {
160			return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str;
161		}
162
163		if (!isset($format)) {
164			$format = sprintf(
165				'/(?<!%s)%s%%s%s/',
166				preg_quote($options['escape'], '/'),
167				str_replace('%', '%%', preg_quote($options['before'], '/')),
168				str_replace('%', '%%', preg_quote($options['after'], '/'))
169			);
170		}
171
172		if (strpos($str, '?') !== false && is_numeric(key($data))) {
173			$offset = 0;
174			while (($pos = strpos($str, '?', $offset)) !== false) {
175				$val = array_shift($data);
176				$offset = $pos + strlen($val);
177				$str = substr_replace($str, $val, $pos, 1);
178			}
179			return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str;
180		}
181
182		asort($data);
183
184		$dataKeys = array_keys($data);
185		$hashKeys = array_map('crc32', $dataKeys);
186		$tempData = array_combine($dataKeys, $hashKeys);
187		krsort($tempData);
188
189		foreach ($tempData as $key => $hashVal) {
190			$key = sprintf($format, preg_quote($key, '/'));
191			$str = preg_replace($key, $hashVal, $str);
192		}
193		$dataReplacements = array_combine($hashKeys, array_values($data));
194		foreach ($dataReplacements as $tmpHash => $tmpValue) {
195			$tmpValue = (is_array($tmpValue)) ? '' : $tmpValue;
196			$str = str_replace($tmpHash, $tmpValue, $str);
197		}
198
199		if (!isset($options['format']) && isset($options['before'])) {
200			$str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
201		}
202		return ($options['clean']) ? CakeText::cleanInsert($str, $options) : $str;
203	}
204
205/**
206 * Cleans up a CakeText::insert() formatted string with given $options depending on the 'clean' key in
207 * $options. The default method used is text but html is also available. The goal of this function
208 * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
209 * by CakeText::insert().
210 *
211 * @param string $str CakeText to clean.
212 * @param array $options Options list.
213 * @return string
214 * @see CakeText::insert()
215 */
216	public static function cleanInsert($str, $options) {
217		$clean = $options['clean'];
218		if (!$clean) {
219			return $str;
220		}
221		if ($clean === true) {
222			$clean = array('method' => 'text');
223		}
224		if (!is_array($clean)) {
225			$clean = array('method' => $options['clean']);
226		}
227		switch ($clean['method']) {
228			case 'html':
229				$clean = array_merge(array(
230					'word' => '[\w,.]+',
231					'andText' => true,
232					'replacement' => '',
233				), $clean);
234				$kleenex = sprintf(
235					'/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
236					preg_quote($options['before'], '/'),
237					$clean['word'],
238					preg_quote($options['after'], '/')
239				);
240				$str = preg_replace($kleenex, $clean['replacement'], $str);
241				if ($clean['andText']) {
242					$options['clean'] = array('method' => 'text');
243					$str = CakeText::cleanInsert($str, $options);
244				}
245				break;
246			case 'text':
247				$clean = array_merge(array(
248					'word' => '[\w,.]+',
249					'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
250					'replacement' => '',
251				), $clean);
252
253				$kleenex = sprintf(
254					'/(%s%s%s%s|%s%s%s%s)/',
255					preg_quote($options['before'], '/'),
256					$clean['word'],
257					preg_quote($options['after'], '/'),
258					$clean['gap'],
259					$clean['gap'],
260					preg_quote($options['before'], '/'),
261					$clean['word'],
262					preg_quote($options['after'], '/')
263				);
264				$str = preg_replace($kleenex, $clean['replacement'], $str);
265				break;
266		}
267		return $str;
268	}
269
270/**
271 * Wraps text to a specific width, can optionally wrap at word breaks.
272 *
273 * ### Options
274 *
275 * - `width` The width to wrap to. Defaults to 72.
276 * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
277 * - `indent` CakeText to indent with. Defaults to null.
278 * - `indentAt` 0 based index to start indenting at. Defaults to 0.
279 *
280 * @param string $text The text to format.
281 * @param array|int $options Array of options to use, or an integer to wrap the text to.
282 * @return string Formatted text.
283 */
284	public static function wrap($text, $options = array()) {
285		if (is_numeric($options)) {
286			$options = array('width' => $options);
287		}
288		$options += array('width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0);
289		if ($options['wordWrap']) {
290			$wrapped = static::wordWrap($text, $options['width'], "\n");
291		} else {
292			$wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
293		}
294		if (!empty($options['indent'])) {
295			$chunks = explode("\n", $wrapped);
296			for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
297				$chunks[$i] = $options['indent'] . $chunks[$i];
298			}
299			$wrapped = implode("\n", $chunks);
300		}
301		return $wrapped;
302	}
303
304/**
305 * Unicode aware version of wordwrap.
306 *
307 * @param string $text The text to format.
308 * @param int $width The width to wrap to. Defaults to 72.
309 * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
310 * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
311 * @return string Formatted text.
312 */
313	public static function wordWrap($text, $width = 72, $break = "\n", $cut = false) {
314		$paragraphs = explode($break, $text);
315		foreach ($paragraphs as &$paragraph) {
316			$paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
317		}
318		return implode($break, $paragraphs);
319	}
320
321/**
322 * Helper method for wordWrap().
323 *
324 * @param string $text The text to format.
325 * @param int $width The width to wrap to. Defaults to 72.
326 * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
327 * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
328 * @return string Formatted text.
329 */
330	protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false) {
331		if ($cut) {
332			$parts = array();
333			while (mb_strlen($text) > 0) {
334				$part = mb_substr($text, 0, $width);
335				$parts[] = trim($part);
336				$text = trim(mb_substr($text, mb_strlen($part)));
337			}
338			return implode($break, $parts);
339		}
340
341		$parts = array();
342		while (mb_strlen($text) > 0) {
343			if ($width >= mb_strlen($text)) {
344				$parts[] = trim($text);
345				break;
346			}
347
348			$part = mb_substr($text, 0, $width);
349			$nextChar = mb_substr($text, $width, 1);
350			if ($nextChar !== ' ') {
351				$breakAt = mb_strrpos($part, ' ');
352				if ($breakAt === false) {
353					$breakAt = mb_strpos($text, ' ', $width);
354				}
355				if ($breakAt === false) {
356					$parts[] = trim($text);
357					break;
358				}
359				$part = mb_substr($text, 0, $breakAt);
360			}
361
362			$part = trim($part);
363			$parts[] = $part;
364			$text = trim(mb_substr($text, mb_strlen($part)));
365		}
366
367		return implode($break, $parts);
368	}
369
370/**
371 * Highlights a given phrase in a text. You can specify any expression in highlighter that
372 * may include the \1 expression to include the $phrase found.
373 *
374 * ### Options:
375 *
376 * - `format` The piece of html with that the phrase will be highlighted
377 * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
378 * - `regex` a custom regex rule that is used to match words, default is '|$tag|iu'
379 *
380 * @param string $text Text to search the phrase in.
381 * @param string|array $phrase The phrase or phrases that will be searched.
382 * @param array $options An array of html attributes and options.
383 * @return string The highlighted text
384 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::highlight
385 */
386	public static function highlight($text, $phrase, $options = array()) {
387		if (empty($phrase)) {
388			return $text;
389		}
390
391		$defaults = array(
392			'format' => '<span class="highlight">\1</span>',
393			'html' => false,
394			'regex' => "|%s|iu"
395		);
396		$options += $defaults;
397		extract($options);
398
399		if (is_array($phrase)) {
400			$replace = array();
401			$with = array();
402
403			foreach ($phrase as $key => $segment) {
404				$segment = '(' . preg_quote($segment, '|') . ')';
405				if ($html) {
406					$segment = "(?![^<]+>)$segment(?![^<]+>)";
407				}
408
409				$with[] = (is_array($format)) ? $format[$key] : $format;
410				$replace[] = sprintf($options['regex'], $segment);
411			}
412
413			return preg_replace($replace, $with, $text);
414		}
415
416		$phrase = '(' . preg_quote($phrase, '|') . ')';
417		if ($html) {
418			$phrase = "(?![^<]+>)$phrase(?![^<]+>)";
419		}
420
421		return preg_replace(sprintf($options['regex'], $phrase), $format, $text);
422	}
423
424/**
425 * Strips given text of all links (<a href=....).
426 *
427 * @param string $text Text
428 * @return string The text without links
429 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::stripLinks
430 */
431	public static function stripLinks($text) {
432		return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text));
433	}
434
435/**
436 * Truncates text starting from the end.
437 *
438 * Cuts a string to the length of $length and replaces the first characters
439 * with the ellipsis if the text is longer than length.
440 *
441 * ### Options:
442 *
443 * - `ellipsis` Will be used as Beginning and prepended to the trimmed string
444 * - `exact` If false, $text will not be cut mid-word
445 *
446 * @param string $text CakeText to truncate.
447 * @param int $length Length of returned string, including ellipsis.
448 * @param array $options An array of options.
449 * @return string Trimmed string.
450 */
451	public static function tail($text, $length = 100, $options = array()) {
452		$defaults = array(
453			'ellipsis' => '...', 'exact' => true
454		);
455		$options += $defaults;
456		extract($options);
457
458		if (!function_exists('mb_strlen')) {
459			class_exists('Multibyte');
460		}
461
462		if (mb_strlen($text) <= $length) {
463			return $text;
464		}
465
466		$truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
467		if (!$exact) {
468			$spacepos = mb_strpos($truncate, ' ');
469			$truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
470		}
471
472		return $ellipsis . $truncate;
473	}
474
475/**
476 * Truncates text.
477 *
478 * Cuts a string to the length of $length and replaces the last characters
479 * with the ellipsis if the text is longer than length.
480 *
481 * ### Options:
482 *
483 * - `ellipsis` Will be used as Ending and appended to the trimmed string (`ending` is deprecated)
484 * - `exact` If false, $text will not be cut mid-word
485 * - `html` If true, HTML tags would be handled correctly
486 *
487 * @param string $text CakeText to truncate.
488 * @param int $length Length of returned string, including ellipsis.
489 * @param array $options An array of html attributes and options.
490 * @return string Trimmed string.
491 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::truncate
492 */
493	public static function truncate($text, $length = 100, $options = array()) {
494		$defaults = array(
495			'ellipsis' => '...', 'exact' => true, 'html' => false
496		);
497		if (isset($options['ending'])) {
498			$defaults['ellipsis'] = $options['ending'];
499		} elseif (!empty($options['html']) && Configure::read('App.encoding') === 'UTF-8') {
500			$defaults['ellipsis'] = "\xe2\x80\xa6";
501		}
502		$options += $defaults;
503		extract($options);
504
505		if (!function_exists('mb_strlen')) {
506			class_exists('Multibyte');
507		}
508
509		if ($html) {
510			if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
511				return $text;
512			}
513			$totalLength = mb_strlen(strip_tags($ellipsis));
514			$openTags = array();
515			$truncate = '';
516
517			preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
518			foreach ($tags as $tag) {
519				if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) {
520					if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
521						array_unshift($openTags, $tag[2]);
522					} elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
523						$pos = array_search($closeTag[1], $openTags);
524						if ($pos !== false) {
525							array_splice($openTags, $pos, 1);
526						}
527					}
528				}
529				$truncate .= $tag[1];
530
531				$contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3]));
532				if ($contentLength + $totalLength > $length) {
533					$left = $length - $totalLength;
534					$entitiesLength = 0;
535					if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) {
536						foreach ($entities[0] as $entity) {
537							if ($entity[1] + 1 - $entitiesLength <= $left) {
538								$left--;
539								$entitiesLength += mb_strlen($entity[0]);
540							} else {
541								break;
542							}
543						}
544					}
545
546					$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
547					break;
548				} else {
549					$truncate .= $tag[3];
550					$totalLength += $contentLength;
551				}
552				if ($totalLength >= $length) {
553					break;
554				}
555			}
556		} else {
557			if (mb_strlen($text) <= $length) {
558				return $text;
559			}
560			$truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis));
561		}
562		if (!$exact) {
563			$spacepos = mb_strrpos($truncate, ' ');
564			if ($html) {
565				$truncateCheck = mb_substr($truncate, 0, $spacepos);
566				$lastOpenTag = mb_strrpos($truncateCheck, '<');
567				$lastCloseTag = mb_strrpos($truncateCheck, '>');
568				if ($lastOpenTag > $lastCloseTag) {
569					preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
570					$lastTag = array_pop($lastTagMatches[0]);
571					$spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
572				}
573				$bits = mb_substr($truncate, $spacepos);
574				preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
575				if (!empty($droppedTags)) {
576					if (!empty($openTags)) {
577						foreach ($droppedTags as $closingTag) {
578							if (!in_array($closingTag[1], $openTags)) {
579								array_unshift($openTags, $closingTag[1]);
580							}
581						}
582					} else {
583						foreach ($droppedTags as $closingTag) {
584							$openTags[] = $closingTag[1];
585						}
586					}
587				}
588			}
589			$truncate = mb_substr($truncate, 0, $spacepos);
590		}
591		$truncate .= $ellipsis;
592
593		if ($html) {
594			foreach ($openTags as $tag) {
595				$truncate .= '</' . $tag . '>';
596			}
597		}
598
599		return $truncate;
600	}
601
602/**
603 * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
604 * determined by radius.
605 *
606 * @param string $text CakeText to search the phrase in
607 * @param string $phrase Phrase that will be searched for
608 * @param int $radius The amount of characters that will be returned on each side of the founded phrase
609 * @param string $ellipsis Ending that will be appended
610 * @return string Modified string
611 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::excerpt
612 */
613	public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...') {
614		if (empty($text) || empty($phrase)) {
615			return static::truncate($text, $radius * 2, array('ellipsis' => $ellipsis));
616		}
617
618		$append = $prepend = $ellipsis;
619
620		$phraseLen = mb_strlen($phrase);
621		$textLen = mb_strlen($text);
622
623		$pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
624		if ($pos === false) {
625			return mb_substr($text, 0, $radius) . $ellipsis;
626		}
627
628		$startPos = $pos - $radius;
629		if ($startPos <= 0) {
630			$startPos = 0;
631			$prepend = '';
632		}
633
634		$endPos = $pos + $phraseLen + $radius;
635		if ($endPos >= $textLen) {
636			$endPos = $textLen;
637			$append = '';
638		}
639
640		$excerpt = mb_substr($text, $startPos, $endPos - $startPos);
641		$excerpt = $prepend . $excerpt . $append;
642
643		return $excerpt;
644	}
645
646/**
647 * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
648 *
649 * @param array $list The list to be joined.
650 * @param string $and The word used to join the last and second last items together with. Defaults to 'and'.
651 * @param string $separator The separator used to join all the other items together. Defaults to ', '.
652 * @return string The glued together string.
653 * @link https://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::toList
654 */
655	public static function toList($list, $and = null, $separator = ', ') {
656		if ($and === null) {
657			$and = __d('cake', 'and');
658		}
659		if (count($list) > 1) {
660			return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
661		}
662
663		return array_pop($list);
664	}
665}
666