1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\String;
13
14use Symfony\Component\String\Exception\ExceptionInterface;
15use Symfony\Component\String\Exception\InvalidArgumentException;
16use Symfony\Component\String\Exception\RuntimeException;
17
18/**
19 * Represents a binary-safe string of bytes.
20 *
21 * @author Nicolas Grekas <p@tchwork.com>
22 * @author Hugo Hamon <hugohamon@neuf.fr>
23 *
24 * @throws ExceptionInterface
25 */
26class ByteString extends AbstractString
27{
28    private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
29
30    public function __construct(string $string = '')
31    {
32        $this->string = $string;
33    }
34
35    /*
36     * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
37     *
38     * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
39     *
40     * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
41     *
42     * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
43     */
44
45    public static function fromRandom(int $length = 16, string $alphabet = null): self
46    {
47        if ($length <= 0) {
48            throw new InvalidArgumentException(sprintf('A strictly positive length is expected, "%d" given.', $length));
49        }
50
51        $alphabet = $alphabet ?? self::ALPHABET_ALPHANUMERIC;
52        $alphabetSize = \strlen($alphabet);
53        $bits = (int) ceil(log($alphabetSize, 2.0));
54        if ($bits <= 0 || $bits > 56) {
55            throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
56        }
57
58        $ret = '';
59        while ($length > 0) {
60            $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
61            $data = random_bytes($urandomLength);
62            $unpackedData = 0;
63            $unpackedBits = 0;
64            for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
65                // Unpack 8 bits
66                $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
67                $unpackedBits += 8;
68
69                // While we have enough bits to select a character from the alphabet, keep
70                // consuming the random data
71                for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
72                    $index = ($unpackedData & ((1 << $bits) - 1));
73                    $unpackedData >>= $bits;
74                    // Unfortunately, the alphabet size is not necessarily a power of two.
75                    // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
76                    // have around a 50% chance of missing as k gets larger
77                    if ($index < $alphabetSize) {
78                        $ret .= $alphabet[$index];
79                        --$length;
80                    }
81                }
82            }
83        }
84
85        return new static($ret);
86    }
87
88    public function bytesAt(int $offset): array
89    {
90        $str = $this->string[$offset] ?? '';
91
92        return '' === $str ? [] : [\ord($str)];
93    }
94
95    public function append(string ...$suffix): parent
96    {
97        $str = clone $this;
98        $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
99
100        return $str;
101    }
102
103    public function camel(): parent
104    {
105        $str = clone $this;
106        $str->string = lcfirst(str_replace(' ', '', ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
107
108        return $str;
109    }
110
111    public function chunk(int $length = 1): array
112    {
113        if (1 > $length) {
114            throw new InvalidArgumentException('The chunk length must be greater than zero.');
115        }
116
117        if ('' === $this->string) {
118            return [];
119        }
120
121        $str = clone $this;
122        $chunks = [];
123
124        foreach (str_split($this->string, $length) as $chunk) {
125            $str->string = $chunk;
126            $chunks[] = clone $str;
127        }
128
129        return $chunks;
130    }
131
132    public function endsWith($suffix): bool
133    {
134        if ($suffix instanceof parent) {
135            $suffix = $suffix->string;
136        } elseif (\is_array($suffix) || $suffix instanceof \Traversable) {
137            return parent::endsWith($suffix);
138        } else {
139            $suffix = (string) $suffix;
140        }
141
142        return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
143    }
144
145    public function equalsTo($string): bool
146    {
147        if ($string instanceof parent) {
148            $string = $string->string;
149        } elseif (\is_array($string) || $string instanceof \Traversable) {
150            return parent::equalsTo($string);
151        } else {
152            $string = (string) $string;
153        }
154
155        if ('' !== $string && $this->ignoreCase) {
156            return 0 === strcasecmp($string, $this->string);
157        }
158
159        return $string === $this->string;
160    }
161
162    public function folded(): parent
163    {
164        $str = clone $this;
165        $str->string = strtolower($str->string);
166
167        return $str;
168    }
169
170    public function indexOf($needle, int $offset = 0): ?int
171    {
172        if ($needle instanceof parent) {
173            $needle = $needle->string;
174        } elseif (\is_array($needle) || $needle instanceof \Traversable) {
175            return parent::indexOf($needle, $offset);
176        } else {
177            $needle = (string) $needle;
178        }
179
180        if ('' === $needle) {
181            return null;
182        }
183
184        $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
185
186        return false === $i ? null : $i;
187    }
188
189    public function indexOfLast($needle, int $offset = 0): ?int
190    {
191        if ($needle instanceof parent) {
192            $needle = $needle->string;
193        } elseif (\is_array($needle) || $needle instanceof \Traversable) {
194            return parent::indexOfLast($needle, $offset);
195        } else {
196            $needle = (string) $needle;
197        }
198
199        if ('' === $needle) {
200            return null;
201        }
202
203        $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
204
205        return false === $i ? null : $i;
206    }
207
208    public function isUtf8(): bool
209    {
210        return '' === $this->string || preg_match('//u', $this->string);
211    }
212
213    public function join(array $strings, string $lastGlue = null): parent
214    {
215        $str = clone $this;
216
217        $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
218        $str->string = implode($this->string, $strings).$tail;
219
220        return $str;
221    }
222
223    public function length(): int
224    {
225        return \strlen($this->string);
226    }
227
228    public function lower(): parent
229    {
230        $str = clone $this;
231        $str->string = strtolower($str->string);
232
233        return $str;
234    }
235
236    public function match(string $regexp, int $flags = 0, int $offset = 0): array
237    {
238        $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
239
240        if ($this->ignoreCase) {
241            $regexp .= 'i';
242        }
243
244        set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
245
246        try {
247            if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
248                $lastError = preg_last_error();
249
250                foreach (get_defined_constants(true)['pcre'] as $k => $v) {
251                    if ($lastError === $v && '_ERROR' === substr($k, -6)) {
252                        throw new RuntimeException('Matching failed with '.$k.'.');
253                    }
254                }
255
256                throw new RuntimeException('Matching failed with unknown error code.');
257            }
258        } finally {
259            restore_error_handler();
260        }
261
262        return $matches;
263    }
264
265    public function padBoth(int $length, string $padStr = ' '): parent
266    {
267        $str = clone $this;
268        $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
269
270        return $str;
271    }
272
273    public function padEnd(int $length, string $padStr = ' '): parent
274    {
275        $str = clone $this;
276        $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
277
278        return $str;
279    }
280
281    public function padStart(int $length, string $padStr = ' '): parent
282    {
283        $str = clone $this;
284        $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
285
286        return $str;
287    }
288
289    public function prepend(string ...$prefix): parent
290    {
291        $str = clone $this;
292        $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
293
294        return $str;
295    }
296
297    public function replace(string $from, string $to): parent
298    {
299        $str = clone $this;
300
301        if ('' !== $from) {
302            $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
303        }
304
305        return $str;
306    }
307
308    public function replaceMatches(string $fromRegexp, $to): parent
309    {
310        if ($this->ignoreCase) {
311            $fromRegexp .= 'i';
312        }
313
314        if (\is_array($to)) {
315            if (!\is_callable($to)) {
316                throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class));
317            }
318
319            $replace = 'preg_replace_callback';
320        } else {
321            $replace = $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
322        }
323
324        set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
325
326        try {
327            if (null === $string = $replace($fromRegexp, $to, $this->string)) {
328                $lastError = preg_last_error();
329
330                foreach (get_defined_constants(true)['pcre'] as $k => $v) {
331                    if ($lastError === $v && '_ERROR' === substr($k, -6)) {
332                        throw new RuntimeException('Matching failed with '.$k.'.');
333                    }
334                }
335
336                throw new RuntimeException('Matching failed with unknown error code.');
337            }
338        } finally {
339            restore_error_handler();
340        }
341
342        $str = clone $this;
343        $str->string = $string;
344
345        return $str;
346    }
347
348    public function reverse(): parent
349    {
350        $str = clone $this;
351        $str->string = strrev($str->string);
352
353        return $str;
354    }
355
356    public function slice(int $start = 0, int $length = null): parent
357    {
358        $str = clone $this;
359        $str->string = (string) substr($this->string, $start, $length ?? \PHP_INT_MAX);
360
361        return $str;
362    }
363
364    public function snake(): parent
365    {
366        $str = $this->camel()->title();
367        $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
368
369        return $str;
370    }
371
372    public function splice(string $replacement, int $start = 0, int $length = null): parent
373    {
374        $str = clone $this;
375        $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
376
377        return $str;
378    }
379
380    public function split(string $delimiter, int $limit = null, int $flags = null): array
381    {
382        if (1 > $limit = $limit ?? \PHP_INT_MAX) {
383            throw new InvalidArgumentException('Split limit must be a positive integer.');
384        }
385
386        if ('' === $delimiter) {
387            throw new InvalidArgumentException('Split delimiter is empty.');
388        }
389
390        if (null !== $flags) {
391            return parent::split($delimiter, $limit, $flags);
392        }
393
394        $str = clone $this;
395        $chunks = $this->ignoreCase
396            ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
397            : explode($delimiter, $this->string, $limit);
398
399        foreach ($chunks as &$chunk) {
400            $str->string = $chunk;
401            $chunk = clone $str;
402        }
403
404        return $chunks;
405    }
406
407    public function startsWith($prefix): bool
408    {
409        if ($prefix instanceof parent) {
410            $prefix = $prefix->string;
411        } elseif (!\is_string($prefix)) {
412            return parent::startsWith($prefix);
413        }
414
415        return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
416    }
417
418    public function title(bool $allWords = false): parent
419    {
420        $str = clone $this;
421        $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
422
423        return $str;
424    }
425
426    public function toUnicodeString(string $fromEncoding = null): UnicodeString
427    {
428        return new UnicodeString($this->toCodePointString($fromEncoding)->string);
429    }
430
431    public function toCodePointString(string $fromEncoding = null): CodePointString
432    {
433        $u = new CodePointString();
434
435        if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
436            $u->string = $this->string;
437
438            return $u;
439        }
440
441        set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
442
443        try {
444            try {
445                $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
446            } catch (InvalidArgumentException $e) {
447                if (!\function_exists('iconv')) {
448                    throw $e;
449                }
450
451                $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
452
453                return $u;
454            }
455        } finally {
456            restore_error_handler();
457        }
458
459        if (!$validEncoding) {
460            throw new InvalidArgumentException(sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
461        }
462
463        $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
464
465        return $u;
466    }
467
468    public function trim(string $chars = " \t\n\r\0\x0B\x0C"): parent
469    {
470        $str = clone $this;
471        $str->string = trim($str->string, $chars);
472
473        return $str;
474    }
475
476    public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): parent
477    {
478        $str = clone $this;
479        $str->string = rtrim($str->string, $chars);
480
481        return $str;
482    }
483
484    public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): parent
485    {
486        $str = clone $this;
487        $str->string = ltrim($str->string, $chars);
488
489        return $str;
490    }
491
492    public function upper(): parent
493    {
494        $str = clone $this;
495        $str->string = strtoupper($str->string);
496
497        return $str;
498    }
499
500    public function width(bool $ignoreAnsiDecoration = true): int
501    {
502        $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
503
504        return (new CodePointString($string))->width($ignoreAnsiDecoration);
505    }
506}
507