1<?php
2
3declare(strict_types=1);
4
5namespace Doctrine\Inflector;
6
7use RuntimeException;
8
9use function chr;
10use function function_exists;
11use function lcfirst;
12use function mb_strtolower;
13use function ord;
14use function preg_match;
15use function preg_replace;
16use function sprintf;
17use function str_replace;
18use function strlen;
19use function strtolower;
20use function strtr;
21use function trim;
22use function ucwords;
23
24class Inflector
25{
26    private const ACCENTED_CHARACTERS = [
27        'À' => 'A',
28        'Á' => 'A',
29        'Â' => 'A',
30        'Ã' => 'A',
31        'Ä' => 'Ae',
32        'Æ' => 'Ae',
33        'Å' => 'Aa',
34        'æ' => 'a',
35        'Ç' => 'C',
36        'È' => 'E',
37        'É' => 'E',
38        'Ê' => 'E',
39        'Ë' => 'E',
40        'Ì' => 'I',
41        'Í' => 'I',
42        'Î' => 'I',
43        'Ï' => 'I',
44        'Ñ' => 'N',
45        'Ò' => 'O',
46        'Ó' => 'O',
47        'Ô' => 'O',
48        'Õ' => 'O',
49        'Ö' => 'Oe',
50        'Ù' => 'U',
51        'Ú' => 'U',
52        'Û' => 'U',
53        'Ü' => 'Ue',
54        'Ý' => 'Y',
55        'ß' => 'ss',
56        'à' => 'a',
57        'á' => 'a',
58        'â' => 'a',
59        'ã' => 'a',
60        'ä' => 'ae',
61        'å' => 'aa',
62        'ç' => 'c',
63        'è' => 'e',
64        'é' => 'e',
65        'ê' => 'e',
66        'ë' => 'e',
67        'ì' => 'i',
68        'í' => 'i',
69        'î' => 'i',
70        'ï' => 'i',
71        'ñ' => 'n',
72        'ò' => 'o',
73        'ó' => 'o',
74        'ô' => 'o',
75        'õ' => 'o',
76        'ö' => 'oe',
77        'ù' => 'u',
78        'ú' => 'u',
79        'û' => 'u',
80        'ü' => 'ue',
81        'ý' => 'y',
82        'ÿ' => 'y',
83        'Ā' => 'A',
84        'ā' => 'a',
85        'Ă' => 'A',
86        'ă' => 'a',
87        'Ą' => 'A',
88        'ą' => 'a',
89        'Ć' => 'C',
90        'ć' => 'c',
91        'Ĉ' => 'C',
92        'ĉ' => 'c',
93        'Ċ' => 'C',
94        'ċ' => 'c',
95        'Č' => 'C',
96        'č' => 'c',
97        'Ď' => 'D',
98        'ď' => 'd',
99        'Đ' => 'D',
100        'đ' => 'd',
101        'Ē' => 'E',
102        'ē' => 'e',
103        'Ĕ' => 'E',
104        'ĕ' => 'e',
105        'Ė' => 'E',
106        'ė' => 'e',
107        'Ę' => 'E',
108        'ę' => 'e',
109        'Ě' => 'E',
110        'ě' => 'e',
111        'Ĝ' => 'G',
112        'ĝ' => 'g',
113        'Ğ' => 'G',
114        'ğ' => 'g',
115        'Ġ' => 'G',
116        'ġ' => 'g',
117        'Ģ' => 'G',
118        'ģ' => 'g',
119        'Ĥ' => 'H',
120        'ĥ' => 'h',
121        'Ħ' => 'H',
122        'ħ' => 'h',
123        'Ĩ' => 'I',
124        'ĩ' => 'i',
125        'Ī' => 'I',
126        'ī' => 'i',
127        'Ĭ' => 'I',
128        'ĭ' => 'i',
129        'Į' => 'I',
130        'į' => 'i',
131        'İ' => 'I',
132        'ı' => 'i',
133        'IJ' => 'IJ',
134        'ij' => 'ij',
135        'Ĵ' => 'J',
136        'ĵ' => 'j',
137        'Ķ' => 'K',
138        'ķ' => 'k',
139        'ĸ' => 'k',
140        'Ĺ' => 'L',
141        'ĺ' => 'l',
142        'Ļ' => 'L',
143        'ļ' => 'l',
144        'Ľ' => 'L',
145        'ľ' => 'l',
146        'Ŀ' => 'L',
147        'ŀ' => 'l',
148        'Ł' => 'L',
149        'ł' => 'l',
150        'Ń' => 'N',
151        'ń' => 'n',
152        'Ņ' => 'N',
153        'ņ' => 'n',
154        'Ň' => 'N',
155        'ň' => 'n',
156        'ʼn' => 'N',
157        'Ŋ' => 'n',
158        'ŋ' => 'N',
159        'Ō' => 'O',
160        'ō' => 'o',
161        'Ŏ' => 'O',
162        'ŏ' => 'o',
163        'Ő' => 'O',
164        'ő' => 'o',
165        'Œ' => 'OE',
166        'œ' => 'oe',
167        'Ø' => 'O',
168        'ø' => 'o',
169        'Ŕ' => 'R',
170        'ŕ' => 'r',
171        'Ŗ' => 'R',
172        'ŗ' => 'r',
173        'Ř' => 'R',
174        'ř' => 'r',
175        'Ś' => 'S',
176        'ś' => 's',
177        'Ŝ' => 'S',
178        'ŝ' => 's',
179        'Ş' => 'S',
180        'ş' => 's',
181        'Š' => 'S',
182        'š' => 's',
183        'Ţ' => 'T',
184        'ţ' => 't',
185        'Ť' => 'T',
186        'ť' => 't',
187        'Ŧ' => 'T',
188        'ŧ' => 't',
189        'Ũ' => 'U',
190        'ũ' => 'u',
191        'Ū' => 'U',
192        'ū' => 'u',
193        'Ŭ' => 'U',
194        'ŭ' => 'u',
195        'Ů' => 'U',
196        'ů' => 'u',
197        'Ű' => 'U',
198        'ű' => 'u',
199        'Ų' => 'U',
200        'ų' => 'u',
201        'Ŵ' => 'W',
202        'ŵ' => 'w',
203        'Ŷ' => 'Y',
204        'ŷ' => 'y',
205        'Ÿ' => 'Y',
206        'Ź' => 'Z',
207        'ź' => 'z',
208        'Ż' => 'Z',
209        'ż' => 'z',
210        'Ž' => 'Z',
211        'ž' => 'z',
212        'ſ' => 's',
213        '€' => 'E',
214        '£' => '',
215    ];
216
217    /** @var WordInflector */
218    private $singularizer;
219
220    /** @var WordInflector */
221    private $pluralizer;
222
223    public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
224    {
225        $this->singularizer = $singularizer;
226        $this->pluralizer   = $pluralizer;
227    }
228
229    /**
230     * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
231     */
232    public function tableize(string $word): string
233    {
234        $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
235
236        if ($tableized === null) {
237            throw new RuntimeException(sprintf(
238                'preg_replace returned null for value "%s"',
239                $word
240            ));
241        }
242
243        return mb_strtolower($tableized);
244    }
245
246    /**
247     * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
248     */
249    public function classify(string $word): string
250    {
251        return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
252    }
253
254    /**
255     * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
256     */
257    public function camelize(string $word): string
258    {
259        return lcfirst($this->classify($word));
260    }
261
262    /**
263     * Uppercases words with configurable delimiters between words.
264     *
265     * Takes a string and capitalizes all of the words, like PHP's built-in
266     * ucwords function. This extends that behavior, however, by allowing the
267     * word delimiters to be configured, rather than only separating on
268     * whitespace.
269     *
270     * Here is an example:
271     * <code>
272     * <?php
273     * $string = 'top-o-the-morning to all_of_you!';
274     * echo $inflector->capitalize($string);
275     * // Top-O-The-Morning To All_of_you!
276     *
277     * echo $inflector->capitalize($string, '-_ ');
278     * // Top-O-The-Morning To All_Of_You!
279     * ?>
280     * </code>
281     *
282     * @param string $string     The string to operate on.
283     * @param string $delimiters A list of word separators.
284     *
285     * @return string The string with all delimiter-separated words capitalized.
286     */
287    public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-"): string
288    {
289        return ucwords($string, $delimiters);
290    }
291
292    /**
293     * Checks if the given string seems like it has utf8 characters in it.
294     *
295     * @param string $string The string to check for utf8 characters in.
296     */
297    public function seemsUtf8(string $string): bool
298    {
299        for ($i = 0; $i < strlen($string); $i++) {
300            if (ord($string[$i]) < 0x80) {
301                continue; // 0bbbbbbb
302            }
303
304            if ((ord($string[$i]) & 0xE0) === 0xC0) {
305                $n = 1; // 110bbbbb
306            } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
307                $n = 2; // 1110bbbb
308            } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
309                $n = 3; // 11110bbb
310            } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
311                $n = 4; // 111110bb
312            } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
313                $n = 5; // 1111110b
314            } else {
315                return false; // Does not match any model
316            }
317
318            for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
319                if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
320                    return false;
321                }
322            }
323        }
324
325        return true;
326    }
327
328    /**
329     * Remove any illegal characters, accents, etc.
330     *
331     * @param  string $string String to unaccent
332     *
333     * @return string Unaccented string
334     */
335    public function unaccent(string $string): string
336    {
337        if (preg_match('/[\x80-\xff]/', $string) === false) {
338            return $string;
339        }
340
341        if ($this->seemsUtf8($string)) {
342            $string = strtr($string, self::ACCENTED_CHARACTERS);
343        } else {
344            $characters = [];
345
346            // Assume ISO-8859-1 if not UTF-8
347            $characters['in'] =
348                  chr(128)
349                . chr(131)
350                . chr(138)
351                . chr(142)
352                . chr(154)
353                . chr(158)
354                . chr(159)
355                . chr(162)
356                . chr(165)
357                . chr(181)
358                . chr(192)
359                . chr(193)
360                . chr(194)
361                . chr(195)
362                . chr(196)
363                . chr(197)
364                . chr(199)
365                . chr(200)
366                . chr(201)
367                . chr(202)
368                . chr(203)
369                . chr(204)
370                . chr(205)
371                . chr(206)
372                . chr(207)
373                . chr(209)
374                . chr(210)
375                . chr(211)
376                . chr(212)
377                . chr(213)
378                . chr(214)
379                . chr(216)
380                . chr(217)
381                . chr(218)
382                . chr(219)
383                . chr(220)
384                . chr(221)
385                . chr(224)
386                . chr(225)
387                . chr(226)
388                . chr(227)
389                . chr(228)
390                . chr(229)
391                . chr(231)
392                . chr(232)
393                . chr(233)
394                . chr(234)
395                . chr(235)
396                . chr(236)
397                . chr(237)
398                . chr(238)
399                . chr(239)
400                . chr(241)
401                . chr(242)
402                . chr(243)
403                . chr(244)
404                . chr(245)
405                . chr(246)
406                . chr(248)
407                . chr(249)
408                . chr(250)
409                . chr(251)
410                . chr(252)
411                . chr(253)
412                . chr(255);
413
414            $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
415
416            $string = strtr($string, $characters['in'], $characters['out']);
417
418            $doubleChars = [];
419
420            $doubleChars['in'] = [
421                chr(140),
422                chr(156),
423                chr(198),
424                chr(208),
425                chr(222),
426                chr(223),
427                chr(230),
428                chr(240),
429                chr(254),
430            ];
431
432            $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
433
434            $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
435        }
436
437        return $string;
438    }
439
440    /**
441     * Convert any passed string to a url friendly string.
442     * Converts 'My first blog post' to 'my-first-blog-post'
443     *
444     * @param  string $string String to urlize.
445     *
446     * @return string Urlized string.
447     */
448    public function urlize(string $string): string
449    {
450        // Remove all non url friendly characters with the unaccent function
451        $unaccented = $this->unaccent($string);
452
453        if (function_exists('mb_strtolower')) {
454            $lowered = mb_strtolower($unaccented);
455        } else {
456            $lowered = strtolower($unaccented);
457        }
458
459        $replacements = [
460            '/\W/' => ' ',
461            '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
462            '/([a-z\d])([A-Z])/' => '\1_\2',
463            '/[^A-Z^a-z^0-9^\/]+/' => '-',
464        ];
465
466        $urlized = $lowered;
467
468        foreach ($replacements as $pattern => $replacement) {
469            $replaced = preg_replace($pattern, $replacement, $urlized);
470
471            if ($replaced === null) {
472                throw new RuntimeException(sprintf(
473                    'preg_replace returned null for value "%s"',
474                    $urlized
475                ));
476            }
477
478            $urlized = $replaced;
479        }
480
481        return trim($urlized, '-');
482    }
483
484    /**
485     * Returns a word in singular form.
486     *
487     * @param string $word The word in plural form.
488     *
489     * @return string The word in singular form.
490     */
491    public function singularize(string $word): string
492    {
493        return $this->singularizer->inflect($word);
494    }
495
496    /**
497     * Returns a word in plural form.
498     *
499     * @param string $word The word in singular form.
500     *
501     * @return string The word in plural form.
502     */
503    public function pluralize(string $word): string
504    {
505        return $this->pluralizer->inflect($word);
506    }
507}
508