1<?php
2
3declare(strict_types=1);
4
5namespace Doctrine\Inflector;
6
7use RuntimeException;
8use function chr;
9use function function_exists;
10use function lcfirst;
11use function mb_strtolower;
12use function ord;
13use function preg_match;
14use function preg_replace;
15use function sprintf;
16use function str_replace;
17use function strlen;
18use function strtolower;
19use function strtr;
20use function trim;
21use function ucwords;
22
23class Inflector
24{
25    private const ACCENTED_CHARACTERS = [
26        'À' => 'A',
27        'Á' => 'A',
28        'Â' => 'A',
29        'Ã' => 'A',
30        'Ä' => 'Ae',
31        'Æ' => 'Ae',
32        'Å' => 'Aa',
33        'æ' => 'a',
34        'Ç' => 'C',
35        'È' => 'E',
36        'É' => 'E',
37        'Ê' => 'E',
38        'Ë' => 'E',
39        'Ì' => 'I',
40        'Í' => 'I',
41        'Î' => 'I',
42        'Ï' => 'I',
43        'Ñ' => 'N',
44        'Ò' => 'O',
45        'Ó' => 'O',
46        'Ô' => 'O',
47        'Õ' => 'O',
48        'Ö' => 'Oe',
49        'Ù' => 'U',
50        'Ú' => 'U',
51        'Û' => 'U',
52        'Ü' => 'Ue',
53        'Ý' => 'Y',
54        'ß' => 'ss',
55        'à' => 'a',
56        'á' => 'a',
57        'â' => 'a',
58        'ã' => 'a',
59        'ä' => 'ae',
60        'å' => 'aa',
61        'ç' => 'c',
62        'è' => 'e',
63        'é' => 'e',
64        'ê' => 'e',
65        'ë' => 'e',
66        'ì' => 'i',
67        'í' => 'i',
68        'î' => 'i',
69        'ï' => 'i',
70        'ñ' => 'n',
71        'ò' => 'o',
72        'ó' => 'o',
73        'ô' => 'o',
74        'õ' => 'o',
75        'ö' => 'oe',
76        'ù' => 'u',
77        'ú' => 'u',
78        'û' => 'u',
79        'ü' => 'ue',
80        'ý' => 'y',
81        'ÿ' => 'y',
82        'Ā' => 'A',
83        'ā' => 'a',
84        'Ă' => 'A',
85        'ă' => 'a',
86        'Ą' => 'A',
87        'ą' => 'a',
88        'Ć' => 'C',
89        'ć' => 'c',
90        'Ĉ' => 'C',
91        'ĉ' => 'c',
92        'Ċ' => 'C',
93        'ċ' => 'c',
94        'Č' => 'C',
95        'č' => 'c',
96        'Ď' => 'D',
97        'ď' => 'd',
98        'Đ' => 'D',
99        'đ' => 'd',
100        'Ē' => 'E',
101        'ē' => 'e',
102        'Ĕ' => 'E',
103        'ĕ' => 'e',
104        'Ė' => 'E',
105        'ė' => 'e',
106        'Ę' => 'E',
107        'ę' => 'e',
108        'Ě' => 'E',
109        'ě' => 'e',
110        'Ĝ' => 'G',
111        'ĝ' => 'g',
112        'Ğ' => 'G',
113        'ğ' => 'g',
114        'Ġ' => 'G',
115        'ġ' => 'g',
116        'Ģ' => 'G',
117        'ģ' => 'g',
118        'Ĥ' => 'H',
119        'ĥ' => 'h',
120        'Ħ' => 'H',
121        'ħ' => 'h',
122        'Ĩ' => 'I',
123        'ĩ' => 'i',
124        'Ī' => 'I',
125        'ī' => 'i',
126        'Ĭ' => 'I',
127        'ĭ' => 'i',
128        'Į' => 'I',
129        'į' => 'i',
130        'İ' => 'I',
131        'ı' => 'i',
132        'IJ' => 'IJ',
133        'ij' => 'ij',
134        'Ĵ' => 'J',
135        'ĵ' => 'j',
136        'Ķ' => 'K',
137        'ķ' => 'k',
138        'ĸ' => 'k',
139        'Ĺ' => 'L',
140        'ĺ' => 'l',
141        'Ļ' => 'L',
142        'ļ' => 'l',
143        'Ľ' => 'L',
144        'ľ' => 'l',
145        'Ŀ' => 'L',
146        'ŀ' => 'l',
147        'Ł' => 'L',
148        'ł' => 'l',
149        'Ń' => 'N',
150        'ń' => 'n',
151        'Ņ' => 'N',
152        'ņ' => 'n',
153        'Ň' => 'N',
154        'ň' => 'n',
155        'ʼn' => 'N',
156        'Ŋ' => 'n',
157        'ŋ' => 'N',
158        'Ō' => 'O',
159        'ō' => 'o',
160        'Ŏ' => 'O',
161        'ŏ' => 'o',
162        'Ő' => 'O',
163        'ő' => 'o',
164        'Œ' => 'OE',
165        'œ' => 'oe',
166        'Ø' => 'O',
167        'ø' => 'o',
168        'Ŕ' => 'R',
169        'ŕ' => 'r',
170        'Ŗ' => 'R',
171        'ŗ' => 'r',
172        'Ř' => 'R',
173        'ř' => 'r',
174        'Ś' => 'S',
175        'ś' => 's',
176        'Ŝ' => 'S',
177        'ŝ' => 's',
178        'Ş' => 'S',
179        'ş' => 's',
180        'Š' => 'S',
181        'š' => 's',
182        'Ţ' => 'T',
183        'ţ' => 't',
184        'Ť' => 'T',
185        'ť' => 't',
186        'Ŧ' => 'T',
187        'ŧ' => 't',
188        'Ũ' => 'U',
189        'ũ' => 'u',
190        'Ū' => 'U',
191        'ū' => 'u',
192        'Ŭ' => 'U',
193        'ŭ' => 'u',
194        'Ů' => 'U',
195        'ů' => 'u',
196        'Ű' => 'U',
197        'ű' => 'u',
198        'Ų' => 'U',
199        'ų' => 'u',
200        'Ŵ' => 'W',
201        'ŵ' => 'w',
202        'Ŷ' => 'Y',
203        'ŷ' => 'y',
204        'Ÿ' => 'Y',
205        'Ź' => 'Z',
206        'ź' => 'z',
207        'Ż' => 'Z',
208        'ż' => 'z',
209        'Ž' => 'Z',
210        'ž' => 'z',
211        'ſ' => 's',
212        '€' => 'E',
213        '£' => '',
214    ];
215
216    /** @var WordInflector */
217    private $singularizer;
218
219    /** @var WordInflector */
220    private $pluralizer;
221
222    public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
223    {
224        $this->singularizer = $singularizer;
225        $this->pluralizer   = $pluralizer;
226    }
227
228    /**
229     * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
230     */
231    public function tableize(string $word) : string
232    {
233        $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
234
235        if ($tableized === null) {
236            throw new RuntimeException(sprintf(
237                'preg_replace returned null for value "%s"',
238                $word
239            ));
240        }
241
242        return mb_strtolower($tableized);
243    }
244
245    /**
246     * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
247     */
248    public function classify(string $word) : string
249    {
250        return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
251    }
252
253    /**
254     * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
255     */
256    public function camelize(string $word) : string
257    {
258        return lcfirst($this->classify($word));
259    }
260
261    /**
262     * Uppercases words with configurable delimiters between words.
263     *
264     * Takes a string and capitalizes all of the words, like PHP's built-in
265     * ucwords function. This extends that behavior, however, by allowing the
266     * word delimiters to be configured, rather than only separating on
267     * whitespace.
268     *
269     * Here is an example:
270     * <code>
271     * <?php
272     * $string = 'top-o-the-morning to all_of_you!';
273     * echo $inflector->capitalize($string);
274     * // Top-O-The-Morning To All_of_you!
275     *
276     * echo $inflector->capitalize($string, '-_ ');
277     * // Top-O-The-Morning To All_Of_You!
278     * ?>
279     * </code>
280     *
281     * @param string $string     The string to operate on.
282     * @param string $delimiters A list of word separators.
283     *
284     * @return string The string with all delimiter-separated words capitalized.
285     */
286    public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string
287    {
288        return ucwords($string, $delimiters);
289    }
290
291    /**
292     * Checks if the given string seems like it has utf8 characters in it.
293     *
294     * @param string $string The string to check for utf8 characters in.
295     */
296    public function seemsUtf8(string $string) : bool
297    {
298        for ($i = 0; $i < strlen($string); $i++) {
299            if (ord($string[$i]) < 0x80) {
300                continue; // 0bbbbbbb
301            }
302
303            if ((ord($string[$i]) & 0xE0) === 0xC0) {
304                $n = 1; // 110bbbbb
305            } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
306                $n = 2; // 1110bbbb
307            } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
308                $n = 3; // 11110bbb
309            } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
310                $n = 4; // 111110bb
311            } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
312                $n = 5; // 1111110b
313            } else {
314                return false; // Does not match any model
315            }
316
317            for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
318                if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
319                    return false;
320                }
321            }
322        }
323
324        return true;
325    }
326
327    /**
328     * Remove any illegal characters, accents, etc.
329     *
330     * @param  string $string String to unaccent
331     *
332     * @return string Unaccented string
333     */
334    public function unaccent(string $string) : string
335    {
336        if (preg_match('/[\x80-\xff]/', $string) === false) {
337            return $string;
338        }
339
340        if ($this->seemsUtf8($string)) {
341            $string = strtr($string, self::ACCENTED_CHARACTERS);
342        } else {
343            $characters = [];
344
345            // Assume ISO-8859-1 if not UTF-8
346            $characters['in'] =
347                  chr(128)
348                . chr(131)
349                . chr(138)
350                . chr(142)
351                . chr(154)
352                . chr(158)
353                . chr(159)
354                . chr(162)
355                . chr(165)
356                . chr(181)
357                . chr(192)
358                . chr(193)
359                . chr(194)
360                . chr(195)
361                . chr(196)
362                . chr(197)
363                . chr(199)
364                . chr(200)
365                . chr(201)
366                . chr(202)
367                . chr(203)
368                . chr(204)
369                . chr(205)
370                . chr(206)
371                . chr(207)
372                . chr(209)
373                . chr(210)
374                . chr(211)
375                . chr(212)
376                . chr(213)
377                . chr(214)
378                . chr(216)
379                . chr(217)
380                . chr(218)
381                . chr(219)
382                . chr(220)
383                . chr(221)
384                . chr(224)
385                . chr(225)
386                . chr(226)
387                . chr(227)
388                . chr(228)
389                . chr(229)
390                . chr(231)
391                . chr(232)
392                . chr(233)
393                . chr(234)
394                . chr(235)
395                . chr(236)
396                . chr(237)
397                . chr(238)
398                . chr(239)
399                . chr(241)
400                . chr(242)
401                . chr(243)
402                . chr(244)
403                . chr(245)
404                . chr(246)
405                . chr(248)
406                . chr(249)
407                . chr(250)
408                . chr(251)
409                . chr(252)
410                . chr(253)
411                . chr(255);
412
413            $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
414
415            $string = strtr($string, $characters['in'], $characters['out']);
416
417            $doubleChars = [];
418
419            $doubleChars['in'] = [
420                chr(140),
421                chr(156),
422                chr(198),
423                chr(208),
424                chr(222),
425                chr(223),
426                chr(230),
427                chr(240),
428                chr(254),
429            ];
430
431            $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
432
433            $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
434        }
435
436        return $string;
437    }
438
439    /**
440     * Convert any passed string to a url friendly string.
441     * Converts 'My first blog post' to 'my-first-blog-post'
442     *
443     * @param  string $string String to urlize.
444     *
445     * @return string Urlized string.
446     */
447    public function urlize(string $string) : string
448    {
449        // Remove all non url friendly characters with the unaccent function
450        $unaccented = $this->unaccent($string);
451
452        if (function_exists('mb_strtolower')) {
453            $lowered = mb_strtolower($unaccented);
454        } else {
455            $lowered = strtolower($unaccented);
456        }
457
458        $replacements = [
459            '/\W/' => ' ',
460            '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
461            '/([a-z\d])([A-Z])/' => '\1_\2',
462            '/[^A-Z^a-z^0-9^\/]+/' => '-',
463        ];
464
465        $urlized = $lowered;
466
467        foreach ($replacements as $pattern => $replacement) {
468            $replaced = preg_replace($pattern, $replacement, $urlized);
469
470            if ($replaced === null) {
471                throw new RuntimeException(sprintf(
472                    'preg_replace returned null for value "%s"',
473                    $urlized
474                ));
475            }
476
477            $urlized = $replaced;
478        }
479
480        return trim($urlized, '-');
481    }
482
483    /**
484     * Returns a word in singular form.
485     *
486     * @param string $word The word in plural form.
487     *
488     * @return string The word in singular form.
489     */
490    public function singularize(string $word) : string
491    {
492        return $this->singularizer->inflect($word);
493    }
494
495    /**
496     * Returns a word in plural form.
497     *
498     * @param string $word The word in singular form.
499     *
500     * @return string The word in plural form.
501     */
502    public function pluralize(string $word) : string
503    {
504        return $this->pluralizer->inflect($word);
505    }
506}
507