1<?php 2 3declare(strict_types=1); 4 5namespace Doctrine\Inflector; 6 7use RuntimeException; 8use function chr; 9use function function_exists; 10use function lcfirst; 11use function mb_strtolower; 12use function ord; 13use function preg_match; 14use function preg_replace; 15use function sprintf; 16use function str_replace; 17use function strlen; 18use function strtolower; 19use function strtr; 20use function trim; 21use function ucwords; 22 23class Inflector 24{ 25 private const ACCENTED_CHARACTERS = [ 26 'À' => 'A', 27 'Á' => 'A', 28 'Â' => 'A', 29 'Ã' => 'A', 30 'Ä' => 'Ae', 31 'Æ' => 'Ae', 32 'Å' => 'Aa', 33 'æ' => 'a', 34 'Ç' => 'C', 35 'È' => 'E', 36 'É' => 'E', 37 'Ê' => 'E', 38 'Ë' => 'E', 39 'Ì' => 'I', 40 'Í' => 'I', 41 'Î' => 'I', 42 'Ï' => 'I', 43 'Ñ' => 'N', 44 'Ò' => 'O', 45 'Ó' => 'O', 46 'Ô' => 'O', 47 'Õ' => 'O', 48 'Ö' => 'Oe', 49 'Ù' => 'U', 50 'Ú' => 'U', 51 'Û' => 'U', 52 'Ü' => 'Ue', 53 'Ý' => 'Y', 54 'ß' => 'ss', 55 'à' => 'a', 56 'á' => 'a', 57 'â' => 'a', 58 'ã' => 'a', 59 'ä' => 'ae', 60 'å' => 'aa', 61 'ç' => 'c', 62 'è' => 'e', 63 'é' => 'e', 64 'ê' => 'e', 65 'ë' => 'e', 66 'ì' => 'i', 67 'í' => 'i', 68 'î' => 'i', 69 'ï' => 'i', 70 'ñ' => 'n', 71 'ò' => 'o', 72 'ó' => 'o', 73 'ô' => 'o', 74 'õ' => 'o', 75 'ö' => 'oe', 76 'ù' => 'u', 77 'ú' => 'u', 78 'û' => 'u', 79 'ü' => 'ue', 80 'ý' => 'y', 81 'ÿ' => 'y', 82 'Ā' => 'A', 83 'ā' => 'a', 84 'Ă' => 'A', 85 'ă' => 'a', 86 'Ą' => 'A', 87 'ą' => 'a', 88 'Ć' => 'C', 89 'ć' => 'c', 90 'Ĉ' => 'C', 91 'ĉ' => 'c', 92 'Ċ' => 'C', 93 'ċ' => 'c', 94 'Č' => 'C', 95 'č' => 'c', 96 'Ď' => 'D', 97 'ď' => 'd', 98 'Đ' => 'D', 99 'đ' => 'd', 100 'Ē' => 'E', 101 'ē' => 'e', 102 'Ĕ' => 'E', 103 'ĕ' => 'e', 104 'Ė' => 'E', 105 'ė' => 'e', 106 'Ę' => 'E', 107 'ę' => 'e', 108 'Ě' => 'E', 109 'ě' => 'e', 110 'Ĝ' => 'G', 111 'ĝ' => 'g', 112 'Ğ' => 'G', 113 'ğ' => 'g', 114 'Ġ' => 'G', 115 'ġ' => 'g', 116 'Ģ' => 'G', 117 'ģ' => 'g', 118 'Ĥ' => 'H', 119 'ĥ' => 'h', 120 'Ħ' => 'H', 121 'ħ' => 'h', 122 'Ĩ' => 'I', 123 'ĩ' => 'i', 124 'Ī' => 'I', 125 'ī' => 'i', 126 'Ĭ' => 'I', 127 'ĭ' => 'i', 128 'Į' => 'I', 129 'į' => 'i', 130 'İ' => 'I', 131 'ı' => 'i', 132 'IJ' => 'IJ', 133 'ij' => 'ij', 134 'Ĵ' => 'J', 135 'ĵ' => 'j', 136 'Ķ' => 'K', 137 'ķ' => 'k', 138 'ĸ' => 'k', 139 'Ĺ' => 'L', 140 'ĺ' => 'l', 141 'Ļ' => 'L', 142 'ļ' => 'l', 143 'Ľ' => 'L', 144 'ľ' => 'l', 145 'Ŀ' => 'L', 146 'ŀ' => 'l', 147 'Ł' => 'L', 148 'ł' => 'l', 149 'Ń' => 'N', 150 'ń' => 'n', 151 'Ņ' => 'N', 152 'ņ' => 'n', 153 'Ň' => 'N', 154 'ň' => 'n', 155 'ʼn' => 'N', 156 'Ŋ' => 'n', 157 'ŋ' => 'N', 158 'Ō' => 'O', 159 'ō' => 'o', 160 'Ŏ' => 'O', 161 'ŏ' => 'o', 162 'Ő' => 'O', 163 'ő' => 'o', 164 'Œ' => 'OE', 165 'œ' => 'oe', 166 'Ø' => 'O', 167 'ø' => 'o', 168 'Ŕ' => 'R', 169 'ŕ' => 'r', 170 'Ŗ' => 'R', 171 'ŗ' => 'r', 172 'Ř' => 'R', 173 'ř' => 'r', 174 'Ś' => 'S', 175 'ś' => 's', 176 'Ŝ' => 'S', 177 'ŝ' => 's', 178 'Ş' => 'S', 179 'ş' => 's', 180 'Š' => 'S', 181 'š' => 's', 182 'Ţ' => 'T', 183 'ţ' => 't', 184 'Ť' => 'T', 185 'ť' => 't', 186 'Ŧ' => 'T', 187 'ŧ' => 't', 188 'Ũ' => 'U', 189 'ũ' => 'u', 190 'Ū' => 'U', 191 'ū' => 'u', 192 'Ŭ' => 'U', 193 'ŭ' => 'u', 194 'Ů' => 'U', 195 'ů' => 'u', 196 'Ű' => 'U', 197 'ű' => 'u', 198 'Ų' => 'U', 199 'ų' => 'u', 200 'Ŵ' => 'W', 201 'ŵ' => 'w', 202 'Ŷ' => 'Y', 203 'ŷ' => 'y', 204 'Ÿ' => 'Y', 205 'Ź' => 'Z', 206 'ź' => 'z', 207 'Ż' => 'Z', 208 'ż' => 'z', 209 'Ž' => 'Z', 210 'ž' => 'z', 211 'ſ' => 's', 212 '€' => 'E', 213 '£' => '', 214 ]; 215 216 /** @var WordInflector */ 217 private $singularizer; 218 219 /** @var WordInflector */ 220 private $pluralizer; 221 222 public function __construct(WordInflector $singularizer, WordInflector $pluralizer) 223 { 224 $this->singularizer = $singularizer; 225 $this->pluralizer = $pluralizer; 226 } 227 228 /** 229 * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'. 230 */ 231 public function tableize(string $word) : string 232 { 233 $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word); 234 235 if ($tableized === null) { 236 throw new RuntimeException(sprintf( 237 'preg_replace returned null for value "%s"', 238 $word 239 )); 240 } 241 242 return mb_strtolower($tableized); 243 } 244 245 /** 246 * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'. 247 */ 248 public function classify(string $word) : string 249 { 250 return str_replace([' ', '_', '-'], '', ucwords($word, ' _-')); 251 } 252 253 /** 254 * Camelizes a word. This uses the classify() method and turns the first character to lowercase. 255 */ 256 public function camelize(string $word) : string 257 { 258 return lcfirst($this->classify($word)); 259 } 260 261 /** 262 * Uppercases words with configurable delimiters between words. 263 * 264 * Takes a string and capitalizes all of the words, like PHP's built-in 265 * ucwords function. This extends that behavior, however, by allowing the 266 * word delimiters to be configured, rather than only separating on 267 * whitespace. 268 * 269 * Here is an example: 270 * <code> 271 * <?php 272 * $string = 'top-o-the-morning to all_of_you!'; 273 * echo $inflector->capitalize($string); 274 * // Top-O-The-Morning To All_of_you! 275 * 276 * echo $inflector->capitalize($string, '-_ '); 277 * // Top-O-The-Morning To All_Of_You! 278 * ?> 279 * </code> 280 * 281 * @param string $string The string to operate on. 282 * @param string $delimiters A list of word separators. 283 * 284 * @return string The string with all delimiter-separated words capitalized. 285 */ 286 public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string 287 { 288 return ucwords($string, $delimiters); 289 } 290 291 /** 292 * Checks if the given string seems like it has utf8 characters in it. 293 * 294 * @param string $string The string to check for utf8 characters in. 295 */ 296 public function seemsUtf8(string $string) : bool 297 { 298 for ($i = 0; $i < strlen($string); $i++) { 299 if (ord($string[$i]) < 0x80) { 300 continue; // 0bbbbbbb 301 } 302 303 if ((ord($string[$i]) & 0xE0) === 0xC0) { 304 $n = 1; // 110bbbbb 305 } elseif ((ord($string[$i]) & 0xF0) === 0xE0) { 306 $n = 2; // 1110bbbb 307 } elseif ((ord($string[$i]) & 0xF8) === 0xF0) { 308 $n = 3; // 11110bbb 309 } elseif ((ord($string[$i]) & 0xFC) === 0xF8) { 310 $n = 4; // 111110bb 311 } elseif ((ord($string[$i]) & 0xFE) === 0xFC) { 312 $n = 5; // 1111110b 313 } else { 314 return false; // Does not match any model 315 } 316 317 for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ? 318 if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) { 319 return false; 320 } 321 } 322 } 323 324 return true; 325 } 326 327 /** 328 * Remove any illegal characters, accents, etc. 329 * 330 * @param string $string String to unaccent 331 * 332 * @return string Unaccented string 333 */ 334 public function unaccent(string $string) : string 335 { 336 if (preg_match('/[\x80-\xff]/', $string) === false) { 337 return $string; 338 } 339 340 if ($this->seemsUtf8($string)) { 341 $string = strtr($string, self::ACCENTED_CHARACTERS); 342 } else { 343 $characters = []; 344 345 // Assume ISO-8859-1 if not UTF-8 346 $characters['in'] = 347 chr(128) 348 . chr(131) 349 . chr(138) 350 . chr(142) 351 . chr(154) 352 . chr(158) 353 . chr(159) 354 . chr(162) 355 . chr(165) 356 . chr(181) 357 . chr(192) 358 . chr(193) 359 . chr(194) 360 . chr(195) 361 . chr(196) 362 . chr(197) 363 . chr(199) 364 . chr(200) 365 . chr(201) 366 . chr(202) 367 . chr(203) 368 . chr(204) 369 . chr(205) 370 . chr(206) 371 . chr(207) 372 . chr(209) 373 . chr(210) 374 . chr(211) 375 . chr(212) 376 . chr(213) 377 . chr(214) 378 . chr(216) 379 . chr(217) 380 . chr(218) 381 . chr(219) 382 . chr(220) 383 . chr(221) 384 . chr(224) 385 . chr(225) 386 . chr(226) 387 . chr(227) 388 . chr(228) 389 . chr(229) 390 . chr(231) 391 . chr(232) 392 . chr(233) 393 . chr(234) 394 . chr(235) 395 . chr(236) 396 . chr(237) 397 . chr(238) 398 . chr(239) 399 . chr(241) 400 . chr(242) 401 . chr(243) 402 . chr(244) 403 . chr(245) 404 . chr(246) 405 . chr(248) 406 . chr(249) 407 . chr(250) 408 . chr(251) 409 . chr(252) 410 . chr(253) 411 . chr(255); 412 413 $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy'; 414 415 $string = strtr($string, $characters['in'], $characters['out']); 416 417 $doubleChars = []; 418 419 $doubleChars['in'] = [ 420 chr(140), 421 chr(156), 422 chr(198), 423 chr(208), 424 chr(222), 425 chr(223), 426 chr(230), 427 chr(240), 428 chr(254), 429 ]; 430 431 $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th']; 432 433 $string = str_replace($doubleChars['in'], $doubleChars['out'], $string); 434 } 435 436 return $string; 437 } 438 439 /** 440 * Convert any passed string to a url friendly string. 441 * Converts 'My first blog post' to 'my-first-blog-post' 442 * 443 * @param string $string String to urlize. 444 * 445 * @return string Urlized string. 446 */ 447 public function urlize(string $string) : string 448 { 449 // Remove all non url friendly characters with the unaccent function 450 $unaccented = $this->unaccent($string); 451 452 if (function_exists('mb_strtolower')) { 453 $lowered = mb_strtolower($unaccented); 454 } else { 455 $lowered = strtolower($unaccented); 456 } 457 458 $replacements = [ 459 '/\W/' => ' ', 460 '/([A-Z]+)([A-Z][a-z])/' => '\1_\2', 461 '/([a-z\d])([A-Z])/' => '\1_\2', 462 '/[^A-Z^a-z^0-9^\/]+/' => '-', 463 ]; 464 465 $urlized = $lowered; 466 467 foreach ($replacements as $pattern => $replacement) { 468 $replaced = preg_replace($pattern, $replacement, $urlized); 469 470 if ($replaced === null) { 471 throw new RuntimeException(sprintf( 472 'preg_replace returned null for value "%s"', 473 $urlized 474 )); 475 } 476 477 $urlized = $replaced; 478 } 479 480 return trim($urlized, '-'); 481 } 482 483 /** 484 * Returns a word in singular form. 485 * 486 * @param string $word The word in plural form. 487 * 488 * @return string The word in singular form. 489 */ 490 public function singularize(string $word) : string 491 { 492 return $this->singularizer->inflect($word); 493 } 494 495 /** 496 * Returns a word in plural form. 497 * 498 * @param string $word The word in singular form. 499 * 500 * @return string The word in plural form. 501 */ 502 public function pluralize(string $word) : string 503 { 504 return $this->pluralizer->inflect($word); 505 } 506} 507