1<?php
2/**
3 * Copyright 2014-2017 Horde LLC (http://www.horde.org/)
4 *
5 * See the enclosed file LICENSE for license information (LGPL). If you
6 * did not receive this file, see http://www.horde.org/licenses/lgpl21.
7 *
8 * @category  Horde
9 * @copyright 2014-2017 Horde LLC
10 * @license   http://www.horde.org/licenses/lgpl21 LGPL 2.1
11 * @package   Util
12 */
13
14/**
15 * Provides utility methods used to transliterate a string.
16 *
17 * @author    Michael Slusarz <slusarz@horde.org>
18 * @author    Jan Schneider <jan@horde.org>
19 * @category  Horde
20 * @copyright 2014-2017 Horde LLC
21 * @license   http://www.horde.org/licenses/lgpl21 LGPL 2.1
22 * @package   Util
23 * @since     2.4.0
24 */
25class Horde_String_Transliterate
26{
27    /**
28     * Transliterate mapping cache.
29     *
30     * @var array
31     */
32    protected static $_map;
33
34    /**
35     * Transliterator instance.
36     *
37     * @var Transliterator
38     */
39    protected static $_transliterator;
40
41    /**
42     * Transliterates an UTF-8 string to ASCII, replacing non-English
43     * characters to their English equivalents.
44     *
45     * Note: there is no guarantee that the output string will be ASCII-only,
46     * since any non-ASCII character not in the transliteration list will
47     * be ignored.
48     *
49     * @param string $str  Input string (UTF-8).
50     *
51     * @return string  Transliterated string (UTF-8).
52     */
53    public static function toAscii($str)
54    {
55        $methods = array(
56            '_intlToAscii',
57            '_iconvToAscii',
58            '_fallbackToAscii'
59        );
60
61        foreach ($methods as $val) {
62            if (($out = call_user_func(array(__CLASS__, $val), $str)) !== false) {
63                return $out;
64            }
65        }
66
67        return $str;
68    }
69
70    /**
71     * Transliterate using the Transliterator package.
72     *
73     * @param string $str  Input string (UTF-8).
74     *
75     * @return mixed  Transliterated string (UTF-8), or false on error.
76     */
77    protected static function _intlToAscii($str)
78    {
79        if (class_exists('Transliterator')) {
80            if (!isset(self::$_transliterator)) {
81                self::$_transliterator = Transliterator::create(
82                    'Any-Latin; Latin-ASCII'
83                );
84            }
85
86            if (!is_null(self::$_transliterator)) {
87                /* Returns false on error. */
88                return self::$_transliterator->transliterate($str);
89            }
90        }
91
92        return false;
93    }
94
95    /**
96     * Transliterate using the iconv extension.
97     *
98     * @param string $str  Input string (UTF-8).
99     *
100     * @return mixed  Transliterated string (UTF-8), or false on error.
101     */
102    protected static function _iconvToAscii($str)
103    {
104        return extension_loaded('iconv')
105            /* Returns false on error. */
106            ? iconv('UTF-8', 'ASCII//TRANSLIT', $str)
107            : false;
108    }
109
110    /**
111     * Transliterate using a built-in ASCII mapping.
112     *
113     * @param string $str  Input string (UTF-8).
114     *
115     * @return string  Transliterated string (UTF-8).
116     */
117    protected static function _fallbackToAscii($str)
118    {
119        if (!isset(self::$_map)) {
120            self::$_map = array(
121                'À' => 'A',
122                'Á' => 'A',
123                'Â' => 'A',
124                'Ã' => 'A',
125                'Ä' => 'A',
126                'Å' => 'A',
127                'Æ' => 'AE',
128                'à' => 'a',
129                'á' => 'a',
130                'â' => 'a',
131                'ã' => 'a',
132                'ä' => 'a',
133                'å' => 'a',
134                'æ' => 'ae',
135                'Þ' => 'TH',
136                'þ' => 'th',
137                'Ç' => 'C',
138                'ç' => 'c',
139                'Ð' => 'D',
140                'ð' => 'd',
141                'È' => 'E',
142                'É' => 'E',
143                'Ê' => 'E',
144                'Ë' => 'E',
145                'è' => 'e',
146                'é' => 'e',
147                'ê' => 'e',
148                'ë' => 'e',
149                'ƒ' => 'f',
150                'Ì' => 'I',
151                'Í' => 'I',
152                'Î' => 'I',
153                'Ï' => 'I',
154                'ì' => 'i',
155                'í' => 'i',
156                'î' => 'i',
157                'ï' => 'i',
158                'Ñ' => 'N',
159                'ñ' => 'n',
160                'Ò' => 'O',
161                'Ó' => 'O',
162                'Ô' => 'O',
163                'Õ' => 'O',
164                'Ö' => 'O',
165                'Ø' => 'O',
166                'ò' => 'o',
167                'ó' => 'o',
168                'ô' => 'o',
169                'õ' => 'o',
170                'ö' => 'o',
171                'ø' => 'o',
172                'Š' => 'S',
173                'ẞ' => 'SS',
174                'ß' => 'ss',
175                'š' => 's',
176                'ś' => 's',
177                'Ù' => 'U',
178                'Ú' => 'U',
179                'Û' => 'U',
180                'Ü' => 'U',
181                'ù' => 'u',
182                'ú' => 'u',
183                'û' => 'u',
184                'Ý' => 'Y',
185                'ý' => 'y',
186                'ÿ' => 'y',
187                'Ž' => 'Z',
188                'ž' => 'z'
189            );
190        }
191
192        /* This should never return false. */
193        return strtr(strval($str), self::$_map);
194    }
195}
196