1<?php 2//============================================================+ 3// File name : html_entity_decode_php4.php 4// Begin : 2006-06-22 5// Last Update : 2008-04-01 6// Author : Nicola Asuni 7// Version : 1.0.000 8// License : GNU LGPL (http://www.gnu.org/copyleft/lesser.html) 9// ---------------------------------------------------------------------------- 10// Copyright (C) 2002-2008 Nicola Asuni - Tecnick.com S.r.l. 11// 12// This program is free software: you can redistribute it and/or modify 13// it under the terms of the GNU Lesser General Public License as published by 14// the Free Software Foundation, either version 2.1 of the License, or 15// (at your option) any later version. 16// 17// This program is distributed in the hope that it will be useful, 18// but WITHOUT ANY WARRANTY; without even the implied warranty of 19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20// GNU Lesser General Public License for more details. 21// 22// You should have received a copy of the GNU Lesser General Public License 23// along with this program. If not, see <http://www.gnu.org/licenses/>. 24// 25// See LICENSE.TXT file for more information. 26// ---------------------------------------------------------------------------- 27// 28// 29// Description : This is a PHP4 function that redefine the 30// standard html_entity_decode function to support 31// UTF-8 encoding. 32// 33// 34// Author: Nicola Asuni 35// 36// (c) Copyright: 37// Nicola Asuni 38// Tecnick.com S.r.l. 39// Via della Pace, 11 40// 09044 Quartucciu (CA) 41// ITALY 42// www.tecnick.com 43// info@tecnick.com 44//============================================================+ 45 46/** 47 * This is a PHP4 function that redefine the standard 48 * html_entity_decode function to support UTF-8 encoding. 49 * @package com.tecnick.tcpdf 50 * @author Nicola Asuni 51 * @copyright 2004-2008 Nicola Asuni - Tecnick.com S.r.l (www.tecnick.com) Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com 52 * @link http://www.tcpdf.org 53 * @license http://www.gnu.org/copyleft/lesser.html LGPL 54 */ 55 56/** 57 * Returns the UTF-8 string corresponding to unicode value. 58 * @param $num unicode value to convert. 59 * @return string converted 60 */ 61function code_to_utf8($num) { 62 if ($num <= 0x7F) { 63 return chr($num); 64 } elseif ($num <= 0x7FF) { 65 return chr(($num >> 0x06) + 0xC0).chr(($num & 0x3F) + 128); 66 } elseif ($num <= 0xFFFF) { 67 return chr(($num >> 0x0C) + 0xE0).chr((($num >> 0x06) & 0x3F) + 0x80).chr(($num & 0x3F) + 0x80); 68 } elseif ($num <= 0x1FFFFF) { 69 return chr(($num >> 0x12) + 0xF0).chr((($num >> 0x0C) & 0x3F) + 0x80).chr((($num >> 0x06) & 0x3F) + 0x80).chr(($num & 0x3F) + 0x80); 70 } 71 return ' '; // default value 72} 73 74/** 75 * Reverse function for htmlentities. 76 * Convert entities in UTF-8. 77 * @param $text_to_convert Text to convert. 78 * @return string converted 79 */ 80function html_entity_decode_php4($text_to_convert) { 81 $htmlentities_table = array ( 82 "Á" => "".chr(195).chr(129)."", 83 "á" => "".chr(195).chr(161)."", 84 "Â" => "".chr(195).chr(130)."", 85 "â" => "".chr(195).chr(162)."", 86 "´" => "".chr(194).chr(180)."", 87 "Æ" => "".chr(195).chr(134)."", 88 "æ" => "".chr(195).chr(166)."", 89 "À" => "".chr(195).chr(128)."", 90 "à" => "".chr(195).chr(160)."", 91 "ℵ" => "".chr(226).chr(132).chr(181)."", 92 "Α" => "".chr(206).chr(145)."", 93 "α" => "".chr(206).chr(177)."", 94 "&" => "".chr(38)."", 95 "∧" => "".chr(226).chr(136).chr(167)."", 96 "∠" => "".chr(226).chr(136).chr(160)."", 97 "Å" => "".chr(195).chr(133)."", 98 "å" => "".chr(195).chr(165)."", 99 "≈" => "".chr(226).chr(137).chr(136)."", 100 "Ã" => "".chr(195).chr(131)."", 101 "ã" => "".chr(195).chr(163)."", 102 "Ä" => "".chr(195).chr(132)."", 103 "ä" => "".chr(195).chr(164)."", 104 "„" => "".chr(226).chr(128).chr(158)."", 105 "Β" => "".chr(206).chr(146)."", 106 "β" => "".chr(206).chr(178)."", 107 "¦" => "".chr(194).chr(166)."", 108 "•" => "".chr(226).chr(128).chr(162)."", 109 "∩" => "".chr(226).chr(136).chr(169)."", 110 "Ç" => "".chr(195).chr(135)."", 111 "ç" => "".chr(195).chr(167)."", 112 "¸" => "".chr(194).chr(184)."", 113 "¢" => "".chr(194).chr(162)."", 114 "Χ" => "".chr(206).chr(167)."", 115 "χ" => "".chr(207).chr(135)."", 116 "ˆ" => "".chr(203).chr(134)."", 117 "♣" => "".chr(226).chr(153).chr(163)."", 118 "≅" => "".chr(226).chr(137).chr(133)."", 119 "©" => "".chr(194).chr(169)."", 120 "↵" => "".chr(226).chr(134).chr(181)."", 121 "∪" => "".chr(226).chr(136).chr(170)."", 122 "¤" => "".chr(194).chr(164)."", 123 "†" => "".chr(226).chr(128).chr(160)."", 124 "‡" => "".chr(226).chr(128).chr(161)."", 125 "↓" => "".chr(226).chr(134).chr(147)."", 126 "⇓" => "".chr(226).chr(135).chr(147)."", 127 "°" => "".chr(194).chr(176)."", 128 "Δ" => "".chr(206).chr(148)."", 129 "δ" => "".chr(206).chr(180)."", 130 "♦" => "".chr(226).chr(153).chr(166)."", 131 "÷" => "".chr(195).chr(183)."", 132 "É" => "".chr(195).chr(137)."", 133 "é" => "".chr(195).chr(169)."", 134 "Ê" => "".chr(195).chr(138)."", 135 "ê" => "".chr(195).chr(170)."", 136 "È" => "".chr(195).chr(136)."", 137 "è" => "".chr(195).chr(168)."", 138 "∅" => "".chr(226).chr(136).chr(133)."", 139 " " => "".chr(226).chr(128).chr(131)."", 140 " " => "".chr(226).chr(128).chr(130)."", 141 "Ε" => "".chr(206).chr(149)."", 142 "ε" => "".chr(206).chr(181)."", 143 "≡" => "".chr(226).chr(137).chr(161)."", 144 "Η" => "".chr(206).chr(151)."", 145 "η" => "".chr(206).chr(183)."", 146 "Ð" => "".chr(195).chr(144)."", 147 "ð" => "".chr(195).chr(176)."", 148 "Ë" => "".chr(195).chr(139)."", 149 "ë" => "".chr(195).chr(171)."", 150 "€" => "".chr(226).chr(130).chr(172)."", 151 "∃" => "".chr(226).chr(136).chr(131)."", 152 "ƒ" => "".chr(198).chr(146)."", 153 "∀" => "".chr(226).chr(136).chr(128)."", 154 "½" => "".chr(194).chr(189)."", 155 "¼" => "".chr(194).chr(188)."", 156 "¾" => "".chr(194).chr(190)."", 157 "⁄" => "".chr(226).chr(129).chr(132)."", 158 "Γ" => "".chr(206).chr(147)."", 159 "γ" => "".chr(206).chr(179)."", 160 "≥" => "".chr(226).chr(137).chr(165)."", 161 "↔" => "".chr(226).chr(134).chr(148)."", 162 "⇔" => "".chr(226).chr(135).chr(148)."", 163 "♥" => "".chr(226).chr(153).chr(165)."", 164 "…" => "".chr(226).chr(128).chr(166)."", 165 "Í" => "".chr(195).chr(141)."", 166 "í" => "".chr(195).chr(173)."", 167 "Î" => "".chr(195).chr(142)."", 168 "î" => "".chr(195).chr(174)."", 169 "¡" => "".chr(194).chr(161)."", 170 "Ì" => "".chr(195).chr(140)."", 171 "ì" => "".chr(195).chr(172)."", 172 "ℑ" => "".chr(226).chr(132).chr(145)."", 173 "∞" => "".chr(226).chr(136).chr(158)."", 174 "∫" => "".chr(226).chr(136).chr(171)."", 175 "Ι" => "".chr(206).chr(153)."", 176 "ι" => "".chr(206).chr(185)."", 177 "¿" => "".chr(194).chr(191)."", 178 "∈" => "".chr(226).chr(136).chr(136)."", 179 "Ï" => "".chr(195).chr(143)."", 180 "ï" => "".chr(195).chr(175)."", 181 "Κ" => "".chr(206).chr(154)."", 182 "κ" => "".chr(206).chr(186)."", 183 "Λ" => "".chr(206).chr(155)."", 184 "λ" => "".chr(206).chr(187)."", 185 "⟨" => "".chr(226).chr(140).chr(169)."", 186 "«" => "".chr(194).chr(171)."", 187 "←" => "".chr(226).chr(134).chr(144)."", 188 "⇐" => "".chr(226).chr(135).chr(144)."", 189 "⌈" => "".chr(226).chr(140).chr(136)."", 190 "“" => "".chr(226).chr(128).chr(156)."", 191 "≤" => "".chr(226).chr(137).chr(164)."", 192 "⌊" => "".chr(226).chr(140).chr(138)."", 193 "∗" => "".chr(226).chr(136).chr(151)."", 194 "◊" => "".chr(226).chr(151).chr(138)."", 195 "‎" => "".chr(226).chr(128).chr(142)."", 196 "‹" => "".chr(226).chr(128).chr(185)."", 197 "‘" => "".chr(226).chr(128).chr(152)."", 198 "¯" => "".chr(194).chr(175)."", 199 "—" => "".chr(226).chr(128).chr(148)."", 200 "µ" => "".chr(194).chr(181)."", 201 "·" => "".chr(194).chr(183)."", 202 "−" => "".chr(226).chr(136).chr(146)."", 203 "Μ" => "".chr(206).chr(156)."", 204 "μ" => "".chr(206).chr(188)."", 205 "∇" => "".chr(226).chr(136).chr(135)."", 206 " " => "".chr(194).chr(160)."", 207 "–" => "".chr(226).chr(128).chr(147)."", 208 "≠" => "".chr(226).chr(137).chr(160)."", 209 "∋" => "".chr(226).chr(136).chr(139)."", 210 "¬" => "".chr(194).chr(172)."", 211 "∉" => "".chr(226).chr(136).chr(137)."", 212 "⊄" => "".chr(226).chr(138).chr(132)."", 213 "Ñ" => "".chr(195).chr(145)."", 214 "ñ" => "".chr(195).chr(177)."", 215 "Ν" => "".chr(206).chr(157)."", 216 "ν" => "".chr(206).chr(189)."", 217 "Ó" => "".chr(195).chr(147)."", 218 "ó" => "".chr(195).chr(179)."", 219 "Ô" => "".chr(195).chr(148)."", 220 "ô" => "".chr(195).chr(180)."", 221 "Œ" => "".chr(197).chr(146)."", 222 "œ" => "".chr(197).chr(147)."", 223 "Ò" => "".chr(195).chr(146)."", 224 "ò" => "".chr(195).chr(178)."", 225 "‾" => "".chr(226).chr(128).chr(190)."", 226 "Ω" => "".chr(206).chr(169)."", 227 "ω" => "".chr(207).chr(137)."", 228 "Ο" => "".chr(206).chr(159)."", 229 "ο" => "".chr(206).chr(191)."", 230 "⊕" => "".chr(226).chr(138).chr(149)."", 231 "∨" => "".chr(226).chr(136).chr(168)."", 232 "ª" => "".chr(194).chr(170)."", 233 "º" => "".chr(194).chr(186)."", 234 "Ø" => "".chr(195).chr(152)."", 235 "ø" => "".chr(195).chr(184)."", 236 "Õ" => "".chr(195).chr(149)."", 237 "õ" => "".chr(195).chr(181)."", 238 "⊗" => "".chr(226).chr(138).chr(151)."", 239 "Ö" => "".chr(195).chr(150)."", 240 "ö" => "".chr(195).chr(182)."", 241 "¶" => "".chr(194).chr(182)."", 242 "∂" => "".chr(226).chr(136).chr(130)."", 243 "‰" => "".chr(226).chr(128).chr(176)."", 244 "⊥" => "".chr(226).chr(138).chr(165)."", 245 "Φ" => "".chr(206).chr(166)."", 246 "φ" => "".chr(207).chr(134)."", 247 "Π" => "".chr(206).chr(160)."", 248 "π" => "".chr(207).chr(128)."", 249 "ϖ" => "".chr(207).chr(150)."", 250 "±" => "".chr(194).chr(177)."", 251 "£" => "".chr(194).chr(163)."", 252 "′" => "".chr(226).chr(128).chr(178)."", 253 "″" => "".chr(226).chr(128).chr(179)."", 254 "∏" => "".chr(226).chr(136).chr(143)."", 255 "∝" => "".chr(226).chr(136).chr(157)."", 256 "Ψ" => "".chr(206).chr(168)."", 257 "ψ" => "".chr(207).chr(136)."", 258 "√" => "".chr(226).chr(136).chr(154)."", 259 "⟩" => "".chr(226).chr(140).chr(170)."", 260 "»" => "".chr(194).chr(187)."", 261 "→" => "".chr(226).chr(134).chr(146)."", 262 "⇒" => "".chr(226).chr(135).chr(146)."", 263 "⌉" => "".chr(226).chr(140).chr(137)."", 264 "”" => "".chr(226).chr(128).chr(157)."", 265 "ℜ" => "".chr(226).chr(132).chr(156)."", 266 "®" => "".chr(194).chr(174)."", 267 "⌋" => "".chr(226).chr(140).chr(139)."", 268 "Ρ" => "".chr(206).chr(161)."", 269 "ρ" => "".chr(207).chr(129)."", 270 "‏" => "".chr(226).chr(128).chr(143)."", 271 "›" => "".chr(226).chr(128).chr(186)."", 272 "’" => "".chr(226).chr(128).chr(153)."", 273 "‚" => "".chr(226).chr(128).chr(154)."", 274 "Š" => "".chr(197).chr(160)."", 275 "š" => "".chr(197).chr(161)."", 276 "⋅" => "".chr(226).chr(139).chr(133)."", 277 "§" => "".chr(194).chr(167)."", 278 "­" => "".chr(194).chr(173)."", 279 "Σ" => "".chr(206).chr(163)."", 280 "σ" => "".chr(207).chr(131)."", 281 "ς" => "".chr(207).chr(130)."", 282 "∼" => "".chr(226).chr(136).chr(188)."", 283 "♠" => "".chr(226).chr(153).chr(160)."", 284 "⊂" => "".chr(226).chr(138).chr(130)."", 285 "⊆" => "".chr(226).chr(138).chr(134)."", 286 "∑" => "".chr(226).chr(136).chr(145)."", 287 "¹" => "".chr(194).chr(185)."", 288 "²" => "".chr(194).chr(178)."", 289 "³" => "".chr(194).chr(179)."", 290 "⊃" => "".chr(226).chr(138).chr(131)."", 291 "⊇" => "".chr(226).chr(138).chr(135)."", 292 "ß" => "".chr(195).chr(159)."", 293 "Τ" => "".chr(206).chr(164)."", 294 "τ" => "".chr(207).chr(132)."", 295 "∴" => "".chr(226).chr(136).chr(180)."", 296 "Θ" => "".chr(206).chr(152)."", 297 "θ" => "".chr(206).chr(184)."", 298 "ϑ" => "".chr(207).chr(145)."", 299 " " => "".chr(226).chr(128).chr(137)."", 300 "Þ" => "".chr(195).chr(158)."", 301 "þ" => "".chr(195).chr(190)."", 302 "˜" => "".chr(203).chr(156)."", 303 "×" => "".chr(195).chr(151)."", 304 "™" => "".chr(226).chr(132).chr(162)."", 305 "Ú" => "".chr(195).chr(154)."", 306 "ú" => "".chr(195).chr(186)."", 307 "↑" => "".chr(226).chr(134).chr(145)."", 308 "⇑" => "".chr(226).chr(135).chr(145)."", 309 "Û" => "".chr(195).chr(155)."", 310 "û" => "".chr(195).chr(187)."", 311 "Ù" => "".chr(195).chr(153)."", 312 "ù" => "".chr(195).chr(185)."", 313 "¨" => "".chr(194).chr(168)."", 314 "ϒ" => "".chr(207).chr(146)."", 315 "Υ" => "".chr(206).chr(165)."", 316 "υ" => "".chr(207).chr(133)."", 317 "Ü" => "".chr(195).chr(156)."", 318 "ü" => "".chr(195).chr(188)."", 319 "℘" => "".chr(226).chr(132).chr(152)."", 320 "Ξ" => "".chr(206).chr(158)."", 321 "ξ" => "".chr(206).chr(190)."", 322 "Ý" => "".chr(195).chr(157)."", 323 "ý" => "".chr(195).chr(189)."", 324 "¥" => "".chr(194).chr(165)."", 325 "ÿ" => "".chr(195).chr(191)."", 326 "Ÿ" => "".chr(197).chr(184)."", 327 "Ζ" => "".chr(206).chr(150)."", 328 "ζ" => "".chr(206).chr(182)."", 329 "‍" => "".chr(226).chr(128).chr(141)."", 330 "‌" => "".chr(226).chr(128).chr(140)."", 331 ">" => ">", 332 "<" => "<" 333 ); 334 $return_text = strtr($text_to_convert, $htmlentities_table); 335 336 // 07.11.2014, from php 5.3.0 fixed deprecated preg_replace with the /e flag. Joe 337 if (version_compare(PHP_VERSION, '5.3.0') >= 0 && function_exists("preg_replace_callback")) 338 { 339 $return_text = preg_replace_callback('~&#x([0-9a-f]+);~i', function ($m){ return chr(hexdec($m[1]));}, $return_text); 340 $return_text = preg_replace_callback('~&#([0-9]+);~', function ($m){ return chr($m[1]);}, $return_text); 341 } 342 else 343 { 344 $return_text = preg_replace('~&#x([0-9a-f]+);~ei', 'code_to_utf8(hexdec("\\1"))', $return_text); 345 $return_text = preg_replace('~&#([0-9]+);~e', 'code_to_utf8(\\1)', $return_text); 346 } 347 return $return_text; 348} 349 350//============================================================+ 351// END OF FILE 352//============================================================+ 353?>