1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 | Hironori Sato <satoh@jpnnet.com> |
18 | Shigeru Kanemoto <sgk@happysize.co.jp> |
19 +----------------------------------------------------------------------+
20 */
21
22 /* {{{ includes */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include "php.h"
28 #include "php_ini.h"
29 #include "php_variables.h"
30 #include "mbstring.h"
31 #include "ext/standard/php_string.h"
32 #include "ext/standard/php_mail.h"
33 #include "ext/standard/exec.h"
34 #include "ext/standard/url.h"
35 #include "main/php_output.h"
36 #include "ext/standard/info.h"
37
38 #include "libmbfl/mbfl/mbfl_allocators.h"
39 #include "libmbfl/mbfl/mbfilter_8bit.h"
40 #include "libmbfl/mbfl/mbfilter_pass.h"
41 #include "libmbfl/mbfl/mbfilter_wchar.h"
42 #include "libmbfl/filters/mbfilter_ascii.h"
43 #include "libmbfl/filters/mbfilter_base64.h"
44 #include "libmbfl/filters/mbfilter_qprint.h"
45 #include "libmbfl/filters/mbfilter_ucs4.h"
46 #include "libmbfl/filters/mbfilter_utf8.h"
47
48 #include "php_variables.h"
49 #include "php_globals.h"
50 #include "rfc1867.h"
51 #include "php_content_types.h"
52 #include "SAPI.h"
53 #include "php_unicode.h"
54 #include "TSRM.h"
55
56 #include "mb_gpc.h"
57
58 #if HAVE_MBREGEX
59 # include "php_mbregex.h"
60 # include "php_onig_compat.h"
61 # include <oniguruma.h>
62 # undef UChar
63 #if ONIGURUMA_VERSION_INT < 60800
64 typedef void OnigMatchParam;
65 #define onig_new_match_param() (NULL)
66 #define onig_initialize_match_param(x) (void)(x)
67 #define onig_set_match_stack_limit_size_of_match_param(x, y)
68 #define onig_set_retry_limit_in_match_of_match_param(x, y)
69 #define onig_free_match_param(x)
70 #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
71 onig_search(reg, str, end, start, range, region, option)
72 #define onig_match_with_param(re, str, end, at, region, option, mp) \
73 onig_match(re, str, end, at, region, option)
74 #endif
75 #else
76 # include "ext/pcre/php_pcre.h"
77 #endif
78
79 #include "zend_multibyte.h"
80 /* }}} */
81
82 #if HAVE_MBSTRING
83
84 /* {{{ prototypes */
85 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
86
87 static PHP_GINIT_FUNCTION(mbstring);
88 static PHP_GSHUTDOWN_FUNCTION(mbstring);
89
90 static void php_mb_populate_current_detect_order_list(void);
91
92 static int php_mb_encoding_translation(void);
93
94 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
95
96 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
97
98 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
99
100 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
101 /* }}} */
102
103 /* {{{ php_mb_default_identify_list */
104 typedef struct _php_mb_nls_ident_list {
105 enum mbfl_no_language lang;
106 const enum mbfl_no_encoding *list;
107 size_t list_size;
108 } php_mb_nls_ident_list;
109
110 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
111 mbfl_no_encoding_ascii,
112 mbfl_no_encoding_jis,
113 mbfl_no_encoding_utf8,
114 mbfl_no_encoding_euc_jp,
115 mbfl_no_encoding_sjis
116 };
117
118 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
119 mbfl_no_encoding_ascii,
120 mbfl_no_encoding_utf8,
121 mbfl_no_encoding_euc_cn,
122 mbfl_no_encoding_cp936
123 };
124
125 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
126 mbfl_no_encoding_ascii,
127 mbfl_no_encoding_utf8,
128 mbfl_no_encoding_euc_tw,
129 mbfl_no_encoding_big5
130 };
131
132 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
133 mbfl_no_encoding_ascii,
134 mbfl_no_encoding_utf8,
135 mbfl_no_encoding_euc_kr,
136 mbfl_no_encoding_uhc
137 };
138
139 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
140 mbfl_no_encoding_ascii,
141 mbfl_no_encoding_utf8,
142 mbfl_no_encoding_koi8r,
143 mbfl_no_encoding_cp1251,
144 mbfl_no_encoding_cp866
145 };
146
147 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
148 mbfl_no_encoding_ascii,
149 mbfl_no_encoding_utf8,
150 mbfl_no_encoding_armscii8
151 };
152
153 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
154 mbfl_no_encoding_ascii,
155 mbfl_no_encoding_utf8,
156 mbfl_no_encoding_cp1254,
157 mbfl_no_encoding_8859_9
158 };
159
160 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
161 mbfl_no_encoding_ascii,
162 mbfl_no_encoding_utf8,
163 mbfl_no_encoding_koi8u
164 };
165
166 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
167 mbfl_no_encoding_ascii,
168 mbfl_no_encoding_utf8
169 };
170
171
172 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
173 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
174 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
175 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
176 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
177 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
178 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
179 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
180 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
181 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
182 };
183
184 /* }}} */
185
186 /* {{{ mb_overload_def mb_ovld[] */
187 static const struct mb_overload_def mb_ovld[] = {
188 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
189 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
190 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
191 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
192 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
193 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
194 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
195 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
196 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
197 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
198 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
199 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
200 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
201 {0, NULL, NULL, NULL}
202 };
203 /* }}} */
204
205 /* {{{ arginfo */
206 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
207 ZEND_ARG_INFO(0, language)
208 ZEND_END_ARG_INFO()
209
210 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
211 ZEND_ARG_INFO(0, encoding)
212 ZEND_END_ARG_INFO()
213
214 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
215 ZEND_ARG_INFO(0, type)
216 ZEND_END_ARG_INFO()
217
218 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
219 ZEND_ARG_INFO(0, encoding)
220 ZEND_END_ARG_INFO()
221
222 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
223 ZEND_ARG_INFO(0, encoding)
224 ZEND_END_ARG_INFO()
225
226 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
227 ZEND_ARG_INFO(0, substchar)
228 ZEND_END_ARG_INFO()
229
230 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
231 ZEND_ARG_INFO(0, encoding)
232 ZEND_END_ARG_INFO()
233
234 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
235 ZEND_ARG_INFO(0, encoded_string)
236 ZEND_ARG_INFO(1, result)
237 ZEND_END_ARG_INFO()
238
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
240 ZEND_ARG_INFO(0, contents)
241 ZEND_ARG_INFO(0, status)
242 ZEND_END_ARG_INFO()
243
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_str_split, 0, 0, 1)
245 ZEND_ARG_INFO(0, str)
246 ZEND_ARG_INFO(0, split_length)
247 ZEND_ARG_INFO(0, encoding)
248 ZEND_END_ARG_INFO()
249
250 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
251 ZEND_ARG_INFO(0, str)
252 ZEND_ARG_INFO(0, encoding)
253 ZEND_END_ARG_INFO()
254
255 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
256 ZEND_ARG_INFO(0, haystack)
257 ZEND_ARG_INFO(0, needle)
258 ZEND_ARG_INFO(0, offset)
259 ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
263 ZEND_ARG_INFO(0, haystack)
264 ZEND_ARG_INFO(0, needle)
265 ZEND_ARG_INFO(0, offset)
266 ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
270 ZEND_ARG_INFO(0, haystack)
271 ZEND_ARG_INFO(0, needle)
272 ZEND_ARG_INFO(0, offset)
273 ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
277 ZEND_ARG_INFO(0, haystack)
278 ZEND_ARG_INFO(0, needle)
279 ZEND_ARG_INFO(0, offset)
280 ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
284 ZEND_ARG_INFO(0, haystack)
285 ZEND_ARG_INFO(0, needle)
286 ZEND_ARG_INFO(0, part)
287 ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
291 ZEND_ARG_INFO(0, haystack)
292 ZEND_ARG_INFO(0, needle)
293 ZEND_ARG_INFO(0, part)
294 ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
298 ZEND_ARG_INFO(0, haystack)
299 ZEND_ARG_INFO(0, needle)
300 ZEND_ARG_INFO(0, part)
301 ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
305 ZEND_ARG_INFO(0, haystack)
306 ZEND_ARG_INFO(0, needle)
307 ZEND_ARG_INFO(0, part)
308 ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
312 ZEND_ARG_INFO(0, haystack)
313 ZEND_ARG_INFO(0, needle)
314 ZEND_ARG_INFO(0, encoding)
315 ZEND_END_ARG_INFO()
316
317 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
318 ZEND_ARG_INFO(0, str)
319 ZEND_ARG_INFO(0, start)
320 ZEND_ARG_INFO(0, length)
321 ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
325 ZEND_ARG_INFO(0, str)
326 ZEND_ARG_INFO(0, start)
327 ZEND_ARG_INFO(0, length)
328 ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
332 ZEND_ARG_INFO(0, str)
333 ZEND_ARG_INFO(0, encoding)
334 ZEND_END_ARG_INFO()
335
336 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
337 ZEND_ARG_INFO(0, str)
338 ZEND_ARG_INFO(0, start)
339 ZEND_ARG_INFO(0, width)
340 ZEND_ARG_INFO(0, trimmarker)
341 ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
345 ZEND_ARG_INFO(0, str)
346 ZEND_ARG_INFO(0, to)
347 ZEND_ARG_INFO(0, from)
348 ZEND_END_ARG_INFO()
349
350 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
351 ZEND_ARG_INFO(0, sourcestring)
352 ZEND_ARG_INFO(0, mode)
353 ZEND_ARG_INFO(0, encoding)
354 ZEND_END_ARG_INFO()
355
356 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
357 ZEND_ARG_INFO(0, sourcestring)
358 ZEND_ARG_INFO(0, encoding)
359 ZEND_END_ARG_INFO()
360
361 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
362 ZEND_ARG_INFO(0, sourcestring)
363 ZEND_ARG_INFO(0, encoding)
364 ZEND_END_ARG_INFO()
365
366 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
367 ZEND_ARG_INFO(0, str)
368 ZEND_ARG_INFO(0, encoding_list)
369 ZEND_ARG_INFO(0, strict)
370 ZEND_END_ARG_INFO()
371
372 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
373 ZEND_END_ARG_INFO()
374
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
376 ZEND_ARG_INFO(0, encoding)
377 ZEND_END_ARG_INFO()
378
379 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
380 ZEND_ARG_INFO(0, str)
381 ZEND_ARG_INFO(0, charset)
382 ZEND_ARG_INFO(0, transfer)
383 ZEND_ARG_INFO(0, linefeed)
384 ZEND_ARG_INFO(0, indent)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
388 ZEND_ARG_INFO(0, string)
389 ZEND_END_ARG_INFO()
390
391 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
392 ZEND_ARG_INFO(0, str)
393 ZEND_ARG_INFO(0, option)
394 ZEND_ARG_INFO(0, encoding)
395 ZEND_END_ARG_INFO()
396
397 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
398 ZEND_ARG_INFO(0, to)
399 ZEND_ARG_INFO(0, from)
400 ZEND_ARG_VARIADIC_INFO(1, vars)
401 ZEND_END_ARG_INFO()
402
403 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
404 ZEND_ARG_INFO(0, string)
405 ZEND_ARG_INFO(0, convmap)
406 ZEND_ARG_INFO(0, encoding)
407 ZEND_ARG_INFO(0, is_hex)
408 ZEND_END_ARG_INFO()
409
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
411 ZEND_ARG_INFO(0, string)
412 ZEND_ARG_INFO(0, convmap)
413 ZEND_ARG_INFO(0, encoding)
414 ZEND_ARG_INFO(0, is_hex)
415 ZEND_END_ARG_INFO()
416
417 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
418 ZEND_ARG_INFO(0, to)
419 ZEND_ARG_INFO(0, subject)
420 ZEND_ARG_INFO(0, message)
421 ZEND_ARG_INFO(0, additional_headers)
422 ZEND_ARG_INFO(0, additional_parameters)
423 ZEND_END_ARG_INFO()
424
425 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
426 ZEND_ARG_INFO(0, type)
427 ZEND_END_ARG_INFO()
428
429 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
430 ZEND_ARG_INFO(0, var)
431 ZEND_ARG_INFO(0, encoding)
432 ZEND_END_ARG_INFO()
433
434 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
435 ZEND_ARG_INFO(0, str)
436 ZEND_ARG_INFO(0, encoding)
437 ZEND_END_ARG_INFO()
438
439 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
440 ZEND_ARG_INFO(0, str)
441 ZEND_ARG_INFO(0, encoding)
442 ZEND_END_ARG_INFO()
443
444 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
445 ZEND_ARG_INFO(0, cp)
446 ZEND_ARG_INFO(0, encoding)
447 ZEND_END_ARG_INFO()
448
449 #if HAVE_MBREGEX
450 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
451 ZEND_ARG_INFO(0, encoding)
452 ZEND_END_ARG_INFO()
453
454 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
455 ZEND_ARG_INFO(0, pattern)
456 ZEND_ARG_INFO(0, string)
457 ZEND_ARG_INFO(1, registers)
458 ZEND_END_ARG_INFO()
459
460 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
461 ZEND_ARG_INFO(0, pattern)
462 ZEND_ARG_INFO(0, string)
463 ZEND_ARG_INFO(1, registers)
464 ZEND_END_ARG_INFO()
465
466 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
467 ZEND_ARG_INFO(0, pattern)
468 ZEND_ARG_INFO(0, replacement)
469 ZEND_ARG_INFO(0, string)
470 ZEND_ARG_INFO(0, option)
471 ZEND_END_ARG_INFO()
472
473 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
474 ZEND_ARG_INFO(0, pattern)
475 ZEND_ARG_INFO(0, replacement)
476 ZEND_ARG_INFO(0, string)
477 ZEND_ARG_INFO(0, option)
478 ZEND_END_ARG_INFO()
479
480 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
481 ZEND_ARG_INFO(0, pattern)
482 ZEND_ARG_INFO(0, callback)
483 ZEND_ARG_INFO(0, string)
484 ZEND_ARG_INFO(0, option)
485 ZEND_END_ARG_INFO()
486
487 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
488 ZEND_ARG_INFO(0, pattern)
489 ZEND_ARG_INFO(0, string)
490 ZEND_ARG_INFO(0, limit)
491 ZEND_END_ARG_INFO()
492
493 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
494 ZEND_ARG_INFO(0, pattern)
495 ZEND_ARG_INFO(0, string)
496 ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
500 ZEND_ARG_INFO(0, pattern)
501 ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
505 ZEND_ARG_INFO(0, pattern)
506 ZEND_ARG_INFO(0, option)
507 ZEND_END_ARG_INFO()
508
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
510 ZEND_ARG_INFO(0, pattern)
511 ZEND_ARG_INFO(0, option)
512 ZEND_END_ARG_INFO()
513
514 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
515 ZEND_ARG_INFO(0, string)
516 ZEND_ARG_INFO(0, pattern)
517 ZEND_ARG_INFO(0, option)
518 ZEND_END_ARG_INFO()
519
520 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
521 ZEND_END_ARG_INFO()
522
523 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
524 ZEND_END_ARG_INFO()
525
526 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
527 ZEND_ARG_INFO(0, position)
528 ZEND_END_ARG_INFO()
529
530 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
531 ZEND_ARG_INFO(0, options)
532 ZEND_END_ARG_INFO()
533 #endif /* HAVE_MBREGEX */
534 /* }}} */
535
536 /* {{{ zend_function_entry mbstring_functions[] */
537 static const zend_function_entry mbstring_functions[] = {
538 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
539 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
540 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
541 PHP_FE(mb_language, arginfo_mb_language)
542 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
543 PHP_FE(mb_http_input, arginfo_mb_http_input)
544 PHP_FE(mb_http_output, arginfo_mb_http_output)
545 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
546 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
547 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
548 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
549 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
550 PHP_FE(mb_str_split, arginfo_mb_str_split)
551 PHP_FE(mb_strlen, arginfo_mb_strlen)
552 PHP_FE(mb_strpos, arginfo_mb_strpos)
553 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
554 PHP_FE(mb_stripos, arginfo_mb_stripos)
555 PHP_FE(mb_strripos, arginfo_mb_strripos)
556 PHP_FE(mb_strstr, arginfo_mb_strstr)
557 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
558 PHP_FE(mb_stristr, arginfo_mb_stristr)
559 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
560 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
561 PHP_FE(mb_substr, arginfo_mb_substr)
562 PHP_FE(mb_strcut, arginfo_mb_strcut)
563 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
564 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
565 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
566 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
567 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
568 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
569 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
570 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
571 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
572 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
573 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
574 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
575 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
576 PHP_FE(mb_get_info, arginfo_mb_get_info)
577 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
578 PHP_FE(mb_ord, arginfo_mb_ord)
579 PHP_FE(mb_chr, arginfo_mb_chr)
580 PHP_FE(mb_scrub, arginfo_mb_scrub)
581 #if HAVE_MBREGEX
582 PHP_MBREGEX_FUNCTION_ENTRIES
583 #endif
584 PHP_FE_END
585 };
586 /* }}} */
587
588 /* {{{ zend_module_entry mbstring_module_entry */
589 zend_module_entry mbstring_module_entry = {
590 STANDARD_MODULE_HEADER,
591 "mbstring",
592 mbstring_functions,
593 PHP_MINIT(mbstring),
594 PHP_MSHUTDOWN(mbstring),
595 PHP_RINIT(mbstring),
596 PHP_RSHUTDOWN(mbstring),
597 PHP_MINFO(mbstring),
598 PHP_MBSTRING_VERSION,
599 PHP_MODULE_GLOBALS(mbstring),
600 PHP_GINIT(mbstring),
601 PHP_GSHUTDOWN(mbstring),
602 NULL,
603 STANDARD_MODULE_PROPERTIES_EX
604 };
605 /* }}} */
606
607 /* {{{ static sapi_post_entry php_post_entries[] */
608 static const sapi_post_entry php_post_entries[] = {
609 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
610 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
611 { NULL, 0, NULL, NULL }
612 };
613 /* }}} */
614
615 #ifdef COMPILE_DL_MBSTRING
616 #ifdef ZTS
617 ZEND_TSRMLS_CACHE_DEFINE()
618 #endif
ZEND_GET_MODULE(mbstring)619 ZEND_GET_MODULE(mbstring)
620 #endif
621
622 /* {{{ allocators */
623 static void *_php_mb_allocators_malloc(size_t sz)
624 {
625 return emalloc(sz);
626 }
627
_php_mb_allocators_realloc(void * ptr,size_t sz)628 static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
629 {
630 return erealloc(ptr, sz);
631 }
632
_php_mb_allocators_calloc(size_t nelems,size_t szelem)633 static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
634 {
635 return ecalloc(nelems, szelem);
636 }
637
_php_mb_allocators_free(void * ptr)638 static void _php_mb_allocators_free(void *ptr)
639 {
640 efree(ptr);
641 }
642
_php_mb_allocators_pmalloc(size_t sz)643 static void *_php_mb_allocators_pmalloc(size_t sz)
644 {
645 return pemalloc(sz, 1);
646 }
647
_php_mb_allocators_prealloc(void * ptr,size_t sz)648 static void *_php_mb_allocators_prealloc(void *ptr, size_t sz)
649 {
650 return perealloc(ptr, sz, 1);
651 }
652
_php_mb_allocators_pfree(void * ptr)653 static void _php_mb_allocators_pfree(void *ptr)
654 {
655 pefree(ptr, 1);
656 }
657
658 static const mbfl_allocators _php_mb_allocators = {
659 _php_mb_allocators_malloc,
660 _php_mb_allocators_realloc,
661 _php_mb_allocators_calloc,
662 _php_mb_allocators_free,
663 _php_mb_allocators_pmalloc,
664 _php_mb_allocators_prealloc,
665 _php_mb_allocators_pfree
666 };
667 /* }}} */
668
669 /* {{{ static sapi_post_entry mbstr_post_entries[] */
670 static const sapi_post_entry mbstr_post_entries[] = {
671 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
672 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
673 { NULL, 0, NULL, NULL }
674 };
675 /* }}} */
676
php_mb_get_encoding(zend_string * encoding_name)677 static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
678 if (encoding_name) {
679 const mbfl_encoding *encoding;
680 zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
681 if (last_encoding_name && (last_encoding_name == encoding_name
682 || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
683 return MBSTRG(last_used_encoding);
684 }
685
686 encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
687 if (!encoding) {
688 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", ZSTR_VAL(encoding_name));
689 return NULL;
690 }
691
692 if (last_encoding_name) {
693 zend_string_release(last_encoding_name);
694 }
695 MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
696 MBSTRG(last_used_encoding) = encoding;
697 return encoding;
698 } else {
699 return MBSTRG(current_internal_encoding);
700 }
701 }
702
703 /* {{{ static int php_mb_parse_encoding_list()
704 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
705 * Even if any illegal encoding is detected the result may contain a list
706 * of parsed encodings.
707 */
708 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)709 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
710 {
711 int bauto, ret = SUCCESS;
712 size_t n, size;
713 char *p, *p1, *p2, *endp, *tmpstr;
714 const mbfl_encoding **entry, **list;
715
716 list = NULL;
717 if (value == NULL || value_length == 0) {
718 if (return_list) {
719 *return_list = NULL;
720 }
721 if (return_size) {
722 *return_size = 0;
723 }
724 return FAILURE;
725 } else {
726 /* copy the value string for work */
727 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
728 tmpstr = (char *)estrndup(value+1, value_length-2);
729 value_length -= 2;
730 }
731 else
732 tmpstr = (char *)estrndup(value, value_length);
733 /* count the number of listed encoding names */
734 endp = tmpstr + value_length;
735 n = 1;
736 p1 = tmpstr;
737 while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
738 p1 = p2 + 1;
739 n++;
740 }
741 size = n + MBSTRG(default_detect_order_list_size);
742 /* make list */
743 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
744 entry = list;
745 n = 0;
746 bauto = 0;
747 p1 = tmpstr;
748 do {
749 p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
750 if (p == NULL) {
751 p = endp;
752 }
753 *p = '\0';
754 /* trim spaces */
755 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
756 p1++;
757 }
758 p--;
759 while (p > p1 && (*p == ' ' || *p == '\t')) {
760 *p = '\0';
761 p--;
762 }
763 /* convert to the encoding number and check encoding */
764 if (strcasecmp(p1, "auto") == 0) {
765 if (!bauto) {
766 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
767 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
768 size_t i;
769 bauto = 1;
770 for (i = 0; i < identify_list_size; i++) {
771 *entry++ = mbfl_no2encoding(*src++);
772 n++;
773 }
774 }
775 } else {
776 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
777 if (encoding) {
778 *entry++ = encoding;
779 n++;
780 } else {
781 ret = FAILURE;
782 }
783 }
784 p1 = p2 + 1;
785 } while (n < size && p2 != NULL);
786 if (n > 0) {
787 if (return_list) {
788 *return_list = list;
789 } else {
790 pefree(list, persistent);
791 }
792 } else {
793 pefree(list, persistent);
794 if (return_list) {
795 *return_list = NULL;
796 }
797 ret = FAILURE;
798 }
799 if (return_size) {
800 *return_size = n;
801 }
802 efree(tmpstr);
803 }
804
805 return ret;
806 }
807 /* }}} */
808
809 /* {{{ static int php_mb_parse_encoding_array()
810 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
811 * Even if any illegal encoding is detected the result may contain a list
812 * of parsed encodings.
813 */
814 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)815 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
816 {
817 zval *hash_entry;
818 HashTable *target_hash;
819 int i, n, bauto, ret = SUCCESS;
820 const mbfl_encoding **list, **entry;
821 size_t size;
822
823 list = NULL;
824 if (Z_TYPE_P(array) == IS_ARRAY) {
825 target_hash = Z_ARRVAL_P(array);
826 i = zend_hash_num_elements(target_hash);
827 size = i + MBSTRG(default_detect_order_list_size);
828 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
829 entry = list;
830 bauto = 0;
831 n = 0;
832 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
833 zend_string *encoding_str = zval_try_get_string(hash_entry);
834 if (UNEXPECTED(!encoding_str)) {
835 ret = FAILURE;
836 break;
837 }
838
839 if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
840 if (!bauto) {
841 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
842 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
843 size_t j;
844
845 bauto = 1;
846 for (j = 0; j < identify_list_size; j++) {
847 *entry++ = mbfl_no2encoding(*src++);
848 n++;
849 }
850 }
851 } else {
852 const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
853 if (encoding) {
854 *entry++ = encoding;
855 n++;
856 } else {
857 ret = FAILURE;
858 }
859 }
860 i--;
861 zend_string_release(encoding_str);
862 } ZEND_HASH_FOREACH_END();
863 if (n > 0) {
864 if (return_list) {
865 *return_list = list;
866 } else {
867 pefree(list, persistent);
868 }
869 } else {
870 pefree(list, persistent);
871 if (return_list) {
872 *return_list = NULL;
873 }
874 ret = FAILURE;
875 }
876 if (return_size) {
877 *return_size = n;
878 }
879 }
880
881 return ret;
882 }
883 /* }}} */
884
885 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)886 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
887 {
888 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
889 }
890
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)891 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
892 {
893 return ((const mbfl_encoding *)encoding)->name;
894 }
895
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)896 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
897 {
898 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
899 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
900 return 1;
901 }
902 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
903 return 1;
904 }
905 return 0;
906 }
907
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)908 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
909 {
910 mbfl_string string;
911
912 if (!list) {
913 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
914 list_size = MBSTRG(current_detect_order_list_size);
915 }
916
917 mbfl_string_init(&string);
918 string.no_language = MBSTRG(language);
919 string.val = (unsigned char *)arg_string;
920 string.len = arg_length;
921 return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
922 }
923
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)924 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
925 {
926 mbfl_string string, result;
927 mbfl_buffer_converter *convd;
928 int status;
929 size_t loc;
930
931 /* new encoding */
932 /* initialize string */
933 string.encoding = (const mbfl_encoding*)encoding_from;
934 string.no_language = MBSTRG(language);
935 string.val = (unsigned char*)from;
936 string.len = from_length;
937
938 /* initialize converter */
939 convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
940 if (convd == NULL) {
941 return (size_t) -1;
942 }
943
944 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
945 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
946
947 /* do it */
948 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
949 if (status) {
950 mbfl_buffer_converter_delete(convd);
951 return (size_t)-1;
952 }
953
954 mbfl_buffer_converter_flush(convd);
955 mbfl_string_init(&result);
956 if (!mbfl_buffer_converter_result(convd, &result)) {
957 mbfl_buffer_converter_delete(convd);
958 return (size_t)-1;
959 }
960
961 *to = result.val;
962 *to_length = result.len;
963
964 mbfl_buffer_converter_delete(convd);
965
966 return loc;
967 }
968
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)969 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
970 {
971 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
972 }
973
php_mb_zend_internal_encoding_getter(void)974 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
975 {
976 return (const zend_encoding *)MBSTRG(internal_encoding);
977 }
978
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)979 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
980 {
981 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
982 return SUCCESS;
983 }
984
985 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
986 "mbstring",
987 php_mb_zend_encoding_fetcher,
988 php_mb_zend_encoding_name_getter,
989 php_mb_zend_encoding_lexer_compatibility_checker,
990 php_mb_zend_encoding_detector,
991 php_mb_zend_encoding_converter,
992 php_mb_zend_encoding_list_parser,
993 php_mb_zend_internal_encoding_getter,
994 php_mb_zend_internal_encoding_setter
995 };
996 /* }}} */
997
998 static void *_php_mb_compile_regex(const char *pattern);
999 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1000 static void _php_mb_free_regex(void *opaque);
1001
1002 #if HAVE_MBREGEX
1003 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1004 static void *_php_mb_compile_regex(const char *pattern)
1005 {
1006 php_mb_regex_t *retval;
1007 OnigErrorInfo err_info;
1008 int err_code;
1009
1010 if ((err_code = onig_new(&retval,
1011 (const OnigUChar *)pattern,
1012 (const OnigUChar *)pattern + strlen(pattern),
1013 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1014 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1015 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1016 onig_error_code_to_str(err_str, err_code, err_info);
1017 php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1018 retval = NULL;
1019 }
1020 return retval;
1021 }
1022 /* }}} */
1023
1024 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1025 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1026 {
1027 OnigMatchParam *mp = onig_new_match_param();
1028 int err;
1029 onig_initialize_match_param(mp);
1030 if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
1031 onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
1032 }
1033 if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
1034 onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
1035 }
1036 /* search */
1037 err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1038 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1039 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
1040 onig_free_match_param(mp);
1041 return err >= 0;
1042 }
1043 /* }}} */
1044
1045 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1046 static void _php_mb_free_regex(void *opaque)
1047 {
1048 onig_free((php_mb_regex_t *)opaque);
1049 }
1050 /* }}} */
1051 #else
1052 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1053 static void *_php_mb_compile_regex(const char *pattern)
1054 {
1055 pcre2_code *retval;
1056 PCRE2_SIZE err_offset;
1057 int errnum;
1058
1059 if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
1060 PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
1061 PCRE2_UCHAR err_str[128];
1062 pcre2_get_error_message(errnum, err_str, sizeof(err_str));
1063 php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
1064 }
1065 return retval;
1066 }
1067 /* }}} */
1068
1069 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1070 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1071 {
1072 int res;
1073
1074 pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
1075 if (NULL == match_data) {
1076 pcre2_code_free(opaque);
1077 php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
1078 return FAILURE;
1079 }
1080 res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
1081 php_pcre_free_match_data(match_data);
1082
1083 return res;
1084 }
1085 /* }}} */
1086
1087 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1088 static void _php_mb_free_regex(void *opaque)
1089 {
1090 pcre2_code_free(opaque);
1091 }
1092 /* }}} */
1093 #endif
1094
1095 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1096 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1097 {
1098 size_t i;
1099
1100 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1101 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1102
1103 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1104 if (php_mb_default_identify_list[i].lang == lang) {
1105 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1106 *plist_size = php_mb_default_identify_list[i].list_size;
1107 return 1;
1108 }
1109 }
1110 return 0;
1111 }
1112 /* }}} */
1113
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)1114 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
1115 {
1116 char *result = emalloc(len + 2);
1117 char *resp = result;
1118 size_t i;
1119
1120 for (i = 0; i < len && start[i] != quote; ++i) {
1121 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1122 *resp++ = start[++i];
1123 } else {
1124 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1125
1126 while (j-- > 0 && i < len) {
1127 *resp++ = start[i++];
1128 }
1129 --i;
1130 }
1131 }
1132
1133 *resp = '\0';
1134 return result;
1135 }
1136
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1137 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1138 {
1139 char *pos = *line, quote;
1140 char *res;
1141
1142 while (*pos && *pos != stop) {
1143 if ((quote = *pos) == '"' || quote == '\'') {
1144 ++pos;
1145 while (*pos && *pos != quote) {
1146 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1147 pos += 2;
1148 } else {
1149 ++pos;
1150 }
1151 }
1152 if (*pos) {
1153 ++pos;
1154 }
1155 } else {
1156 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1157
1158 }
1159 }
1160 if (*pos == '\0') {
1161 res = estrdup(*line);
1162 *line += strlen(*line);
1163 return res;
1164 }
1165
1166 res = estrndup(*line, pos - *line);
1167
1168 while (*pos == stop) {
1169 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1170 }
1171
1172 *line = pos;
1173 return res;
1174 }
1175 /* }}} */
1176
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1177 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1178 {
1179 while (*str && isspace(*(unsigned char *)str)) {
1180 ++str;
1181 }
1182
1183 if (!*str) {
1184 return estrdup("");
1185 }
1186
1187 if (*str == '"' || *str == '\'') {
1188 char quote = *str;
1189
1190 str++;
1191 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1192 } else {
1193 char *strend = str;
1194
1195 while (*strend && !isspace(*(unsigned char *)strend)) {
1196 ++strend;
1197 }
1198 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1199 }
1200 }
1201 /* }}} */
1202
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1203 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1204 {
1205 char *s, *s2;
1206 const size_t filename_len = strlen(filename);
1207
1208 /* The \ check should technically be needed for win32 systems only where
1209 * it is a valid path separator. However, IE in all it's wisdom always sends
1210 * the full path of the file on the user's filesystem, which means that unless
1211 * the user does basename() they get a bogus file name. Until IE's user base drops
1212 * to nill or problem is fixed this code must remain enabled for all systems. */
1213 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1214 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1215
1216 if (s && s2) {
1217 if (s > s2) {
1218 return ++s;
1219 } else {
1220 return ++s2;
1221 }
1222 } else if (s) {
1223 return ++s;
1224 } else if (s2) {
1225 return ++s2;
1226 } else {
1227 return filename;
1228 }
1229 }
1230 /* }}} */
1231
1232 /* {{{ php.ini directive handler */
1233 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1234 static PHP_INI_MH(OnUpdate_mbstring_language)
1235 {
1236 enum mbfl_no_language no_language;
1237
1238 no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1239 if (no_language == mbfl_no_language_invalid) {
1240 MBSTRG(language) = mbfl_no_language_neutral;
1241 return FAILURE;
1242 }
1243 MBSTRG(language) = no_language;
1244 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1245 return SUCCESS;
1246 }
1247 /* }}} */
1248
1249 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1250 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1251 {
1252 const mbfl_encoding **list;
1253 size_t size;
1254
1255 if (!new_value) {
1256 if (MBSTRG(detect_order_list)) {
1257 pefree(MBSTRG(detect_order_list), 1);
1258 }
1259 MBSTRG(detect_order_list) = NULL;
1260 MBSTRG(detect_order_list_size) = 0;
1261 return SUCCESS;
1262 }
1263
1264 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1265 return FAILURE;
1266 }
1267
1268 if (MBSTRG(detect_order_list)) {
1269 pefree(MBSTRG(detect_order_list), 1);
1270 }
1271 MBSTRG(detect_order_list) = list;
1272 MBSTRG(detect_order_list_size) = size;
1273 return SUCCESS;
1274 }
1275 /* }}} */
1276
_php_mb_ini_mbstring_http_input_set(const char * new_value,size_t new_value_length)1277 static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
1278 const mbfl_encoding **list;
1279 size_t size;
1280 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
1281 return FAILURE;
1282 }
1283 if (MBSTRG(http_input_list)) {
1284 pefree(MBSTRG(http_input_list), 1);
1285 }
1286 MBSTRG(http_input_list) = list;
1287 MBSTRG(http_input_list_size) = size;
1288 return SUCCESS;
1289 }
1290
1291 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1292 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1293 {
1294 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1295 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1296 }
1297
1298 if (!new_value || !ZSTR_VAL(new_value)) {
1299 const char *encoding = php_get_input_encoding();
1300 MBSTRG(http_input_set) = 0;
1301 _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
1302 return SUCCESS;
1303 }
1304
1305 MBSTRG(http_input_set) = 1;
1306 return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1307 }
1308 /* }}} */
1309
_php_mb_ini_mbstring_http_output_set(const char * new_value)1310 static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
1311 const mbfl_encoding *encoding = mbfl_name2encoding(new_value);
1312 if (!encoding) {
1313 return FAILURE;
1314 }
1315
1316 MBSTRG(http_output_encoding) = encoding;
1317 MBSTRG(current_http_output_encoding) = encoding;
1318 return SUCCESS;
1319 }
1320
1321 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1322 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1323 {
1324 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1325 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1326 }
1327
1328 if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1329 MBSTRG(http_output_set) = 0;
1330 _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
1331 return SUCCESS;
1332 }
1333
1334 MBSTRG(http_output_set) = 1;
1335 return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
1336 }
1337 /* }}} */
1338
1339 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)1340 static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
1341 {
1342 const mbfl_encoding *encoding;
1343
1344 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1345 /* falls back to UTF-8 if an unknown encoding name is given */
1346 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1347 }
1348 MBSTRG(internal_encoding) = encoding;
1349 MBSTRG(current_internal_encoding) = encoding;
1350 #if HAVE_MBREGEX
1351 {
1352 const char *enc_name = new_value;
1353 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1354 /* falls back to UTF-8 if an unknown encoding name is given */
1355 enc_name = "UTF-8";
1356 php_mb_regex_set_default_mbctype(enc_name);
1357 }
1358 php_mb_regex_set_mbctype(new_value);
1359 }
1360 #endif
1361 return SUCCESS;
1362 }
1363 /* }}} */
1364
1365 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1366 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1367 {
1368 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1369 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1370 }
1371
1372 if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1373 return FAILURE;
1374 }
1375
1376 if (new_value && ZSTR_LEN(new_value)) {
1377 MBSTRG(internal_encoding_set) = 1;
1378 return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1379 } else {
1380 const char *encoding = php_get_internal_encoding();
1381 MBSTRG(internal_encoding_set) = 0;
1382 return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
1383 }
1384 }
1385 /* }}} */
1386
1387 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1388 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1389 {
1390 int c;
1391 char *endptr = NULL;
1392
1393 if (new_value != NULL) {
1394 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1395 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1396 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1397 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1398 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1399 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1400 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1401 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1402 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1403 } else {
1404 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1405 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1406 if (ZSTR_LEN(new_value) > 0) {
1407 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1408 if (*endptr == '\0') {
1409 MBSTRG(filter_illegal_substchar) = c;
1410 MBSTRG(current_filter_illegal_substchar) = c;
1411 }
1412 }
1413 }
1414 } else {
1415 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1416 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1417 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1418 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1419 }
1420
1421 return SUCCESS;
1422 }
1423 /* }}} */
1424
1425 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1426 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1427 {
1428 if (new_value == NULL) {
1429 return FAILURE;
1430 }
1431
1432 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1433
1434 if (MBSTRG(encoding_translation)) {
1435 sapi_unregister_post_entry(php_post_entries);
1436 sapi_register_post_entries(mbstr_post_entries);
1437 } else {
1438 sapi_unregister_post_entry(mbstr_post_entries);
1439 sapi_register_post_entries(php_post_entries);
1440 }
1441
1442 return SUCCESS;
1443 }
1444 /* }}} */
1445
1446 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1447 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1448 {
1449 zend_string *tmp;
1450 void *re = NULL;
1451
1452 if (!new_value) {
1453 new_value = entry->orig_value;
1454 }
1455 tmp = php_trim(new_value, NULL, 0, 3);
1456
1457 if (ZSTR_LEN(tmp) > 0) {
1458 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1459 zend_string_release_ex(tmp, 0);
1460 return FAILURE;
1461 }
1462 }
1463
1464 if (MBSTRG(http_output_conv_mimetypes)) {
1465 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1466 }
1467
1468 MBSTRG(http_output_conv_mimetypes) = re;
1469
1470 zend_string_release_ex(tmp, 0);
1471 return SUCCESS;
1472 }
1473 /* }}} */
1474 /* }}} */
1475
1476 /* {{{ php.ini directive registration */
1477 PHP_INI_BEGIN()
1478 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1479 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1480 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1481 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1482 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1483 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1484 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1485 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1486
1487 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1488 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1489 OnUpdate_mbstring_encoding_translation,
1490 encoding_translation, zend_mbstring_globals, mbstring_globals)
1491 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1492 "^(text/|application/xhtml\\+xml)",
1493 PHP_INI_ALL,
1494 OnUpdate_mbstring_http_output_conv_mimetypes)
1495
1496 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1497 PHP_INI_ALL,
1498 OnUpdateBool,
1499 strict_detection, zend_mbstring_globals, mbstring_globals)
1500 #if HAVE_MBREGEX
1501 STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
1502 STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
1503 #endif
PHP_INI_END()1504 PHP_INI_END()
1505 /* }}} */
1506
1507 static void mbstring_internal_encoding_changed_hook() {
1508 /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
1509 if (!MBSTRG(internal_encoding_set)) {
1510 const char *encoding = php_get_internal_encoding();
1511 _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
1512 }
1513
1514 if (!MBSTRG(http_output_set)) {
1515 const char *encoding = php_get_output_encoding();
1516 _php_mb_ini_mbstring_http_output_set(encoding);
1517 }
1518
1519 if (!MBSTRG(http_input_set)) {
1520 const char *encoding = php_get_input_encoding();
1521 _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
1522 }
1523 }
1524
1525 /* {{{ module global initialize handler */
PHP_GINIT_FUNCTION(mbstring)1526 static PHP_GINIT_FUNCTION(mbstring)
1527 {
1528 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1529 ZEND_TSRMLS_CACHE_UPDATE();
1530 #endif
1531
1532 mbstring_globals->language = mbfl_no_language_uni;
1533 mbstring_globals->internal_encoding = NULL;
1534 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1535 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1536 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1537 mbstring_globals->http_input_identify = NULL;
1538 mbstring_globals->http_input_identify_get = NULL;
1539 mbstring_globals->http_input_identify_post = NULL;
1540 mbstring_globals->http_input_identify_cookie = NULL;
1541 mbstring_globals->http_input_identify_string = NULL;
1542 mbstring_globals->http_input_list = NULL;
1543 mbstring_globals->http_input_list_size = 0;
1544 mbstring_globals->detect_order_list = NULL;
1545 mbstring_globals->detect_order_list_size = 0;
1546 mbstring_globals->current_detect_order_list = NULL;
1547 mbstring_globals->current_detect_order_list_size = 0;
1548 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1549 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1550 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1551 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1552 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1553 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1554 mbstring_globals->illegalchars = 0;
1555 mbstring_globals->func_overload = 0;
1556 mbstring_globals->encoding_translation = 0;
1557 mbstring_globals->strict_detection = 0;
1558 mbstring_globals->outconv = NULL;
1559 mbstring_globals->http_output_conv_mimetypes = NULL;
1560 #if HAVE_MBREGEX
1561 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1562 #endif
1563 mbstring_globals->last_used_encoding_name = NULL;
1564 mbstring_globals->last_used_encoding = NULL;
1565 mbstring_globals->internal_encoding_set = 0;
1566 mbstring_globals->http_output_set = 0;
1567 mbstring_globals->http_input_set = 0;
1568 }
1569 /* }}} */
1570
1571 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1572 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1573 {
1574 if (mbstring_globals->http_input_list) {
1575 free(mbstring_globals->http_input_list);
1576 }
1577 if (mbstring_globals->detect_order_list) {
1578 free(mbstring_globals->detect_order_list);
1579 }
1580 if (mbstring_globals->http_output_conv_mimetypes) {
1581 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1582 }
1583 #if HAVE_MBREGEX
1584 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1585 #endif
1586 }
1587 /* }}} */
1588
1589 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1590 PHP_MINIT_FUNCTION(mbstring)
1591 {
1592 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1593 ZEND_TSRMLS_CACHE_UPDATE();
1594 #endif
1595 __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
1596
1597 REGISTER_INI_ENTRIES();
1598
1599 /* We assume that we're the only user of the hook. */
1600 ZEND_ASSERT(php_internal_encoding_changed == NULL);
1601 php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1602 mbstring_internal_encoding_changed_hook();
1603
1604 /* This is a global handler. Should not be set in a per-request handler. */
1605 sapi_register_treat_data(mbstr_treat_data);
1606
1607 /* Post handlers are stored in the thread-local context. */
1608 if (MBSTRG(encoding_translation)) {
1609 sapi_register_post_entries(mbstr_post_entries);
1610 }
1611
1612 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1613 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1614 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1615
1616 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1617 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1618 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1619 REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1620 REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1621 REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1622 REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1623 REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1624
1625 #if HAVE_MBREGEX
1626 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1627 #endif
1628
1629 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1630 return FAILURE;
1631 }
1632
1633 php_rfc1867_set_multibyte_callbacks(
1634 php_mb_encoding_translation,
1635 php_mb_gpc_get_detect_order,
1636 php_mb_gpc_set_input_encoding,
1637 php_mb_rfc1867_getword,
1638 php_mb_rfc1867_getword_conf,
1639 php_mb_rfc1867_basename);
1640
1641 /* override original function (deprecated). */
1642 if (MBSTRG(func_overload)){
1643 zend_function *func, *orig;
1644 const struct mb_overload_def *p;
1645 zend_string *str;
1646
1647 p = &(mb_ovld[0]);
1648 while (p->type > 0) {
1649 if ((MBSTRG(func_overload) & p->type) == p->type &&
1650 !zend_hash_str_exists(CG(function_table), p->save_func, strlen(p->save_func))
1651 ) {
1652 func = zend_hash_str_find_ptr(CG(function_table), p->ovld_func, strlen(p->ovld_func));
1653
1654 if ((orig = zend_hash_str_find_ptr(CG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1655 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1656 return FAILURE;
1657 } else {
1658 ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1659 str = zend_string_init_interned(p->save_func, strlen(p->save_func), 1);
1660 zend_hash_add_mem(CG(function_table), str, orig, sizeof(zend_internal_function));
1661 zend_string_release_ex(str, 1);
1662 function_add_ref(orig);
1663
1664 str = zend_string_init_interned(p->orig_func, strlen(p->orig_func), 1);
1665 zend_hash_update_mem(CG(function_table), str, func, sizeof(zend_internal_function));
1666 zend_string_release_ex(str, 1);
1667 function_add_ref(func);
1668 }
1669 }
1670 p++;
1671 }
1672 }
1673
1674 return SUCCESS;
1675 }
1676 /* }}} */
1677
1678 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1679 PHP_MSHUTDOWN_FUNCTION(mbstring)
1680 {
1681 /* clear overloaded function. */
1682 if (MBSTRG(func_overload)){
1683 const struct mb_overload_def *p;
1684 zend_function *orig;
1685
1686 p = &(mb_ovld[0]);
1687 while (p->type > 0) {
1688 if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 (orig = zend_hash_str_find_ptr(CG(function_table), p->save_func, strlen(p->save_func)))) {
1690
1691 zend_hash_str_update_mem(CG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1692 function_add_ref(orig);
1693 zend_hash_str_del(CG(function_table), p->save_func, strlen(p->save_func));
1694 }
1695 p++;
1696 }
1697 }
1698
1699 UNREGISTER_INI_ENTRIES();
1700
1701 zend_multibyte_restore_functions();
1702
1703 #if HAVE_MBREGEX
1704 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1705 #endif
1706
1707 php_internal_encoding_changed = NULL;
1708
1709 return SUCCESS;
1710 }
1711 /* }}} */
1712
1713 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1714 PHP_RINIT_FUNCTION(mbstring)
1715 {
1716 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1717 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1718 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1719 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1720
1721 MBSTRG(illegalchars) = 0;
1722
1723 php_mb_populate_current_detect_order_list();
1724
1725 /* override original function. */
1726 if (MBSTRG(func_overload)){
1727 zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1728
1729 CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1730 }
1731 #if HAVE_MBREGEX
1732 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1733 #endif
1734 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1735
1736 return SUCCESS;
1737 }
1738 /* }}} */
1739
1740 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1741 PHP_RSHUTDOWN_FUNCTION(mbstring)
1742 {
1743 if (MBSTRG(current_detect_order_list) != NULL) {
1744 efree(MBSTRG(current_detect_order_list));
1745 MBSTRG(current_detect_order_list) = NULL;
1746 MBSTRG(current_detect_order_list_size) = 0;
1747 }
1748 if (MBSTRG(outconv) != NULL) {
1749 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1750 mbfl_buffer_converter_delete(MBSTRG(outconv));
1751 MBSTRG(outconv) = NULL;
1752 }
1753
1754 /* clear http input identification. */
1755 MBSTRG(http_input_identify) = NULL;
1756 MBSTRG(http_input_identify_post) = NULL;
1757 MBSTRG(http_input_identify_get) = NULL;
1758 MBSTRG(http_input_identify_cookie) = NULL;
1759 MBSTRG(http_input_identify_string) = NULL;
1760
1761 if (MBSTRG(last_used_encoding_name)) {
1762 zend_string_release(MBSTRG(last_used_encoding_name));
1763 MBSTRG(last_used_encoding_name) = NULL;
1764 }
1765
1766 MBSTRG(internal_encoding_set) = 0;
1767 MBSTRG(http_output_set) = 0;
1768 MBSTRG(http_input_set) = 0;
1769
1770 #if HAVE_MBREGEX
1771 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1772 #endif
1773
1774 return SUCCESS;
1775 }
1776 /* }}} */
1777
1778 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1779 PHP_MINFO_FUNCTION(mbstring)
1780 {
1781 php_info_print_table_start();
1782 php_info_print_table_row(2, "Multibyte Support", "enabled");
1783 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1784 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1785 {
1786 char tmp[256];
1787 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1788 php_info_print_table_row(2, "libmbfl version", tmp);
1789 }
1790 php_info_print_table_end();
1791
1792 php_info_print_table_start();
1793 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1794 php_info_print_table_end();
1795
1796 #if HAVE_MBREGEX
1797 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1798 #endif
1799
1800 DISPLAY_INI_ENTRIES();
1801 }
1802 /* }}} */
1803
1804 /* {{{ proto string mb_language([string language])
1805 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1806 PHP_FUNCTION(mb_language)
1807 {
1808 zend_string *name = NULL;
1809
1810 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1811 return;
1812 }
1813 if (name == NULL) {
1814 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1815 } else {
1816 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1817 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1818 php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1819 RETVAL_FALSE;
1820 } else {
1821 RETVAL_TRUE;
1822 }
1823 zend_string_release_ex(ini_name, 0);
1824 }
1825 }
1826 /* }}} */
1827
1828 /* {{{ proto string mb_internal_encoding([string encoding])
1829 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1830 PHP_FUNCTION(mb_internal_encoding)
1831 {
1832 const char *name = NULL;
1833 size_t name_len;
1834 const mbfl_encoding *encoding;
1835
1836 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1837 return;
1838 }
1839 if (name == NULL) {
1840 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1841 if (name != NULL) {
1842 RETURN_STRING(name);
1843 } else {
1844 RETURN_FALSE;
1845 }
1846 } else {
1847 encoding = mbfl_name2encoding(name);
1848 if (!encoding) {
1849 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1850 RETURN_FALSE;
1851 } else {
1852 MBSTRG(current_internal_encoding) = encoding;
1853 MBSTRG(internal_encoding_set) = 1;
1854 RETURN_TRUE;
1855 }
1856 }
1857 }
1858 /* }}} */
1859
1860 /* {{{ proto mixed mb_http_input([string type])
1861 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1862 PHP_FUNCTION(mb_http_input)
1863 {
1864 char *typ = NULL;
1865 size_t typ_len;
1866 int retname;
1867 char *list, *temp;
1868 const mbfl_encoding *result = NULL;
1869
1870 retname = 1;
1871 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1872 return;
1873 }
1874 if (typ == NULL) {
1875 result = MBSTRG(http_input_identify);
1876 } else {
1877 switch (*typ) {
1878 case 'G':
1879 case 'g':
1880 result = MBSTRG(http_input_identify_get);
1881 break;
1882 case 'P':
1883 case 'p':
1884 result = MBSTRG(http_input_identify_post);
1885 break;
1886 case 'C':
1887 case 'c':
1888 result = MBSTRG(http_input_identify_cookie);
1889 break;
1890 case 'S':
1891 case 's':
1892 result = MBSTRG(http_input_identify_string);
1893 break;
1894 case 'I':
1895 case 'i':
1896 {
1897 const mbfl_encoding **entry = MBSTRG(http_input_list);
1898 const size_t n = MBSTRG(http_input_list_size);
1899 size_t i;
1900 array_init(return_value);
1901 for (i = 0; i < n; i++) {
1902 add_next_index_string(return_value, (*entry)->name);
1903 entry++;
1904 }
1905 retname = 0;
1906 }
1907 break;
1908 case 'L':
1909 case 'l':
1910 {
1911 const mbfl_encoding **entry = MBSTRG(http_input_list);
1912 const size_t n = MBSTRG(http_input_list_size);
1913 size_t i;
1914 list = NULL;
1915 for (i = 0; i < n; i++) {
1916 if (list) {
1917 temp = list;
1918 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1919 efree(temp);
1920 if (!list) {
1921 break;
1922 }
1923 } else {
1924 list = estrdup((*entry)->name);
1925 }
1926 entry++;
1927 }
1928 }
1929 if (!list) {
1930 RETURN_FALSE;
1931 }
1932 RETVAL_STRING(list);
1933 efree(list);
1934 retname = 0;
1935 break;
1936 default:
1937 result = MBSTRG(http_input_identify);
1938 break;
1939 }
1940 }
1941
1942 if (retname) {
1943 if (result) {
1944 RETVAL_STRING(result->name);
1945 } else {
1946 RETVAL_FALSE;
1947 }
1948 }
1949 }
1950 /* }}} */
1951
1952 /* {{{ proto string mb_http_output([string encoding])
1953 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1954 PHP_FUNCTION(mb_http_output)
1955 {
1956 const char *name = NULL;
1957 size_t name_len;
1958 const mbfl_encoding *encoding;
1959
1960 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1961 return;
1962 }
1963
1964 if (name == NULL) {
1965 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1966 if (name != NULL) {
1967 RETURN_STRING(name);
1968 } else {
1969 RETURN_FALSE;
1970 }
1971 } else {
1972 encoding = mbfl_name2encoding(name);
1973 if (!encoding) {
1974 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1975 RETURN_FALSE;
1976 } else {
1977 MBSTRG(http_output_set) = 1;
1978 MBSTRG(current_http_output_encoding) = encoding;
1979 RETURN_TRUE;
1980 }
1981 }
1982 }
1983 /* }}} */
1984
1985 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1986 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1987 PHP_FUNCTION(mb_detect_order)
1988 {
1989 zval *arg1 = NULL;
1990
1991 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1992 return;
1993 }
1994
1995 if (!arg1) {
1996 size_t i;
1997 size_t n = MBSTRG(current_detect_order_list_size);
1998 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1999 array_init(return_value);
2000 for (i = 0; i < n; i++) {
2001 add_next_index_string(return_value, (*entry)->name);
2002 entry++;
2003 }
2004 } else {
2005 const mbfl_encoding **list = NULL;
2006 size_t size = 0;
2007 switch (Z_TYPE_P(arg1)) {
2008 case IS_ARRAY:
2009 if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
2010 if (list) {
2011 efree(list);
2012 }
2013 RETURN_FALSE;
2014 }
2015 break;
2016 default:
2017 if (!try_convert_to_string(arg1)) {
2018 return;
2019 }
2020 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
2021 if (list) {
2022 efree(list);
2023 }
2024 RETURN_FALSE;
2025 }
2026 break;
2027 }
2028
2029 if (list == NULL) {
2030 RETURN_FALSE;
2031 }
2032
2033 if (MBSTRG(current_detect_order_list)) {
2034 efree(MBSTRG(current_detect_order_list));
2035 }
2036 MBSTRG(current_detect_order_list) = list;
2037 MBSTRG(current_detect_order_list_size) = size;
2038 RETURN_TRUE;
2039 }
2040 }
2041 /* }}} */
2042
php_mb_check_code_point(zend_long cp)2043 static inline int php_mb_check_code_point(zend_long cp)
2044 {
2045 if (cp <= 0 || cp >= 0x110000) {
2046 /* Out of Unicode range */
2047 return 0;
2048 }
2049
2050 if (cp >= 0xd800 && cp <= 0xdfff) {
2051 /* Surrogate code-point. These are never valid on their own and we only allow a single
2052 * substitute character. */
2053 return 0;
2054 }
2055
2056 /* As the we do not know the target encoding of the conversion operation that is going to
2057 * use the substitution character, we cannot check whether the codepoint is actually mapped
2058 * in the given encoding at this point. Thus we have to accept everything. */
2059 return 1;
2060 }
2061
2062 /* {{{ proto mixed mb_substitute_character([mixed substchar])
2063 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2064 PHP_FUNCTION(mb_substitute_character)
2065 {
2066 zval *arg1 = NULL;
2067
2068 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2069 return;
2070 }
2071
2072 if (!arg1) {
2073 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2074 RETURN_STRING("none");
2075 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2076 RETURN_STRING("long");
2077 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2078 RETURN_STRING("entity");
2079 } else {
2080 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2081 }
2082 } else {
2083 RETVAL_TRUE;
2084
2085 switch (Z_TYPE_P(arg1)) {
2086 case IS_STRING:
2087 if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2088 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2089 } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2090 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2091 } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2092 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2093 } else {
2094 convert_to_long_ex(arg1);
2095
2096 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2097 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2098 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2099 } else {
2100 php_error_docref(NULL, E_WARNING, "Unknown character");
2101 RETURN_FALSE;
2102 }
2103 }
2104 break;
2105 default:
2106 convert_to_long_ex(arg1);
2107 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2108 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2109 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2110 } else {
2111 php_error_docref(NULL, E_WARNING, "Unknown character");
2112 RETURN_FALSE;
2113 }
2114 break;
2115 }
2116 }
2117 }
2118 /* }}} */
2119
2120 /* {{{ proto string mb_preferred_mime_name(string encoding)
2121 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2122 PHP_FUNCTION(mb_preferred_mime_name)
2123 {
2124 enum mbfl_no_encoding no_encoding;
2125 char *name = NULL;
2126 size_t name_len;
2127
2128 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2129 return;
2130 } else {
2131 no_encoding = mbfl_name2no_encoding(name);
2132 if (no_encoding == mbfl_no_encoding_invalid) {
2133 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2134 RETVAL_FALSE;
2135 } else {
2136 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2137 if (preferred_name == NULL || *preferred_name == '\0') {
2138 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2139 RETVAL_FALSE;
2140 } else {
2141 RETVAL_STRING((char *)preferred_name);
2142 }
2143 }
2144 }
2145 }
2146 /* }}} */
2147
2148 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2149 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2150
2151 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2152 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2153 PHP_FUNCTION(mb_parse_str)
2154 {
2155 zval *track_vars_array = NULL;
2156 char *encstr = NULL;
2157 size_t encstr_len;
2158 php_mb_encoding_handler_info_t info;
2159 const mbfl_encoding *detected;
2160
2161 track_vars_array = NULL;
2162 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2163 return;
2164 }
2165
2166 if (track_vars_array != NULL) {
2167 track_vars_array = zend_try_array_init(track_vars_array);
2168 if (!track_vars_array) {
2169 return;
2170 }
2171 }
2172
2173 encstr = estrndup(encstr, encstr_len);
2174
2175 info.data_type = PARSE_STRING;
2176 info.separator = PG(arg_separator).input;
2177 info.report_errors = 1;
2178 info.to_encoding = MBSTRG(current_internal_encoding);
2179 info.to_language = MBSTRG(language);
2180 info.from_encodings = MBSTRG(http_input_list);
2181 info.num_from_encodings = MBSTRG(http_input_list_size);
2182 info.from_language = MBSTRG(language);
2183
2184 if (track_vars_array != NULL) {
2185 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2186 } else {
2187 zval tmp;
2188 zend_array *symbol_table;
2189 if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2190 efree(encstr);
2191 return;
2192 }
2193
2194 php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2195
2196 symbol_table = zend_rebuild_symbol_table();
2197 ZVAL_ARR(&tmp, symbol_table);
2198 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2199 }
2200
2201 MBSTRG(http_input_identify) = detected;
2202
2203 RETVAL_BOOL(detected);
2204
2205 if (encstr != NULL) efree(encstr);
2206 }
2207 /* }}} */
2208
2209 /* {{{ proto string mb_output_handler(string contents, int status)
2210 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2211 PHP_FUNCTION(mb_output_handler)
2212 {
2213 char *arg_string;
2214 size_t arg_string_len;
2215 zend_long arg_status;
2216 mbfl_string string, result;
2217 const char *charset;
2218 char *p;
2219 const mbfl_encoding *encoding;
2220 int last_feed;
2221 size_t len;
2222 unsigned char send_text_mimetype = 0;
2223 char *s, *mimetype = NULL;
2224
2225 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2226 return;
2227 }
2228
2229 encoding = MBSTRG(current_http_output_encoding);
2230
2231 /* start phase only */
2232 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2233 /* delete the converter just in case. */
2234 if (MBSTRG(outconv)) {
2235 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2236 mbfl_buffer_converter_delete(MBSTRG(outconv));
2237 MBSTRG(outconv) = NULL;
2238 }
2239 if (encoding == &mbfl_encoding_pass) {
2240 RETURN_STRINGL(arg_string, arg_string_len);
2241 }
2242
2243 /* analyze mime type */
2244 if (SG(sapi_headers).mimetype &&
2245 _php_mb_match_regex(
2246 MBSTRG(http_output_conv_mimetypes),
2247 SG(sapi_headers).mimetype,
2248 strlen(SG(sapi_headers).mimetype))) {
2249 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2250 mimetype = estrdup(SG(sapi_headers).mimetype);
2251 } else {
2252 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2253 }
2254 send_text_mimetype = 1;
2255 } else if (SG(sapi_headers).send_default_content_type) {
2256 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2257 }
2258
2259 /* if content-type is not yet set, set it and activate the converter */
2260 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2261 charset = encoding->mime_name;
2262 if (charset) {
2263 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2264 if (sapi_add_header(p, len, 0) != FAILURE) {
2265 SG(sapi_headers).send_default_content_type = 0;
2266 }
2267 }
2268 /* activate the converter */
2269 MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
2270 if (send_text_mimetype){
2271 efree(mimetype);
2272 }
2273 }
2274 }
2275
2276 /* just return if the converter is not activated. */
2277 if (MBSTRG(outconv) == NULL) {
2278 RETURN_STRINGL(arg_string, arg_string_len);
2279 }
2280
2281 /* flag */
2282 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2283 /* mode */
2284 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2285 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2286
2287 /* feed the string */
2288 mbfl_string_init(&string);
2289 /* these are not needed. convd has encoding info.
2290 string.no_language = MBSTRG(language);
2291 string.encoding = MBSTRG(current_internal_encoding);
2292 */
2293 string.val = (unsigned char *)arg_string;
2294 string.len = arg_string_len;
2295 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2296 if (last_feed) {
2297 mbfl_buffer_converter_flush(MBSTRG(outconv));
2298 }
2299 /* get the converter output, and return it */
2300 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2301 // TODO: avoid reallocation ???
2302 RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
2303 efree(result.val);
2304
2305 /* delete the converter if it is the last feed. */
2306 if (last_feed) {
2307 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2308 mbfl_buffer_converter_delete(MBSTRG(outconv));
2309 MBSTRG(outconv) = NULL;
2310 }
2311 }
2312 /* }}} */
2313
2314 /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
2315 Convert a multibyte string to an array. If split_length is specified,
2316 break the string down into chunks each split_length characters long. */
2317
2318 /* structure to pass split params to the callback */
2319 struct mbfl_split_params {
2320 zval *return_value; /* php function return value structure pointer */
2321 mbfl_string *result_string; /* string to store result chunk */
2322 size_t mb_chunk_length; /* actual chunk length in chars */
2323 size_t split_length; /* split length in chars */
2324 mbfl_convert_filter *next_filter; /* widechar to encoding converter */
2325 };
2326
2327 /* callback function to fill split array */
mbfl_split_output(int c,void * data)2328 static int mbfl_split_output(int c, void *data)
2329 {
2330 struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
2331
2332 (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
2333
2334 if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
2335 mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
2336 mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
2337 mbfl_string *chunk = params->result_string;
2338 mbfl_memory_device_result(device, chunk); /* make chunk */
2339 add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
2340 efree(chunk->val);
2341 params->mb_chunk_length = 0; /* reset mb_chunk size */
2342 }
2343 return 0;
2344 }
2345
PHP_FUNCTION(mb_str_split)2346 PHP_FUNCTION(mb_str_split)
2347 {
2348 zend_string *str, *encoding = NULL;
2349 size_t mb_len, chunks, chunk_len;
2350 const char *p, *last; /* pointer for the string cursor and last string char */
2351 mbfl_string string, result_string;
2352 const mbfl_encoding *mbfl_encoding;
2353 zend_long split_length = 1;
2354
2355 ZEND_PARSE_PARAMETERS_START(1, 3)
2356 Z_PARAM_STR(str)
2357 Z_PARAM_OPTIONAL
2358 Z_PARAM_LONG(split_length)
2359 Z_PARAM_STR(encoding)
2360 ZEND_PARSE_PARAMETERS_END();
2361
2362 if (split_length <= 0) {
2363 php_error_docref(NULL, E_WARNING, "The length of each segment must be greater than zero");
2364 RETURN_FALSE;
2365 }
2366
2367 /* fill mbfl_string structure */
2368 string.val = (unsigned char *) ZSTR_VAL(str);
2369 string.len = ZSTR_LEN(str);
2370 string.no_language = MBSTRG(language);
2371 string.encoding = php_mb_get_encoding(encoding);
2372 if (!string.encoding) {
2373 RETURN_FALSE;
2374 }
2375
2376 p = ZSTR_VAL(str); /* string cursor pointer */
2377 last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
2378
2379 mbfl_encoding = string.encoding;
2380
2381 /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
2382 if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
2383 mb_len = string.len;
2384 chunk_len = (size_t)split_length; /* chunk length in bytes */
2385 } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
2386 mb_len = string.len / 2;
2387 chunk_len = split_length * 2;
2388 } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
2389 mb_len = string.len / 4;
2390 chunk_len = split_length * 4;
2391 } else if (mbfl_encoding->mblen_table != NULL) {
2392 /* second scenario: variable width encodings with length table */
2393 char unsigned const *mbtab = mbfl_encoding->mblen_table;
2394
2395 /* assume that we have 1-bytes characters */
2396 array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
2397
2398 while (p < last) { /* split cycle work until the cursor has reached the last byte */
2399 char const *chunk_p = p; /* chunk first byte pointer */
2400 chunk_len = 0; /* chunk length in bytes */
2401 zend_long char_count;
2402
2403 for (char_count = 0; char_count < split_length && p < last; ++char_count) {
2404 char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
2405 chunk_len += m;
2406 p += m;
2407 }
2408 if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
2409 add_next_index_stringl(return_value, chunk_p, chunk_len);
2410 }
2411 return;
2412 } else {
2413 /* third scenario: other multibyte encodings */
2414 mbfl_convert_filter *filter, *decoder;
2415
2416 /* assume that we have 1-bytes characters */
2417 array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
2418
2419 /* decoder filter to decode wchar to encoding */
2420 mbfl_memory_device device;
2421 mbfl_memory_device_init(&device, split_length + 1, 0);
2422
2423 decoder = mbfl_convert_filter_new(
2424 &mbfl_encoding_wchar,
2425 string.encoding,
2426 mbfl_memory_device_output,
2427 NULL,
2428 &device);
2429 /* if something wrong with the decoded */
2430 if (decoder == NULL) {
2431 RETURN_FALSE;
2432 }
2433
2434 /* wchar filter */
2435 mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
2436 struct mbfl_split_params params = { /* init callback function params structure */
2437 .return_value = return_value,
2438 .result_string = &result_string,
2439 .mb_chunk_length = 0,
2440 .split_length = (size_t)split_length,
2441 .next_filter = decoder,
2442 };
2443
2444 filter = mbfl_convert_filter_new(
2445 string.encoding,
2446 &mbfl_encoding_wchar,
2447 mbfl_split_output,
2448 NULL,
2449 ¶ms);
2450 /* if something wrong with the filter */
2451 if (filter == NULL){
2452 mbfl_convert_filter_delete(decoder); /* this will free allocated memory for the decoded */
2453 RETURN_FALSE;
2454 }
2455
2456 while (p < last - 1) { /* cycle each byte except last with callback function */
2457 (*filter->filter_function)(*p++, filter);
2458 }
2459 params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
2460 (*filter->filter_function)(*p++, filter); /*process last char */
2461
2462 mbfl_convert_filter_delete(decoder);
2463 mbfl_convert_filter_delete(filter);
2464 mbfl_memory_device_clear(&device);
2465 return;
2466 }
2467
2468 /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
2469 chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
2470 array_init_size(return_value, chunks);
2471 if (chunks != 0) {
2472 zend_long i;
2473
2474 for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
2475 add_next_index_stringl(return_value, p, chunk_len);
2476 }
2477 add_next_index_stringl(return_value, p, last - p);
2478 }
2479 }
2480 /* }}} */
2481
2482 /* {{{ proto int mb_strlen(string str [, string encoding])
2483 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2484 PHP_FUNCTION(mb_strlen)
2485 {
2486 size_t n;
2487 mbfl_string string;
2488 char *str;
2489 size_t str_len;
2490 zend_string *enc_name = NULL;
2491
2492 ZEND_PARSE_PARAMETERS_START(1, 2)
2493 Z_PARAM_STRING(str, str_len)
2494 Z_PARAM_OPTIONAL
2495 Z_PARAM_STR(enc_name)
2496 ZEND_PARSE_PARAMETERS_END();
2497
2498 string.val = (unsigned char *) str;
2499 string.len = str_len;
2500 string.no_language = MBSTRG(language);
2501 string.encoding = php_mb_get_encoding(enc_name);
2502 if (!string.encoding) {
2503 RETURN_FALSE;
2504 }
2505
2506 n = mbfl_strlen(&string);
2507 if (!mbfl_is_error(n)) {
2508 RETVAL_LONG(n);
2509 } else {
2510 RETVAL_FALSE;
2511 }
2512 }
2513 /* }}} */
2514
2515 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2516 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2517 PHP_FUNCTION(mb_strpos)
2518 {
2519 int reverse = 0;
2520 zend_long offset = 0;
2521 mbfl_string haystack, needle;
2522 zend_string *enc_name = NULL;
2523 size_t n;
2524
2525 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
2526 return;
2527 }
2528
2529 haystack.no_language = needle.no_language = MBSTRG(language);
2530 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2531 if (!haystack.encoding) {
2532 RETURN_FALSE;
2533 }
2534
2535 if (offset != 0) {
2536 size_t slen = mbfl_strlen(&haystack);
2537 if (offset < 0) {
2538 offset += slen;
2539 }
2540 if (offset < 0 || offset > slen) {
2541 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2542 RETURN_FALSE;
2543 }
2544 }
2545
2546 if (needle.len == 0) {
2547 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2548 RETURN_FALSE;
2549 }
2550
2551 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2552 if (!mbfl_is_error(n)) {
2553 RETVAL_LONG(n);
2554 } else {
2555 switch (-n) {
2556 case 1:
2557 break;
2558 case 2:
2559 php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2560 break;
2561 case 4:
2562 php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2563 break;
2564 case 8:
2565 php_error_docref(NULL, E_NOTICE, "Argument is empty");
2566 break;
2567 default:
2568 php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2569 break;
2570 }
2571 RETVAL_FALSE;
2572 }
2573 }
2574 /* }}} */
2575
2576 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2577 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2578 PHP_FUNCTION(mb_strrpos)
2579 {
2580 mbfl_string haystack, needle;
2581 zend_string *enc_name = NULL;
2582 zval *zoffset = NULL;
2583 zend_long offset = 0, n;
2584
2585 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name) == FAILURE) {
2586 return;
2587 }
2588
2589 if (zoffset) {
2590 if (Z_TYPE_P(zoffset) == IS_STRING) {
2591 switch (Z_STRVAL_P(zoffset)[0]) {
2592 case '0':
2593 case '1':
2594 case '2':
2595 case '3':
2596 case '4':
2597 case '5':
2598 case '6':
2599 case '7':
2600 case '8':
2601 case '9':
2602 case ' ':
2603 case '-':
2604 case '.':
2605 convert_to_long_ex(zoffset);
2606 offset = Z_LVAL_P(zoffset);
2607 break;
2608 default :
2609 enc_name = Z_STR_P(zoffset);
2610 php_error_docref(NULL, E_DEPRECATED,
2611 "Passing the encoding as third parameter is deprecated. "
2612 "Use an explicit zero offset");
2613 break;
2614 }
2615 } else {
2616 convert_to_long_ex(zoffset);
2617 offset = Z_LVAL_P(zoffset);
2618 }
2619 }
2620
2621 haystack.no_language = needle.no_language = MBSTRG(language);
2622 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2623 if (!haystack.encoding) {
2624 RETURN_FALSE;
2625 }
2626
2627 if (offset != 0) {
2628 size_t haystack_char_len = mbfl_strlen(&haystack);
2629 if ((offset > 0 && offset > haystack_char_len) ||
2630 (offset < 0 && -offset > haystack_char_len)) {
2631 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2632 RETURN_FALSE;
2633 }
2634 }
2635
2636 n = mbfl_strpos(&haystack, &needle, offset, 1);
2637 if (!mbfl_is_error(n)) {
2638 RETVAL_LONG(n);
2639 } else {
2640 RETVAL_FALSE;
2641 }
2642 }
2643 /* }}} */
2644
2645 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2646 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2647 PHP_FUNCTION(mb_stripos)
2648 {
2649 size_t n = (size_t) -1;
2650 zend_long offset = 0;
2651 mbfl_string haystack, needle;
2652 zend_string *from_encoding = NULL;
2653
2654 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2655 return;
2656 }
2657
2658 if (needle.len == 0) {
2659 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2660 RETURN_FALSE;
2661 }
2662
2663 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2664
2665 if (!mbfl_is_error(n)) {
2666 RETVAL_LONG(n);
2667 } else {
2668 RETVAL_FALSE;
2669 }
2670 }
2671 /* }}} */
2672
2673 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2674 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2675 PHP_FUNCTION(mb_strripos)
2676 {
2677 size_t n = (size_t) -1;
2678 zend_long offset = 0;
2679 mbfl_string haystack, needle;
2680 zend_string *from_encoding = NULL;
2681
2682 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2683 return;
2684 }
2685
2686 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2687
2688 if (!mbfl_is_error(n)) {
2689 RETVAL_LONG(n);
2690 } else {
2691 RETVAL_FALSE;
2692 }
2693 }
2694 /* }}} */
2695
2696 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2697 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2698 PHP_FUNCTION(mb_strstr)
2699 {
2700 size_t n;
2701 mbfl_string haystack, needle, result, *ret = NULL;
2702 zend_string *enc_name = NULL;
2703 zend_bool part = 0;
2704
2705 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
2706 return;
2707 }
2708
2709 haystack.no_language = needle.no_language = MBSTRG(language);
2710 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2711 if (!haystack.encoding) {
2712 RETURN_FALSE;
2713 }
2714
2715 if (needle.len == 0) {
2716 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2717 RETURN_FALSE;
2718 }
2719
2720 n = mbfl_strpos(&haystack, &needle, 0, 0);
2721 if (!mbfl_is_error(n)) {
2722 if (part) {
2723 ret = mbfl_substr(&haystack, &result, 0, n);
2724 if (ret != NULL) {
2725 // TODO: avoid reallocation ???
2726 RETVAL_STRINGL((char *)ret->val, ret->len);
2727 efree(ret->val);
2728 } else {
2729 RETVAL_FALSE;
2730 }
2731 } else {
2732 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2733 if (ret != NULL) {
2734 // TODO: avoid reallocation ???
2735 RETVAL_STRINGL((char *)ret->val, ret->len);
2736 efree(ret->val);
2737 } else {
2738 RETVAL_FALSE;
2739 }
2740 }
2741 } else {
2742 RETVAL_FALSE;
2743 }
2744 }
2745 /* }}} */
2746
2747 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2748 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2749 PHP_FUNCTION(mb_strrchr)
2750 {
2751 size_t n;
2752 mbfl_string haystack, needle, result, *ret = NULL;
2753 zend_string *enc_name = NULL;
2754 zend_bool part = 0;
2755
2756 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
2757 return;
2758 }
2759
2760 haystack.no_language = needle.no_language = MBSTRG(language);
2761 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2762 if (!haystack.encoding) {
2763 RETURN_FALSE;
2764 }
2765
2766 if (haystack.len == 0) {
2767 RETURN_FALSE;
2768 }
2769 if (needle.len == 0) {
2770 RETURN_FALSE;
2771 }
2772
2773 n = mbfl_strpos(&haystack, &needle, 0, 1);
2774 if (!mbfl_is_error(n)) {
2775 if (part) {
2776 ret = mbfl_substr(&haystack, &result, 0, n);
2777 if (ret != NULL) {
2778 // TODO: avoid reallocation ???
2779 RETVAL_STRINGL((char *)ret->val, ret->len);
2780 efree(ret->val);
2781 } else {
2782 RETVAL_FALSE;
2783 }
2784 } else {
2785 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2786 if (ret != NULL) {
2787 // TODO: avoid reallocation ???
2788 RETVAL_STRINGL((char *)ret->val, ret->len);
2789 efree(ret->val);
2790 } else {
2791 RETVAL_FALSE;
2792 }
2793 }
2794 } else {
2795 RETVAL_FALSE;
2796 }
2797 }
2798 /* }}} */
2799
2800 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2801 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2802 PHP_FUNCTION(mb_stristr)
2803 {
2804 zend_bool part = 0;
2805 size_t n;
2806 mbfl_string haystack, needle, result, *ret = NULL;
2807 zend_string *from_encoding = NULL;
2808
2809 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
2810 return;
2811 }
2812
2813 haystack.no_language = needle.no_language = MBSTRG(language);
2814 haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2815 if (!haystack.encoding) {
2816 RETURN_FALSE;
2817 }
2818
2819 if (!needle.len) {
2820 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2821 RETURN_FALSE;
2822 }
2823
2824 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2825 if (mbfl_is_error(n)) {
2826 RETURN_FALSE;
2827 }
2828
2829 if (part) {
2830 ret = mbfl_substr(&haystack, &result, 0, n);
2831 if (ret != NULL) {
2832 // TODO: avoid reallocation ???
2833 RETVAL_STRINGL((char *)ret->val, ret->len);
2834 efree(ret->val);
2835 } else {
2836 RETVAL_FALSE;
2837 }
2838 } else {
2839 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2840 if (ret != NULL) {
2841 // TODO: avoid reallocaton ???
2842 RETVAL_STRINGL((char *)ret->val, ret->len);
2843 efree(ret->val);
2844 } else {
2845 RETVAL_FALSE;
2846 }
2847 }
2848 }
2849 /* }}} */
2850
2851 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2852 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2853 PHP_FUNCTION(mb_strrichr)
2854 {
2855 zend_bool part = 0;
2856 size_t n;
2857 mbfl_string haystack, needle, result, *ret = NULL;
2858 zend_string *from_encoding = NULL;
2859
2860 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
2861 return;
2862 }
2863
2864 haystack.no_language = needle.no_language = MBSTRG(language);
2865 haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2866 if (!haystack.encoding) {
2867 RETURN_FALSE;
2868 }
2869
2870 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2871 if (mbfl_is_error(n)) {
2872 RETURN_FALSE;
2873 }
2874
2875 if (part) {
2876 ret = mbfl_substr(&haystack, &result, 0, n);
2877 if (ret != NULL) {
2878 // TODO: avoid reallocation ???
2879 RETVAL_STRINGL((char *)ret->val, ret->len);
2880 efree(ret->val);
2881 } else {
2882 RETVAL_FALSE;
2883 }
2884 } else {
2885 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2886 if (ret != NULL) {
2887 // TODO: avoid reallocation ???
2888 RETVAL_STRINGL((char *)ret->val, ret->len);
2889 efree(ret->val);
2890 } else {
2891 RETVAL_FALSE;
2892 }
2893 }
2894 }
2895 /* }}} */
2896
2897 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2898 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2899 PHP_FUNCTION(mb_substr_count)
2900 {
2901 size_t n;
2902 mbfl_string haystack, needle;
2903 zend_string *enc_name = NULL;
2904
2905 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) {
2906 return;
2907 }
2908
2909 haystack.no_language = needle.no_language = MBSTRG(language);
2910 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2911 if (!haystack.encoding) {
2912 RETURN_FALSE;
2913 }
2914
2915 if (needle.len == 0) {
2916 php_error_docref(NULL, E_WARNING, "Empty substring");
2917 RETURN_FALSE;
2918 }
2919
2920 n = mbfl_substr_count(&haystack, &needle);
2921 if (!mbfl_is_error(n)) {
2922 RETVAL_LONG(n);
2923 } else {
2924 RETVAL_FALSE;
2925 }
2926 }
2927 /* }}} */
2928
2929 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2930 Returns part of a string */
PHP_FUNCTION(mb_substr)2931 PHP_FUNCTION(mb_substr)
2932 {
2933 char *str;
2934 zend_string *encoding = NULL;
2935 zend_long from, len;
2936 size_t mblen, real_from, real_len;
2937 size_t str_len;
2938 zend_bool len_is_null = 1;
2939 mbfl_string string, result, *ret;
2940
2941 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) {
2942 return;
2943 }
2944
2945 string.no_language = MBSTRG(language);
2946 string.encoding = php_mb_get_encoding(encoding);
2947 if (!string.encoding) {
2948 RETURN_FALSE;
2949 }
2950
2951 string.val = (unsigned char *)str;
2952 string.len = str_len;
2953
2954 /* measures length */
2955 mblen = 0;
2956 if (from < 0 || (!len_is_null && len < 0)) {
2957 mblen = mbfl_strlen(&string);
2958 }
2959
2960 /* if "from" position is negative, count start position from the end
2961 * of the string
2962 */
2963 if (from >= 0) {
2964 real_from = (size_t) from;
2965 } else if (-from < mblen) {
2966 real_from = mblen + from;
2967 } else {
2968 real_from = 0;
2969 }
2970
2971 /* if "length" position is negative, set it to the length
2972 * needed to stop that many chars from the end of the string
2973 */
2974 if (len_is_null) {
2975 real_len = MBFL_SUBSTR_UNTIL_END;
2976 } else if (len >= 0) {
2977 real_len = (size_t) len;
2978 } else if (real_from < mblen && -len < mblen - real_from) {
2979 real_len = (mblen - real_from) + len;
2980 } else {
2981 real_len = 0;
2982 }
2983
2984 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2985 && (real_from > mbfl_strlen(&string))) {
2986 RETURN_FALSE;
2987 }
2988
2989 ret = mbfl_substr(&string, &result, real_from, real_len);
2990 if (NULL == ret) {
2991 RETURN_FALSE;
2992 }
2993
2994 // TODO: avoid reallocation ???
2995 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2996 efree(ret->val);
2997 }
2998 /* }}} */
2999
3000 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
3001 Returns part of a string */
PHP_FUNCTION(mb_strcut)3002 PHP_FUNCTION(mb_strcut)
3003 {
3004 zend_string *encoding = NULL;
3005 zend_long from, len;
3006 zend_bool len_is_null = 1;
3007 mbfl_string string, result, *ret;
3008
3009 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) {
3010 return;
3011 }
3012
3013 string.no_language = MBSTRG(language);
3014 string.encoding = php_mb_get_encoding(encoding);
3015 if (!string.encoding) {
3016 RETURN_FALSE;
3017 }
3018
3019 if (len_is_null) {
3020 len = string.len;
3021 }
3022
3023 /* if "from" position is negative, count start position from the end
3024 * of the string
3025 */
3026 if (from < 0) {
3027 from = string.len + from;
3028 if (from < 0) {
3029 from = 0;
3030 }
3031 }
3032
3033 /* if "length" position is negative, set it to the length
3034 * needed to stop that many chars from the end of the string
3035 */
3036 if (len < 0) {
3037 len = (string.len - from) + len;
3038 if (len < 0) {
3039 len = 0;
3040 }
3041 }
3042
3043 if (from > string.len) {
3044 RETURN_FALSE;
3045 }
3046
3047 ret = mbfl_strcut(&string, &result, from, len);
3048 if (ret == NULL) {
3049 RETURN_FALSE;
3050 }
3051
3052 // TODO: avoid reallocation ???
3053 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3054 efree(ret->val);
3055 }
3056 /* }}} */
3057
3058 /* {{{ proto int mb_strwidth(string str [, string encoding])
3059 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3060 PHP_FUNCTION(mb_strwidth)
3061 {
3062 size_t n;
3063 mbfl_string string;
3064 zend_string *enc_name = NULL;
3065
3066 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S", (char **)&string.val, &string.len, &enc_name) == FAILURE) {
3067 return;
3068 }
3069
3070 string.no_language = MBSTRG(language);
3071 string.encoding = php_mb_get_encoding(enc_name);
3072 if (!string.encoding) {
3073 RETURN_FALSE;
3074 }
3075
3076 n = mbfl_strwidth(&string);
3077 if (!mbfl_is_error(n)) {
3078 RETVAL_LONG(n);
3079 } else {
3080 RETVAL_FALSE;
3081 }
3082 }
3083 /* }}} */
3084
3085 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3086 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3087 PHP_FUNCTION(mb_strimwidth)
3088 {
3089 char *str, *trimmarker = NULL;
3090 zend_string *encoding = NULL;
3091 zend_long from, width, swidth = 0;
3092 size_t str_len, trimmarker_len;
3093 mbfl_string string, result, marker, *ret;
3094
3095 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) {
3096 return;
3097 }
3098
3099 string.no_language = marker.no_language = MBSTRG(language);
3100 string.encoding = marker.encoding = php_mb_get_encoding(encoding);
3101 if (!string.encoding) {
3102 RETURN_FALSE;
3103 }
3104
3105 string.val = (unsigned char *)str;
3106 string.len = str_len;
3107 marker.val = NULL;
3108 marker.len = 0;
3109
3110 if ((from < 0) || (width < 0)) {
3111 swidth = mbfl_strwidth(&string);
3112 }
3113
3114 if (from < 0) {
3115 from += swidth;
3116 }
3117
3118 if (from < 0 || (size_t)from > str_len) {
3119 php_error_docref(NULL, E_WARNING, "Start position is out of range");
3120 RETURN_FALSE;
3121 }
3122
3123 if (width < 0) {
3124 width = swidth + width - from;
3125 }
3126
3127 if (width < 0) {
3128 php_error_docref(NULL, E_WARNING, "Width is out of range");
3129 RETURN_FALSE;
3130 }
3131
3132 if (trimmarker) {
3133 marker.val = (unsigned char *)trimmarker;
3134 marker.len = trimmarker_len;
3135 }
3136
3137 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3138
3139 if (ret == NULL) {
3140 RETURN_FALSE;
3141 }
3142 // TODO: avoid reallocation ???
3143 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3144 efree(ret->val);
3145 }
3146 /* }}} */
3147
3148
3149 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3150 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3151 {
3152 return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3153 || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3154 || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3155 || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3156 }
3157
3158
3159 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3160 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3161 {
3162 return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3163 }
3164
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)3165 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
3166 {
3167 mbfl_string string, result, *ret;
3168 mbfl_buffer_converter *convd;
3169 char *output = NULL;
3170
3171 if (output_len) {
3172 *output_len = 0;
3173 }
3174
3175 /* initialize string */
3176 string.encoding = from_encoding;
3177 string.no_language = MBSTRG(language);
3178 string.val = (unsigned char *)input;
3179 string.len = length;
3180
3181 /* initialize converter */
3182 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
3183 if (convd == NULL) {
3184 php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3185 return NULL;
3186 }
3187
3188 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3189 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3190
3191 /* do it */
3192 mbfl_string_init(&result);
3193 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3194 if (ret) {
3195 if (output_len) {
3196 *output_len = ret->len;
3197 }
3198 output = (char *)ret->val;
3199 }
3200
3201 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3202 mbfl_buffer_converter_delete(convd);
3203 return output;
3204 }
3205 /* }}} */
3206
3207 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3208 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3209 {
3210 const mbfl_encoding *from_encoding, *to_encoding;
3211
3212 if (output_len) {
3213 *output_len = 0;
3214 }
3215 if (!input) {
3216 return NULL;
3217 }
3218 /* new encoding */
3219 if (_to_encoding && strlen(_to_encoding)) {
3220 to_encoding = mbfl_name2encoding(_to_encoding);
3221 if (!to_encoding) {
3222 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3223 return NULL;
3224 }
3225 } else {
3226 to_encoding = MBSTRG(current_internal_encoding);
3227 }
3228
3229 /* pre-conversion encoding */
3230 from_encoding = MBSTRG(current_internal_encoding);
3231 if (_from_encodings) {
3232 const mbfl_encoding **list = NULL;
3233 size_t size = 0;
3234 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3235 if (size == 1) {
3236 from_encoding = *list;
3237 } else if (size > 1) {
3238 /* auto detect */
3239 mbfl_string string;
3240 mbfl_string_init(&string);
3241 string.val = (unsigned char *)input;
3242 string.len = length;
3243 from_encoding = mbfl_identify_encoding(&string, list, size, MBSTRG(strict_detection));
3244 if (!from_encoding) {
3245 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3246 from_encoding = &mbfl_encoding_pass;
3247 }
3248 } else {
3249 php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3250 }
3251 if (list != NULL) {
3252 efree((void *)list);
3253 }
3254 }
3255
3256 return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
3257 }
3258 /* }}} */
3259
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3260 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3261 {
3262 HashTable *output, *chash;
3263 zend_long idx;
3264 zend_string *key;
3265 zval *entry, entry_tmp;
3266 size_t ckey_len, cval_len;
3267 char *ckey, *cval;
3268
3269 if (!input) {
3270 return NULL;
3271 }
3272
3273 if (GC_IS_RECURSIVE(input)) {
3274 GC_UNPROTECT_RECURSION(input);
3275 php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3276 return NULL;
3277 }
3278 GC_TRY_PROTECT_RECURSION(input);
3279 output = zend_new_array(zend_hash_num_elements(input));
3280 ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3281 /* convert key */
3282 if (key) {
3283 ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3284 key = zend_string_init(ckey, ckey_len, 0);
3285 efree(ckey);
3286 }
3287 /* convert value */
3288 ZEND_ASSERT(entry);
3289 try_again:
3290 switch(Z_TYPE_P(entry)) {
3291 case IS_STRING:
3292 cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3293 ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3294 efree(cval);
3295 break;
3296 case IS_NULL:
3297 case IS_TRUE:
3298 case IS_FALSE:
3299 case IS_LONG:
3300 case IS_DOUBLE:
3301 ZVAL_COPY(&entry_tmp, entry);
3302 break;
3303 case IS_ARRAY:
3304 chash = php_mb_convert_encoding_recursive(Z_ARRVAL_P(entry), _to_encoding, _from_encodings);
3305 if (chash) {
3306 ZVAL_ARR(&entry_tmp, chash);
3307 } else {
3308 ZVAL_EMPTY_ARRAY(&entry_tmp);
3309 }
3310 break;
3311 case IS_REFERENCE:
3312 entry = Z_REFVAL_P(entry);
3313 goto try_again;
3314 case IS_OBJECT:
3315 default:
3316 if (key) {
3317 zend_string_release(key);
3318 }
3319 php_error_docref(NULL, E_WARNING, "Object is not supported");
3320 continue;
3321 }
3322 if (key) {
3323 zend_hash_add(output, key, &entry_tmp);
3324 zend_string_release(key);
3325 } else {
3326 zend_hash_index_add(output, idx, &entry_tmp);
3327 }
3328 } ZEND_HASH_FOREACH_END();
3329 GC_TRY_UNPROTECT_RECURSION(input);
3330
3331 return output;
3332 }
3333 /* }}} */
3334
3335
3336 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3337 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3338 PHP_FUNCTION(mb_convert_encoding)
3339 {
3340 zval *input;
3341 char *arg_new;
3342 size_t new_len;
3343 zval *arg_old = NULL;
3344 size_t size, l, n;
3345 char *_from_encodings = NULL, *ret, *s_free = NULL;
3346
3347 zval *hash_entry;
3348 HashTable *target_hash;
3349
3350 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3351 return;
3352 }
3353
3354 if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3355 if (!try_convert_to_string(input)) {
3356 return;
3357 }
3358 }
3359
3360 if (arg_old) {
3361 switch (Z_TYPE_P(arg_old)) {
3362 case IS_ARRAY:
3363 target_hash = Z_ARRVAL_P(arg_old);
3364 _from_encodings = NULL;
3365
3366 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3367 zend_string *encoding_str = zval_try_get_string(hash_entry);
3368 if (UNEXPECTED(!encoding_str)) {
3369 if (_from_encodings) {
3370 efree(_from_encodings);
3371 }
3372 return;
3373 }
3374
3375 if ( _from_encodings) {
3376 l = strlen(_from_encodings);
3377 n = ZSTR_LEN(encoding_str);
3378 _from_encodings = erealloc(_from_encodings, l+n+2);
3379 memcpy(_from_encodings + l, ",", 1);
3380 memcpy(_from_encodings + l + 1, ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str) + 1);
3381 } else {
3382 _from_encodings = estrdup(ZSTR_VAL(encoding_str));
3383 }
3384 zend_string_release(encoding_str);
3385 } ZEND_HASH_FOREACH_END();
3386
3387 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3388 efree(_from_encodings);
3389 _from_encodings = NULL;
3390 }
3391 s_free = _from_encodings;
3392 break;
3393 default:
3394 if (!try_convert_to_string(arg_old)) {
3395 return;
3396 }
3397
3398 _from_encodings = Z_STRVAL_P(arg_old);
3399 break;
3400 }
3401 }
3402
3403 if (Z_TYPE_P(input) == IS_STRING) {
3404 /* new encoding */
3405 ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3406 if (ret != NULL) {
3407 // TODO: avoid reallocation ???
3408 RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3409 efree(ret);
3410 } else {
3411 RETVAL_FALSE;
3412 }
3413 if (s_free) {
3414 efree(s_free);
3415 }
3416 } else {
3417 HashTable *tmp;
3418 tmp = php_mb_convert_encoding_recursive(Z_ARRVAL_P(input), arg_new, _from_encodings);
3419 RETURN_ARR(tmp);
3420 }
3421
3422 return;
3423 }
3424 /* }}} */
3425
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)3426 static char *mbstring_convert_case(
3427 int case_mode, const char *str, size_t str_len, size_t *ret_len,
3428 const mbfl_encoding *enc) {
3429 return php_unicode_convert_case(
3430 case_mode, str, str_len, ret_len, enc,
3431 MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
3432 }
3433
3434 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3435 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3436 PHP_FUNCTION(mb_convert_case)
3437 {
3438 zend_string *from_encoding = NULL;
3439 char *str;
3440 size_t str_len;
3441 zend_long case_mode = 0;
3442 char *newstr;
3443 size_t ret_len;
3444 const mbfl_encoding *enc;
3445
3446 RETVAL_FALSE;
3447 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len,
3448 &case_mode, &from_encoding) == FAILURE) {
3449 return;
3450 }
3451
3452 enc = php_mb_get_encoding(from_encoding);
3453 if (!enc) {
3454 return;
3455 }
3456
3457 if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
3458 php_error_docref(NULL, E_WARNING, "Invalid case mode");
3459 return;
3460 }
3461
3462 newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
3463
3464 if (newstr) {
3465 // TODO: avoid reallocation ???
3466 RETVAL_STRINGL(newstr, ret_len);
3467 efree(newstr);
3468 }
3469 }
3470 /* }}} */
3471
3472 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3473 * Returns a uppercased version of sourcestring
3474 */
PHP_FUNCTION(mb_strtoupper)3475 PHP_FUNCTION(mb_strtoupper)
3476 {
3477 zend_string *from_encoding = NULL;
3478 char *str;
3479 size_t str_len;
3480 char *newstr;
3481 size_t ret_len;
3482 const mbfl_encoding *enc;
3483
3484 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
3485 &from_encoding) == FAILURE) {
3486 return;
3487 }
3488
3489 enc = php_mb_get_encoding(from_encoding);
3490 if (!enc) {
3491 RETURN_FALSE;
3492 }
3493
3494 newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
3495
3496 if (newstr) {
3497 // TODO: avoid reallocation ???
3498 RETVAL_STRINGL(newstr, ret_len);
3499 efree(newstr);
3500 return;
3501 }
3502 RETURN_FALSE;
3503 }
3504 /* }}} */
3505
3506 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3507 * Returns a lowercased version of sourcestring
3508 */
PHP_FUNCTION(mb_strtolower)3509 PHP_FUNCTION(mb_strtolower)
3510 {
3511 zend_string *from_encoding = NULL;
3512 char *str;
3513 size_t str_len;
3514 char *newstr;
3515 size_t ret_len;
3516 const mbfl_encoding *enc;
3517
3518 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
3519 &from_encoding) == FAILURE) {
3520 return;
3521 }
3522
3523 enc = php_mb_get_encoding(from_encoding);
3524 if (!enc) {
3525 RETURN_FALSE;
3526 }
3527
3528 newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
3529
3530 if (newstr) {
3531 // TODO: avoid reallocation ???
3532 RETVAL_STRINGL(newstr, ret_len);
3533 efree(newstr);
3534 return;
3535 }
3536 RETURN_FALSE;
3537 }
3538 /* }}} */
3539
3540 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3541 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3542 PHP_FUNCTION(mb_detect_encoding)
3543 {
3544 char *str;
3545 size_t str_len;
3546 zend_bool strict=0;
3547 zval *encoding_list = NULL;
3548
3549 mbfl_string string;
3550 const mbfl_encoding *ret;
3551 const mbfl_encoding **elist, **list;
3552 size_t size;
3553
3554 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3555 return;
3556 }
3557
3558 /* make encoding list */
3559 list = NULL;
3560 size = 0;
3561 if (encoding_list) {
3562 switch (Z_TYPE_P(encoding_list)) {
3563 case IS_ARRAY:
3564 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3565 if (list) {
3566 efree(list);
3567 list = NULL;
3568 size = 0;
3569 }
3570 }
3571 break;
3572 default:
3573 if (!try_convert_to_string(encoding_list)) {
3574 return;
3575 }
3576 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3577 if (list) {
3578 efree(list);
3579 list = NULL;
3580 size = 0;
3581 }
3582 }
3583 break;
3584 }
3585 if (size == 0) {
3586 php_error_docref(NULL, E_WARNING, "Illegal argument");
3587 }
3588 }
3589
3590 if (ZEND_NUM_ARGS() < 3) {
3591 strict = MBSTRG(strict_detection);
3592 }
3593
3594 if (size > 0 && list != NULL) {
3595 elist = list;
3596 } else {
3597 elist = MBSTRG(current_detect_order_list);
3598 size = MBSTRG(current_detect_order_list_size);
3599 }
3600
3601 mbfl_string_init(&string);
3602 string.no_language = MBSTRG(language);
3603 string.val = (unsigned char *)str;
3604 string.len = str_len;
3605 ret = mbfl_identify_encoding(&string, elist, size, strict);
3606
3607 if (list != NULL) {
3608 efree((void *)list);
3609 }
3610
3611 if (ret == NULL) {
3612 RETURN_FALSE;
3613 }
3614
3615 RETVAL_STRING((char *)ret->name);
3616 }
3617 /* }}} */
3618
3619 /* {{{ proto mixed mb_list_encodings()
3620 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3621 PHP_FUNCTION(mb_list_encodings)
3622 {
3623 const mbfl_encoding **encodings;
3624 const mbfl_encoding *encoding;
3625 int i;
3626
3627 if (zend_parse_parameters_none() == FAILURE) {
3628 return;
3629 }
3630
3631 array_init(return_value);
3632 i = 0;
3633 encodings = mbfl_get_supported_encodings();
3634 while ((encoding = encodings[i++]) != NULL) {
3635 add_next_index_string(return_value, (char *) encoding->name);
3636 }
3637 }
3638 /* }}} */
3639
3640 /* {{{ proto array mb_encoding_aliases(string encoding)
3641 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3642 PHP_FUNCTION(mb_encoding_aliases)
3643 {
3644 const mbfl_encoding *encoding;
3645 char *name = NULL;
3646 size_t name_len;
3647
3648 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3649 return;
3650 }
3651
3652 encoding = mbfl_name2encoding(name);
3653 if (!encoding) {
3654 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3655 RETURN_FALSE;
3656 }
3657
3658 array_init(return_value);
3659 if (encoding->aliases != NULL) {
3660 const char **alias;
3661 for (alias = *encoding->aliases; *alias; ++alias) {
3662 add_next_index_string(return_value, (char *)*alias);
3663 }
3664 }
3665 }
3666 /* }}} */
3667
3668 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3669 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3670 PHP_FUNCTION(mb_encode_mimeheader)
3671 {
3672 const mbfl_encoding *charset, *transenc;
3673 mbfl_string string, result, *ret;
3674 char *charset_name = NULL;
3675 size_t charset_name_len;
3676 char *trans_enc_name = NULL;
3677 size_t trans_enc_name_len;
3678 char *linefeed = "\r\n";
3679 size_t linefeed_len;
3680 zend_long indent = 0;
3681
3682 string.no_language = MBSTRG(language);
3683 string.encoding = MBSTRG(current_internal_encoding);
3684
3685 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3686 return;
3687 }
3688
3689 charset = &mbfl_encoding_pass;
3690 transenc = &mbfl_encoding_base64;
3691
3692 if (charset_name != NULL) {
3693 charset = mbfl_name2encoding(charset_name);
3694 if (!charset) {
3695 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3696 RETURN_FALSE;
3697 }
3698 } else {
3699 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3700 if (lang != NULL) {
3701 charset = mbfl_no2encoding(lang->mail_charset);
3702 transenc = mbfl_no2encoding(lang->mail_header_encoding);
3703 }
3704 }
3705
3706 if (trans_enc_name != NULL) {
3707 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3708 transenc = &mbfl_encoding_base64;
3709 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3710 transenc = &mbfl_encoding_qprint;
3711 }
3712 }
3713
3714 mbfl_string_init(&result);
3715 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3716 if (ret != NULL) {
3717 // TODO: avoid reallocation ???
3718 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3719 efree(ret->val);
3720 } else {
3721 RETVAL_FALSE;
3722 }
3723 }
3724 /* }}} */
3725
3726 /* {{{ proto string mb_decode_mimeheader(string string)
3727 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3728 PHP_FUNCTION(mb_decode_mimeheader)
3729 {
3730 mbfl_string string, result, *ret;
3731
3732 string.no_language = MBSTRG(language);
3733 string.encoding = MBSTRG(current_internal_encoding);
3734
3735 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
3736 return;
3737 }
3738
3739 mbfl_string_init(&result);
3740 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3741 if (ret != NULL) {
3742 // TODO: avoid reallocation ???
3743 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3744 efree(ret->val);
3745 } else {
3746 RETVAL_FALSE;
3747 }
3748 }
3749 /* }}} */
3750
3751 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3752 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3753 PHP_FUNCTION(mb_convert_kana)
3754 {
3755 int opt;
3756 mbfl_string string, result, *ret;
3757 char *optstr = NULL;
3758 size_t optstr_len;
3759 zend_string *encname = NULL;
3760
3761 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sS", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) {
3762 return;
3763 }
3764
3765 /* option */
3766 if (optstr != NULL) {
3767 char *p = optstr;
3768 size_t i = 0, n = optstr_len;
3769 opt = 0;
3770 while (i < n) {
3771 i++;
3772 switch (*p++) {
3773 case 'A':
3774 opt |= 0x1;
3775 break;
3776 case 'a':
3777 opt |= 0x10;
3778 break;
3779 case 'R':
3780 opt |= 0x2;
3781 break;
3782 case 'r':
3783 opt |= 0x20;
3784 break;
3785 case 'N':
3786 opt |= 0x4;
3787 break;
3788 case 'n':
3789 opt |= 0x40;
3790 break;
3791 case 'S':
3792 opt |= 0x8;
3793 break;
3794 case 's':
3795 opt |= 0x80;
3796 break;
3797 case 'K':
3798 opt |= 0x100;
3799 break;
3800 case 'k':
3801 opt |= 0x1000;
3802 break;
3803 case 'H':
3804 opt |= 0x200;
3805 break;
3806 case 'h':
3807 opt |= 0x2000;
3808 break;
3809 case 'V':
3810 opt |= 0x800;
3811 break;
3812 case 'C':
3813 opt |= 0x10000;
3814 break;
3815 case 'c':
3816 opt |= 0x20000;
3817 break;
3818 case 'M':
3819 opt |= 0x100000;
3820 break;
3821 case 'm':
3822 opt |= 0x200000;
3823 break;
3824 }
3825 }
3826 } else {
3827 opt = 0x900;
3828 }
3829
3830 /* encoding */
3831 string.no_language = MBSTRG(language);
3832 string.encoding = php_mb_get_encoding(encname);
3833 if (!string.encoding) {
3834 RETURN_FALSE;
3835 }
3836
3837 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3838 if (ret != NULL) {
3839 // TODO: avoid reallocation ???
3840 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3841 efree(ret->val);
3842 } else {
3843 RETVAL_FALSE;
3844 }
3845 }
3846 /* }}} */
3847
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)3848 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3849 {
3850 mbfl_string string;
3851 HashTable *ht;
3852 zval *entry;
3853
3854 ZVAL_DEREF(var);
3855 if (Z_TYPE_P(var) == IS_STRING) {
3856 string.val = (unsigned char *)Z_STRVAL_P(var);
3857 string.len = Z_STRLEN_P(var);
3858 if (mbfl_encoding_detector_feed(identd, &string)) {
3859 return 1; /* complete detecting */
3860 }
3861 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3862 if (Z_REFCOUNTED_P(var)) {
3863 if (Z_IS_RECURSIVE_P(var)) {
3864 *recursion_error = 1;
3865 return 0;
3866 }
3867 Z_PROTECT_RECURSION_P(var);
3868 }
3869
3870 ht = HASH_OF(var);
3871 if (ht != NULL) {
3872 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3873 if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3874 if (Z_REFCOUNTED_P(var)) {
3875 Z_UNPROTECT_RECURSION_P(var);
3876 }
3877 return 1;
3878 } else if (*recursion_error) {
3879 if (Z_REFCOUNTED_P(var)) {
3880 Z_UNPROTECT_RECURSION_P(var);
3881 }
3882 return 0;
3883 }
3884 } ZEND_HASH_FOREACH_END();
3885 }
3886
3887 if (Z_REFCOUNTED_P(var)) {
3888 Z_UNPROTECT_RECURSION_P(var);
3889 }
3890 }
3891 return 0;
3892 } /* }}} */
3893
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3894 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3895 {
3896 mbfl_string string, result, *ret;
3897 HashTable *ht;
3898 zval *entry, *orig_var;
3899
3900 orig_var = var;
3901 ZVAL_DEREF(var);
3902 if (Z_TYPE_P(var) == IS_STRING) {
3903 string.val = (unsigned char *)Z_STRVAL_P(var);
3904 string.len = Z_STRLEN_P(var);
3905 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3906 if (ret != NULL) {
3907 zval_ptr_dtor(orig_var);
3908 // TODO: avoid reallocation ???
3909 ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3910 efree(ret->val);
3911 }
3912 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3913 if (Z_TYPE_P(var) == IS_ARRAY) {
3914 SEPARATE_ARRAY(var);
3915 }
3916 if (Z_REFCOUNTED_P(var)) {
3917 if (Z_IS_RECURSIVE_P(var)) {
3918 return 1;
3919 }
3920 Z_PROTECT_RECURSION_P(var);
3921 }
3922
3923 ht = HASH_OF(var);
3924 if (ht != NULL) {
3925 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3926 if (mb_recursive_convert_variable(convd, entry)) {
3927 if (Z_REFCOUNTED_P(var)) {
3928 Z_UNPROTECT_RECURSION_P(var);
3929 }
3930 return 1;
3931 }
3932 } ZEND_HASH_FOREACH_END();
3933 }
3934
3935 if (Z_REFCOUNTED_P(var)) {
3936 Z_UNPROTECT_RECURSION_P(var);
3937 }
3938 }
3939 return 0;
3940 } /* }}} */
3941
3942 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3943 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3944 PHP_FUNCTION(mb_convert_variables)
3945 {
3946 zval *args, *zfrom_enc;
3947 mbfl_string string, result;
3948 const mbfl_encoding *from_encoding, *to_encoding;
3949 mbfl_encoding_detector *identd;
3950 mbfl_buffer_converter *convd;
3951 int n, argc;
3952 size_t to_enc_len;
3953 size_t elistsz;
3954 const mbfl_encoding **elist;
3955 char *to_enc;
3956 int recursion_error = 0;
3957
3958 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3959 return;
3960 }
3961
3962 /* new encoding */
3963 to_encoding = mbfl_name2encoding(to_enc);
3964 if (!to_encoding) {
3965 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3966 RETURN_FALSE;
3967 }
3968
3969 /* initialize string */
3970 mbfl_string_init(&string);
3971 mbfl_string_init(&result);
3972 from_encoding = MBSTRG(current_internal_encoding);
3973 string.encoding = from_encoding;
3974 string.no_language = MBSTRG(language);
3975
3976 /* pre-conversion encoding */
3977 elist = NULL;
3978 elistsz = 0;
3979 switch (Z_TYPE_P(zfrom_enc)) {
3980 case IS_ARRAY:
3981 php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3982 break;
3983 default:
3984 if (!try_convert_to_string(zfrom_enc)) {
3985 return;
3986 }
3987 php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3988 break;
3989 }
3990
3991 if (elistsz == 0) {
3992 from_encoding = &mbfl_encoding_pass;
3993 } else if (elistsz == 1) {
3994 from_encoding = *elist;
3995 } else {
3996 /* auto detect */
3997 from_encoding = NULL;
3998 identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3999 if (identd != NULL) {
4000 n = 0;
4001 while (n < argc) {
4002 if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
4003 break;
4004 }
4005 n++;
4006 }
4007 from_encoding = mbfl_encoding_detector_judge(identd);
4008 mbfl_encoding_detector_delete(identd);
4009 if (recursion_error) {
4010 if (elist != NULL) {
4011 efree((void *)elist);
4012 }
4013 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4014 RETURN_FALSE;
4015 }
4016 }
4017
4018 if (!from_encoding) {
4019 php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
4020 from_encoding = &mbfl_encoding_pass;
4021 }
4022 }
4023 if (elist != NULL) {
4024 efree((void *)elist);
4025 }
4026 /* create converter */
4027 convd = NULL;
4028 if (from_encoding != &mbfl_encoding_pass) {
4029 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
4030 if (convd == NULL) {
4031 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4032 RETURN_FALSE;
4033 }
4034 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4035 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4036 }
4037
4038 /* convert */
4039 if (convd != NULL) {
4040 n = 0;
4041 while (n < argc) {
4042 zval *zv = &args[n];
4043
4044 ZVAL_DEREF(zv);
4045 recursion_error = mb_recursive_convert_variable(convd, zv);
4046 if (recursion_error) {
4047 break;
4048 }
4049 n++;
4050 }
4051
4052 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4053 mbfl_buffer_converter_delete(convd);
4054
4055 if (recursion_error) {
4056 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4057 RETURN_FALSE;
4058 }
4059 }
4060
4061 if (from_encoding) {
4062 RETURN_STRING(from_encoding->name);
4063 } else {
4064 RETURN_FALSE;
4065 }
4066 }
4067 /* }}} */
4068
4069 /* {{{ HTML numeric entity */
4070 /* {{{ static void php_mb_numericentity_exec() */
4071 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)4072 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
4073 {
4074 char *str, *encoding = NULL;
4075 size_t str_len, encoding_len;
4076 zval *zconvmap, *hash_entry;
4077 HashTable *target_hash;
4078 int i, *convmap, *mapelm, mapsize=0;
4079 zend_bool is_hex = 0;
4080 mbfl_string string, result, *ret;
4081
4082 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
4083 return;
4084 }
4085
4086 string.no_language = MBSTRG(language);
4087 string.encoding = MBSTRG(current_internal_encoding);
4088 string.val = (unsigned char *)str;
4089 string.len = str_len;
4090
4091 /* encoding */
4092 if (encoding && encoding_len > 0) {
4093 string.encoding = mbfl_name2encoding(encoding);
4094 if (!string.encoding) {
4095 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
4096 RETURN_FALSE;
4097 }
4098 }
4099
4100 if (type == 0 && is_hex) {
4101 type = 2; /* output in hex format */
4102 }
4103
4104 /* conversion map */
4105 convmap = NULL;
4106 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4107 target_hash = Z_ARRVAL_P(zconvmap);
4108 i = zend_hash_num_elements(target_hash);
4109 if (i > 0) {
4110 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4111 mapelm = convmap;
4112 mapsize = 0;
4113 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4114 *mapelm++ = zval_get_long(hash_entry);
4115 mapsize++;
4116 } ZEND_HASH_FOREACH_END();
4117 }
4118 }
4119 if (convmap == NULL) {
4120 RETURN_FALSE;
4121 }
4122 mapsize /= 4;
4123
4124 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4125 if (ret != NULL) {
4126 // TODO: avoid reallocation ???
4127 RETVAL_STRINGL((char *)ret->val, ret->len);
4128 efree(ret->val);
4129 } else {
4130 RETVAL_FALSE;
4131 }
4132 efree((void *)convmap);
4133 }
4134 /* }}} */
4135
4136 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4137 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4138 PHP_FUNCTION(mb_encode_numericentity)
4139 {
4140 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4141 }
4142 /* }}} */
4143
4144 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4145 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4146 PHP_FUNCTION(mb_decode_numericentity)
4147 {
4148 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4149 }
4150 /* }}} */
4151 /* }}} */
4152
4153 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4154 * Sends an email message with MIME scheme
4155 */
4156
4157 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
4158 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
4159 pos += 2; \
4160 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
4161 pos++; \
4162 } \
4163 continue; \
4164 }
4165
4166 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
4167 pp = str; \
4168 ee = pp + len; \
4169 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
4170 *pp = ' '; \
4171 } \
4172
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4173 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4174 {
4175 const char *ps;
4176 size_t icnt;
4177 int state = 0;
4178 int crlf_state = -1;
4179 char *token = NULL;
4180 size_t token_pos = 0;
4181 zend_string *fld_name, *fld_val;
4182
4183 ps = str;
4184 icnt = str_len;
4185 fld_name = fld_val = NULL;
4186
4187 /*
4188 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4189 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4190 * state 0 1 2 3
4191 *
4192 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4193 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4194 * crlf_state -1 0 1 -1
4195 *
4196 */
4197
4198 while (icnt > 0) {
4199 switch (*ps) {
4200 case ':':
4201 if (crlf_state == 1) {
4202 token_pos++;
4203 }
4204
4205 if (state == 0 || state == 1) {
4206 if(token && token_pos > 0) {
4207 fld_name = zend_string_init(token, token_pos, 0);
4208 }
4209 state = 2;
4210 } else {
4211 token_pos++;
4212 }
4213
4214 crlf_state = 0;
4215 break;
4216
4217 case '\n':
4218 if (crlf_state == -1) {
4219 goto out;
4220 }
4221 crlf_state = -1;
4222 break;
4223
4224 case '\r':
4225 if (crlf_state == 1) {
4226 token_pos++;
4227 } else {
4228 crlf_state = 1;
4229 }
4230 break;
4231
4232 case ' ': case '\t':
4233 if (crlf_state == -1) {
4234 if (state == 3) {
4235 /* continuing from the previous line */
4236 state = 4;
4237 } else {
4238 /* simply skipping this new line */
4239 state = 5;
4240 }
4241 } else {
4242 if (crlf_state == 1) {
4243 token_pos++;
4244 }
4245 if (state == 1 || state == 3) {
4246 token_pos++;
4247 }
4248 }
4249 crlf_state = 0;
4250 break;
4251
4252 default:
4253 switch (state) {
4254 case 0:
4255 token = (char*)ps;
4256 token_pos = 0;
4257 state = 1;
4258 break;
4259
4260 case 2:
4261 if (crlf_state != -1) {
4262 token = (char*)ps;
4263 token_pos = 0;
4264
4265 state = 3;
4266 break;
4267 }
4268 /* break is missing intentionally */
4269
4270 case 3:
4271 if (crlf_state == -1) {
4272 if(token && token_pos > 0) {
4273 fld_val = zend_string_init(token, token_pos, 0);
4274 }
4275
4276 if (fld_name != NULL && fld_val != NULL) {
4277 zval val;
4278 /* FIXME: some locale free implementation is
4279 * really required here,,, */
4280 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4281 ZVAL_STR(&val, fld_val);
4282
4283 zend_hash_update(ht, fld_name, &val);
4284
4285 zend_string_release_ex(fld_name, 0);
4286 }
4287
4288 fld_name = fld_val = NULL;
4289 token = (char*)ps;
4290 token_pos = 0;
4291
4292 state = 1;
4293 }
4294 break;
4295
4296 case 4:
4297 token_pos++;
4298 state = 3;
4299 break;
4300 }
4301
4302 if (crlf_state == 1) {
4303 token_pos++;
4304 }
4305
4306 token_pos++;
4307
4308 crlf_state = 0;
4309 break;
4310 }
4311 ps++, icnt--;
4312 }
4313 out:
4314 if (state == 2) {
4315 token = "";
4316 token_pos = 0;
4317
4318 state = 3;
4319 }
4320 if (state == 3) {
4321 if(token && token_pos > 0) {
4322 fld_val = zend_string_init(token, token_pos, 0);
4323 }
4324 if (fld_name != NULL && fld_val != NULL) {
4325 zval val;
4326 /* FIXME: some locale free implementation is
4327 * really required here,,, */
4328 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4329 ZVAL_STR(&val, fld_val);
4330
4331 zend_hash_update(ht, fld_name, &val);
4332
4333 zend_string_release_ex(fld_name, 0);
4334 }
4335 }
4336 return state;
4337 }
4338
PHP_FUNCTION(mb_send_mail)4339 PHP_FUNCTION(mb_send_mail)
4340 {
4341 char *to;
4342 size_t to_len;
4343 char *message;
4344 size_t message_len;
4345 char *subject;
4346 size_t subject_len;
4347 zval *headers = NULL;
4348 zend_string *extra_cmd = NULL;
4349 zend_string *str_headers = NULL, *tmp_headers;
4350 size_t n, i;
4351 char *to_r = NULL;
4352 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4353 struct {
4354 int cnt_type:1;
4355 int cnt_trans_enc:1;
4356 } suppressed_hdrs = { 0, 0 };
4357
4358 char *message_buf = NULL, *subject_buf = NULL, *p;
4359 mbfl_string orig_str, conv_str;
4360 mbfl_string *pstr; /* pointer to mbfl string for return value */
4361 enum mbfl_no_encoding;
4362 const mbfl_encoding *tran_cs, /* transfar text charset */
4363 *head_enc, /* header transfar encoding */
4364 *body_enc; /* body transfar encoding */
4365 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4366 const mbfl_language *lang;
4367 int err = 0;
4368 HashTable ht_headers;
4369 zval *s;
4370 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4371 char *pp, *ee;
4372
4373 /* initialize */
4374 mbfl_memory_device_init(&device, 0, 0);
4375 mbfl_string_init(&orig_str);
4376 mbfl_string_init(&conv_str);
4377
4378 /* character-set, transfer-encoding */
4379 tran_cs = &mbfl_encoding_utf8;
4380 head_enc = &mbfl_encoding_base64;
4381 body_enc = &mbfl_encoding_base64;
4382 lang = mbfl_no2language(MBSTRG(language));
4383 if (lang != NULL) {
4384 tran_cs = mbfl_no2encoding(lang->mail_charset);
4385 head_enc = mbfl_no2encoding(lang->mail_header_encoding);
4386 body_enc = mbfl_no2encoding(lang->mail_body_encoding);
4387 }
4388
4389 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4390 return;
4391 }
4392
4393 /* ASCIIZ check */
4394 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4395 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4396 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4397 if (headers) {
4398 switch(Z_TYPE_P(headers)) {
4399 case IS_STRING:
4400 tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4401 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4402 str_headers = php_trim(tmp_headers, NULL, 0, 2);
4403 zend_string_release_ex(tmp_headers, 0);
4404 break;
4405 case IS_ARRAY:
4406 str_headers = php_mail_build_headers(headers);
4407 break;
4408 default:
4409 php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4410 RETURN_FALSE;
4411 }
4412 }
4413 if (extra_cmd) {
4414 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4415 }
4416
4417 zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4418
4419 if (str_headers != NULL) {
4420 _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4421 }
4422
4423 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4424 char *tmp;
4425 char *param_name;
4426 char *charset = NULL;
4427
4428 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4429 p = strchr(Z_STRVAL_P(s), ';');
4430
4431 if (p != NULL) {
4432 /* skipping the padded spaces */
4433 do {
4434 ++p;
4435 } while (*p == ' ' || *p == '\t');
4436
4437 if (*p != '\0') {
4438 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4439 if (strcasecmp(param_name, "charset") == 0) {
4440 const mbfl_encoding *_tran_cs = tran_cs;
4441
4442 charset = php_strtok_r(NULL, "= \"", &tmp);
4443 if (charset != NULL) {
4444 _tran_cs = mbfl_name2encoding(charset);
4445 }
4446
4447 if (!_tran_cs) {
4448 php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4449 _tran_cs = &mbfl_encoding_ascii;
4450 }
4451 tran_cs = _tran_cs;
4452 }
4453 }
4454 }
4455 }
4456 suppressed_hdrs.cnt_type = 1;
4457 }
4458
4459 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4460 const mbfl_encoding *_body_enc;
4461
4462 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4463 _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
4464 switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
4465 case mbfl_no_encoding_base64:
4466 case mbfl_no_encoding_7bit:
4467 case mbfl_no_encoding_8bit:
4468 body_enc = _body_enc;
4469 break;
4470
4471 default:
4472 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4473 body_enc = &mbfl_encoding_8bit;
4474 break;
4475 }
4476 suppressed_hdrs.cnt_trans_enc = 1;
4477 }
4478
4479 /* To: */
4480 if (to_len > 0) {
4481 to_r = estrndup(to, to_len);
4482 for (; to_len; to_len--) {
4483 if (!isspace((unsigned char) to_r[to_len - 1])) {
4484 break;
4485 }
4486 to_r[to_len - 1] = '\0';
4487 }
4488 for (i = 0; to_r[i]; i++) {
4489 if (iscntrl((unsigned char) to_r[i])) {
4490 /* According to RFC 822, section 3.1.1 long headers may be separated into
4491 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4492 * To prevent these separators from being replaced with a space, we use the
4493 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4494 */
4495 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4496 to_r[i] = ' ';
4497 }
4498 }
4499 } else {
4500 to_r = to;
4501 }
4502
4503 /* Subject: */
4504 orig_str.no_language = MBSTRG(language);
4505 orig_str.val = (unsigned char *)subject;
4506 orig_str.len = subject_len;
4507 orig_str.encoding = MBSTRG(current_internal_encoding);
4508 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4509 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4510 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4511 }
4512 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4513 if (pstr != NULL) {
4514 subject_buf = subject = (char *)pstr->val;
4515 }
4516
4517 /* message body */
4518 orig_str.no_language = MBSTRG(language);
4519 orig_str.val = (unsigned char *)message;
4520 orig_str.len = message_len;
4521 orig_str.encoding = MBSTRG(current_internal_encoding);
4522
4523 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4524 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4525 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4526 }
4527
4528 pstr = NULL;
4529 {
4530 mbfl_string tmpstr;
4531
4532 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4533 tmpstr.encoding = &mbfl_encoding_8bit;
4534 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4535 efree(tmpstr.val);
4536 }
4537 }
4538 if (pstr != NULL) {
4539 message_buf = message = (char *)pstr->val;
4540 }
4541
4542 /* other headers */
4543 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4544 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4545 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4546 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4547 if (str_headers != NULL) {
4548 p = ZSTR_VAL(str_headers);
4549 n = ZSTR_LEN(str_headers);
4550 mbfl_memory_device_strncat(&device, p, n);
4551 if (n > 0 && p[n - 1] != '\n') {
4552 mbfl_memory_device_strncat(&device, "\n", 1);
4553 }
4554 zend_string_release_ex(str_headers, 0);
4555 }
4556
4557 if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4558 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4559 mbfl_memory_device_strncat(&device, "\n", 1);
4560 }
4561
4562 if (!suppressed_hdrs.cnt_type) {
4563 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4564
4565 p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
4566 if (p != NULL) {
4567 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4568 mbfl_memory_device_strcat(&device, p);
4569 }
4570 mbfl_memory_device_strncat(&device, "\n", 1);
4571 }
4572 if (!suppressed_hdrs.cnt_trans_enc) {
4573 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4574 p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
4575 if (p == NULL) {
4576 p = "7bit";
4577 }
4578 mbfl_memory_device_strcat(&device, p);
4579 mbfl_memory_device_strncat(&device, "\n", 1);
4580 }
4581
4582 mbfl_memory_device_unput(&device);
4583 mbfl_memory_device_output('\0', &device);
4584 str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4585
4586 if (force_extra_parameters) {
4587 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4588 } else if (extra_cmd) {
4589 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4590 }
4591
4592 if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4593 RETVAL_TRUE;
4594 } else {
4595 RETVAL_FALSE;
4596 }
4597
4598 if (extra_cmd) {
4599 zend_string_release_ex(extra_cmd, 0);
4600 }
4601
4602 if (to_r != to) {
4603 efree(to_r);
4604 }
4605 if (subject_buf) {
4606 efree((void *)subject_buf);
4607 }
4608 if (message_buf) {
4609 efree((void *)message_buf);
4610 }
4611 mbfl_memory_device_clear(&device);
4612 zend_hash_destroy(&ht_headers);
4613 if (str_headers) {
4614 zend_string_release_ex(str_headers, 0);
4615 }
4616 }
4617
4618 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4619 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4620 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4621 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4622 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4623 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4624 /* }}} */
4625
4626 /* {{{ proto mixed mb_get_info([string type])
4627 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4628 PHP_FUNCTION(mb_get_info)
4629 {
4630 char *typ = NULL;
4631 size_t typ_len;
4632 size_t n;
4633 char *name;
4634 const struct mb_overload_def *over_func;
4635 zval row1, row2;
4636 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4637 const mbfl_encoding **entry;
4638
4639 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4640 return;
4641 }
4642
4643 if (!typ || !strcasecmp("all", typ)) {
4644 array_init(return_value);
4645 if (MBSTRG(current_internal_encoding)) {
4646 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4647 }
4648 if (MBSTRG(http_input_identify)) {
4649 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4650 }
4651 if (MBSTRG(current_http_output_encoding)) {
4652 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4653 }
4654 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4655 add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4656 }
4657 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4658 if (MBSTRG(func_overload)){
4659 over_func = &(mb_ovld[0]);
4660 array_init(&row1);
4661 while (over_func->type > 0) {
4662 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4663 add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4664 }
4665 over_func++;
4666 }
4667 add_assoc_zval(return_value, "func_overload_list", &row1);
4668 } else {
4669 add_assoc_string(return_value, "func_overload_list", "no overload");
4670 }
4671 if (lang != NULL) {
4672 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4673 add_assoc_string(return_value, "mail_charset", name);
4674 }
4675 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4676 add_assoc_string(return_value, "mail_header_encoding", name);
4677 }
4678 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4679 add_assoc_string(return_value, "mail_body_encoding", name);
4680 }
4681 }
4682 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4683 if (MBSTRG(encoding_translation)) {
4684 add_assoc_string(return_value, "encoding_translation", "On");
4685 } else {
4686 add_assoc_string(return_value, "encoding_translation", "Off");
4687 }
4688 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4689 add_assoc_string(return_value, "language", name);
4690 }
4691 n = MBSTRG(current_detect_order_list_size);
4692 entry = MBSTRG(current_detect_order_list);
4693 if (n > 0) {
4694 size_t i;
4695 array_init(&row2);
4696 for (i = 0; i < n; i++) {
4697 add_next_index_string(&row2, (*entry)->name);
4698 entry++;
4699 }
4700 add_assoc_zval(return_value, "detect_order", &row2);
4701 }
4702 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4703 add_assoc_string(return_value, "substitute_character", "none");
4704 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4705 add_assoc_string(return_value, "substitute_character", "long");
4706 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4707 add_assoc_string(return_value, "substitute_character", "entity");
4708 } else {
4709 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4710 }
4711 if (MBSTRG(strict_detection)) {
4712 add_assoc_string(return_value, "strict_detection", "On");
4713 } else {
4714 add_assoc_string(return_value, "strict_detection", "Off");
4715 }
4716 } else if (!strcasecmp("internal_encoding", typ)) {
4717 if (MBSTRG(current_internal_encoding)) {
4718 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4719 }
4720 } else if (!strcasecmp("http_input", typ)) {
4721 if (MBSTRG(http_input_identify)) {
4722 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4723 }
4724 } else if (!strcasecmp("http_output", typ)) {
4725 if (MBSTRG(current_http_output_encoding)) {
4726 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4727 }
4728 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4729 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4730 RETVAL_STRING(name);
4731 }
4732 } else if (!strcasecmp("func_overload", typ)) {
4733 RETVAL_LONG(MBSTRG(func_overload));
4734 } else if (!strcasecmp("func_overload_list", typ)) {
4735 if (MBSTRG(func_overload)){
4736 over_func = &(mb_ovld[0]);
4737 array_init(return_value);
4738 while (over_func->type > 0) {
4739 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4740 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4741 }
4742 over_func++;
4743 }
4744 } else {
4745 RETVAL_STRING("no overload");
4746 }
4747 } else if (!strcasecmp("mail_charset", typ)) {
4748 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4749 RETVAL_STRING(name);
4750 }
4751 } else if (!strcasecmp("mail_header_encoding", typ)) {
4752 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4753 RETVAL_STRING(name);
4754 }
4755 } else if (!strcasecmp("mail_body_encoding", typ)) {
4756 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4757 RETVAL_STRING(name);
4758 }
4759 } else if (!strcasecmp("illegal_chars", typ)) {
4760 RETVAL_LONG(MBSTRG(illegalchars));
4761 } else if (!strcasecmp("encoding_translation", typ)) {
4762 if (MBSTRG(encoding_translation)) {
4763 RETVAL_STRING("On");
4764 } else {
4765 RETVAL_STRING("Off");
4766 }
4767 } else if (!strcasecmp("language", typ)) {
4768 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4769 RETVAL_STRING(name);
4770 }
4771 } else if (!strcasecmp("detect_order", typ)) {
4772 n = MBSTRG(current_detect_order_list_size);
4773 entry = MBSTRG(current_detect_order_list);
4774 if (n > 0) {
4775 size_t i;
4776 array_init(return_value);
4777 for (i = 0; i < n; i++) {
4778 add_next_index_string(return_value, (*entry)->name);
4779 entry++;
4780 }
4781 }
4782 } else if (!strcasecmp("substitute_character", typ)) {
4783 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4784 RETVAL_STRING("none");
4785 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4786 RETVAL_STRING("long");
4787 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4788 RETVAL_STRING("entity");
4789 } else {
4790 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4791 }
4792 } else if (!strcasecmp("strict_detection", typ)) {
4793 if (MBSTRG(strict_detection)) {
4794 RETVAL_STRING("On");
4795 } else {
4796 RETVAL_STRING("Off");
4797 }
4798 } else {
4799 RETURN_FALSE;
4800 }
4801 }
4802 /* }}} */
4803
4804
php_mb_init_convd(const mbfl_encoding * encoding)4805 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4806 {
4807 mbfl_buffer_converter *convd;
4808
4809 convd = mbfl_buffer_converter_new(encoding, encoding, 0);
4810 if (convd == NULL) {
4811 return NULL;
4812 }
4813 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4814 mbfl_buffer_converter_illegal_substchar(convd, 0);
4815 return convd;
4816 }
4817
4818
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4819 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4820 mbfl_string string, result, *ret = NULL;
4821 size_t illegalchars = 0;
4822
4823 /* initialize string */
4824 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding);
4825 mbfl_string_init(&result);
4826
4827 string.val = (unsigned char *) input;
4828 string.len = length;
4829
4830 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4831 illegalchars = mbfl_buffer_illegalchars(convd);
4832
4833 if (ret != NULL) {
4834 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4835 mbfl_string_clear(&result);
4836 return 1;
4837 }
4838 mbfl_string_clear(&result);
4839 }
4840 return 0;
4841 }
4842
4843
php_mb_check_encoding(const char * input,size_t length,const char * enc)4844 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4845 {
4846 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4847 mbfl_buffer_converter *convd;
4848
4849 if (input == NULL) {
4850 return MBSTRG(illegalchars) == 0;
4851 }
4852
4853 if (enc != NULL) {
4854 encoding = mbfl_name2encoding(enc);
4855 if (!encoding || encoding == &mbfl_encoding_pass) {
4856 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4857 return 0;
4858 }
4859 }
4860
4861 convd = php_mb_init_convd(encoding);
4862 if (convd == NULL) {
4863 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4864 return 0;
4865 }
4866
4867 if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4868 mbfl_buffer_converter_delete(convd);
4869 return 1;
4870 }
4871 mbfl_buffer_converter_delete(convd);
4872 return 0;
4873 }
4874
4875
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4876 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4877 {
4878 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4879 mbfl_buffer_converter *convd;
4880 zend_long idx;
4881 zend_string *key;
4882 zval *entry;
4883 int valid = 1;
4884
4885 (void)(idx);
4886
4887 if (enc != NULL) {
4888 encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4889 if (!encoding || encoding == &mbfl_encoding_pass) {
4890 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4891 return 0;
4892 }
4893 }
4894
4895 convd = php_mb_init_convd(encoding);
4896 if (convd == NULL) {
4897 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4898 return 0;
4899 }
4900
4901 if (GC_IS_RECURSIVE(vars)) {
4902 mbfl_buffer_converter_delete(convd);
4903 php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4904 return 0;
4905 }
4906 GC_TRY_PROTECT_RECURSION(vars);
4907 ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4908 ZVAL_DEREF(entry);
4909 if (key) {
4910 if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4911 valid = 0;
4912 break;
4913 }
4914 }
4915 switch (Z_TYPE_P(entry)) {
4916 case IS_STRING:
4917 if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4918 valid = 0;
4919 break;
4920 }
4921 break;
4922 case IS_ARRAY:
4923 if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), enc)) {
4924 valid = 0;
4925 break;
4926 }
4927 break;
4928 case IS_LONG:
4929 case IS_DOUBLE:
4930 case IS_NULL:
4931 case IS_TRUE:
4932 case IS_FALSE:
4933 break;
4934 default:
4935 /* Other types are error. */
4936 valid = 0;
4937 break;
4938 }
4939 } ZEND_HASH_FOREACH_END();
4940 GC_TRY_UNPROTECT_RECURSION(vars);
4941 mbfl_buffer_converter_delete(convd);
4942 return valid;
4943 }
4944
4945
4946 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
4947 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4948 PHP_FUNCTION(mb_check_encoding)
4949 {
4950 zval *input = NULL;
4951 zend_string *enc = NULL;
4952
4953 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
4954 return;
4955 }
4956
4957 /* FIXME: Actually check all inputs, except $_FILES file content. */
4958 if (input == NULL) {
4959 if (MBSTRG(illegalchars) == 0) {
4960 RETURN_TRUE;
4961 }
4962 RETURN_FALSE;
4963 }
4964
4965 if (Z_TYPE_P(input) == IS_ARRAY) {
4966 if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
4967 RETURN_FALSE;
4968 }
4969 } else {
4970 if (!try_convert_to_string(input)) {
4971 RETURN_FALSE;
4972 }
4973 if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
4974 RETURN_FALSE;
4975 }
4976 }
4977 RETURN_TRUE;
4978 }
4979 /* }}} */
4980
4981
php_mb_ord(const char * str,size_t str_len,zend_string * enc_name)4982 static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name)
4983 {
4984 const mbfl_encoding *enc;
4985 enum mbfl_no_encoding no_enc;
4986
4987 enc = php_mb_get_encoding(enc_name);
4988 if (!enc) {
4989 return -1;
4990 }
4991
4992 no_enc = enc->no_encoding;
4993 if (php_mb_is_unsupported_no_encoding(no_enc)) {
4994 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4995 return -1;
4996 }
4997
4998 if (str_len == 0) {
4999 php_error_docref(NULL, E_WARNING, "Empty string");
5000 return -1;
5001 }
5002
5003 {
5004 mbfl_wchar_device dev;
5005 mbfl_convert_filter *filter;
5006 zend_long cp;
5007
5008 mbfl_wchar_device_init(&dev);
5009 filter = mbfl_convert_filter_new(
5010 enc, &mbfl_encoding_wchar,
5011 mbfl_wchar_device_output, 0, &dev);
5012 if (!filter) {
5013 php_error_docref(NULL, E_WARNING, "Creation of filter failed");
5014 return -1;
5015 }
5016
5017 mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
5018 mbfl_convert_filter_flush(filter);
5019
5020 if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
5021 mbfl_convert_filter_delete(filter);
5022 mbfl_wchar_device_clear(&dev);
5023 return -1;
5024 }
5025
5026 cp = dev.buffer[0];
5027 mbfl_convert_filter_delete(filter);
5028 mbfl_wchar_device_clear(&dev);
5029 return cp;
5030 }
5031 }
5032
5033
5034 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)5035 PHP_FUNCTION(mb_ord)
5036 {
5037 char *str;
5038 size_t str_len;
5039 zend_string *enc = NULL;
5040 zend_long cp;
5041
5042 ZEND_PARSE_PARAMETERS_START(1, 2)
5043 Z_PARAM_STRING(str, str_len)
5044 Z_PARAM_OPTIONAL
5045 Z_PARAM_STR(enc)
5046 ZEND_PARSE_PARAMETERS_END();
5047
5048 cp = php_mb_ord(str, str_len, enc);
5049
5050 if (0 > cp) {
5051 RETURN_FALSE;
5052 }
5053
5054 RETURN_LONG(cp);
5055 }
5056 /* }}} */
5057
5058
php_mb_chr(zend_long cp,zend_string * enc_name)5059 static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name)
5060 {
5061 const mbfl_encoding *enc;
5062 enum mbfl_no_encoding no_enc;
5063 zend_string *ret;
5064 char* buf;
5065 size_t buf_len;
5066
5067 enc = php_mb_get_encoding(enc_name);
5068 if (!enc) {
5069 return NULL;
5070 }
5071
5072 no_enc = enc->no_encoding;
5073 if (php_mb_is_unsupported_no_encoding(no_enc)) {
5074 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
5075 return NULL;
5076 }
5077
5078 if (cp < 0 || cp > 0x10ffff) {
5079 return NULL;
5080 }
5081
5082 if (php_mb_is_no_encoding_utf8(no_enc)) {
5083 if (cp > 0xd7ff && 0xe000 > cp) {
5084 return NULL;
5085 }
5086
5087 if (cp < 0x80) {
5088 ret = ZSTR_CHAR(cp);
5089 } else if (cp < 0x800) {
5090 ret = zend_string_alloc(2, 0);
5091 ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
5092 ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
5093 ZSTR_VAL(ret)[2] = 0;
5094 } else if (cp < 0x10000) {
5095 ret = zend_string_alloc(3, 0);
5096 ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
5097 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
5098 ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
5099 ZSTR_VAL(ret)[3] = 0;
5100 } else {
5101 ret = zend_string_alloc(4, 0);
5102 ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
5103 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
5104 ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
5105 ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
5106 ZSTR_VAL(ret)[4] = 0;
5107 }
5108
5109 return ret;
5110 }
5111
5112 buf_len = 4;
5113 buf = (char *) emalloc(buf_len + 1);
5114 buf[0] = (cp >> 24) & 0xff;
5115 buf[1] = (cp >> 16) & 0xff;
5116 buf[2] = (cp >> 8) & 0xff;
5117 buf[3] = cp & 0xff;
5118 buf[4] = 0;
5119
5120 {
5121 char *ret_str;
5122 size_t ret_len;
5123 long orig_illegalchars = MBSTRG(illegalchars);
5124 MBSTRG(illegalchars) = 0;
5125 ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
5126 if (MBSTRG(illegalchars) != 0) {
5127 efree(buf);
5128 efree(ret_str);
5129 MBSTRG(illegalchars) = orig_illegalchars;
5130 return NULL;
5131 }
5132
5133 ret = zend_string_init(ret_str, ret_len, 0);
5134 efree(ret_str);
5135 MBSTRG(illegalchars) = orig_illegalchars;
5136 }
5137
5138 efree(buf);
5139 return ret;
5140 }
5141
5142
5143 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5144 PHP_FUNCTION(mb_chr)
5145 {
5146 zend_long cp;
5147 zend_string *enc = NULL;
5148 zend_string* ret;
5149
5150 ZEND_PARSE_PARAMETERS_START(1, 2)
5151 Z_PARAM_LONG(cp)
5152 Z_PARAM_OPTIONAL
5153 Z_PARAM_STR(enc)
5154 ZEND_PARSE_PARAMETERS_END();
5155
5156 ret = php_mb_chr(cp, enc);
5157 if (ret == NULL) {
5158 RETURN_FALSE;
5159 }
5160
5161 RETURN_STR(ret);
5162 }
5163 /* }}} */
5164
5165
php_mb_scrub(const char * str,size_t str_len,const mbfl_encoding * enc,size_t * ret_len)5166 static inline char* php_mb_scrub(const char* str, size_t str_len, const mbfl_encoding *enc, size_t *ret_len)
5167 {
5168 return php_mb_convert_encoding_ex(str, str_len, enc, enc, ret_len);
5169 }
5170
5171
5172 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5173 PHP_FUNCTION(mb_scrub)
5174 {
5175 const mbfl_encoding *enc;
5176 char* str;
5177 size_t str_len;
5178 zend_string *enc_name = NULL;
5179 char *ret;
5180 size_t ret_len;
5181
5182 ZEND_PARSE_PARAMETERS_START(1, 2)
5183 Z_PARAM_STRING(str, str_len)
5184 Z_PARAM_OPTIONAL
5185 Z_PARAM_STR(enc_name)
5186 ZEND_PARSE_PARAMETERS_END();
5187
5188 enc = php_mb_get_encoding(enc_name);
5189 if (!enc) {
5190 RETURN_FALSE;
5191 }
5192
5193 ret = php_mb_scrub(str, str_len, enc, &ret_len);
5194
5195 if (ret == NULL) {
5196 RETURN_FALSE;
5197 }
5198
5199 RETVAL_STRINGL(ret, ret_len);
5200 efree(ret);
5201 }
5202 /* }}} */
5203
5204
5205 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5206 static void php_mb_populate_current_detect_order_list(void)
5207 {
5208 const mbfl_encoding **entry = 0;
5209 size_t nentries;
5210
5211 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5212 nentries = MBSTRG(detect_order_list_size);
5213 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5214 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5215 } else {
5216 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5217 size_t i;
5218 nentries = MBSTRG(default_detect_order_list_size);
5219 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5220 for (i = 0; i < nentries; i++) {
5221 entry[i] = mbfl_no2encoding(src[i]);
5222 }
5223 }
5224 MBSTRG(current_detect_order_list) = entry;
5225 MBSTRG(current_detect_order_list_size) = nentries;
5226 }
5227 /* }}} */
5228
5229 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5230 static int php_mb_encoding_translation(void)
5231 {
5232 return MBSTRG(encoding_translation);
5233 }
5234 /* }}} */
5235
5236 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5237 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5238 {
5239 if (enc != NULL) {
5240 if (enc->flag & MBFL_ENCTYPE_MBCS) {
5241 if (enc->mblen_table != NULL) {
5242 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5243 }
5244 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5245 return 2;
5246 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5247 return 4;
5248 }
5249 }
5250 return 1;
5251 }
5252 /* }}} */
5253
5254 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5255 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5256 {
5257 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5258 }
5259 /* }}} */
5260
5261 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5262 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5263 {
5264 register const char *p = s;
5265 char *last=NULL;
5266
5267 if (nbytes == (size_t)-1) {
5268 size_t nb = 0;
5269
5270 while (*p != '\0') {
5271 if (nb == 0) {
5272 if ((unsigned char)*p == (unsigned char)c) {
5273 last = (char *)p;
5274 }
5275 nb = php_mb_mbchar_bytes_ex(p, enc);
5276 if (nb == 0) {
5277 return NULL; /* something is going wrong! */
5278 }
5279 }
5280 --nb;
5281 ++p;
5282 }
5283 } else {
5284 register size_t bcnt = nbytes;
5285 register size_t nbytes_char;
5286 while (bcnt > 0) {
5287 if ((unsigned char)*p == (unsigned char)c) {
5288 last = (char *)p;
5289 }
5290 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5291 if (bcnt < nbytes_char) {
5292 return NULL;
5293 }
5294 p += nbytes_char;
5295 bcnt -= nbytes_char;
5296 }
5297 }
5298 return last;
5299 }
5300 /* }}} */
5301
5302 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5303 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5304 {
5305 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5306 }
5307 /* }}} */
5308
5309 /* {{{ MBSTRING_API int php_mb_stripos()
5310 */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,zend_string * from_encoding)5311 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding)
5312 {
5313 size_t n = (size_t) -1;
5314 mbfl_string haystack, needle;
5315 const mbfl_encoding *enc;
5316
5317 enc = php_mb_get_encoding(from_encoding);
5318 if (!enc) {
5319 return (size_t) -1;
5320 }
5321
5322 mbfl_string_init(&haystack);
5323 mbfl_string_init(&needle);
5324 haystack.no_language = MBSTRG(language);
5325 haystack.encoding = enc;
5326 needle.no_language = MBSTRG(language);
5327 needle.encoding = enc;
5328
5329 do {
5330 /* We're using simple case-folding here, because we'd have to deal with remapping of
5331 * offsets otherwise. */
5332
5333 size_t len = 0;
5334 haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
5335 haystack.len = len;
5336
5337 if (!haystack.val) {
5338 break;
5339 }
5340
5341 if (haystack.len == 0) {
5342 break;
5343 }
5344
5345 needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
5346 needle.len = len;
5347
5348 if (!needle.val) {
5349 break;
5350 }
5351
5352 if (needle.len == 0) {
5353 break;
5354 }
5355
5356 if (offset != 0) {
5357 size_t haystack_char_len = mbfl_strlen(&haystack);
5358
5359 if (mode) {
5360 if ((offset > 0 && (size_t)offset > haystack_char_len) ||
5361 (offset < 0 && (size_t)(-offset) > haystack_char_len)) {
5362 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5363 break;
5364 }
5365 } else {
5366 if (offset < 0) {
5367 offset += (zend_long)haystack_char_len;
5368 }
5369 if (offset < 0 || (size_t)offset > haystack_char_len) {
5370 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5371 break;
5372 }
5373 }
5374 }
5375
5376 n = mbfl_strpos(&haystack, &needle, offset, mode);
5377 } while(0);
5378
5379 if (haystack.val) {
5380 efree(haystack.val);
5381 }
5382
5383 if (needle.val) {
5384 efree(needle.val);
5385 }
5386
5387 return n;
5388 }
5389 /* }}} */
5390
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5391 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5392 {
5393 *list = (const zend_encoding **)MBSTRG(http_input_list);
5394 *list_size = MBSTRG(http_input_list_size);
5395 }
5396 /* }}} */
5397
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5398 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5399 {
5400 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5401 }
5402 /* }}} */
5403
5404 #endif /* HAVE_MBSTRING */
5405