1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
12    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
13    +----------------------------------------------------------------------+
14  */
15 
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "php_intl.h"
21 #include "collator.h"
22 #include "collator_class.h"
23 #include "collator_sort.h"
24 #include "collator_convert.h"
25 #include "intl_convert.h"
26 
27 #if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
28 typedef zend_long ptrdiff_t;
29 #endif
30 
31 /**
32  * Declare 'index' which will point to sort key in sort key
33  * buffer.
34  */
35 typedef struct _collator_sort_key_index {
36 	char* key;       /* pointer to sort key */
37 	zval* zstr;     /* pointer to original string(hash-item) */
38 } collator_sort_key_index_t;
39 
40 ZEND_EXTERN_MODULE_GLOBALS( intl )
41 
42 static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
43 static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
44 
45 static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
46 static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
47 
48 static const size_t DEF_UTF16_BUF_SIZE = 1024;
49 
50 /* {{{ collator_regular_compare_function */
collator_regular_compare_function(zval * result,zval * op1,zval * op2)51 static int collator_regular_compare_function(zval *result, zval *op1, zval *op2)
52 {
53 	int rc = SUCCESS;
54 	zval str1, str2;
55 	zval num1, num2;
56 	zval norm1, norm2;
57 	zval *num1_p = NULL, *num2_p = NULL;
58 	zval *norm1_p = NULL, *norm2_p = NULL;
59 	zval *str1_p, *str2_p;
60 
61 	ZVAL_NULL(&str1);
62 	str1_p  = collator_convert_object_to_string( op1, &str1 );
63 	ZVAL_NULL(&str2);
64 	str2_p  = collator_convert_object_to_string( op2, &str2 );
65 
66 	/* If both args are strings AND either of args is not numeric string
67 	 * then use ICU-compare. Otherwise PHP-compare. */
68 	if( Z_TYPE_P(str1_p) == IS_STRING && Z_TYPE_P(str2_p) == IS_STRING &&
69 		( str1_p == ( num1_p = collator_convert_string_to_number_if_possible( str1_p, &num1 ) ) ||
70 		  str2_p == ( num2_p = collator_convert_string_to_number_if_possible( str2_p, &num2 ) ) ) )
71 	{
72 		/* Compare the strings using ICU. */
73 		ZEND_ASSERT(INTL_G(current_collator) != NULL);
74 		ZVAL_LONG(result, ucol_strcoll(
75 					INTL_G(current_collator),
76 					INTL_Z_STRVAL_P(str1_p), INTL_Z_STRLEN_P(str1_p),
77 					INTL_Z_STRVAL_P(str2_p), INTL_Z_STRLEN_P(str2_p) ));
78 	}
79 	else
80 	{
81 		/* num1 is set if str1 and str2 are strings. */
82 		if( num1_p )
83 		{
84 			if( num1_p == str1_p )
85 			{
86 				/* str1 is string but not numeric string
87 				 * just convert it to utf8.
88 				 */
89 				norm1_p = collator_convert_zstr_utf16_to_utf8( str1_p, &norm1 );
90 
91 				/* num2 is not set but str2 is string => do normalization. */
92 				norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
93 			}
94 			else
95 			{
96 				/* str1 is numeric strings => passthru to PHP-compare. */
97 				Z_TRY_ADDREF_P(num1_p);
98 				norm1_p = num1_p;
99 
100 				/* str2 is numeric strings => passthru to PHP-compare. */
101 				Z_TRY_ADDREF_P(num2_p);
102 				norm2_p = num2_p;
103 			}
104 		}
105 		else
106 		{
107 			/* num1 is not set if str1 or str2 is not a string => do normalization. */
108 			norm1_p = collator_normalize_sort_argument( str1_p, &norm1 );
109 
110 			/* if num1 is not set then num2 is not set as well => do normalization. */
111 			norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
112 		}
113 
114 		rc = compare_function( result, norm1_p, norm2_p );
115 
116 		zval_ptr_dtor( norm1_p );
117 		zval_ptr_dtor( norm2_p );
118 	}
119 
120 	if( num1_p )
121 		zval_ptr_dtor( num1_p );
122 
123 	if( num2_p )
124 		zval_ptr_dtor( num2_p );
125 
126 	zval_ptr_dtor( str1_p );
127 	zval_ptr_dtor( str2_p );
128 
129 	return rc;
130 }
131 /* }}} */
132 
133 /* {{{ collator_numeric_compare_function
134  * Convert input args to double and compare it.
135  */
collator_numeric_compare_function(zval * result,zval * op1,zval * op2)136 static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2)
137 {
138 	zval num1, num2;
139 	zval *num1_p = NULL;
140 	zval *num2_p = NULL;
141 
142 	if( Z_TYPE_P(op1) == IS_STRING )
143 	{
144 		num1_p = collator_convert_string_to_double( op1, &num1 );
145 		op1 = num1_p;
146 	}
147 
148 	if( Z_TYPE_P(op2) == IS_STRING )
149 	{
150 		num2_p = collator_convert_string_to_double( op2, &num2 );
151 		op2 = num2_p;
152 	}
153 
154 	ZVAL_LONG(result, numeric_compare_function(op1, op2));
155 
156 	if( num1_p )
157 		zval_ptr_dtor( num1_p );
158 	if( num2_p )
159 		zval_ptr_dtor( num2_p );
160 
161 	return SUCCESS;
162 }
163 /* }}} */
164 
165 /* {{{ collator_icu_compare_function
166  * Direct use of ucol_strcoll.
167 */
collator_icu_compare_function(zval * result,zval * op1,zval * op2)168 static int collator_icu_compare_function(zval *result, zval *op1, zval *op2)
169 {
170 	zval str1, str2;
171 	int rc              = SUCCESS;
172 	zval *str1_p        = NULL;
173 	zval *str2_p        = NULL;
174 
175 	str1_p = collator_make_printable_zval( op1, &str1);
176 	str2_p = collator_make_printable_zval( op2, &str2 );
177 
178 	/* Compare the strings using ICU. */
179 	ZEND_ASSERT(INTL_G(current_collator) != NULL);
180 	ZVAL_LONG(result, ucol_strcoll(
181 				INTL_G(current_collator),
182 				INTL_Z_STRVAL_P(str1_p), INTL_Z_STRLEN_P(str1_p),
183 				INTL_Z_STRVAL_P(str2_p), INTL_Z_STRLEN_P(str2_p) ));
184 
185 	zval_ptr_dtor( str1_p );
186 	zval_ptr_dtor( str2_p );
187 
188 	return rc;
189 }
190 /* }}} */
191 
192 /* {{{ collator_compare_func
193  * Taken from PHP7 source (array_data_compare).
194  */
collator_compare_func(Bucket * f,Bucket * s)195 static int collator_compare_func(Bucket *f, Bucket *s)
196 {
197 	zval result;
198 	zval *first = &f->val;
199 	zval *second = &s->val;
200 
201 	if( INTL_G(compare_func)( &result, first, second) == FAILURE )
202 		return 0;
203 
204 	if( Z_TYPE(result) == IS_DOUBLE )
205 	{
206 		if( Z_DVAL(result) < 0 )
207 			return -1;
208 		else if( Z_DVAL(result) > 0 )
209 			return 1;
210 		else
211 			return 0;
212 	}
213 
214 	convert_to_long(&result);
215 
216 	if( Z_LVAL(result) < 0 )
217 		return -1;
218 	else if( Z_LVAL(result) > 0 )
219 		return 1;
220 
221 	return 0;
222 }
223 /* }}} */
224 
225 /* {{{ Compare sort keys */
collator_cmp_sort_keys(const void * p1,const void * p2)226 static int collator_cmp_sort_keys( const void *p1, const void *p2 )
227 {
228 	char* key1 = ((collator_sort_key_index_t*)p1)->key;
229 	char* key2 = ((collator_sort_key_index_t*)p2)->key;
230 
231 	return strcmp( key1, key2 );
232 }
233 /* }}} */
234 
235 /* {{{ Choose compare function according to sort flags. */
collator_get_compare_function(const zend_long sort_flags)236 static collator_compare_func_t collator_get_compare_function( const zend_long sort_flags )
237 {
238 	collator_compare_func_t func;
239 
240 	switch( sort_flags )
241 	{
242 		case COLLATOR_SORT_NUMERIC:
243 			func = collator_numeric_compare_function;
244 			break;
245 
246 		case COLLATOR_SORT_STRING:
247 			func = collator_icu_compare_function;
248 			break;
249 
250 		case COLLATOR_SORT_REGULAR:
251 		default:
252 			func = collator_regular_compare_function;
253 			break;
254 	}
255 
256 	return func;
257 }
258 /* }}} */
259 
260 /* {{{ Common code shared by collator_sort() and collator_asort() API functions. */
collator_sort_internal(int renumber,INTERNAL_FUNCTION_PARAMETERS)261 static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
262 {
263 	UCollator*     saved_collator;
264 	zval*          array            = NULL;
265 	HashTable*     hash             = NULL;
266 	zend_long           sort_flags       = COLLATOR_SORT_REGULAR;
267 
268 	COLLATOR_METHOD_INIT_VARS
269 
270 	/* Parse parameters. */
271 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa/|l",
272 		&object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
273 	{
274 		RETURN_THROWS();
275 	}
276 
277 	/* Fetch the object. */
278 	COLLATOR_METHOD_FETCH_OBJECT;
279 
280 	if (!co->ucoll) {
281 		zend_throw_error(NULL, "Object not initialized");
282 		RETURN_THROWS();
283 	}
284 
285 	/* Set 'compare function' according to sort flags. */
286 	INTL_G(compare_func) = collator_get_compare_function( sort_flags );
287 
288 	hash = Z_ARRVAL_P( array );
289 
290 	/* Convert strings in the specified array from UTF-8 to UTF-16. */
291 	collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
292 	COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
293 
294 	/* Save specified collator in the request-global (?) variable. */
295 	saved_collator = INTL_G( current_collator );
296 	INTL_G( current_collator ) = co->ucoll;
297 
298 	/* Sort specified array. */
299 	zend_hash_sort(hash, collator_compare_func, renumber);
300 
301 	/* Restore saved collator. */
302 	INTL_G( current_collator ) = saved_collator;
303 
304 	/* Convert strings in the specified array back to UTF-8. */
305 	collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
306 	COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
307 
308 	RETURN_TRUE;
309 }
310 /* }}} */
311 
312 /* {{{ Sort array using specified collator. */
PHP_FUNCTION(collator_sort)313 PHP_FUNCTION( collator_sort )
314 {
315 	collator_sort_internal( true, INTERNAL_FUNCTION_PARAM_PASSTHRU );
316 }
317 /* }}} */
318 
collator_sortkey_swap(collator_sort_key_index_t * p,collator_sort_key_index_t * q)319 static void collator_sortkey_swap(collator_sort_key_index_t *p, collator_sort_key_index_t *q) /* {{{ */
320 {
321 	collator_sort_key_index_t t;
322 	t = *p;
323 	*p = *q;
324 	*q = t;
325 }
326 /* }}} */
327 
328 /* {{{ Equivalent to standard PHP sort using Collator.
329  * Uses ICU ucol_getSortKey for performance.
330  */
PHP_FUNCTION(collator_sort_with_sort_keys)331 PHP_FUNCTION( collator_sort_with_sort_keys )
332 {
333 	zval*       array                = NULL;
334 	zval        garbage;
335 	HashTable*  hash                 = NULL;
336 	zval*       hashData             = NULL;                     /* currently processed item of input hash */
337 
338 	char*       sortKeyBuf           = NULL;                     /* buffer to store sort keys */
339 	uint32_t    sortKeyBufSize       = DEF_SORT_KEYS_BUF_SIZE;   /* buffer size */
340 	ptrdiff_t   sortKeyBufOffset     = 0;                        /* pos in buffer to store sort key */
341 	uint32_t    sortKeyLen           = 0;                        /* the length of currently processing key */
342 	uint32_t    bufLeft              = 0;
343 	uint32_t    bufIncrement         = 0;
344 
345 	collator_sort_key_index_t* sortKeyIndxBuf = NULL;            /* buffer to store 'indexes' which will be passed to 'qsort' */
346 	uint32_t    sortKeyIndxBufSize   = DEF_SORT_KEYS_INDX_BUF_SIZE;
347 	uint32_t    sortKeyIndxSize      = sizeof( collator_sort_key_index_t );
348 
349 	uint32_t    sortKeyCount         = 0;
350 	uint32_t    j                    = 0;
351 
352 	UChar*      utf16_buf            = NULL;                     /* tmp buffer to hold current processing string in utf-16 */
353 	int         utf16_buf_size       = DEF_UTF16_BUF_SIZE;       /* the length of utf16_buf */
354 	int         utf16_len            = 0;                        /* length of converted string */
355 
356 	COLLATOR_METHOD_INIT_VARS
357 
358 	/* Parse parameters. */
359 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa",
360 		&object, Collator_ce_ptr, &array ) == FAILURE )
361 	{
362 		RETURN_THROWS();
363 	}
364 
365 	/* Fetch the object. */
366 	COLLATOR_METHOD_FETCH_OBJECT;
367 
368 	if (!co || !co->ucoll) {
369 		intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
370 		intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
371 			"Object not initialized", 0 );
372 		zend_throw_error(NULL, "Object not initialized");
373 
374 		RETURN_THROWS();
375 	}
376 
377 	/*
378 	 * Sort specified array.
379 	 */
380 	hash = Z_ARRVAL_P( array );
381 
382 	if( !hash || zend_hash_num_elements( hash ) == 0 )
383 		RETURN_TRUE;
384 
385 	/* Create bufers */
386 	sortKeyBuf     = ecalloc( sortKeyBufSize,     sizeof( char    ) );
387 	sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
388 	utf16_buf      = eumalloc( utf16_buf_size );
389 
390 	/* Iterate through input hash and create a sort key for each value. */
391 	ZEND_HASH_FOREACH_VAL(hash, hashData) {
392 		/* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */
393 
394 		utf16_len = utf16_buf_size;
395 
396 		/* Process string values only. */
397 		if( Z_TYPE_P( hashData ) == IS_STRING )
398 		{
399 			intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_P( hashData ), Z_STRLEN_P( hashData ), COLLATOR_ERROR_CODE_P( co ) );
400 
401 			if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
402 			{
403 				intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
404 				intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 );
405 
406 				if( utf16_buf )
407 					efree( utf16_buf );
408 
409 				efree( sortKeyIndxBuf );
410 				efree( sortKeyBuf );
411 
412 				RETURN_FALSE;
413 			}
414 		}
415 		else
416 		{
417 			/* Set empty string */
418 			utf16_len = 0;
419 			utf16_buf[utf16_len] = 0;
420 		}
421 
422 		if( (utf16_len + 1) > utf16_buf_size )
423 			utf16_buf_size = utf16_len + 1;
424 
425 		/* Get sort key, reallocating the buffer if needed. */
426 		bufLeft = sortKeyBufSize - sortKeyBufOffset;
427 
428 		sortKeyLen = ucol_getSortKey( co->ucoll,
429 									  utf16_buf,
430 									  utf16_len,
431 									  (uint8_t*)sortKeyBuf + sortKeyBufOffset,
432 									  bufLeft );
433 
434 		/* check for sortKeyBuf overflow, increasing its size of the buffer if needed */
435 		if( sortKeyLen > bufLeft )
436 		{
437 			bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
438 
439 			sortKeyBufSize += bufIncrement;
440 			bufLeft += bufIncrement;
441 
442 			sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
443 
444 			sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
445 		}
446 
447 		/*  check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */
448 		if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
449 		{
450 			bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
451 
452 			sortKeyIndxBufSize += bufIncrement;
453 
454 			sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
455 		}
456 
457 		sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;    /* remember just offset, cause address */
458 		                                                               /* of 'sortKeyBuf' may be changed due to realloc. */
459 		sortKeyIndxBuf[sortKeyCount].zstr = hashData;
460 
461 		sortKeyBufOffset += sortKeyLen;
462 		++sortKeyCount;
463 
464 	} ZEND_HASH_FOREACH_END();
465 
466 	/* update ptrs to point to valid keys. */
467 	for( j = 0; j < sortKeyCount; j++ )
468 		sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
469 
470 	/* sort it */
471 	zend_sort( sortKeyIndxBuf, sortKeyCount,
472 			sortKeyIndxSize, collator_cmp_sort_keys, (swap_func_t)collator_sortkey_swap);
473 
474 	ZVAL_COPY_VALUE(&garbage, array);
475 	/* for resulting hash we'll assign new hash keys rather then reordering */
476 	array_init(array);
477 
478 	for( j = 0; j < sortKeyCount; j++ )
479 	{
480 		Z_TRY_ADDREF_P( sortKeyIndxBuf[j].zstr );
481 		zend_hash_next_index_insert( Z_ARRVAL_P(array), sortKeyIndxBuf[j].zstr);
482 	}
483 
484 	if( utf16_buf )
485 		efree( utf16_buf );
486 
487 	zval_ptr_dtor(&garbage);
488 	efree( sortKeyIndxBuf );
489 	efree( sortKeyBuf );
490 
491 	RETURN_TRUE;
492 }
493 /* }}} */
494 
495 /* {{{ Sort array using specified collator, maintaining index association. */
PHP_FUNCTION(collator_asort)496 PHP_FUNCTION( collator_asort )
497 {
498 	collator_sort_internal( false, INTERNAL_FUNCTION_PARAM_PASSTHRU );
499 }
500 /* }}} */
501 
502 /* {{{ Get a sort key for a string from a Collator. */
PHP_FUNCTION(collator_get_sort_key)503 PHP_FUNCTION( collator_get_sort_key )
504 {
505 	char*            str      = NULL;
506 	size_t           str_len  = 0;
507 	UChar*           ustr     = NULL;
508 	int32_t          ustr_len = 0;
509 	int              key_len = 0;
510 	zend_string*     key_str;
511 
512 	COLLATOR_METHOD_INIT_VARS
513 
514 	/* Parse parameters. */
515 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Os",
516 		&object, Collator_ce_ptr, &str, &str_len ) == FAILURE )
517 	{
518 		RETURN_THROWS();
519 	}
520 
521 	/* Fetch the object. */
522 	COLLATOR_METHOD_FETCH_OBJECT;
523 
524 	if (!co || !co->ucoll) {
525 		intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
526 		intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
527 			"Object not initialized", 0 );
528 		zend_throw_error(NULL, "Object not initialized");
529 
530 		RETURN_THROWS();
531 	}
532 
533 	/*
534 	 * Compare given strings (converting them to UTF-16 first).
535 	 */
536 
537 	/* First convert the strings to UTF-16. */
538 	intl_convert_utf8_to_utf16(
539 		&ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) );
540 	if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
541 	{
542 		/* Set global error code. */
543 		intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
544 
545 		/* Set error messages. */
546 		intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
547 			"Error converting first argument to UTF-16", 0 );
548 		efree( ustr );
549 		RETURN_FALSE;
550 	}
551 
552 	/* ucol_getSortKey is exception in that the key length includes the
553 	 * NUL terminator*/
554 	key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, NULL, 0);
555 	if(!key_len) {
556 		efree( ustr );
557 		RETURN_FALSE;
558 	}
559 	key_str = zend_string_alloc(key_len, 0);
560 	key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, (uint8_t*)ZSTR_VAL(key_str), key_len);
561 	efree( ustr );
562 	if(!key_len) {
563 		RETURN_FALSE;
564 	}
565 	ZSTR_LEN(key_str) = key_len - 1;
566 	RETVAL_NEW_STR(key_str);
567 }
568 /* }}} */
569