xref: /freebsd/lib/libc/tests/string/wcscoll_test.c (revision c697fb7f)
1 /*-
2  * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
3  * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
4  * Copyright 2017 Nexenta Systems, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <wchar.h>
33 #include <locale.h>
34 #include <stdlib.h>
35 #include <time.h>
36 #include <errno.h>
37 
38 #include <atf-c.h>
39 
40 static int
41 cmp(const void *a, const void *b)
42 {
43 	const wchar_t wa[2] = { *(const wchar_t *)a, 0 };
44 	const wchar_t wb[2] = { *(const wchar_t *)b, 0 };
45 
46 	return (wcscoll(wa, wb));
47 }
48 
49 ATF_TC_WITHOUT_HEAD(russian_collation);
50 ATF_TC_BODY(russian_collation, tc)
51 {
52 	wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё";
53 	wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ";
54 
55 	ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL,
56 	    "Fail to set locale to \"ru_RU.UTF-8\"");
57 	qsort(c, wcslen(c), sizeof(wchar_t), cmp);
58 	ATF_CHECK_MSG(wcscmp(c, res) == 0,
59 	    "Bad collation, expected: '%ls' got '%ls'", res, c);
60 }
61 
62 #define	NSTRINGS 2000
63 #define	MAXSTRLEN 20
64 #define	MAXXFRMLEN (MAXSTRLEN * 20)
65 
66 typedef struct {
67 	char	sval[MAXSTRLEN];
68 	char	xval[MAXXFRMLEN];
69 } cstr;
70 
71 ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
72 ATF_TC_BODY(strcoll_vs_strxfrm, tc)
73 {
74 	cstr	data[NSTRINGS];
75 	char	*curloc;
76 	int	i, j;
77 
78 	curloc = setlocale(LC_ALL, "en_US.UTF-8");
79 	ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
80 
81 	/* Ensure new random() values on every run */
82 	srandom((unsigned int) time(NULL));
83 
84 	/* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
85 	for (i = 0; i < NSTRINGS; i++) {
86 		char	*p;
87 		int	len;
88 
89 again:
90 		p = data[i].sval;
91 		len = 1 + (random() % (MAXSTRLEN - 1));
92 		while (len > 0) {
93 			int c;
94 			/*
95 			 * Generate random printable char in ISO8859-1 range.
96 			 * Bias towards producing a lot of spaces.
97 			 */
98 
99 			if ((random() % 16) < 3) {
100 				c = ' ';
101 			} else {
102 				do {
103 					c = random() & 0xFF;
104 				} while (!((c >= ' ' && c <= 127) ||
105 				    (c >= 0xA0 && c <= 0xFF)));
106 			}
107 
108 			if (c <= 127) {
109 				*p++ = c;
110 				len--;
111 			} else {
112 				if (len < 2)
113 					break;
114 				/* Poor man's utf8-ification */
115 				*p++ = 0xC0 + (c >> 6);
116 				len--;
117 				*p++ = 0x80 + (c & 0x3F);
118 				len--;
119 			}
120 		}
121 		*p = '\0';
122 		/* strxfrm() each string as we produce it */
123 		errno = 0;
124 		ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
125 		    MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
126 		    " string exceeded %d bytes", (int)strlen(data[i].sval),
127 		    MAXXFRMLEN);
128 
129 		/*
130 		 * Amend strxfrm() failing on certain characters to be fixed and
131 		 * test later
132 		 */
133 		if (errno != 0)
134 			goto again;
135 	}
136 
137 	for (i = 0; i < NSTRINGS; i++) {
138 		for (j = 0; j < NSTRINGS; j++) {
139 			int sr = strcoll(data[i].sval, data[j].sval);
140 			int sx = strcmp(data[i].xval, data[j].xval);
141 
142 			ATF_CHECK_MSG(!((sr * sx < 0) ||
143 			    (sr * sx == 0 && sr + sx != 0)),
144 			    "%s: diff for \"%s\" and \"%s\"",
145 			    curloc, data[i].sval, data[j].sval);
146 		}
147 	}
148 }
149 
150 ATF_TP_ADD_TCS(tp)
151 {
152 	ATF_TP_ADD_TC(tp, russian_collation);
153 	ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
154 
155 	return (atf_no_error());
156 }
157