xref: /freebsd/lib/libc/string/wcscoll.c (revision 535af610)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright 2017 Nexenta Systems, Inc.
5  * Copyright (c) 2002 Tim J. Robbins
6  * All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <errno.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include "collate.h"
43 
44 int
45 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
46 {
47 	int len1, len2, pri1, pri2;
48 	wchar_t *tr1 = NULL, *tr2 = NULL;
49 	int direc, pass;
50 	int ret = wcscmp(ws1, ws2);
51 
52 	FIX_LOCALE(locale);
53 	struct xlocale_collate *table =
54 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
55 
56 	if (table->__collate_load_error || ret == 0)
57 		return (ret);
58 
59 	if (*ws1 == 0 && *ws2 != 0)
60 		return (-1);
61 	if (*ws1 != 0 && *ws2 == 0)
62 		return (1);
63 
64 	/*
65 	 * Once upon a time we had code to try to optimize this, but
66 	 * it turns out that you really can't make many assumptions
67 	 * safely.  You absolutely have to run this pass by pass,
68 	 * because some passes will be ignored for a given character,
69 	 * while others will not.  Simpler locales will benefit from
70 	 * having fewer passes, and most comparisons should resolve
71 	 * during the primary pass anyway.
72 	 *
73 	 * Note that we do one final extra pass at the end to pick
74 	 * up UNDEFINED elements.  There is special handling for them.
75 	 */
76 	for (pass = 0; pass <= table->info->directive_count; pass++) {
77 
78 		const int32_t *st1 = NULL;
79 		const int32_t *st2 = NULL;
80 		const wchar_t	*w1 = ws1;
81 		const wchar_t	*w2 = ws2;
82 
83 		/* special pass for UNDEFINED */
84 		if (pass == table->info->directive_count) {
85 			direc = DIRECTIVE_FORWARD;
86 		} else {
87 			direc = table->info->directive[pass];
88 		}
89 
90 		if (direc & DIRECTIVE_BACKWARD) {
91 			wchar_t *bp, *fp, c;
92 			free(tr1);
93 			if ((tr1 = wcsdup(w1)) == NULL)
94 				goto end;
95 			bp = tr1;
96 			fp = tr1 + wcslen(tr1) - 1;
97 			while (bp < fp) {
98 				c = *bp;
99 				*bp++ = *fp;
100 				*fp-- = c;
101 			}
102 			free(tr2);
103 			if ((tr2 = wcsdup(w2)) == NULL)
104 				goto end;
105 			bp = tr2;
106 			fp = tr2 + wcslen(tr2) - 1;
107 			while (bp < fp) {
108 				c = *bp;
109 				*bp++ = *fp;
110 				*fp-- = c;
111 			}
112 			w1 = tr1;
113 			w2 = tr2;
114 		}
115 
116 		if (direc & DIRECTIVE_POSITION) {
117 			int check1, check2;
118 			while (*w1 && *w2) {
119 				pri1 = pri2 = 0;
120 				check1 = check2 = 1;
121 				while ((pri1 == pri2) && (check1 || check2)) {
122 					if (check1) {
123 						_collate_lookup(table, w1, &len1,
124 						    &pri1, pass, &st1);
125 						if (pri1 < 0) {
126 							errno = EINVAL;
127 							goto end;
128 						}
129 						if (!pri1) {
130 							pri1 = COLLATE_MAX_PRIORITY;
131 							st1 = NULL;
132 						}
133 						check1 = (st1 != NULL);
134 					}
135 					if (check2) {
136 						_collate_lookup(table, w2, &len2,
137 						    &pri2, pass, &st2);
138 						if (pri2 < 0) {
139 							errno = EINVAL;
140 							goto end;
141 						}
142 						if (!pri2) {
143 							pri2 = COLLATE_MAX_PRIORITY;
144 							st2 = NULL;
145 						}
146 						check2 = (st2 != NULL);
147 					}
148 				}
149 				if (pri1 != pri2) {
150 					ret = pri1 - pri2;
151 					goto end;
152 				}
153 				w1 += len1;
154 				w2 += len2;
155 			}
156 			if (!*w1) {
157 				if (*w2) {
158 					ret = -(int)*w2;
159 					goto end;
160 				}
161 			} else {
162 				ret = *w1;
163 				goto end;
164 			}
165 		} else {
166 			int vpri1 = 0, vpri2 = 0;
167 			while (*w1 || *w2 || st1 || st2) {
168 				pri1 = 1;
169 				while (*w1 || st1) {
170 					_collate_lookup(table, w1, &len1, &pri1,
171 					    pass, &st1);
172 					w1 += len1;
173 					if (pri1 > 0) {
174 						vpri1++;
175 						break;
176 					}
177 
178 					if (pri1 < 0) {
179 						errno = EINVAL;
180 						goto end;
181 					}
182 					st1 = NULL;
183 				}
184 				pri2 = 1;
185 				while (*w2 || st2) {
186 					_collate_lookup(table, w2, &len2, &pri2,
187 					    pass, &st2);
188 					w2 += len2;
189 					if (pri2 > 0) {
190 						vpri2++;
191 						break;
192 					}
193 					if (pri2 < 0) {
194 						errno = EINVAL;
195 						goto end;
196 					}
197 					st2 = NULL;
198 				}
199 				if ((!pri1 || !pri2) && (vpri1 == vpri2))
200 					break;
201 				if (pri1 != pri2) {
202 					ret = pri1 - pri2;
203 					goto end;
204 				}
205 			}
206 			if (vpri1 && !vpri2) {
207 				ret = 1;
208 				goto end;
209 			}
210 			if (!vpri1 && vpri2) {
211 				ret = -1;
212 				goto end;
213 			}
214 		}
215 	}
216 	ret = 0;
217 
218 end:
219 	free(tr1);
220 	free(tr2);
221 
222 	return (ret);
223 }
224 
225 int
226 wcscoll(const wchar_t *ws1, const wchar_t *ws2)
227 {
228 	return wcscoll_l(ws1, ws2, __get_locale());
229 }
230