xref: /dragonfly/lib/libc/locale/collate.c (revision f746689a)
1 /*-
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: src/lib/libc/locale/collate.c,v 1.21.2.4 2002/10/11 10:36:47 ache Exp $
28  * $DragonFly: src/lib/libc/locale/collate.c,v 1.7 2005/11/13 01:20:49 swildner Exp $
29  */
30 
31 #include "namespace.h"
32 #include <arpa/inet.h>
33 #include <rune.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <sysexits.h>
40 #include "un-namespace.h"
41 
42 #include "collate.h"
43 
44 #define	ENCODING_LEN	31
45 #define _LDP_LOADED 0
46 #define _LDP_ERROR  (-1)
47 #define _LDP_CACHE  1
48 
49 int __collate_load_error = 1;
50 int __collate_substitute_nontrivial;
51 
52 u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
53 struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
54 struct __collate_st_chain_pri *__collate_chain_pri_table;
55 
56 void __collate_err(int ex, const char *f) __dead2;
57 
58 int
59 __collate_load_tables(const char *encoding)
60 {
61 	FILE *fp;
62 	int i, saverr, chains;
63 	uint32_t u32;
64 	char strbuf[STR_LEN], buf[PATH_MAX];
65 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
66 	static char collate_encoding[ENCODING_LEN + 1];
67 
68 	/* 'encoding' must be already checked. */
69 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
70 		__collate_load_error = 1;
71 		return (_LDP_CACHE);
72 	}
73 
74 	/*
75 	 * If the locale name is the same as our cache, use the cache.
76 	 */
77 	if (strcmp(encoding, collate_encoding) == 0) {
78 		__collate_load_error = 0;
79 		return (_LDP_CACHE);
80 	}
81 
82 	/*
83 	 * Slurp the locale file into the cache.
84 	 */
85 
86 	/* 'PathLocale' must be already set & checked. */
87 	/* Range checking not needed, encoding has fixed size */
88 	strcpy(buf, _PathLocale);
89 	strcat(buf, "/");
90 	strcat(buf, encoding);
91 	strcat(buf, "/LC_COLLATE");
92 	if ((fp = fopen(buf, "r")) == NULL)
93 		return (_LDP_ERROR);
94 
95 	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
96 		saverr = errno;
97 		fclose(fp);
98 		errno = saverr;
99 		return (_LDP_ERROR);
100 	}
101 	chains = -1;
102 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
103 		chains = 0;
104 	else if (strcmp(strbuf, COLLATE_VERSION1_1) == 0)
105 		chains = 1;
106 	if (chains < 0) {
107 		fclose(fp);
108 		errno = EFTYPE;
109 		return (_LDP_ERROR);
110 	}
111 	if (chains) {
112 		if (fread(&u32, sizeof(u32), 1, fp) != 1) {
113 			saverr = errno;
114 			fclose(fp);
115 			errno = saverr;
116 			return (_LDP_ERROR);
117 		}
118 		if ((chains = (int)ntohl(u32)) < 1) {
119 			fclose(fp);
120 			errno = EFTYPE;
121 			return (_LDP_ERROR);
122 		}
123 	} else
124 		chains = TABLE_SIZE;
125 
126 	if ((TMP_substitute_table =
127 	     malloc(sizeof(__collate_substitute_table))) == NULL) {
128 		saverr = errno;
129 		fclose(fp);
130 		errno = saverr;
131 		return (_LDP_ERROR);
132 	}
133 	if ((TMP_char_pri_table =
134 	     malloc(sizeof(__collate_char_pri_table))) == NULL) {
135 		saverr = errno;
136 		free(TMP_substitute_table);
137 		fclose(fp);
138 		errno = saverr;
139 		return (_LDP_ERROR);
140 	}
141 	if ((TMP_chain_pri_table =
142 	     malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
143 		saverr = errno;
144 		free(TMP_substitute_table);
145 		free(TMP_char_pri_table);
146 		fclose(fp);
147 		errno = saverr;
148 		return (_LDP_ERROR);
149 	}
150 
151 #define FREAD(a, b, c, d) \
152 { \
153 	if ( fread(a, b, c, d) != c) { \
154 		saverr = errno; \
155 		free(TMP_substitute_table); \
156 		free(TMP_char_pri_table); \
157 		free(TMP_chain_pri_table); \
158 		fclose(d); \
159 		errno = saverr; \
160 		return (_LDP_ERROR); \
161 	} \
162 }
163 
164 	FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
165 	FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
166 	FREAD(TMP_chain_pri_table,
167 	      sizeof(*__collate_chain_pri_table), chains, fp);
168 	fclose(fp);
169 
170 	strcpy(collate_encoding, encoding);
171 	if (__collate_substitute_table_ptr != NULL)
172 		free(__collate_substitute_table_ptr);
173 	__collate_substitute_table_ptr = TMP_substitute_table;
174 	if (__collate_char_pri_table_ptr != NULL)
175 		free(__collate_char_pri_table_ptr);
176 	__collate_char_pri_table_ptr = TMP_char_pri_table;
177 	if (__collate_chain_pri_table != NULL)
178 		free(__collate_chain_pri_table);
179 	__collate_chain_pri_table = TMP_chain_pri_table;
180 
181 	__collate_substitute_nontrivial = 0;
182 	for (i = 0; i < UCHAR_MAX + 1; i++) {
183 		if (__collate_substitute_table[i][0] != i ||
184 		    __collate_substitute_table[i][1] != 0) {
185 			__collate_substitute_nontrivial = 1;
186 			break;
187 		}
188 	}
189 	__collate_load_error = 0;
190 
191 	return (_LDP_LOADED);
192 }
193 
194 u_char *
195 __collate_substitute(const u_char *s)
196 {
197 	int dest_len, len, nlen;
198 	int delta = strlen(s);
199 	u_char *dest_str = NULL;
200 
201 	if (s == NULL || *s == '\0')
202 		return (__collate_strdup(""));
203 	delta += delta / 8;
204 	dest_str = malloc(dest_len = delta);
205 	if (dest_str == NULL)
206 		__collate_err(EX_OSERR, __func__);
207 	len = 0;
208 	while (*s) {
209 		nlen = len + strlen(__collate_substitute_table[*s]);
210 		if (dest_len <= nlen) {
211 			dest_str = reallocf(dest_str, dest_len = nlen + delta);
212 			if (dest_str == NULL)
213 				__collate_err(EX_OSERR, __func__);
214 		}
215 		strcpy(dest_str + len, __collate_substitute_table[*s++]);
216 		len = nlen;
217 	}
218 	return (dest_str);
219 }
220 
221 void
222 __collate_lookup(const u_char *t, int *len, int *prim, int *sec)
223 {
224 	struct __collate_st_chain_pri *p2;
225 
226 	*len = 1;
227 	*prim = *sec = 0;
228 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
229 		if (*t == p2->str[0] &&
230 		    strncmp(t, p2->str, strlen(p2->str)) == 0) {
231 			*len = strlen(p2->str);
232 			*prim = p2->prim;
233 			*sec = p2->sec;
234 			return;
235 		}
236 	}
237 	*prim = __collate_char_pri_table[*t].prim;
238 	*sec = __collate_char_pri_table[*t].sec;
239 }
240 
241 u_char *
242 __collate_strdup(u_char *s)
243 {
244 	u_char *t = strdup(s);
245 
246 	if (t == NULL)
247 		__collate_err(EX_OSERR, __func__);
248 	return (t);
249 }
250 
251 void
252 __collate_err(int ex, const char *f)
253 {
254 	const char *progname;
255 	int serrno = errno;
256 
257 	progname = getprogname();
258 	_write(STDERR_FILENO, progname, strlen(progname));
259 	_write(STDERR_FILENO, ": ", 2);
260 	_write(STDERR_FILENO, f, strlen(f));
261 	_write(STDERR_FILENO, ": ", 2);
262 	f = strerror(serrno);
263 	_write(STDERR_FILENO, f, strlen(f));
264 	_write(STDERR_FILENO, "\n", 1);
265 	exit(ex);
266 }
267 
268 #ifdef COLLATE_DEBUG
269 void
270 __collate_print_tables(void)
271 {
272 	int i;
273 	struct __collate_st_chain_pri *p2;
274 
275 	printf("Substitute table:\n");
276 	for (i = 0; i < UCHAR_MAX + 1; i++)
277 	    if (i != *__collate_substitute_table[i])
278 		printf("\t'%c' --> \"%s\"\n", i,
279 		       __collate_substitute_table[i]);
280 	printf("Chain priority table:\n");
281 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
282 		printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
283 	printf("Char priority table:\n");
284 	for (i = 0; i < UCHAR_MAX + 1; i++)
285 		printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
286 		       __collate_char_pri_table[i].sec);
287 }
288 #endif
289