xref: /illumos-gate/usr/src/lib/libc/port/locale/collate.c (revision 6ea3c060)
1 /*
2  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3  *		at Electronni Visti IA, Kiev, Ukraine.
4  *			All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
30  */
31 
32 #include "lint.h"
33 #include "file64.h"
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <sysexits.h>
40 #include <netinet/in.h>
41 
42 #include "collate.h"
43 #include "setlocale.h"
44 #include "ldpart.h"
45 
46 int __collate_load_error = 1;
47 int __collate_substitute_nontrivial;
48 
49 char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
50 struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
51 struct __collate_st_chain_pri *__collate_chain_pri_table;
52 
53 int
54 __collate_load_tables(const char *encoding)
55 {
56 	FILE *fp;
57 	int i, saverr, chains;
58 	uint32_t u32;
59 	char strbuf[STR_LEN], buf[PATH_MAX];
60 	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
61 	static char collate_encoding[ENCODING_LEN + 1];
62 
63 	/* 'encoding' must be already checked. */
64 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
65 		__collate_load_error = 1;
66 		return (_LDP_CACHE);
67 	}
68 
69 	/*
70 	 * If the locale name is the same as our cache, use the cache.
71 	 */
72 	if (strcmp(encoding, collate_encoding) == 0) {
73 		__collate_load_error = 0;
74 		return (_LDP_CACHE);
75 	}
76 
77 	/*
78 	 * Slurp the locale file into the cache.
79 	 */
80 
81 	(void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE/LCL_DATA",
82 	    _PathLocale, encoding);
83 
84 	if ((fp = fopen(buf, "r")) == NULL)
85 		return (_LDP_ERROR);
86 
87 	if (fread(strbuf, sizeof (strbuf), 1, fp) != 1) {
88 		saverr = errno;
89 		(void) fclose(fp);
90 		errno = saverr;
91 		return (_LDP_ERROR);
92 	}
93 	chains = -1;
94 	if (strcmp(strbuf, COLLATE_VERSION) == 0)
95 		chains = 0;
96 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
97 		chains = 1;
98 	if (chains < 0) {
99 		(void) fclose(fp);
100 		errno = EINVAL;
101 		return (_LDP_ERROR);
102 	}
103 	if (chains) {
104 		if (fread(&u32, sizeof (u32), 1, fp) != 1) {
105 			saverr = errno;
106 			(void) fclose(fp);
107 			errno = saverr;
108 			return (_LDP_ERROR);
109 		}
110 		if ((chains = (int)ntohl(u32)) < 1) {
111 			(void) fclose(fp);
112 			errno = EINVAL;
113 			return (_LDP_ERROR);
114 		}
115 	} else
116 		chains = TABLE_SIZE;
117 
118 	if ((TMP_substitute_table =
119 	    malloc(sizeof (__collate_substitute_table))) == NULL) {
120 		saverr = errno;
121 		(void) fclose(fp);
122 		errno = saverr;
123 		return (_LDP_ERROR);
124 	}
125 	if ((TMP_char_pri_table =
126 	    malloc(sizeof (__collate_char_pri_table))) == NULL) {
127 		saverr = errno;
128 		free(TMP_substitute_table);
129 		(void) fclose(fp);
130 		errno = saverr;
131 		return (_LDP_ERROR);
132 	}
133 	if ((TMP_chain_pri_table =
134 	    malloc(sizeof (*__collate_chain_pri_table) * chains)) == NULL) {
135 		saverr = errno;
136 		free(TMP_substitute_table);
137 		free(TMP_char_pri_table);
138 		(void) fclose(fp);
139 		errno = saverr;
140 		return (_LDP_ERROR);
141 	}
142 
143 #define	FREAD(a, b, c, d) \
144 { \
145 	if (fread(a, b, c, d) != c) { \
146 		saverr = errno; \
147 		free(TMP_substitute_table); \
148 		free(TMP_char_pri_table); \
149 		free(TMP_chain_pri_table); \
150 		(void) fclose(d); \
151 		errno = saverr; \
152 		return (_LDP_ERROR); \
153 	} \
154 }
155 
156 	FREAD(TMP_substitute_table, sizeof (__collate_substitute_table), 1, fp);
157 	FREAD(TMP_char_pri_table, sizeof (__collate_char_pri_table), 1, fp);
158 	FREAD(TMP_chain_pri_table,
159 	    sizeof (*__collate_chain_pri_table), chains, fp);
160 	(void) fclose(fp);
161 
162 	(void) strcpy(collate_encoding, encoding);
163 	if (__collate_substitute_table_ptr != NULL)
164 		free(__collate_substitute_table_ptr);
165 	__collate_substitute_table_ptr = TMP_substitute_table;
166 	if (__collate_char_pri_table_ptr != NULL)
167 		free(__collate_char_pri_table_ptr);
168 	__collate_char_pri_table_ptr = TMP_char_pri_table;
169 	for (i = 0; i < UCHAR_MAX + 1; i++) {
170 		__collate_char_pri_table[i].prim =
171 		    ntohl(__collate_char_pri_table[i].prim);
172 		__collate_char_pri_table[i].sec =
173 		    ntohl(__collate_char_pri_table[i].sec);
174 	}
175 	if (__collate_chain_pri_table != NULL)
176 		free(__collate_chain_pri_table);
177 	__collate_chain_pri_table = TMP_chain_pri_table;
178 	for (i = 0; i < chains; i++) {
179 		__collate_chain_pri_table[i].prim =
180 		    ntohl(__collate_chain_pri_table[i].prim);
181 		__collate_chain_pri_table[i].sec =
182 		    ntohl(__collate_chain_pri_table[i].sec);
183 	}
184 	__collate_substitute_nontrivial = 0;
185 	for (i = 0; i < UCHAR_MAX + 1; i++) {
186 		if (__collate_substitute_table[i][0] != i ||
187 		    __collate_substitute_table[i][1] != 0) {
188 			__collate_substitute_nontrivial = 1;
189 			break;
190 		}
191 	}
192 	__collate_load_error = 0;
193 
194 	return (_LDP_LOADED);
195 }
196 
197 char *
198 __collate_substitute(const char *str)
199 {
200 	int dest_len, len, nlen;
201 	int delta;
202 	char *dest_str = NULL;
203 	uchar_t *s = (uchar_t *)str;
204 
205 	if (s == NULL || *s == '\0') {
206 		return (strdup(""));
207 	}
208 
209 	delta = strlen(str);
210 	delta += delta / 8;
211 	dest_str = malloc(dest_len = delta);
212 	if (dest_str == NULL)
213 		return (NULL);
214 	len = 0;
215 	while (*s) {
216 		nlen = len + strlen(__collate_substitute_table[*s]);
217 		if (dest_len <= nlen) {
218 			char *new_str;
219 			new_str = realloc(dest_str, dest_len = nlen + delta);
220 			if (new_str == NULL) {
221 				free(dest_str);
222 				return (NULL);
223 			}
224 			dest_str = new_str;
225 		}
226 		(void) strcpy(dest_str + len,
227 		    (char *)__collate_substitute_table[*s++]);
228 		len = nlen;
229 	}
230 	return (dest_str);
231 }
232 
233 void
234 __collate_lookup(const char *t, int *len, int *prim, int *sec)
235 {
236 	struct __collate_st_chain_pri *p2;
237 
238 	*len = 1;
239 	*prim = *sec = 0;
240 	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
241 		if (*t == p2->str[0] &&
242 		    strncmp(t, (char *)p2->str, strlen((char *)p2->str)) == 0) {
243 			*len = strlen((char *)p2->str);
244 			*prim = p2->prim;
245 			*sec = p2->sec;
246 			return;
247 		}
248 	}
249 	*prim = __collate_char_pri_table[(uchar_t)*t].prim;
250 	*sec = __collate_char_pri_table[(uchar_t)*t].sec;
251 }
252