1 /*
2
3 silcstringprep.c
4
5 Author: Pekka Riikonen <priikone@silcnet.org>
6
7 Copyright (C) 2004 - 2005 Pekka Riikonen
8
9 The contents of this file are subject to one of the Licenses specified
10 in the COPYING file; You may not use this file except in compliance
11 with the License.
12
13 The software distributed under the License is distributed on an "AS IS"
14 basis, in the hope that it will be useful, but WITHOUT WARRANTY OF ANY
15 KIND, either expressed or implied. See the COPYING file for more
16 information.
17
18 */
19
20 #include "silc.h"
21 #include "silcstringprep.h"
22 #include "stringprep.h"
23
24 /* We use GNU Libidn which has stringprep to do the magic. Only bad thing
25 is that its interface is idiotic. We have our own API here in case
26 we'll implement it ourselves later. */
27
28 /* Prohibited characters as defined by the protocol in Appendix C */
29 const Stringprep_table_element silc_appendix_c[] =
30 {
31 {0x000021}, {0x00002A}, {0x00002C}, {0x00003F}, {0x000040},
32 {0}
33 };
34
35 /* Prohibited characters as defined by the protocol in Appendix D */
36 const Stringprep_table_element silc_appendix_d[] =
37 {
38 {0x0000A2, 0x0000A9},
39 {0x0000AC}, {0x0000AE}, {0x0000AF}, {0x0000B0}, {0x0000B1}, {0x0000B4},
40 {0x0000B6}, {0x0000B8}, {0x0000D7}, {0x0000F7},
41 {0x0002C2, 0x0002C5}, {0x0002D2, 0x0002FF},
42 {0x000374}, {0x000375}, {0x000384}, {0x000385}, {0x0003F6}, {0x000482},
43 {0x00060E}, {0x00060F}, {0x0006E9}, {0x0006FD}, {0x0006FE}, {0x0009F2},
44 {0x0009F3}, {0x0009FA}, {0x000AF1}, {0x000B70},
45 {0x000BF3, 0x000BFA}, {0x000E3F},
46 {0x000F01, 0x000F03}, {0x000F13, 0x000F17}, {0x000F1A, 0x000F1F},
47 {0x000F34}, {0x000F36}, {0x000F38}, {0x000FBE}, {0x000FBF},
48 {0x000FC0, 0x000FC5}, {0x000FC7, 0x000FCF}, {0x0017DB}, {0x001940},
49 {0x0019E0, 0x0019FF}, {0x001FBD}, {0x001FBF, 0x001FC1},
50 {0x001FCD, 0x001FCF}, {0x001FDD, 0x001FDF}, {0x001FED, 0x001FEF},
51 {0x001FFD}, {0x001FFE}, {0x002044}, {0x002052}, {0x00207A, 0x00207C},
52 {0x00208A, 0x00208C}, {0x0020A0, 0x0020B1}, {0x002100, 0x00214F},
53 {0x002150, 0x00218F}, {0x002190, 0x0021FF}, {0x002200, 0x0022FF},
54 {0x002300, 0x0023FF}, {0x002400, 0x00243F}, {0x002440, 0x00245F},
55 {0x002460, 0x0024FF}, {0x002500, 0x00257F}, {0x002580, 0x00259F},
56 {0x0025A0, 0x0025FF}, {0x002600, 0x0026FF}, {0x002700, 0x0027BF},
57 {0x0027C0, 0x0027EF}, {0x0027F0, 0x0027FF}, {0x002800, 0x0028FF},
58 {0x002900, 0x00297F}, {0x002980, 0x0029FF}, {0x002A00, 0x002AFF},
59 {0x002B00, 0x002BFF}, {0x002E9A}, {0x002EF4, 0x002EFF},
60 {0x002FF0, 0x002FFF}, {0x00303B, 0x00303D}, {0x003040},
61 {0x003095, 0x003098}, {0x00309F, 0x0030A0}, {0x0030FF, 0x003104},
62 {0x00312D, 0x003130}, {0x00318F}, {0x0031B8, 0x0031FF},
63 {0x00321D, 0x00321F}, {0x003244, 0x00325F}, {0x00327C, 0x00327E},
64 {0x0032B1, 0x0032BF}, {0x0032CC, 0x0032CF}, {0x0032FF},
65 {0x003377, 0x00337A}, {0x0033DE, 0x0033DF}, {0x0033FF},
66 {0x004DB6, 0x004DFF},
67 {0x009FA6, 0x009FFF}, {0x00A48D, 0x00A48F}, {0x00A4A2, 0x00A4A3},
68 {0x00A4B4}, {0x00A4C1}, {0x00A4C5}, {0x00A4C7, 0x00ABFF},
69 {0x00D7A4, 0x00D7FF}, {0x00FA2E, 0x00FAFF}, {0x00FFE0, 0x00FFEE},
70 {0x00FFFC}, {0x010000, 0x01007F}, {0x010080, 0x0100FF},
71 {0x010100, 0x01013F}, {0x01D000, 0x01D0FF}, {0x01D100, 0x01D1FF},
72 {0x01D300, 0x01D35F}, {0x01D400, 0x01D7FF},
73 {0x0E0100, 0x0E01EF},
74 {0}
75 };
76
77 /* Default SILC Identifier String profile defined by the protocol */
78 const Stringprep_profile stringprep_silc_identifier_prep[] =
79 {
80 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
81 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
82 {STRINGPREP_NFKC, 0, 0},
83 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
84 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
85 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
86 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
87 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
88 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
89 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
90 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
91 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
92 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
93 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
94 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
95 {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_c},
96 {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_d},
97 {STRINGPREP_UNASSIGNED_TABLE, 0, stringprep_rfc3454_A_1},
98 {0}
99 };
100
101 /* Default channel name string profile defined by the protocol */
102 const Stringprep_profile stringprep_silc_identifier_ch_prep[] =
103 {
104 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
105 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
106 {STRINGPREP_NFKC, 0, 0},
107 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
108 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
109 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
110 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
111 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
112 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
113 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
114 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
115 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
116 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
117 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
118 {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
119 {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_d},
120 {STRINGPREP_UNASSIGNED_TABLE, 0, stringprep_rfc3454_A_1},
121 {0}
122 };
123
124 /* Identifier string case folding and normalizing */
125 const Stringprep_profile stringprep_silc_identifierc_prep[] =
126 {
127 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
128 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
129 {STRINGPREP_NFKC, 0, 0},
130 {0}
131 };
132
133 /* Case folding and normalizing */
134 const Stringprep_profile stringprep_silc_casefold_prep[] =
135 {
136 {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
137 {STRINGPREP_NFKC, 0, 0},
138 {0}
139 };
140
141
142 /* Prepares string according to the profile */
143
144 SilcStringprepStatus
silc_stringprep(const unsigned char * bin,SilcUInt32 bin_len,SilcStringEncoding bin_encoding,const char * profile_name,SilcStringprepFlags flags,unsigned char ** out,SilcUInt32 * out_len,SilcStringEncoding out_encoding)145 silc_stringprep(const unsigned char *bin, SilcUInt32 bin_len,
146 SilcStringEncoding bin_encoding,
147 const char *profile_name,
148 SilcStringprepFlags flags,
149 unsigned char **out, SilcUInt32 *out_len,
150 SilcStringEncoding out_encoding)
151 {
152 Stringprep_profile_flags f = 0;
153 const Stringprep_profile *profile;
154 unsigned char *utf8s;
155 SilcUInt32 utf8s_len;
156 int ret;
157
158 SILC_LOG_DEBUG(("Preparing string"));
159
160 if (!bin || !bin_len || !profile_name)
161 return SILC_STRINGPREP_ERR;
162
163 /* Convert string to UTF-8 */
164 utf8s_len = silc_utf8_encoded_len(bin, bin_len, bin_encoding);
165 if (!utf8s_len)
166 return SILC_STRINGPREP_ERR_ENCODING;
167 utf8s = silc_calloc(utf8s_len + 1, sizeof(*utf8s));
168 if (!utf8s)
169 return SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
170 silc_utf8_encode(bin, bin_len, bin_encoding, utf8s, utf8s_len);
171
172 /* Check profile. */
173 if (!strcmp(profile_name, SILC_IDENTIFIER_PREP))
174 profile = stringprep_silc_identifier_prep;
175 else if (!strcmp(profile_name, SILC_IDENTIFIER_CH_PREP))
176 profile = stringprep_silc_identifier_ch_prep;
177 else if (!strcmp(profile_name, SILC_IDENTIFIERC_PREP))
178 profile = stringprep_silc_identifierc_prep;
179 else if (!strcmp(profile_name, SILC_CASEFOLD_PREP))
180 profile = stringprep_silc_casefold_prep;
181 else
182 return SILC_STRINGPREP_ERR_UNSUP_PROFILE;
183
184 /* Translate flags */
185 if (!(flags & SILC_STRINGPREP_ALLOW_UNASSIGNED))
186 f |= STRINGPREP_NO_UNASSIGNED;
187
188 /* Prepare */
189 ret = stringprep((char *)utf8s, utf8s_len, f, profile);
190 SILC_LOG_DEBUG(("stringprep() return %d", ret));
191
192 /* Since the stringprep() doesn't allocate returned buffer, and
193 stringprep_profile() doesn't do it correctly, we can't know how
194 much space we must have for the conversion. Allocate more if it
195 fails, and try again. */
196 if (ret == STRINGPREP_TOO_SMALL_BUFFER) {
197 utf8s = silc_realloc(utf8s, sizeof(*utf8s) * ((utf8s_len * 2) + 1));
198 if (!utf8s)
199 return SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
200 memset(utf8s + utf8s_len + 1, 0, utf8s_len);
201 ret = stringprep((char *)utf8s, utf8s_len * 2, f, profile);
202 SILC_LOG_DEBUG(("stringprep() return %d", ret));
203 }
204
205 switch (ret) {
206 case STRINGPREP_OK:
207 ret = SILC_STRINGPREP_OK;
208 break;
209
210 case STRINGPREP_CONTAINS_UNASSIGNED:
211 ret = SILC_STRINGPREP_ERR_UNASSIGNED;
212 break;
213
214 case STRINGPREP_CONTAINS_PROHIBITED:
215 ret = SILC_STRINGPREP_ERR_PROHIBITED;
216 break;
217
218 case STRINGPREP_BIDI_BOTH_L_AND_RAL:
219 ret = SILC_STRINGPREP_ERR_BIDI_RAL_WITH_L;
220 break;
221
222 case STRINGPREP_BIDI_LEADTRAIL_NOT_RAL:
223 ret = SILC_STRINGPREP_ERR_BIDI_RAL;
224 break;
225
226 case STRINGPREP_BIDI_CONTAINS_PROHIBITED:
227 ret = SILC_STRINGPREP_ERR_BIDI_PROHIBITED;
228 break;
229
230 case STRINGPREP_UNKNOWN_PROFILE:
231 ret = SILC_STRINGPREP_ERR_UNSUP_PROFILE;
232 break;
233
234 case STRINGPREP_MALLOC_ERROR:
235 ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
236 break;
237
238 default:
239 ret = SILC_STRINGPREP_ERR;
240 break;
241 }
242
243 /* Convert to desired output character encoding */
244 if (ret == SILC_STRINGPREP_OK) {
245 if (out && out_len) {
246 if (out_encoding != SILC_STRING_UTF8) {
247 *out_len = silc_utf8_decoded_len(utf8s, strlen(utf8s), out_encoding);
248 if (*out_len) {
249 *out = silc_calloc(*out_len + 1, sizeof(**out));
250 if (*out) {
251 silc_utf8_decode(utf8s, strlen(utf8s), out_encoding, *out,
252 *out_len);
253 } else {
254 ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
255 }
256 } else {
257 ret = SILC_STRINGPREP_ERR_ENCODING;
258 }
259 } else {
260 *out_len = strlen(utf8s);
261 *out = silc_memdup(utf8s, *out_len);
262 }
263 }
264 }
265
266 silc_free(utf8s);
267
268 return (SilcStringprepStatus)ret;
269 }
270