1 /*
2 
3   silcstringprep.c
4 
5   Author: Pekka Riikonen <priikone@silcnet.org>
6 
7   Copyright (C) 2004 - 2005 Pekka Riikonen
8 
9   The contents of this file are subject to one of the Licenses specified
10   in the COPYING file;  You may not use this file except in compliance
11   with the License.
12 
13   The software distributed under the License is distributed on an "AS IS"
14   basis, in the hope that it will be useful, but WITHOUT WARRANTY OF ANY
15   KIND, either expressed or implied.  See the COPYING file for more
16   information.
17 
18 */
19 
20 #include "silc.h"
21 #include "silcstringprep.h"
22 #include "stringprep.h"
23 
24 /* We use GNU Libidn which has stringprep to do the magic.  Only bad thing
25    is that its interface is idiotic.  We have our own API here in case
26    we'll implement it ourselves later. */
27 
28 /* Prohibited characters as defined by the protocol in Appendix C */
29 const Stringprep_table_element silc_appendix_c[] =
30 {
31   {0x000021}, {0x00002A}, {0x00002C}, {0x00003F}, {0x000040},
32   {0}
33 };
34 
35 /* Prohibited characters as defined by the protocol in Appendix D */
36 const Stringprep_table_element silc_appendix_d[] =
37 {
38   {0x0000A2, 0x0000A9},
39   {0x0000AC}, {0x0000AE}, {0x0000AF}, {0x0000B0}, {0x0000B1}, {0x0000B4},
40   {0x0000B6}, {0x0000B8}, {0x0000D7}, {0x0000F7},
41   {0x0002C2, 0x0002C5}, {0x0002D2, 0x0002FF},
42   {0x000374}, {0x000375}, {0x000384}, {0x000385}, {0x0003F6}, {0x000482},
43   {0x00060E}, {0x00060F}, {0x0006E9}, {0x0006FD}, {0x0006FE}, {0x0009F2},
44   {0x0009F3}, {0x0009FA}, {0x000AF1}, {0x000B70},
45   {0x000BF3, 0x000BFA}, {0x000E3F},
46   {0x000F01, 0x000F03}, {0x000F13, 0x000F17}, {0x000F1A, 0x000F1F},
47   {0x000F34}, {0x000F36}, {0x000F38}, {0x000FBE}, {0x000FBF},
48   {0x000FC0, 0x000FC5}, {0x000FC7, 0x000FCF}, {0x0017DB}, {0x001940},
49   {0x0019E0, 0x0019FF}, {0x001FBD}, {0x001FBF, 0x001FC1},
50   {0x001FCD, 0x001FCF}, {0x001FDD, 0x001FDF}, {0x001FED, 0x001FEF},
51   {0x001FFD}, {0x001FFE}, {0x002044}, {0x002052}, {0x00207A, 0x00207C},
52   {0x00208A, 0x00208C}, {0x0020A0, 0x0020B1}, {0x002100, 0x00214F},
53   {0x002150, 0x00218F}, {0x002190, 0x0021FF}, {0x002200, 0x0022FF},
54   {0x002300, 0x0023FF}, {0x002400, 0x00243F}, {0x002440, 0x00245F},
55   {0x002460, 0x0024FF}, {0x002500, 0x00257F}, {0x002580, 0x00259F},
56   {0x0025A0, 0x0025FF}, {0x002600, 0x0026FF}, {0x002700, 0x0027BF},
57   {0x0027C0, 0x0027EF}, {0x0027F0, 0x0027FF}, {0x002800, 0x0028FF},
58   {0x002900, 0x00297F}, {0x002980, 0x0029FF}, {0x002A00, 0x002AFF},
59   {0x002B00, 0x002BFF}, {0x002E9A}, {0x002EF4, 0x002EFF},
60   {0x002FF0, 0x002FFF}, {0x00303B, 0x00303D}, {0x003040},
61   {0x003095, 0x003098}, {0x00309F, 0x0030A0}, {0x0030FF, 0x003104},
62   {0x00312D, 0x003130}, {0x00318F}, {0x0031B8, 0x0031FF},
63   {0x00321D, 0x00321F}, {0x003244, 0x00325F}, {0x00327C, 0x00327E},
64   {0x0032B1, 0x0032BF}, {0x0032CC, 0x0032CF}, {0x0032FF},
65   {0x003377, 0x00337A}, {0x0033DE, 0x0033DF}, {0x0033FF},
66   {0x004DB6, 0x004DFF},
67   {0x009FA6, 0x009FFF}, {0x00A48D, 0x00A48F}, {0x00A4A2, 0x00A4A3},
68   {0x00A4B4}, {0x00A4C1}, {0x00A4C5}, {0x00A4C7, 0x00ABFF},
69   {0x00D7A4, 0x00D7FF}, {0x00FA2E, 0x00FAFF}, {0x00FFE0, 0x00FFEE},
70   {0x00FFFC}, {0x010000, 0x01007F}, {0x010080, 0x0100FF},
71   {0x010100, 0x01013F}, {0x01D000, 0x01D0FF}, {0x01D100, 0x01D1FF},
72   {0x01D300, 0x01D35F}, {0x01D400, 0x01D7FF},
73   {0x0E0100, 0x0E01EF},
74   {0}
75 };
76 
77 /* Default SILC Identifier String profile defined by the protocol */
78 const Stringprep_profile stringprep_silc_identifier_prep[] =
79 {
80   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
81   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
82   {STRINGPREP_NFKC, 0, 0},
83   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
84   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
85   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
86   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
87   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
88   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
89   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
90   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
91   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
92   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
93   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
94   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
95   {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_c},
96   {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_d},
97   {STRINGPREP_UNASSIGNED_TABLE, 0, stringprep_rfc3454_A_1},
98   {0}
99 };
100 
101 /* Default channel name string profile defined by the protocol */
102 const Stringprep_profile stringprep_silc_identifier_ch_prep[] =
103 {
104   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
105   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
106   {STRINGPREP_NFKC, 0, 0},
107   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
108   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
109   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
110   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
111   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
112   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
113   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
114   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
115   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
116   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
117   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
118   {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
119   {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_d},
120   {STRINGPREP_UNASSIGNED_TABLE, 0, stringprep_rfc3454_A_1},
121   {0}
122 };
123 
124 /* Identifier string case folding and normalizing */
125 const Stringprep_profile stringprep_silc_identifierc_prep[] =
126 {
127   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
128   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
129   {STRINGPREP_NFKC, 0, 0},
130   {0}
131 };
132 
133 /* Case folding and normalizing */
134 const Stringprep_profile stringprep_silc_casefold_prep[] =
135 {
136   {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
137   {STRINGPREP_NFKC, 0, 0},
138   {0}
139 };
140 
141 
142 /* Prepares string according to the profile */
143 
144 SilcStringprepStatus
silc_stringprep(const unsigned char * bin,SilcUInt32 bin_len,SilcStringEncoding bin_encoding,const char * profile_name,SilcStringprepFlags flags,unsigned char ** out,SilcUInt32 * out_len,SilcStringEncoding out_encoding)145 silc_stringprep(const unsigned char *bin, SilcUInt32 bin_len,
146 		SilcStringEncoding bin_encoding,
147 		const char *profile_name,
148 		SilcStringprepFlags flags,
149 		unsigned char **out, SilcUInt32 *out_len,
150 		SilcStringEncoding out_encoding)
151 {
152   Stringprep_profile_flags f = 0;
153   const Stringprep_profile *profile;
154   unsigned char *utf8s;
155   SilcUInt32 utf8s_len;
156   int ret;
157 
158   SILC_LOG_DEBUG(("Preparing string"));
159 
160   if (!bin || !bin_len || !profile_name)
161     return SILC_STRINGPREP_ERR;
162 
163   /* Convert string to UTF-8 */
164   utf8s_len = silc_utf8_encoded_len(bin, bin_len, bin_encoding);
165   if (!utf8s_len)
166     return SILC_STRINGPREP_ERR_ENCODING;
167   utf8s = silc_calloc(utf8s_len + 1, sizeof(*utf8s));
168   if (!utf8s)
169     return SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
170   silc_utf8_encode(bin, bin_len, bin_encoding, utf8s, utf8s_len);
171 
172   /* Check profile. */
173   if (!strcmp(profile_name, SILC_IDENTIFIER_PREP))
174     profile = stringprep_silc_identifier_prep;
175   else if (!strcmp(profile_name, SILC_IDENTIFIER_CH_PREP))
176     profile = stringprep_silc_identifier_ch_prep;
177   else if (!strcmp(profile_name, SILC_IDENTIFIERC_PREP))
178     profile = stringprep_silc_identifierc_prep;
179   else if (!strcmp(profile_name, SILC_CASEFOLD_PREP))
180     profile = stringprep_silc_casefold_prep;
181   else
182     return SILC_STRINGPREP_ERR_UNSUP_PROFILE;
183 
184   /* Translate flags */
185   if (!(flags & SILC_STRINGPREP_ALLOW_UNASSIGNED))
186     f |= STRINGPREP_NO_UNASSIGNED;
187 
188   /* Prepare */
189   ret = stringprep((char *)utf8s, utf8s_len, f, profile);
190   SILC_LOG_DEBUG(("stringprep() return %d", ret));
191 
192   /* Since the stringprep() doesn't allocate returned buffer, and
193      stringprep_profile() doesn't do it correctly, we can't know how
194      much space we must have for the conversion.  Allocate more if it
195      fails, and try again. */
196   if (ret == STRINGPREP_TOO_SMALL_BUFFER) {
197     utf8s = silc_realloc(utf8s, sizeof(*utf8s) * ((utf8s_len * 2) + 1));
198     if (!utf8s)
199       return SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
200     memset(utf8s + utf8s_len + 1, 0, utf8s_len);
201     ret = stringprep((char *)utf8s, utf8s_len * 2, f, profile);
202     SILC_LOG_DEBUG(("stringprep() return %d", ret));
203   }
204 
205   switch (ret) {
206   case STRINGPREP_OK:
207     ret = SILC_STRINGPREP_OK;
208     break;
209 
210   case STRINGPREP_CONTAINS_UNASSIGNED:
211     ret = SILC_STRINGPREP_ERR_UNASSIGNED;
212     break;
213 
214   case STRINGPREP_CONTAINS_PROHIBITED:
215     ret = SILC_STRINGPREP_ERR_PROHIBITED;
216     break;
217 
218   case STRINGPREP_BIDI_BOTH_L_AND_RAL:
219     ret = SILC_STRINGPREP_ERR_BIDI_RAL_WITH_L;
220     break;
221 
222   case STRINGPREP_BIDI_LEADTRAIL_NOT_RAL:
223     ret = SILC_STRINGPREP_ERR_BIDI_RAL;
224     break;
225 
226   case STRINGPREP_BIDI_CONTAINS_PROHIBITED:
227     ret = SILC_STRINGPREP_ERR_BIDI_PROHIBITED;
228     break;
229 
230   case STRINGPREP_UNKNOWN_PROFILE:
231     ret = SILC_STRINGPREP_ERR_UNSUP_PROFILE;
232     break;
233 
234   case STRINGPREP_MALLOC_ERROR:
235     ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
236     break;
237 
238   default:
239     ret = SILC_STRINGPREP_ERR;
240     break;
241   }
242 
243   /* Convert to desired output character encoding */
244   if (ret == SILC_STRINGPREP_OK) {
245     if (out && out_len) {
246       if (out_encoding != SILC_STRING_UTF8) {
247 	*out_len = silc_utf8_decoded_len(utf8s, strlen(utf8s), out_encoding);
248 	if (*out_len) {
249 	  *out = silc_calloc(*out_len + 1, sizeof(**out));
250 	  if (*out) {
251 	    silc_utf8_decode(utf8s, strlen(utf8s), out_encoding, *out,
252 			     *out_len);
253 	  } else {
254 	    ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
255 	  }
256 	} else {
257 	  ret = SILC_STRINGPREP_ERR_ENCODING;
258 	}
259       } else {
260 	*out_len = strlen(utf8s);
261 	*out = silc_memdup(utf8s, *out_len);
262       }
263     }
264   }
265 
266   silc_free(utf8s);
267 
268   return (SilcStringprepStatus)ret;
269 }
270