1 /*-------------------------------------------------------------------------
2 * saslprep.c
3 * SASLprep normalization, for SCRAM authentication
4 *
5 * The SASLprep algorithm is used to process a user-supplied password into
6 * canonical form. For more details, see:
7 *
8 * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9 * http://www.ietf.org/rfc/rfc3454.txt
10 *
11 * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12 * http://www.ietf.org/rfc/rfc4013.txt
13 *
14 *
15 * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
16 *
17 * IDENTIFICATION
18 * src/common/saslprep.c
19 *
20 *-------------------------------------------------------------------------
21 */
22 #ifndef FRONTEND
23 #include "postgres.h"
24 #else
25 #include "postgres_fe.h"
26 #endif
27
28 #include "common/saslprep.h"
29 #include "common/unicode_norm.h"
30 #include "mb/pg_wchar.h"
31
32 /*
33 * Limit on how large password's we will try to process. A password
34 * larger than this will be treated the same as out-of-memory.
35 */
36 #define MAX_PASSWORD_LENGTH 1024
37
38 /*
39 * In backend, we will use palloc/pfree. In frontend, use malloc, and
40 * return SASLPREP_OOM on out-of-memory.
41 */
42 #ifndef FRONTEND
43 #define STRDUP(s) pstrdup(s)
44 #define ALLOC(size) palloc(size)
45 #define FREE(size) pfree(size)
46 #else
47 #define STRDUP(s) strdup(s)
48 #define ALLOC(size) malloc(size)
49 #define FREE(size) free(size)
50 #endif
51
52 /* Prototypes for local functions */
53 static int codepoint_range_cmp(const void *a, const void *b);
54 static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
55 static int pg_utf8_string_len(const char *source);
56 static bool pg_is_ascii_string(const char *p);
57
58 /*
59 * Stringprep Mapping Tables.
60 *
61 * The stringprep specification includes a number of tables of Unicode
62 * codepoints, used in different parts of the algorithm. They are below,
63 * as arrays of codepoint ranges. Each range is a pair of codepoints,
64 * for the first and last codepoint included the range (inclusive!).
65 */
66
67 /*
68 * C.1.2 Non-ASCII space characters
69 *
70 * These are all mapped to the ASCII space character (U+00A0).
71 */
72 static const pg_wchar non_ascii_space_ranges[] =
73 {
74 0x00A0, 0x00A0,
75 0x1680, 0x1680,
76 0x2000, 0x200B,
77 0x202F, 0x202F,
78 0x205F, 0x205F,
79 0x3000, 0x3000
80 };
81
82 /*
83 * B.1 Commonly mapped to nothing
84 *
85 * If any of these appear in the input, they are removed.
86 */
87 static const pg_wchar commonly_mapped_to_nothing_ranges[] =
88 {
89 0x00AD, 0x00AD,
90 0x034F, 0x034F,
91 0x1806, 0x1806,
92 0x180B, 0x180D,
93 0x200B, 0x200D,
94 0x2060, 0x2060,
95 0xFE00, 0xFE0F,
96 0xFEFF, 0xFEFF
97 };
98
99 /*
100 * prohibited_output_ranges is a union of all the characters from
101 * the following tables:
102 *
103 * C.1.2 Non-ASCII space characters
104 * C.2.1 ASCII control characters
105 * C.2.2 Non-ASCII control characters
106 * C.3 Private Use characters
107 * C.4 Non-character code points
108 * C.5 Surrogate code points
109 * C.6 Inappropriate for plain text characters
110 * C.7 Inappropriate for canonical representation characters
111 * C.7 Change display properties or deprecated characters
112 * C.8 Tagging characters
113 *
114 * These are the tables that are listed as "prohibited output"
115 * characters in the SASLprep profile.
116 *
117 * The comment after each code range indicates which source table
118 * the code came from. Note that there is some overlap in the source
119 * tables, so one code might originate from multiple source tables.
120 * Adjacent ranges have also been merged together, to save space.
121 */
122 static const pg_wchar prohibited_output_ranges[] =
123 {
124 0x0000, 0x001F, /* C.2.1 */
125 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
126 0x0340, 0x0341, /* C.8 */
127 0x06DD, 0x06DD, /* C.2.2 */
128 0x070F, 0x070F, /* C.2.2 */
129 0x1680, 0x1680, /* C.1.2 */
130 0x180E, 0x180E, /* C.2.2 */
131 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
132 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
133 0x205F, 0x2063, /* C.1.2, C.2.2 */
134 0x206A, 0x206F, /* C.2.2, C.8 */
135 0x2FF0, 0x2FFB, /* C.7 */
136 0x3000, 0x3000, /* C.1.2 */
137 0xD800, 0xF8FF, /* C.3, C.5 */
138 0xFDD0, 0xFDEF, /* C.4 */
139 0xFEFF, 0xFEFF, /* C.2.2 */
140 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
141 0x1D173, 0x1D17A, /* C.2.2 */
142 0x1FFFE, 0x1FFFF, /* C.4 */
143 0x2FFFE, 0x2FFFF, /* C.4 */
144 0x3FFFE, 0x3FFFF, /* C.4 */
145 0x4FFFE, 0x4FFFF, /* C.4 */
146 0x5FFFE, 0x5FFFF, /* C.4 */
147 0x6FFFE, 0x6FFFF, /* C.4 */
148 0x7FFFE, 0x7FFFF, /* C.4 */
149 0x8FFFE, 0x8FFFF, /* C.4 */
150 0x9FFFE, 0x9FFFF, /* C.4 */
151 0xAFFFE, 0xAFFFF, /* C.4 */
152 0xBFFFE, 0xBFFFF, /* C.4 */
153 0xCFFFE, 0xCFFFF, /* C.4 */
154 0xDFFFE, 0xDFFFF, /* C.4 */
155 0xE0001, 0xE0001, /* C.9 */
156 0xE0020, 0xE007F, /* C.9 */
157 0xEFFFE, 0xEFFFF, /* C.4 */
158 0xF0000, 0xFFFFF, /* C.3, C.4 */
159 0x100000, 0x10FFFF /* C.3, C.4 */
160 };
161
162 /* A.1 Unassigned code points in Unicode 3.2 */
163 static const pg_wchar unassigned_codepoint_ranges[] =
164 {
165 0x0221, 0x0221,
166 0x0234, 0x024F,
167 0x02AE, 0x02AF,
168 0x02EF, 0x02FF,
169 0x0350, 0x035F,
170 0x0370, 0x0373,
171 0x0376, 0x0379,
172 0x037B, 0x037D,
173 0x037F, 0x0383,
174 0x038B, 0x038B,
175 0x038D, 0x038D,
176 0x03A2, 0x03A2,
177 0x03CF, 0x03CF,
178 0x03F7, 0x03FF,
179 0x0487, 0x0487,
180 0x04CF, 0x04CF,
181 0x04F6, 0x04F7,
182 0x04FA, 0x04FF,
183 0x0510, 0x0530,
184 0x0557, 0x0558,
185 0x0560, 0x0560,
186 0x0588, 0x0588,
187 0x058B, 0x0590,
188 0x05A2, 0x05A2,
189 0x05BA, 0x05BA,
190 0x05C5, 0x05CF,
191 0x05EB, 0x05EF,
192 0x05F5, 0x060B,
193 0x060D, 0x061A,
194 0x061C, 0x061E,
195 0x0620, 0x0620,
196 0x063B, 0x063F,
197 0x0656, 0x065F,
198 0x06EE, 0x06EF,
199 0x06FF, 0x06FF,
200 0x070E, 0x070E,
201 0x072D, 0x072F,
202 0x074B, 0x077F,
203 0x07B2, 0x0900,
204 0x0904, 0x0904,
205 0x093A, 0x093B,
206 0x094E, 0x094F,
207 0x0955, 0x0957,
208 0x0971, 0x0980,
209 0x0984, 0x0984,
210 0x098D, 0x098E,
211 0x0991, 0x0992,
212 0x09A9, 0x09A9,
213 0x09B1, 0x09B1,
214 0x09B3, 0x09B5,
215 0x09BA, 0x09BB,
216 0x09BD, 0x09BD,
217 0x09C5, 0x09C6,
218 0x09C9, 0x09CA,
219 0x09CE, 0x09D6,
220 0x09D8, 0x09DB,
221 0x09DE, 0x09DE,
222 0x09E4, 0x09E5,
223 0x09FB, 0x0A01,
224 0x0A03, 0x0A04,
225 0x0A0B, 0x0A0E,
226 0x0A11, 0x0A12,
227 0x0A29, 0x0A29,
228 0x0A31, 0x0A31,
229 0x0A34, 0x0A34,
230 0x0A37, 0x0A37,
231 0x0A3A, 0x0A3B,
232 0x0A3D, 0x0A3D,
233 0x0A43, 0x0A46,
234 0x0A49, 0x0A4A,
235 0x0A4E, 0x0A58,
236 0x0A5D, 0x0A5D,
237 0x0A5F, 0x0A65,
238 0x0A75, 0x0A80,
239 0x0A84, 0x0A84,
240 0x0A8C, 0x0A8C,
241 0x0A8E, 0x0A8E,
242 0x0A92, 0x0A92,
243 0x0AA9, 0x0AA9,
244 0x0AB1, 0x0AB1,
245 0x0AB4, 0x0AB4,
246 0x0ABA, 0x0ABB,
247 0x0AC6, 0x0AC6,
248 0x0ACA, 0x0ACA,
249 0x0ACE, 0x0ACF,
250 0x0AD1, 0x0ADF,
251 0x0AE1, 0x0AE5,
252 0x0AF0, 0x0B00,
253 0x0B04, 0x0B04,
254 0x0B0D, 0x0B0E,
255 0x0B11, 0x0B12,
256 0x0B29, 0x0B29,
257 0x0B31, 0x0B31,
258 0x0B34, 0x0B35,
259 0x0B3A, 0x0B3B,
260 0x0B44, 0x0B46,
261 0x0B49, 0x0B4A,
262 0x0B4E, 0x0B55,
263 0x0B58, 0x0B5B,
264 0x0B5E, 0x0B5E,
265 0x0B62, 0x0B65,
266 0x0B71, 0x0B81,
267 0x0B84, 0x0B84,
268 0x0B8B, 0x0B8D,
269 0x0B91, 0x0B91,
270 0x0B96, 0x0B98,
271 0x0B9B, 0x0B9B,
272 0x0B9D, 0x0B9D,
273 0x0BA0, 0x0BA2,
274 0x0BA5, 0x0BA7,
275 0x0BAB, 0x0BAD,
276 0x0BB6, 0x0BB6,
277 0x0BBA, 0x0BBD,
278 0x0BC3, 0x0BC5,
279 0x0BC9, 0x0BC9,
280 0x0BCE, 0x0BD6,
281 0x0BD8, 0x0BE6,
282 0x0BF3, 0x0C00,
283 0x0C04, 0x0C04,
284 0x0C0D, 0x0C0D,
285 0x0C11, 0x0C11,
286 0x0C29, 0x0C29,
287 0x0C34, 0x0C34,
288 0x0C3A, 0x0C3D,
289 0x0C45, 0x0C45,
290 0x0C49, 0x0C49,
291 0x0C4E, 0x0C54,
292 0x0C57, 0x0C5F,
293 0x0C62, 0x0C65,
294 0x0C70, 0x0C81,
295 0x0C84, 0x0C84,
296 0x0C8D, 0x0C8D,
297 0x0C91, 0x0C91,
298 0x0CA9, 0x0CA9,
299 0x0CB4, 0x0CB4,
300 0x0CBA, 0x0CBD,
301 0x0CC5, 0x0CC5,
302 0x0CC9, 0x0CC9,
303 0x0CCE, 0x0CD4,
304 0x0CD7, 0x0CDD,
305 0x0CDF, 0x0CDF,
306 0x0CE2, 0x0CE5,
307 0x0CF0, 0x0D01,
308 0x0D04, 0x0D04,
309 0x0D0D, 0x0D0D,
310 0x0D11, 0x0D11,
311 0x0D29, 0x0D29,
312 0x0D3A, 0x0D3D,
313 0x0D44, 0x0D45,
314 0x0D49, 0x0D49,
315 0x0D4E, 0x0D56,
316 0x0D58, 0x0D5F,
317 0x0D62, 0x0D65,
318 0x0D70, 0x0D81,
319 0x0D84, 0x0D84,
320 0x0D97, 0x0D99,
321 0x0DB2, 0x0DB2,
322 0x0DBC, 0x0DBC,
323 0x0DBE, 0x0DBF,
324 0x0DC7, 0x0DC9,
325 0x0DCB, 0x0DCE,
326 0x0DD5, 0x0DD5,
327 0x0DD7, 0x0DD7,
328 0x0DE0, 0x0DF1,
329 0x0DF5, 0x0E00,
330 0x0E3B, 0x0E3E,
331 0x0E5C, 0x0E80,
332 0x0E83, 0x0E83,
333 0x0E85, 0x0E86,
334 0x0E89, 0x0E89,
335 0x0E8B, 0x0E8C,
336 0x0E8E, 0x0E93,
337 0x0E98, 0x0E98,
338 0x0EA0, 0x0EA0,
339 0x0EA4, 0x0EA4,
340 0x0EA6, 0x0EA6,
341 0x0EA8, 0x0EA9,
342 0x0EAC, 0x0EAC,
343 0x0EBA, 0x0EBA,
344 0x0EBE, 0x0EBF,
345 0x0EC5, 0x0EC5,
346 0x0EC7, 0x0EC7,
347 0x0ECE, 0x0ECF,
348 0x0EDA, 0x0EDB,
349 0x0EDE, 0x0EFF,
350 0x0F48, 0x0F48,
351 0x0F6B, 0x0F70,
352 0x0F8C, 0x0F8F,
353 0x0F98, 0x0F98,
354 0x0FBD, 0x0FBD,
355 0x0FCD, 0x0FCE,
356 0x0FD0, 0x0FFF,
357 0x1022, 0x1022,
358 0x1028, 0x1028,
359 0x102B, 0x102B,
360 0x1033, 0x1035,
361 0x103A, 0x103F,
362 0x105A, 0x109F,
363 0x10C6, 0x10CF,
364 0x10F9, 0x10FA,
365 0x10FC, 0x10FF,
366 0x115A, 0x115E,
367 0x11A3, 0x11A7,
368 0x11FA, 0x11FF,
369 0x1207, 0x1207,
370 0x1247, 0x1247,
371 0x1249, 0x1249,
372 0x124E, 0x124F,
373 0x1257, 0x1257,
374 0x1259, 0x1259,
375 0x125E, 0x125F,
376 0x1287, 0x1287,
377 0x1289, 0x1289,
378 0x128E, 0x128F,
379 0x12AF, 0x12AF,
380 0x12B1, 0x12B1,
381 0x12B6, 0x12B7,
382 0x12BF, 0x12BF,
383 0x12C1, 0x12C1,
384 0x12C6, 0x12C7,
385 0x12CF, 0x12CF,
386 0x12D7, 0x12D7,
387 0x12EF, 0x12EF,
388 0x130F, 0x130F,
389 0x1311, 0x1311,
390 0x1316, 0x1317,
391 0x131F, 0x131F,
392 0x1347, 0x1347,
393 0x135B, 0x1360,
394 0x137D, 0x139F,
395 0x13F5, 0x1400,
396 0x1677, 0x167F,
397 0x169D, 0x169F,
398 0x16F1, 0x16FF,
399 0x170D, 0x170D,
400 0x1715, 0x171F,
401 0x1737, 0x173F,
402 0x1754, 0x175F,
403 0x176D, 0x176D,
404 0x1771, 0x1771,
405 0x1774, 0x177F,
406 0x17DD, 0x17DF,
407 0x17EA, 0x17FF,
408 0x180F, 0x180F,
409 0x181A, 0x181F,
410 0x1878, 0x187F,
411 0x18AA, 0x1DFF,
412 0x1E9C, 0x1E9F,
413 0x1EFA, 0x1EFF,
414 0x1F16, 0x1F17,
415 0x1F1E, 0x1F1F,
416 0x1F46, 0x1F47,
417 0x1F4E, 0x1F4F,
418 0x1F58, 0x1F58,
419 0x1F5A, 0x1F5A,
420 0x1F5C, 0x1F5C,
421 0x1F5E, 0x1F5E,
422 0x1F7E, 0x1F7F,
423 0x1FB5, 0x1FB5,
424 0x1FC5, 0x1FC5,
425 0x1FD4, 0x1FD5,
426 0x1FDC, 0x1FDC,
427 0x1FF0, 0x1FF1,
428 0x1FF5, 0x1FF5,
429 0x1FFF, 0x1FFF,
430 0x2053, 0x2056,
431 0x2058, 0x205E,
432 0x2064, 0x2069,
433 0x2072, 0x2073,
434 0x208F, 0x209F,
435 0x20B2, 0x20CF,
436 0x20EB, 0x20FF,
437 0x213B, 0x213C,
438 0x214C, 0x2152,
439 0x2184, 0x218F,
440 0x23CF, 0x23FF,
441 0x2427, 0x243F,
442 0x244B, 0x245F,
443 0x24FF, 0x24FF,
444 0x2614, 0x2615,
445 0x2618, 0x2618,
446 0x267E, 0x267F,
447 0x268A, 0x2700,
448 0x2705, 0x2705,
449 0x270A, 0x270B,
450 0x2728, 0x2728,
451 0x274C, 0x274C,
452 0x274E, 0x274E,
453 0x2753, 0x2755,
454 0x2757, 0x2757,
455 0x275F, 0x2760,
456 0x2795, 0x2797,
457 0x27B0, 0x27B0,
458 0x27BF, 0x27CF,
459 0x27EC, 0x27EF,
460 0x2B00, 0x2E7F,
461 0x2E9A, 0x2E9A,
462 0x2EF4, 0x2EFF,
463 0x2FD6, 0x2FEF,
464 0x2FFC, 0x2FFF,
465 0x3040, 0x3040,
466 0x3097, 0x3098,
467 0x3100, 0x3104,
468 0x312D, 0x3130,
469 0x318F, 0x318F,
470 0x31B8, 0x31EF,
471 0x321D, 0x321F,
472 0x3244, 0x3250,
473 0x327C, 0x327E,
474 0x32CC, 0x32CF,
475 0x32FF, 0x32FF,
476 0x3377, 0x337A,
477 0x33DE, 0x33DF,
478 0x33FF, 0x33FF,
479 0x4DB6, 0x4DFF,
480 0x9FA6, 0x9FFF,
481 0xA48D, 0xA48F,
482 0xA4C7, 0xABFF,
483 0xD7A4, 0xD7FF,
484 0xFA2E, 0xFA2F,
485 0xFA6B, 0xFAFF,
486 0xFB07, 0xFB12,
487 0xFB18, 0xFB1C,
488 0xFB37, 0xFB37,
489 0xFB3D, 0xFB3D,
490 0xFB3F, 0xFB3F,
491 0xFB42, 0xFB42,
492 0xFB45, 0xFB45,
493 0xFBB2, 0xFBD2,
494 0xFD40, 0xFD4F,
495 0xFD90, 0xFD91,
496 0xFDC8, 0xFDCF,
497 0xFDFD, 0xFDFF,
498 0xFE10, 0xFE1F,
499 0xFE24, 0xFE2F,
500 0xFE47, 0xFE48,
501 0xFE53, 0xFE53,
502 0xFE67, 0xFE67,
503 0xFE6C, 0xFE6F,
504 0xFE75, 0xFE75,
505 0xFEFD, 0xFEFE,
506 0xFF00, 0xFF00,
507 0xFFBF, 0xFFC1,
508 0xFFC8, 0xFFC9,
509 0xFFD0, 0xFFD1,
510 0xFFD8, 0xFFD9,
511 0xFFDD, 0xFFDF,
512 0xFFE7, 0xFFE7,
513 0xFFEF, 0xFFF8,
514 0x10000, 0x102FF,
515 0x1031F, 0x1031F,
516 0x10324, 0x1032F,
517 0x1034B, 0x103FF,
518 0x10426, 0x10427,
519 0x1044E, 0x1CFFF,
520 0x1D0F6, 0x1D0FF,
521 0x1D127, 0x1D129,
522 0x1D1DE, 0x1D3FF,
523 0x1D455, 0x1D455,
524 0x1D49D, 0x1D49D,
525 0x1D4A0, 0x1D4A1,
526 0x1D4A3, 0x1D4A4,
527 0x1D4A7, 0x1D4A8,
528 0x1D4AD, 0x1D4AD,
529 0x1D4BA, 0x1D4BA,
530 0x1D4BC, 0x1D4BC,
531 0x1D4C1, 0x1D4C1,
532 0x1D4C4, 0x1D4C4,
533 0x1D506, 0x1D506,
534 0x1D50B, 0x1D50C,
535 0x1D515, 0x1D515,
536 0x1D51D, 0x1D51D,
537 0x1D53A, 0x1D53A,
538 0x1D53F, 0x1D53F,
539 0x1D545, 0x1D545,
540 0x1D547, 0x1D549,
541 0x1D551, 0x1D551,
542 0x1D6A4, 0x1D6A7,
543 0x1D7CA, 0x1D7CD,
544 0x1D800, 0x1FFFD,
545 0x2A6D7, 0x2F7FF,
546 0x2FA1E, 0x2FFFD,
547 0x30000, 0x3FFFD,
548 0x40000, 0x4FFFD,
549 0x50000, 0x5FFFD,
550 0x60000, 0x6FFFD,
551 0x70000, 0x7FFFD,
552 0x80000, 0x8FFFD,
553 0x90000, 0x9FFFD,
554 0xA0000, 0xAFFFD,
555 0xB0000, 0xBFFFD,
556 0xC0000, 0xCFFFD,
557 0xD0000, 0xDFFFD,
558 0xE0000, 0xE0000,
559 0xE0002, 0xE001F,
560 0xE0080, 0xEFFFD
561 };
562
563 /* D.1 Characters with bidirectional property "R" or "AL" */
564 static const pg_wchar RandALCat_codepoint_ranges[] =
565 {
566 0x05BE, 0x05BE,
567 0x05C0, 0x05C0,
568 0x05C3, 0x05C3,
569 0x05D0, 0x05EA,
570 0x05F0, 0x05F4,
571 0x061B, 0x061B,
572 0x061F, 0x061F,
573 0x0621, 0x063A,
574 0x0640, 0x064A,
575 0x066D, 0x066F,
576 0x0671, 0x06D5,
577 0x06DD, 0x06DD,
578 0x06E5, 0x06E6,
579 0x06FA, 0x06FE,
580 0x0700, 0x070D,
581 0x0710, 0x0710,
582 0x0712, 0x072C,
583 0x0780, 0x07A5,
584 0x07B1, 0x07B1,
585 0x200F, 0x200F,
586 0xFB1D, 0xFB1D,
587 0xFB1F, 0xFB28,
588 0xFB2A, 0xFB36,
589 0xFB38, 0xFB3C,
590 0xFB3E, 0xFB3E,
591 0xFB40, 0xFB41,
592 0xFB43, 0xFB44,
593 0xFB46, 0xFBB1,
594 0xFBD3, 0xFD3D,
595 0xFD50, 0xFD8F,
596 0xFD92, 0xFDC7,
597 0xFDF0, 0xFDFC,
598 0xFE70, 0xFE74,
599 0xFE76, 0xFEFC
600 };
601
602 /* D.2 Characters with bidirectional property "L" */
603 static const pg_wchar LCat_codepoint_ranges[] =
604 {
605 0x0041, 0x005A,
606 0x0061, 0x007A,
607 0x00AA, 0x00AA,
608 0x00B5, 0x00B5,
609 0x00BA, 0x00BA,
610 0x00C0, 0x00D6,
611 0x00D8, 0x00F6,
612 0x00F8, 0x0220,
613 0x0222, 0x0233,
614 0x0250, 0x02AD,
615 0x02B0, 0x02B8,
616 0x02BB, 0x02C1,
617 0x02D0, 0x02D1,
618 0x02E0, 0x02E4,
619 0x02EE, 0x02EE,
620 0x037A, 0x037A,
621 0x0386, 0x0386,
622 0x0388, 0x038A,
623 0x038C, 0x038C,
624 0x038E, 0x03A1,
625 0x03A3, 0x03CE,
626 0x03D0, 0x03F5,
627 0x0400, 0x0482,
628 0x048A, 0x04CE,
629 0x04D0, 0x04F5,
630 0x04F8, 0x04F9,
631 0x0500, 0x050F,
632 0x0531, 0x0556,
633 0x0559, 0x055F,
634 0x0561, 0x0587,
635 0x0589, 0x0589,
636 0x0903, 0x0903,
637 0x0905, 0x0939,
638 0x093D, 0x0940,
639 0x0949, 0x094C,
640 0x0950, 0x0950,
641 0x0958, 0x0961,
642 0x0964, 0x0970,
643 0x0982, 0x0983,
644 0x0985, 0x098C,
645 0x098F, 0x0990,
646 0x0993, 0x09A8,
647 0x09AA, 0x09B0,
648 0x09B2, 0x09B2,
649 0x09B6, 0x09B9,
650 0x09BE, 0x09C0,
651 0x09C7, 0x09C8,
652 0x09CB, 0x09CC,
653 0x09D7, 0x09D7,
654 0x09DC, 0x09DD,
655 0x09DF, 0x09E1,
656 0x09E6, 0x09F1,
657 0x09F4, 0x09FA,
658 0x0A05, 0x0A0A,
659 0x0A0F, 0x0A10,
660 0x0A13, 0x0A28,
661 0x0A2A, 0x0A30,
662 0x0A32, 0x0A33,
663 0x0A35, 0x0A36,
664 0x0A38, 0x0A39,
665 0x0A3E, 0x0A40,
666 0x0A59, 0x0A5C,
667 0x0A5E, 0x0A5E,
668 0x0A66, 0x0A6F,
669 0x0A72, 0x0A74,
670 0x0A83, 0x0A83,
671 0x0A85, 0x0A8B,
672 0x0A8D, 0x0A8D,
673 0x0A8F, 0x0A91,
674 0x0A93, 0x0AA8,
675 0x0AAA, 0x0AB0,
676 0x0AB2, 0x0AB3,
677 0x0AB5, 0x0AB9,
678 0x0ABD, 0x0AC0,
679 0x0AC9, 0x0AC9,
680 0x0ACB, 0x0ACC,
681 0x0AD0, 0x0AD0,
682 0x0AE0, 0x0AE0,
683 0x0AE6, 0x0AEF,
684 0x0B02, 0x0B03,
685 0x0B05, 0x0B0C,
686 0x0B0F, 0x0B10,
687 0x0B13, 0x0B28,
688 0x0B2A, 0x0B30,
689 0x0B32, 0x0B33,
690 0x0B36, 0x0B39,
691 0x0B3D, 0x0B3E,
692 0x0B40, 0x0B40,
693 0x0B47, 0x0B48,
694 0x0B4B, 0x0B4C,
695 0x0B57, 0x0B57,
696 0x0B5C, 0x0B5D,
697 0x0B5F, 0x0B61,
698 0x0B66, 0x0B70,
699 0x0B83, 0x0B83,
700 0x0B85, 0x0B8A,
701 0x0B8E, 0x0B90,
702 0x0B92, 0x0B95,
703 0x0B99, 0x0B9A,
704 0x0B9C, 0x0B9C,
705 0x0B9E, 0x0B9F,
706 0x0BA3, 0x0BA4,
707 0x0BA8, 0x0BAA,
708 0x0BAE, 0x0BB5,
709 0x0BB7, 0x0BB9,
710 0x0BBE, 0x0BBF,
711 0x0BC1, 0x0BC2,
712 0x0BC6, 0x0BC8,
713 0x0BCA, 0x0BCC,
714 0x0BD7, 0x0BD7,
715 0x0BE7, 0x0BF2,
716 0x0C01, 0x0C03,
717 0x0C05, 0x0C0C,
718 0x0C0E, 0x0C10,
719 0x0C12, 0x0C28,
720 0x0C2A, 0x0C33,
721 0x0C35, 0x0C39,
722 0x0C41, 0x0C44,
723 0x0C60, 0x0C61,
724 0x0C66, 0x0C6F,
725 0x0C82, 0x0C83,
726 0x0C85, 0x0C8C,
727 0x0C8E, 0x0C90,
728 0x0C92, 0x0CA8,
729 0x0CAA, 0x0CB3,
730 0x0CB5, 0x0CB9,
731 0x0CBE, 0x0CBE,
732 0x0CC0, 0x0CC4,
733 0x0CC7, 0x0CC8,
734 0x0CCA, 0x0CCB,
735 0x0CD5, 0x0CD6,
736 0x0CDE, 0x0CDE,
737 0x0CE0, 0x0CE1,
738 0x0CE6, 0x0CEF,
739 0x0D02, 0x0D03,
740 0x0D05, 0x0D0C,
741 0x0D0E, 0x0D10,
742 0x0D12, 0x0D28,
743 0x0D2A, 0x0D39,
744 0x0D3E, 0x0D40,
745 0x0D46, 0x0D48,
746 0x0D4A, 0x0D4C,
747 0x0D57, 0x0D57,
748 0x0D60, 0x0D61,
749 0x0D66, 0x0D6F,
750 0x0D82, 0x0D83,
751 0x0D85, 0x0D96,
752 0x0D9A, 0x0DB1,
753 0x0DB3, 0x0DBB,
754 0x0DBD, 0x0DBD,
755 0x0DC0, 0x0DC6,
756 0x0DCF, 0x0DD1,
757 0x0DD8, 0x0DDF,
758 0x0DF2, 0x0DF4,
759 0x0E01, 0x0E30,
760 0x0E32, 0x0E33,
761 0x0E40, 0x0E46,
762 0x0E4F, 0x0E5B,
763 0x0E81, 0x0E82,
764 0x0E84, 0x0E84,
765 0x0E87, 0x0E88,
766 0x0E8A, 0x0E8A,
767 0x0E8D, 0x0E8D,
768 0x0E94, 0x0E97,
769 0x0E99, 0x0E9F,
770 0x0EA1, 0x0EA3,
771 0x0EA5, 0x0EA5,
772 0x0EA7, 0x0EA7,
773 0x0EAA, 0x0EAB,
774 0x0EAD, 0x0EB0,
775 0x0EB2, 0x0EB3,
776 0x0EBD, 0x0EBD,
777 0x0EC0, 0x0EC4,
778 0x0EC6, 0x0EC6,
779 0x0ED0, 0x0ED9,
780 0x0EDC, 0x0EDD,
781 0x0F00, 0x0F17,
782 0x0F1A, 0x0F34,
783 0x0F36, 0x0F36,
784 0x0F38, 0x0F38,
785 0x0F3E, 0x0F47,
786 0x0F49, 0x0F6A,
787 0x0F7F, 0x0F7F,
788 0x0F85, 0x0F85,
789 0x0F88, 0x0F8B,
790 0x0FBE, 0x0FC5,
791 0x0FC7, 0x0FCC,
792 0x0FCF, 0x0FCF,
793 0x1000, 0x1021,
794 0x1023, 0x1027,
795 0x1029, 0x102A,
796 0x102C, 0x102C,
797 0x1031, 0x1031,
798 0x1038, 0x1038,
799 0x1040, 0x1057,
800 0x10A0, 0x10C5,
801 0x10D0, 0x10F8,
802 0x10FB, 0x10FB,
803 0x1100, 0x1159,
804 0x115F, 0x11A2,
805 0x11A8, 0x11F9,
806 0x1200, 0x1206,
807 0x1208, 0x1246,
808 0x1248, 0x1248,
809 0x124A, 0x124D,
810 0x1250, 0x1256,
811 0x1258, 0x1258,
812 0x125A, 0x125D,
813 0x1260, 0x1286,
814 0x1288, 0x1288,
815 0x128A, 0x128D,
816 0x1290, 0x12AE,
817 0x12B0, 0x12B0,
818 0x12B2, 0x12B5,
819 0x12B8, 0x12BE,
820 0x12C0, 0x12C0,
821 0x12C2, 0x12C5,
822 0x12C8, 0x12CE,
823 0x12D0, 0x12D6,
824 0x12D8, 0x12EE,
825 0x12F0, 0x130E,
826 0x1310, 0x1310,
827 0x1312, 0x1315,
828 0x1318, 0x131E,
829 0x1320, 0x1346,
830 0x1348, 0x135A,
831 0x1361, 0x137C,
832 0x13A0, 0x13F4,
833 0x1401, 0x1676,
834 0x1681, 0x169A,
835 0x16A0, 0x16F0,
836 0x1700, 0x170C,
837 0x170E, 0x1711,
838 0x1720, 0x1731,
839 0x1735, 0x1736,
840 0x1740, 0x1751,
841 0x1760, 0x176C,
842 0x176E, 0x1770,
843 0x1780, 0x17B6,
844 0x17BE, 0x17C5,
845 0x17C7, 0x17C8,
846 0x17D4, 0x17DA,
847 0x17DC, 0x17DC,
848 0x17E0, 0x17E9,
849 0x1810, 0x1819,
850 0x1820, 0x1877,
851 0x1880, 0x18A8,
852 0x1E00, 0x1E9B,
853 0x1EA0, 0x1EF9,
854 0x1F00, 0x1F15,
855 0x1F18, 0x1F1D,
856 0x1F20, 0x1F45,
857 0x1F48, 0x1F4D,
858 0x1F50, 0x1F57,
859 0x1F59, 0x1F59,
860 0x1F5B, 0x1F5B,
861 0x1F5D, 0x1F5D,
862 0x1F5F, 0x1F7D,
863 0x1F80, 0x1FB4,
864 0x1FB6, 0x1FBC,
865 0x1FBE, 0x1FBE,
866 0x1FC2, 0x1FC4,
867 0x1FC6, 0x1FCC,
868 0x1FD0, 0x1FD3,
869 0x1FD6, 0x1FDB,
870 0x1FE0, 0x1FEC,
871 0x1FF2, 0x1FF4,
872 0x1FF6, 0x1FFC,
873 0x200E, 0x200E,
874 0x2071, 0x2071,
875 0x207F, 0x207F,
876 0x2102, 0x2102,
877 0x2107, 0x2107,
878 0x210A, 0x2113,
879 0x2115, 0x2115,
880 0x2119, 0x211D,
881 0x2124, 0x2124,
882 0x2126, 0x2126,
883 0x2128, 0x2128,
884 0x212A, 0x212D,
885 0x212F, 0x2131,
886 0x2133, 0x2139,
887 0x213D, 0x213F,
888 0x2145, 0x2149,
889 0x2160, 0x2183,
890 0x2336, 0x237A,
891 0x2395, 0x2395,
892 0x249C, 0x24E9,
893 0x3005, 0x3007,
894 0x3021, 0x3029,
895 0x3031, 0x3035,
896 0x3038, 0x303C,
897 0x3041, 0x3096,
898 0x309D, 0x309F,
899 0x30A1, 0x30FA,
900 0x30FC, 0x30FF,
901 0x3105, 0x312C,
902 0x3131, 0x318E,
903 0x3190, 0x31B7,
904 0x31F0, 0x321C,
905 0x3220, 0x3243,
906 0x3260, 0x327B,
907 0x327F, 0x32B0,
908 0x32C0, 0x32CB,
909 0x32D0, 0x32FE,
910 0x3300, 0x3376,
911 0x337B, 0x33DD,
912 0x33E0, 0x33FE,
913 0x3400, 0x4DB5,
914 0x4E00, 0x9FA5,
915 0xA000, 0xA48C,
916 0xAC00, 0xD7A3,
917 0xD800, 0xFA2D,
918 0xFA30, 0xFA6A,
919 0xFB00, 0xFB06,
920 0xFB13, 0xFB17,
921 0xFF21, 0xFF3A,
922 0xFF41, 0xFF5A,
923 0xFF66, 0xFFBE,
924 0xFFC2, 0xFFC7,
925 0xFFCA, 0xFFCF,
926 0xFFD2, 0xFFD7,
927 0xFFDA, 0xFFDC,
928 0x10300, 0x1031E,
929 0x10320, 0x10323,
930 0x10330, 0x1034A,
931 0x10400, 0x10425,
932 0x10428, 0x1044D,
933 0x1D000, 0x1D0F5,
934 0x1D100, 0x1D126,
935 0x1D12A, 0x1D166,
936 0x1D16A, 0x1D172,
937 0x1D183, 0x1D184,
938 0x1D18C, 0x1D1A9,
939 0x1D1AE, 0x1D1DD,
940 0x1D400, 0x1D454,
941 0x1D456, 0x1D49C,
942 0x1D49E, 0x1D49F,
943 0x1D4A2, 0x1D4A2,
944 0x1D4A5, 0x1D4A6,
945 0x1D4A9, 0x1D4AC,
946 0x1D4AE, 0x1D4B9,
947 0x1D4BB, 0x1D4BB,
948 0x1D4BD, 0x1D4C0,
949 0x1D4C2, 0x1D4C3,
950 0x1D4C5, 0x1D505,
951 0x1D507, 0x1D50A,
952 0x1D50D, 0x1D514,
953 0x1D516, 0x1D51C,
954 0x1D51E, 0x1D539,
955 0x1D53B, 0x1D53E,
956 0x1D540, 0x1D544,
957 0x1D546, 0x1D546,
958 0x1D54A, 0x1D550,
959 0x1D552, 0x1D6A3,
960 0x1D6A8, 0x1D7C9,
961 0x20000, 0x2A6D6,
962 0x2F800, 0x2FA1D,
963 0xF0000, 0xFFFFD,
964 0x100000, 0x10FFFD
965 };
966
967 /* End of stringprep tables */
968
969
970 /* Is the given Unicode codepoint in the given table of ranges? */
971 #define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
972
973 static int
codepoint_range_cmp(const void * a,const void * b)974 codepoint_range_cmp(const void *a, const void *b)
975 {
976 const pg_wchar *key = (const pg_wchar *) a;
977 const pg_wchar *range = (const pg_wchar *) b;
978
979 if (*key < range[0])
980 return -1; /* less than lower bound */
981 if (*key > range[1])
982 return 1; /* greater than upper bound */
983
984 return 0; /* within range */
985 }
986
987 static bool
is_code_in_table(pg_wchar code,const pg_wchar * map,int mapsize)988 is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
989 {
990 Assert(mapsize % 2 == 0);
991
992 if (code < map[0] || code > map[mapsize - 1])
993 return false;
994
995 if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
996 codepoint_range_cmp))
997 return true;
998 else
999 return false;
1000 }
1001
1002 /*
1003 * Calculate the length in characters of a null-terminated UTF-8 string.
1004 *
1005 * Returns -1 if the input is not valid UTF-8.
1006 */
1007 static int
pg_utf8_string_len(const char * source)1008 pg_utf8_string_len(const char *source)
1009 {
1010 const unsigned char *p = (const unsigned char *) source;
1011 int l;
1012 int num_chars = 0;
1013
1014 while (*p)
1015 {
1016 l = pg_utf_mblen(p);
1017
1018 if (!pg_utf8_islegal(p, l))
1019 return -1;
1020
1021 p += l;
1022 num_chars++;
1023 }
1024
1025 return num_chars;
1026 }
1027
1028 /*
1029 * Returns true if the input string is pure ASCII.
1030 */
1031 static bool
pg_is_ascii_string(const char * p)1032 pg_is_ascii_string(const char *p)
1033 {
1034 while (*p)
1035 {
1036 if (IS_HIGHBIT_SET(*p))
1037 return false;
1038 p++;
1039 }
1040 return true;
1041 }
1042
1043
1044 /*
1045 * pg_saslprep - Normalize a password with SASLprep.
1046 *
1047 * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1048 * supports many encodings, so we don't blindly assume that. pg_saslprep
1049 * will check if the input looks like valid UTF-8, and returns
1050 * SASLPREP_INVALID_UTF8 if not.
1051 *
1052 * If the string contains prohibited characters (or more precisely, if the
1053 * output string would contain prohibited characters after normalization),
1054 * returns SASLPREP_PROHIBITED.
1055 *
1056 * On success, returns SASLPREP_SUCCESS, and the normalized string in
1057 * *output.
1058 *
1059 * In frontend, the normalized string is malloc'd, and the caller is
1060 * responsible for freeing it. If an allocation fails, returns
1061 * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
1062 * and a failed allocation leads to ereport(ERROR).
1063 */
1064 pg_saslprep_rc
pg_saslprep(const char * input,char ** output)1065 pg_saslprep(const char *input, char **output)
1066 {
1067 pg_wchar *input_chars = NULL;
1068 pg_wchar *output_chars = NULL;
1069 int input_size;
1070 char *result;
1071 int result_size;
1072 int count;
1073 int i;
1074 bool contains_RandALCat;
1075 unsigned char *p;
1076 pg_wchar *wp;
1077
1078 /* Ensure we return *output as NULL on failure */
1079 *output = NULL;
1080
1081 /* Check that the password isn't stupendously long */
1082 if (strlen(input) > MAX_PASSWORD_LENGTH)
1083 {
1084 #ifndef FRONTEND
1085 ereport(ERROR,
1086 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1087 errmsg("password too long")));
1088 #else
1089 return SASLPREP_OOM;
1090 #endif
1091 }
1092
1093 /*
1094 * Quick check if the input is pure ASCII. An ASCII string requires no
1095 * further processing.
1096 */
1097 if (pg_is_ascii_string(input))
1098 {
1099 *output = STRDUP(input);
1100 if (!(*output))
1101 goto oom;
1102 return SASLPREP_SUCCESS;
1103 }
1104
1105 /*
1106 * Convert the input from UTF-8 to an array of Unicode codepoints.
1107 *
1108 * This also checks that the input is a legal UTF-8 string.
1109 */
1110 input_size = pg_utf8_string_len(input);
1111 if (input_size < 0)
1112 return SASLPREP_INVALID_UTF8;
1113
1114 input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1115 if (!input_chars)
1116 goto oom;
1117
1118 p = (unsigned char *) input;
1119 for (i = 0; i < input_size; i++)
1120 {
1121 input_chars[i] = utf8_to_unicode(p);
1122 p += pg_utf_mblen(p);
1123 }
1124 input_chars[i] = (pg_wchar) '\0';
1125
1126 /*
1127 * The steps below correspond to the steps listed in [RFC3454], Section
1128 * "2. Preparation Overview"
1129 */
1130
1131 /*
1132 * 1) Map -- For each character in the input, check if it has a mapping
1133 * and, if so, replace it with its mapping.
1134 */
1135 count = 0;
1136 for (i = 0; i < input_size; i++)
1137 {
1138 pg_wchar code = input_chars[i];
1139
1140 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
1141 input_chars[count++] = 0x0020;
1142 else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
1143 {
1144 /* map to nothing */
1145 }
1146 else
1147 input_chars[count++] = code;
1148 }
1149 input_chars[count] = (pg_wchar) '\0';
1150 input_size = count;
1151
1152 if (input_size == 0)
1153 goto prohibited; /* don't allow empty password */
1154
1155 /*
1156 * 2) Normalize -- Normalize the result of step 1 using Unicode
1157 * normalization.
1158 */
1159 output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1160 if (!output_chars)
1161 goto oom;
1162
1163 /*
1164 * 3) Prohibit -- Check for any characters that are not allowed in the
1165 * output. If any are found, return an error.
1166 */
1167 for (i = 0; i < input_size; i++)
1168 {
1169 pg_wchar code = input_chars[i];
1170
1171 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
1172 goto prohibited;
1173 if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
1174 goto prohibited;
1175 }
1176
1177 /*
1178 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1179 * any are found, make sure that the whole string satisfies the
1180 * requirements for bidirectional strings. If the string does not satisfy
1181 * the requirements for bidirectional strings, return an error.
1182 *
1183 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1184 * detail what that means:
1185 *
1186 * "In any profile that specifies bidirectional character handling, all
1187 * three of the following requirements MUST be met:
1188 *
1189 * 1) The characters in section 5.8 MUST be prohibited.
1190 *
1191 * 2) If a string contains any RandALCat character, the string MUST NOT
1192 * contain any LCat character.
1193 *
1194 * 3) If a string contains any RandALCat character, a RandALCat character
1195 * MUST be the first character of the string, and a RandALCat character
1196 * MUST be the last character of the string."
1197 */
1198 contains_RandALCat = false;
1199 for (i = 0; i < input_size; i++)
1200 {
1201 pg_wchar code = input_chars[i];
1202
1203 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
1204 {
1205 contains_RandALCat = true;
1206 break;
1207 }
1208 }
1209
1210 if (contains_RandALCat)
1211 {
1212 pg_wchar first = input_chars[0];
1213 pg_wchar last = input_chars[input_size - 1];
1214
1215 for (i = 0; i < input_size; i++)
1216 {
1217 pg_wchar code = input_chars[i];
1218
1219 if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
1220 goto prohibited;
1221 }
1222
1223 if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
1224 !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
1225 goto prohibited;
1226 }
1227
1228 /*
1229 * Finally, convert the result back to UTF-8.
1230 */
1231 result_size = 0;
1232 for (wp = output_chars; *wp; wp++)
1233 {
1234 unsigned char buf[4];
1235
1236 unicode_to_utf8(*wp, buf);
1237 result_size += pg_utf_mblen(buf);
1238 }
1239
1240 result = ALLOC(result_size + 1);
1241 if (!result)
1242 goto oom;
1243
1244 /*
1245 * There are no error exits below here, so the error exit paths don't need
1246 * to worry about possibly freeing "result".
1247 */
1248 p = (unsigned char *) result;
1249 for (wp = output_chars; *wp; wp++)
1250 {
1251 unicode_to_utf8(*wp, p);
1252 p += pg_utf_mblen(p);
1253 }
1254 Assert((char *) p == result + result_size);
1255 *p = '\0';
1256
1257 FREE(input_chars);
1258 FREE(output_chars);
1259
1260 *output = result;
1261 return SASLPREP_SUCCESS;
1262
1263 prohibited:
1264 if (input_chars)
1265 FREE(input_chars);
1266 if (output_chars)
1267 FREE(output_chars);
1268
1269 return SASLPREP_PROHIBITED;
1270
1271 oom:
1272 if (input_chars)
1273 FREE(input_chars);
1274 if (output_chars)
1275 FREE(output_chars);
1276
1277 return SASLPREP_OOM;
1278 }
1279