1 #ifndef UNICODE_H
2 #define UNICODE_H
3 
4 #include <assert.h>
5 #include "adt/obst.h"
6 
7 typedef unsigned int utf32;
8 #define UTF32_PRINTF_FORMAT "%u"
9 
10 /**
11  * "parse" an utf8 character from a string.
12  * Warning: This function only works for valid utf-8 inputs. The behaviour
13  * is undefined for invalid utf-8 input.
14  *
15  * @param p    A pointer to a pointer into the string. The pointer
16  *             is incremented for each consumed char
17  */
read_utf8_char(const char ** p)18 static inline utf32 read_utf8_char(const char **p)
19 {
20 	const unsigned char *c      = (const unsigned char *) *p;
21 	utf32                result;
22 
23 	if ((*c & 0x80) == 0) {
24 		/* 1 character encoding: 0b0??????? */
25 		result = *c++;
26 	} else if ((*c & 0xE0) == 0xC0) {
27 		/* 2 character encoding: 0b110?????, 0b10?????? */
28 		result = *c++ & 0x1F;
29 		result = (result << 6) | (*c++ & 0x3F);
30 	} else if ((*c & 0xF0) == 0xE0) {
31 		/* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
32 		result = *c++ & 0x0F;
33 		result = (result << 6) | (*c++ & 0x3F);
34 		result = (result << 6) | (*c++ & 0x3F);
35 	} else {
36 		/* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
37 		assert((*c & 0xF8) == 0xF0);
38 		result = *c++ & 0x07;
39 		result = (result << 6) | (*c++ & 0x3F);
40 		result = (result << 6) | (*c++ & 0x3F);
41 		result = (result << 6) | (*c++ & 0x3F);
42 	}
43 
44 	*p = (const char*) c;
45 	return result;
46 }
47 
obstack_grow_utf8(struct obstack * const obst,utf32 const c)48 static inline void obstack_grow_utf8(struct obstack *const obst, utf32 const c)
49 {
50 	if (c < 0x80U) {
51 		obstack_1grow(obst, c);
52 	} else if (c < 0x800) {
53 		obstack_1grow(obst, 0xC0 |  (c >>  6));
54 		goto one_more;
55 	} else if (c < 0x10000) {
56 		obstack_1grow(obst, 0xE0 |  (c >> 12));
57 		goto two_more;
58 	} else {
59 		obstack_1grow(obst, 0xF0 |  (c >> 18));
60 		obstack_1grow(obst, 0x80 | ((c >> 12) & 0x3F));
61 two_more:
62 		obstack_1grow(obst, 0x80 | ((c >>  6) & 0x3F));
63 one_more:
64 		obstack_1grow(obst, 0x80 | ( c        & 0x3F));
65 	}
66 }
67 
68 #endif
69