1 /*
2  * The authors of this software are Rob Pike and Ken Thompson.
3  *              Copyright (c) 2002 by Lucent Technologies.
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose without fee is hereby granted, provided that this entire notice
6  * is included in all copies of any software which is or includes a copy
7  * or modification of this software and in all copies of the supporting
8  * documentation for such software.
9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
11  * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13  */
14 #include <stdarg.h>
15 #include <string.h>
16 #include "plan9.h"
17 #include "utf.h"
18 
19 enum
20 {
21 	Bit1	= 7,
22 	Bitx	= 6,
23 	Bit2	= 5,
24 	Bit3	= 4,
25 	Bit4	= 3,
26 
27 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
28 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
29 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
30 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
31 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
32 
33 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
34 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
35 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
36 
37 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
38 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
39 
40 	Bad	= Runeerror,
41 };
42 
43 int
chartorune(Rune * rune,const char * str)44 chartorune(Rune *rune, const char *str)
45 {
46 	int c, c1, c2;
47 	long l;
48 
49 	/*
50 	 * one character sequence
51 	 *	00000-0007F => T1
52 	 */
53 	c = *(uchar*)str;
54 	if(c < Tx) {
55 		*rune = c;
56 		return 1;
57 	}
58 
59 	/*
60 	 * two character sequence
61 	 *	0080-07FF => T2 Tx
62 	 */
63 	c1 = *(uchar*)(str+1) ^ Tx;
64 	if(c1 & Testx)
65 		goto bad;
66 	if(c < T3) {
67 		if(c < T2)
68 			goto bad;
69 		l = ((c << Bitx) | c1) & Rune2;
70 		if(l <= Rune1)
71 			goto bad;
72 		*rune = l;
73 		return 2;
74 	}
75 
76 	/*
77 	 * three character sequence
78 	 *	0800-FFFF => T3 Tx Tx
79 	 */
80 	c2 = *(uchar*)(str+2) ^ Tx;
81 	if(c2 & Testx)
82 		goto bad;
83 	if(c < T4) {
84 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
85 		if(l <= Rune2)
86 			goto bad;
87 		*rune = l;
88 		return 3;
89 	}
90 
91 	/*
92 	 * bad decoding
93 	 */
94 bad:
95 	*rune = Bad;
96 	return 1;
97 }
98 
99 int
runetochar(char * str,const Rune * rune)100 runetochar(char *str, const Rune *rune)
101 {
102 	long c;
103 
104 	/*
105 	 * one character sequence
106 	 *	00000-0007F => 00-7F
107 	 */
108 	c = *rune;
109 	if(c <= Rune1) {
110 		str[0] = c;
111 		return 1;
112 	}
113 
114 	/*
115 	 * two character sequence
116 	 *	0080-07FF => T2 Tx
117 	 */
118 	if(c <= Rune2) {
119 		str[0] = T2 | (c >> 1*Bitx);
120 		str[1] = Tx | (c & Maskx);
121 		return 2;
122 	}
123 
124 	/*
125 	 * three character sequence
126 	 *	0800-FFFF => T3 Tx Tx
127 	 */
128 	str[0] = T3 |  (c >> 2*Bitx);
129 	str[1] = Tx | ((c >> 1*Bitx) & Maskx);
130 	str[2] = Tx |  (c & Maskx);
131 	return 3;
132 }
133 
134 int
runelen(Rune c)135 runelen(Rune c)
136 {
137 	char str[10];
138 
139 	return runetochar(str, &c);
140 }
141 
142 int
runenlen(const Rune * r,int nrune)143 runenlen(const Rune *r, int nrune)
144 {
145 	int nb, c;
146 
147 	nb = 0;
148 	while(nrune--) {
149 		c = *r++;
150 		if(c <= Rune1)
151 			nb++;
152 		else
153 		if(c <= Rune2)
154 			nb += 2;
155 		else
156 			nb += 3;
157 	}
158 	return nb;
159 }
160 
161 int
fullrune(const char * str,int n)162 fullrune(const char *str, int n)
163 {
164 	int c;
165 
166 	if(n > 0) {
167 		c = *(uchar*)str;
168 		if(c < Tx)
169 			return 1;
170 		if(n > 1)
171 			if(c < T3 || n > 2)
172 				return 1;
173 	}
174 	return 0;
175 }
176