1 /** @file
2   Implementation of translation upon VT-UTF8.
3 
4 Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.<BR>
5 SPDX-License-Identifier: BSD-2-Clause-Patent
6 
7 **/
8 
9 #include "Terminal.h"
10 
11 /**
12   Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
13   and insert them into Unicode FIFO.
14 
15   @param TerminalDevice          The terminal device.
16 
17 **/
18 VOID
VTUTF8RawDataToUnicode(IN TERMINAL_DEV * TerminalDevice)19 VTUTF8RawDataToUnicode (
20   IN  TERMINAL_DEV    *TerminalDevice
21   )
22 {
23   UTF8_CHAR Utf8Char;
24   UINT8     ValidBytes;
25   UINT16    UnicodeChar;
26 
27   ValidBytes = 0;
28   //
29   // pop the raw data out from the raw fifo,
30   // and translate it into unicode, then push
31   // the unicode into unicode fifo, until the raw fifo is empty.
32   //
33   while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
34 
35     GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
36 
37     if (ValidBytes < 1 || ValidBytes > 3) {
38       continue;
39     }
40 
41     Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
42 
43     UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
44   }
45 }
46 
47 /**
48   Get one valid VT-UTF8 characters set from Raw Data FIFO.
49 
50   @param  Utf8Device          The terminal device.
51   @param  Utf8Char            Returned valid VT-UTF8 characters set.
52   @param  ValidBytes          The count of returned VT-VTF8 characters.
53                               If ValidBytes is zero, no valid VT-UTF8 returned.
54 
55 **/
56 VOID
GetOneValidUtf8Char(IN TERMINAL_DEV * Utf8Device,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)57 GetOneValidUtf8Char (
58   IN  TERMINAL_DEV      *Utf8Device,
59   OUT UTF8_CHAR         *Utf8Char,
60   OUT UINT8             *ValidBytes
61   )
62 {
63   UINT8   Temp;
64   UINT8   Index;
65   BOOLEAN FetchFlag;
66 
67   Temp      = 0;
68   Index     = 0;
69   FetchFlag = TRUE;
70 
71   //
72   // if no valid Utf8 char is found in the RawFiFo,
73   // then *ValidBytes will be zero.
74   //
75   *ValidBytes = 0;
76 
77   while (!IsRawFiFoEmpty (Utf8Device)) {
78 
79     RawFiFoRemoveOneKey (Utf8Device, &Temp);
80 
81     switch (*ValidBytes) {
82 
83     case 0:
84       if ((Temp & 0x80) == 0) {
85         //
86         // one-byte utf8 char
87         //
88         *ValidBytes       = 1;
89 
90         Utf8Char->Utf8_1  = Temp;
91 
92         FetchFlag         = FALSE;
93 
94       } else if ((Temp & 0xe0) == 0xc0) {
95         //
96         // two-byte utf8 char
97         //
98         *ValidBytes         = 2;
99 
100         Utf8Char->Utf8_2[1] = Temp;
101 
102       } else if ((Temp & 0xf0) == 0xe0) {
103         //
104         // three-byte utf8 char
105         //
106         *ValidBytes         = 3;
107 
108         Utf8Char->Utf8_3[2] = Temp;
109 
110         Index++;
111 
112       } else {
113         //
114         // reset *ValidBytes to zero, let valid utf8 char search restart
115         //
116         *ValidBytes = 0;
117       }
118 
119       break;
120 
121     case 2:
122       //
123       // two-byte utf8 char go on
124       //
125       if ((Temp & 0xc0) == 0x80) {
126 
127         Utf8Char->Utf8_2[0] = Temp;
128 
129         FetchFlag           = FALSE;
130 
131       } else {
132 
133         *ValidBytes = 0;
134       }
135       break;
136 
137     case 3:
138       //
139       // three-byte utf8 char go on
140       //
141       if ((Temp & 0xc0) == 0x80) {
142         if (Index == 1) {
143           Utf8Char->Utf8_3[1] = Temp;
144           Index++;
145         } else {
146           Utf8Char->Utf8_3[0] = Temp;
147           FetchFlag = FALSE;
148         }
149       } else {
150         //
151         // reset *ValidBytes and Index to zero, let valid utf8 char search restart
152         //
153         *ValidBytes = 0;
154         Index       = 0;
155       }
156       break;
157 
158     default:
159       break;
160     }
161 
162     if (!FetchFlag) {
163       break;
164     }
165   }
166 
167   return ;
168 }
169 
170 /**
171   Translate VT-UTF8 characters into one Unicode character.
172 
173   UTF8 Encoding Table
174   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |  UTF8 Binary Encoding
175         0-7           |     0x0000 - 0x007F     |     00000000 0xxxxxxx    |   0xxxxxxx
176         8-11          |     0x0080 - 0x07FF     |     00000xxx xxxxxxxx     |   110xxxxx 10xxxxxx
177        12-16         |     0x0800 - 0xFFFF     |     xxxxxxxx xxxxxxxx    |   1110xxxx 10xxxxxx 10xxxxxx
178 
179 
180   @param  Utf8Char         VT-UTF8 character set needs translating.
181   @param  ValidBytes       The count of valid VT-UTF8 characters.
182   @param  UnicodeChar      Returned unicode character.
183 
184 **/
185 VOID
Utf8ToUnicode(IN UTF8_CHAR Utf8Char,IN UINT8 ValidBytes,OUT CHAR16 * UnicodeChar)186 Utf8ToUnicode (
187   IN  UTF8_CHAR       Utf8Char,
188   IN  UINT8           ValidBytes,
189   OUT CHAR16          *UnicodeChar
190   )
191 {
192   UINT8 UnicodeByte0;
193   UINT8 UnicodeByte1;
194   UINT8 Byte0;
195   UINT8 Byte1;
196   UINT8 Byte2;
197 
198   *UnicodeChar = 0;
199 
200   //
201   // translate utf8 code to unicode, in terminal standard,
202   // up to 3 bytes utf8 code is supported.
203   //
204   switch (ValidBytes) {
205   case 1:
206     //
207     // one-byte utf8 code
208     //
209     *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
210     break;
211 
212   case 2:
213     //
214     // two-byte utf8 code
215     //
216     Byte0         = Utf8Char.Utf8_2[0];
217     Byte1         = Utf8Char.Utf8_2[1];
218 
219     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
220     UnicodeByte1  = (UINT8) ((Byte1 >> 2) & 0x07);
221     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
222     break;
223 
224   case 3:
225     //
226     // three-byte utf8 code
227     //
228     Byte0         = Utf8Char.Utf8_3[0];
229     Byte1         = Utf8Char.Utf8_3[1];
230     Byte2         = Utf8Char.Utf8_3[2];
231 
232     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
233     UnicodeByte1  = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
234     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
235 
236   default:
237     break;
238   }
239 
240   return ;
241 }
242 
243 /**
244   Translate one Unicode character into VT-UTF8 characters.
245 
246   UTF8 Encoding Table
247   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |  UTF8 Binary Encoding
248         0-7           |     0x0000 - 0x007F     |     00000000 0xxxxxxx    |   0xxxxxxx
249         8-11          |     0x0080 - 0x07FF     |     00000xxx xxxxxxxx     |   110xxxxx 10xxxxxx
250        12-16         |     0x0800 - 0xFFFF     |     xxxxxxxx xxxxxxxx    |   1110xxxx 10xxxxxx 10xxxxxx
251 
252 
253   @param  Unicode          Unicode character need translating.
254   @param  Utf8Char         Return VT-UTF8 character set.
255   @param  ValidBytes       The count of valid VT-UTF8 characters. If
256                            ValidBytes is zero, no valid VT-UTF8 returned.
257 
258 **/
259 VOID
UnicodeToUtf8(IN CHAR16 Unicode,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)260 UnicodeToUtf8 (
261   IN  CHAR16      Unicode,
262   OUT UTF8_CHAR   *Utf8Char,
263   OUT UINT8       *ValidBytes
264   )
265 {
266   UINT8 UnicodeByte0;
267   UINT8 UnicodeByte1;
268   //
269   // translate unicode to utf8 code
270   //
271   UnicodeByte0  = (UINT8) Unicode;
272   UnicodeByte1  = (UINT8) (Unicode >> 8);
273 
274   if (Unicode < 0x0080) {
275 
276     Utf8Char->Utf8_1  = (UINT8) (UnicodeByte0 & 0x7f);
277     *ValidBytes       = 1;
278 
279   } else if (Unicode < 0x0800) {
280     //
281     // byte sequence: high -> low
282     //                Utf8_2[0], Utf8_2[1]
283     //
284     Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
285     Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
286 
287     *ValidBytes         = 2;
288 
289   } else {
290     //
291     // byte sequence: high -> low
292     //                Utf8_3[0], Utf8_3[1], Utf8_3[2]
293     //
294     Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
295     Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
296     Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
297 
298     *ValidBytes         = 3;
299   }
300 }
301 
302 
303 /**
304   Check if input string is valid VT-UTF8 string.
305 
306   @param  TerminalDevice          The terminal device.
307   @param  WString                 The input string.
308 
309   @retval EFI_SUCCESS             If all input characters are valid.
310 
311 **/
312 EFI_STATUS
VTUTF8TestString(IN TERMINAL_DEV * TerminalDevice,IN CHAR16 * WString)313 VTUTF8TestString (
314   IN  TERMINAL_DEV    *TerminalDevice,
315   IN  CHAR16          *WString
316   )
317 {
318   //
319   // to utf8, all kind of characters are supported.
320   //
321   return EFI_SUCCESS;
322 }
323