xref: /freebsd/stand/efi/libefi/efichar.c (revision 069ac184)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #include <errno.h>
29 #ifdef _STANDALONE
30 #include <stand.h>
31 #else
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/efi.h>
37 #include <machine/efi.h>
38 #endif
39 
40 #include "efichar.h"
41 
42 int
43 ucs2len(const efi_char *str)
44 {
45 	int i;
46 
47 	i = 0;
48 	while (*str++)
49 		i++;
50 	return (i);
51 }
52 
53 /*
54  * If nm were converted to utf8, what what would strlen
55  * return on the resulting string?
56  */
57 static size_t
58 utf8_len_of_ucs2(const efi_char *nm)
59 {
60 	size_t len;
61 	efi_char c;
62 
63 	len = 0;
64 	while (*nm) {
65 		c = *nm++;
66 		if (c > 0x7ff)
67 			len += 3;
68 		else if (c > 0x7f)
69 			len += 2;
70 		else
71 			len++;
72 	}
73 
74 	return (len);
75 }
76 
77 int
78 ucs2_to_utf8(const efi_char *nm, char **name)
79 {
80 	size_t len, sz;
81 	efi_char c;
82 	char *cp;
83 	int freeit = *name == NULL;
84 
85 	sz = utf8_len_of_ucs2(nm) + 1;
86 	len = 0;
87 	if (*name != NULL)
88 		cp = *name;
89 	else
90 		cp = *name = malloc(sz);
91 	if (*name == NULL)
92 		return (ENOMEM);
93 
94 	while (*nm) {
95 		c = *nm++;
96 		if (c > 0x7ff) {
97 			if (len++ < sz)
98 				*cp++ = (char)(0xE0 | (c >> 12));
99 			if (len++ < sz)
100 				*cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
101 			if (len++ < sz)
102 				*cp++ = (char)(0x80 | (c & 0x3f));
103 		} else if (c > 0x7f) {
104 			if (len++ < sz)
105 				*cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
106 			if (len++ < sz)
107 				*cp++ = (char)(0x80 | (c & 0x3f));
108 		} else {
109 			if (len++ < sz)
110 				*cp++ = (char)(c & 0x7f);
111 		}
112 	}
113 
114 	if (len >= sz) {
115 		/* Absent bugs, we'll never return EOVERFLOW */
116 		if (freeit) {
117 			free(*name);
118 			*name = NULL;
119 		}
120 		return (EOVERFLOW);
121 	}
122 	*cp++ = '\0';
123 
124 	return (0);
125 }
126 
127 int
128 utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len)
129 {
130 	efi_char *nm;
131 	size_t sz;
132 	uint32_t ucs4;
133 	int c, bytes;
134 	int freeit = *nmp == NULL;
135 
136 	sz = strlen(name) * 2 + 2;
137 	if (*nmp == NULL)
138 		*nmp = malloc(sz);
139 	if (*nmp == NULL)
140 		return (ENOMEM);
141 	nm = *nmp;
142 	*len = sz;
143 
144 	ucs4 = 0;
145 	bytes = 0;
146 	while (sz > 1 && *name != '\0') {
147 		c = *name++;
148 		/*
149 		 * Conditionalize on the two major character types:
150 		 * initial and followup characters.
151 		 */
152 		if ((c & 0xc0) != 0x80) {
153 			/* Initial characters. */
154 			if (bytes != 0)
155 				goto ilseq;
156 			if ((c & 0xf8) == 0xf0) {
157 				ucs4 = c & 0x07;
158 				bytes = 3;
159 			} else if ((c & 0xf0) == 0xe0) {
160 				ucs4 = c & 0x0f;
161 				bytes = 2;
162 			} else if ((c & 0xe0) == 0xc0) {
163 				ucs4 = c & 0x1f;
164 				bytes = 1;
165 			} else {
166 				ucs4 = c & 0x7f;
167 				bytes = 0;
168 			}
169 		} else {
170 			/* Followup characters. */
171 			if (bytes > 0) {
172 				ucs4 = (ucs4 << 6) + (c & 0x3f);
173 				bytes--;
174 			} else if (bytes == 0)
175 				goto ilseq;
176 		}
177 		if (bytes == 0) {
178 			if (ucs4 > 0xffff)
179 				goto ilseq;
180 			*nm++ = (efi_char)ucs4;
181 			sz -= 2;
182 		}
183 	}
184 	if (sz < 2) {
185 		if (freeit) {
186 			free(nm);
187 			*nmp = NULL;
188 		}
189 		return (EDOOFUS);
190 	}
191 	sz -= 2;
192 	*nm = 0;
193 	*len -= sz;
194 	return (0);
195 ilseq:
196 	if (freeit) {
197 		free(nm);
198 		*nmp = NULL;
199 	}
200 	return (EILSEQ);
201 }
202