1 #ifndef _WIMLIB_ENCODING_H
2 #define _WIMLIB_ENCODING_H
3 
4 #include <string.h>
5 
6 #include "wimlib/error.h"
7 #include "wimlib/util.h"
8 #include "wimlib/types.h"
9 
10 /* String conversion functions */
11 
12 extern int
13 utf8_to_utf16le(const char *in, size_t in_nbytes,
14 		utf16lechar **out_ret, size_t *out_nbytes_ret);
15 
16 extern int
17 utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
18 		char **out_ret, size_t *out_nbytes_ret);
19 
20 /* Identity conversion: duplicate a 'tchar' string. */
21 static inline int
tstr_to_tstr(const tchar * in,size_t in_nbytes,tchar ** out_ret,size_t * out_nbytes_ret)22 tstr_to_tstr(const tchar *in, size_t in_nbytes,
23 	     tchar **out_ret, size_t *out_nbytes_ret)
24 {
25 	tchar *out = MALLOC(in_nbytes + sizeof(tchar));
26 	if (unlikely(!out))
27 		return WIMLIB_ERR_NOMEM;
28 	memcpy(out, in, in_nbytes);
29 	out[in_nbytes / sizeof(tchar)] = 0;
30 	*out_ret = out;
31 	if (out_nbytes_ret)
32 		*out_nbytes_ret = in_nbytes;
33 	return 0;
34 }
35 
36 #if TCHAR_IS_UTF16LE
37 
38 /* tstr(UTF-16LE) <=> UTF-16LE  */
39 #  define tstr_to_utf16le	tstr_to_tstr
40 #  define utf16le_to_tstr	tstr_to_tstr
41 
42 /* tstr(UTF-16LE) <=> UTF-8  */
43 #  define tstr_to_utf8		utf16le_to_utf8
44 #  define utf8_to_tstr		utf8_to_utf16le
45 
46 #else
47 
48 /* tstr(UTF-8) <=> UTF-16LE  */
49 #  define tstr_to_utf16le	utf8_to_utf16le
50 #  define utf16le_to_tstr	utf16le_to_utf8
51 
52 /* tstr(UTF-8) <=> UTF-8  */
53 #  define tstr_to_utf8		tstr_to_tstr
54 #  define utf8_to_tstr		tstr_to_tstr
55 
56 #endif
57 
58 /* Convert a tchar string to UTF-16LE, but if both encodings are UTF-16LE, then
59  * simply re-use the string.  Release with tstr_put_utf16le() when done.  */
60 static inline int
tstr_get_utf16le_and_len(const tchar * in,const utf16lechar ** out_ret,size_t * out_nbytes_ret)61 tstr_get_utf16le_and_len(const tchar *in,
62 			 const utf16lechar **out_ret, size_t *out_nbytes_ret)
63 {
64 	size_t in_nbytes = tstrlen(in) * sizeof(tchar);
65 #if TCHAR_IS_UTF16LE
66 	*out_ret = in;
67 	if (out_nbytes_ret)
68 		*out_nbytes_ret = in_nbytes;
69 	return 0;
70 #else
71 	return tstr_to_utf16le(in, in_nbytes,
72 			       (utf16lechar **)out_ret, out_nbytes_ret);
73 #endif
74 }
75 
76 static inline int
tstr_get_utf16le(const tchar * in,const utf16lechar ** out_ret)77 tstr_get_utf16le(const tchar *in, const utf16lechar **out_ret)
78 {
79 	return tstr_get_utf16le_and_len(in, out_ret, NULL);
80 }
81 
82 /* Release a string acquired with tstr_get_utf16le() or
83  * tstr_get_utf16le_and_len().  */
84 static inline void
tstr_put_utf16le(const utf16lechar * s)85 tstr_put_utf16le(const utf16lechar *s)
86 {
87 #if !TCHAR_IS_UTF16LE
88 	FREE((void *)s);
89 #endif
90 }
91 
92 /* Convert a UTF-16LE string to a tchar string, but if both encodings are
93  * UTF-16LE, then simply re-use the string.  Release with utf16le_put_tstr()
94  * when done.  */
95 static inline int
utf16le_get_tstr(const utf16lechar * in,size_t in_nbytes,const tchar ** out_ret,size_t * out_nbytes_ret)96 utf16le_get_tstr(const utf16lechar *in, size_t in_nbytes,
97 		 const tchar **out_ret, size_t *out_nbytes_ret)
98 {
99 #if TCHAR_IS_UTF16LE
100 	*out_ret = in;
101 	if (out_nbytes_ret)
102 		*out_nbytes_ret = in_nbytes;
103 	return 0;
104 #else
105 	return utf16le_to_tstr(in, in_nbytes,
106 			       (tchar **)out_ret, out_nbytes_ret);
107 #endif
108 }
109 
110 /* Release a string acquired with utf16le_get_tstr().  */
111 static inline void
utf16le_put_tstr(const tchar * s)112 utf16le_put_tstr(const tchar *s)
113 {
114 #if !TCHAR_IS_UTF16LE
115 	FREE((void *)s);
116 #endif
117 }
118 
119 
120 /* UTF-16LE utilities */
121 
122 extern u16 upcase[65536];
123 
124 extern void
125 init_upcase(void);
126 
127 extern int
128 cmp_utf16le_strings(const utf16lechar *s1, size_t n1,
129 		    const utf16lechar *s2, size_t n2,
130 		    bool ignore_case);
131 
132 extern int
133 cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2,
134 		      bool ignore_case);
135 
136 extern utf16lechar *
137 utf16le_dupz(const void *s, size_t size);
138 
139 extern utf16lechar *
140 utf16le_dup(const utf16lechar *s);
141 
142 extern size_t
143 utf16le_len_bytes(const utf16lechar *s);
144 
145 extern size_t
146 utf16le_len_chars(const utf16lechar *s);
147 
148 #endif /* _WIMLIB_ENCODING_H */
149