1 #ifndef _WIMLIB_ENCODING_H
2 #define _WIMLIB_ENCODING_H
3
4 #include <string.h>
5
6 #include "wimlib/error.h"
7 #include "wimlib/util.h"
8 #include "wimlib/types.h"
9
10 /* String conversion functions */
11
12 extern int
13 utf8_to_utf16le(const char *in, size_t in_nbytes,
14 utf16lechar **out_ret, size_t *out_nbytes_ret);
15
16 extern int
17 utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
18 char **out_ret, size_t *out_nbytes_ret);
19
20 /* Identity conversion: duplicate a 'tchar' string. */
21 static inline int
tstr_to_tstr(const tchar * in,size_t in_nbytes,tchar ** out_ret,size_t * out_nbytes_ret)22 tstr_to_tstr(const tchar *in, size_t in_nbytes,
23 tchar **out_ret, size_t *out_nbytes_ret)
24 {
25 tchar *out = MALLOC(in_nbytes + sizeof(tchar));
26 if (unlikely(!out))
27 return WIMLIB_ERR_NOMEM;
28 memcpy(out, in, in_nbytes);
29 out[in_nbytes / sizeof(tchar)] = 0;
30 *out_ret = out;
31 if (out_nbytes_ret)
32 *out_nbytes_ret = in_nbytes;
33 return 0;
34 }
35
36 #if TCHAR_IS_UTF16LE
37
38 /* tstr(UTF-16LE) <=> UTF-16LE */
39 # define tstr_to_utf16le tstr_to_tstr
40 # define utf16le_to_tstr tstr_to_tstr
41
42 /* tstr(UTF-16LE) <=> UTF-8 */
43 # define tstr_to_utf8 utf16le_to_utf8
44 # define utf8_to_tstr utf8_to_utf16le
45
46 #else
47
48 /* tstr(UTF-8) <=> UTF-16LE */
49 # define tstr_to_utf16le utf8_to_utf16le
50 # define utf16le_to_tstr utf16le_to_utf8
51
52 /* tstr(UTF-8) <=> UTF-8 */
53 # define tstr_to_utf8 tstr_to_tstr
54 # define utf8_to_tstr tstr_to_tstr
55
56 #endif
57
58 /* Convert a tchar string to UTF-16LE, but if both encodings are UTF-16LE, then
59 * simply re-use the string. Release with tstr_put_utf16le() when done. */
60 static inline int
tstr_get_utf16le_and_len(const tchar * in,const utf16lechar ** out_ret,size_t * out_nbytes_ret)61 tstr_get_utf16le_and_len(const tchar *in,
62 const utf16lechar **out_ret, size_t *out_nbytes_ret)
63 {
64 size_t in_nbytes = tstrlen(in) * sizeof(tchar);
65 #if TCHAR_IS_UTF16LE
66 *out_ret = in;
67 if (out_nbytes_ret)
68 *out_nbytes_ret = in_nbytes;
69 return 0;
70 #else
71 return tstr_to_utf16le(in, in_nbytes,
72 (utf16lechar **)out_ret, out_nbytes_ret);
73 #endif
74 }
75
76 static inline int
tstr_get_utf16le(const tchar * in,const utf16lechar ** out_ret)77 tstr_get_utf16le(const tchar *in, const utf16lechar **out_ret)
78 {
79 return tstr_get_utf16le_and_len(in, out_ret, NULL);
80 }
81
82 /* Release a string acquired with tstr_get_utf16le() or
83 * tstr_get_utf16le_and_len(). */
84 static inline void
tstr_put_utf16le(const utf16lechar * s)85 tstr_put_utf16le(const utf16lechar *s)
86 {
87 #if !TCHAR_IS_UTF16LE
88 FREE((void *)s);
89 #endif
90 }
91
92 /* Convert a UTF-16LE string to a tchar string, but if both encodings are
93 * UTF-16LE, then simply re-use the string. Release with utf16le_put_tstr()
94 * when done. */
95 static inline int
utf16le_get_tstr(const utf16lechar * in,size_t in_nbytes,const tchar ** out_ret,size_t * out_nbytes_ret)96 utf16le_get_tstr(const utf16lechar *in, size_t in_nbytes,
97 const tchar **out_ret, size_t *out_nbytes_ret)
98 {
99 #if TCHAR_IS_UTF16LE
100 *out_ret = in;
101 if (out_nbytes_ret)
102 *out_nbytes_ret = in_nbytes;
103 return 0;
104 #else
105 return utf16le_to_tstr(in, in_nbytes,
106 (tchar **)out_ret, out_nbytes_ret);
107 #endif
108 }
109
110 /* Release a string acquired with utf16le_get_tstr(). */
111 static inline void
utf16le_put_tstr(const tchar * s)112 utf16le_put_tstr(const tchar *s)
113 {
114 #if !TCHAR_IS_UTF16LE
115 FREE((void *)s);
116 #endif
117 }
118
119
120 /* UTF-16LE utilities */
121
122 extern u16 upcase[65536];
123
124 extern void
125 init_upcase(void);
126
127 extern int
128 cmp_utf16le_strings(const utf16lechar *s1, size_t n1,
129 const utf16lechar *s2, size_t n2,
130 bool ignore_case);
131
132 extern int
133 cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2,
134 bool ignore_case);
135
136 extern utf16lechar *
137 utf16le_dupz(const void *s, size_t size);
138
139 extern utf16lechar *
140 utf16le_dup(const utf16lechar *s);
141
142 extern size_t
143 utf16le_len_bytes(const utf16lechar *s);
144
145 extern size_t
146 utf16le_len_chars(const utf16lechar *s);
147
148 #endif /* _WIMLIB_ENCODING_H */
149