1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5
6 /*
7 WARNING: This file was generated by the dkct program (see
8 http://dktools.sourceforge.net/ for details).
9 Changes you make here will be lost if dkct is run again!
10 You should modify the original source and run dkct on it.
11 Original source: dk4enc.ctr
12 */
13
14 /** @file dk4enc.c The dk4enc module.
15 */
16
17
18 #include "dk4conf.h"
19 #include <libdk4c/dk4enc.h>
20 #include <libdk4base/dk4mem.h>
21 #include <libdk4base/dk4strd.h>
22
23 #if DK4_HAVE_ASSERT_H
24 #ifndef ASSERT_H_INCLUDED
25 #include <assert.h>
26 #define ASSERT_H_INCLUDED 1
27 #endif
28 #endif
29
30
31 /** Encoding names in variations.
32 */
33 static const dkChar * const dk4enc_encoding_names[] = {
34 /* 0 */
35 dkT("plain"),
36
37 /* 1 */
38 dkT("ascii"),
39
40 /* 2 */
41 dkT("ansi"),
42
43 /* 3 */
44 dkT("utf-8"),
45
46 /* 4 */
47 dkT("utf8"),
48
49 /* 5 */
50 dkT("utf-16"),
51
52 /* 6 */
53 dkT("utf16"),
54
55 /* 7 */
56 dkT("utf-16-le"),
57
58 /* 8 */
59 dkT("utf-16le"),
60
61 /* 9 */
62 dkT("utf16le"),
63
64 /* 10 */
65 dkT("utf-16-lsb"),
66
67 /* 11 */
68 dkT("utf-16lsb"),
69
70 /* 12 */
71 dkT("utf16lsb"),
72
73 /* 13 */
74 dkT("utf-16-be"),
75
76 /* 14 */
77 dkT("utf-16be"),
78
79 /* 15 */
80 dkT("utf16be"),
81
82 /* 16 */
83 dkT("utf-16-msb"),
84
85 /* 17 */
86 dkT("utf-16msb"),
87
88 /* 18 */
89 dkT("utf16msb"),
90
91 /* 19 */
92 dkT("c32"),
93
94 /* 20 */
95 dkT("c32-le"),
96
97 /* 21 */
98 dkT("c32le"),
99
100 /* 22 */
101 dkT("c32-lsb"),
102
103 /* 23 */
104 dkT("c32lsb"),
105
106 /* 24 */
107 dkT("c32-be"),
108
109 /* 25 */
110 dkT("c32be"),
111
112 /* 26 */
113 dkT("c32-msb"),
114
115 /* 27 */
116 dkT("c32msb"),
117
118 /* 28 */
119 dkT("utf-16.msb"),
120
121 /* 29 */
122 dkT("utf-16.lsb"),
123
124 /* 30 */
125 dkT("uc32"),
126
127 /* 31 */
128 dkT("uc32.msb"),
129
130 /* 32 */
131 dkT("uc32.lsb"),
132
133 /* 33 */
134 dkT("iso-latin-1"),
135
136 /* 34 */
137 dkT("iso-8859-1"),
138
139 /* 35 */
140 dkT("win1252"),
141
142 /* 36 */
143 dkT("cp1252"),
144
145 NULL
146
147 };
148
149
150
151 /** Keywords for further options.
152 */
153 static const dkChar * const dk4enc_option_keywords[] = {
154 /* 0 */
155 dkT("le"),
156
157 /* 1 */
158 dkT("lsb"),
159
160 /* 2 */
161 dkT("be"),
162
163 /* 3 */
164 dkT("msb"),
165
166 /* 4 */
167 dkT("bom"),
168
169 /* 5 */
170 dkT("nobom"),
171
172 NULL
173
174 };
175
176
177
178 int
dk4enc_find(int * encptr,int * bomptr,const dkChar * src,dk4_er_t * erp)179 dk4enc_find(int *encptr, int *bomptr, const dkChar *src, dk4_er_t *erp)
180 {
181 dkChar buf[64]; /* Private copy for modification */
182 dkChar *p1; /* Start of text */
183 dkChar *p2; /* Start of options */
184 int res = 0; /* Array index */
185 int back = 0; /* Function result */
186 int ae = 0; /* Flag: Allow ending specification */
187 int enc = 0; /* Encoding found */
188 int bom = 0; /* Flag: BOM keyword found */
189 int bom_f = 0; /* Flag: BOM information found */
190 #if DK4_USE_ASSERT
191 assert(NULL != encptr);
192 assert(NULL != src);
193 #endif
194 if ((NULL != encptr) && (NULL != src)) {
195 if (0 != dk4str_cpy_s(buf, DK4_SIZEOF(buf,dkChar), src, erp)) {
196 p1 = dk4str_start(buf, NULL);
197 if (NULL != p1) {
198 p2 = dk4str_chr(buf, dkT(','));
199 if (NULL == p2) {
200 p2 = dk4str_chr(buf, dkT('.'));
201 }
202 if (NULL != p2) { *(p2++) = dkT('\0'); p2 = dk4str_start(p2, NULL); }
203 dk4str_normalize(p1, NULL);
204 switch (dk4str_array_index(dk4enc_encoding_names, p1, 0)) {
205 case 0: case 1: case 33: case 34: {
206 enc = DK4_FILE_ENCODING_PLAIN;
207 back = 1;
208 } break;
209 case 2: case 35: case 36: {
210 enc = DK4_FILE_ENCODING_WIN1252;
211 back = 1;
212 } break;
213 case 3: case 4: {
214 enc = DK4_FILE_ENCODING_UTF8;
215 back = 1;
216 } break;
217 case 5: case 6: {
218 enc = DK4_ENCODING_UTF16;
219 ae = 1;
220 back = 1;
221 bom = 1;
222 } break;
223 case 7: case 8: case 9: case 10: case 11: case 12: case 29: {
224 enc = DK4_FILE_ENCODING_UTF16_LE;
225 back = 1;
226 bom = 1;
227 } break;
228 case 13: case 14: case 15: case 16: case 17: case 18: case 28: {
229 enc = DK4_FILE_ENCODING_UTF16_BE;
230 back = 1;
231 bom = 1;
232 } break;
233 case 19: case 30: {
234 enc = DK4_ENCODING_32;
235 ae = 1;
236 back = 1;
237 bom = 1;
238 } break;
239 case 20: case 21: case 22: case 23: case 32: {
240 enc = DK4_FILE_ENCODING_32_LE;
241 back = 1;
242 bom = 1;
243 } break;
244 case 24: case 25: case 26: case 27: case 31: {
245 enc = DK4_FILE_ENCODING_32_BE;
246 back = 1;
247 bom = 1;
248 } break;
249 default: {
250 dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
251 } break;
252 }
253 if (1 == back) {
254 while (NULL != p2) {
255 p1 = dk4str_chr(p2, dkT(','));
256 if (NULL != p1) { *(p1++) = dkT('\0'); p1 = dk4str_start(p1,NULL); }
257 dk4str_normalize(p2, NULL);
258 switch (res = dk4str_array_index(dk4enc_option_keywords, p2, 0)) {
259 case 0: case 1: case 2: case 3: {
260 if (0 != ae) {
261 switch (enc) {
262 case DK4_ENCODING_UTF16: {
263 enc = (
264 ((2 == res) || (3 == res))
265 ? DK4_FILE_ENCODING_UTF16_BE
266 : DK4_FILE_ENCODING_UTF16_LE
267 );
268 } break;
269 case DK4_ENCODING_32: {
270 enc = (
271 ((2 == res) || (3 == res))
272 ? DK4_FILE_ENCODING_32_BE
273 : DK4_FILE_ENCODING_32_LE
274 );
275 } break;
276 }
277 ae = 0;
278 } else {
279 back = 0;
280 dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
281 }
282 } break;
283 case 4: {
284 switch (enc) {
285 case DK4_FILE_ENCODING_UTF8:
286 case DK4_FILE_ENCODING_UTF16_LE:
287 case DK4_FILE_ENCODING_UTF16_BE:
288 case DK4_FILE_ENCODING_32_LE:
289 case DK4_FILE_ENCODING_32_BE: {
290 bom = 1;
291 bom_f = 1;
292 } break;
293 default: {
294 back = 0;
295 dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
296 } break;
297 }
298 } break;
299 case 5: {
300 bom = 0;
301 bom_f = 1;
302 } break;
303 default: {
304 back = 0;
305 dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
306 } break;
307 }
308 p2 = p1;
309 }
310 }
311 } else {
312 /* ERROR: Empty string */
313 dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
314 }
315 } else {
316 dk4error_set_simple_error_code(erp, DK4_E_BUFFER_TOO_SMALL);
317 }
318 } else {
319 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
320 }
321 if (NULL != encptr) { *encptr = enc; }
322 if (0 != bom_f) { if (NULL != bomptr) { *bomptr = bom; } }
323 return back;
324 }
325
326
327