1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5 
6 /*
7 	WARNING: This file was generated by the dkct program (see
8 	http://dktools.sourceforge.net/ for details).
9 	Changes you make here will be lost if dkct is run again!
10 	You should modify the original source and run dkct on it.
11 	Original source: dk4enc.ctr
12 */
13 
14 /**	@file dk4enc.c The dk4enc module.
15 */
16 
17 
18 #include "dk4conf.h"
19 #include <libdk4c/dk4enc.h>
20 #include <libdk4base/dk4mem.h>
21 #include <libdk4base/dk4strd.h>
22 
23 #if	DK4_HAVE_ASSERT_H
24 #ifndef	ASSERT_H_INCLUDED
25 #include <assert.h>
26 #define	ASSERT_H_INCLUDED 1
27 #endif
28 #endif
29 
30 
31 /**	Encoding names in variations.
32 */
33 static const dkChar * const dk4enc_encoding_names[] = {
34 /* 0 */
35 dkT("plain"),
36 
37 /* 1 */
38 dkT("ascii"),
39 
40 /* 2 */
41 dkT("ansi"),
42 
43 /* 3 */
44 dkT("utf-8"),
45 
46 /* 4 */
47 dkT("utf8"),
48 
49 /* 5 */
50 dkT("utf-16"),
51 
52 /* 6 */
53 dkT("utf16"),
54 
55 /* 7 */
56 dkT("utf-16-le"),
57 
58 /* 8 */
59 dkT("utf-16le"),
60 
61 /* 9 */
62 dkT("utf16le"),
63 
64 /* 10 */
65 dkT("utf-16-lsb"),
66 
67 /* 11 */
68 dkT("utf-16lsb"),
69 
70 /* 12 */
71 dkT("utf16lsb"),
72 
73 /* 13 */
74 dkT("utf-16-be"),
75 
76 /* 14 */
77 dkT("utf-16be"),
78 
79 /* 15 */
80 dkT("utf16be"),
81 
82 /* 16 */
83 dkT("utf-16-msb"),
84 
85 /* 17 */
86 dkT("utf-16msb"),
87 
88 /* 18 */
89 dkT("utf16msb"),
90 
91 /* 19 */
92 dkT("c32"),
93 
94 /* 20 */
95 dkT("c32-le"),
96 
97 /* 21 */
98 dkT("c32le"),
99 
100 /* 22 */
101 dkT("c32-lsb"),
102 
103 /* 23 */
104 dkT("c32lsb"),
105 
106 /* 24 */
107 dkT("c32-be"),
108 
109 /* 25 */
110 dkT("c32be"),
111 
112 /* 26 */
113 dkT("c32-msb"),
114 
115 /* 27 */
116 dkT("c32msb"),
117 
118 /* 28 */
119 dkT("utf-16.msb"),
120 
121 /* 29 */
122 dkT("utf-16.lsb"),
123 
124 /* 30 */
125 dkT("uc32"),
126 
127 /* 31 */
128 dkT("uc32.msb"),
129 
130 /* 32 */
131 dkT("uc32.lsb"),
132 
133 /* 33 */
134 dkT("iso-latin-1"),
135 
136 /* 34 */
137 dkT("iso-8859-1"),
138 
139 /* 35 */
140 dkT("win1252"),
141 
142 /* 36 */
143 dkT("cp1252"),
144 
145 NULL
146 
147 };
148 
149 
150 
151 /**	Keywords for further options.
152 */
153 static const dkChar * const dk4enc_option_keywords[] = {
154 /* 0 */
155 dkT("le"),
156 
157 /* 1 */
158 dkT("lsb"),
159 
160 /* 2 */
161 dkT("be"),
162 
163 /* 3 */
164 dkT("msb"),
165 
166 /* 4 */
167 dkT("bom"),
168 
169 /* 5 */
170 dkT("nobom"),
171 
172 NULL
173 
174 };
175 
176 
177 
178 int
dk4enc_find(int * encptr,int * bomptr,const dkChar * src,dk4_er_t * erp)179 dk4enc_find(int *encptr, int *bomptr, const dkChar *src, dk4_er_t *erp)
180 {
181   dkChar	buf[64];		/* Private copy for modification */
182   dkChar	*p1;			/* Start of text */
183   dkChar	*p2;			/* Start of options */
184   int		res	=	0;	/* Array index */
185   int		back	=	0;	/* Function result */
186   int		ae	=	0;	/* Flag: Allow ending specification */
187   int		enc	=	0;	/* Encoding found */
188   int		bom	=	0;	/* Flag: BOM keyword found */
189   int		bom_f	=	0;	/* Flag: BOM information found */
190 #if	DK4_USE_ASSERT
191   assert(NULL != encptr);
192   assert(NULL != src);
193 #endif
194   if ((NULL != encptr) && (NULL != src)) {
195     if (0 != dk4str_cpy_s(buf, DK4_SIZEOF(buf,dkChar), src, erp)) {
196       p1 = dk4str_start(buf, NULL);
197       if (NULL != p1) {
198         p2 = dk4str_chr(buf, dkT(','));
199 	if (NULL == p2) {
200 	  p2 = dk4str_chr(buf, dkT('.'));
201 	}
202 	if (NULL != p2) { *(p2++) = dkT('\0'); p2 = dk4str_start(p2, NULL); }
203 	dk4str_normalize(p1, NULL);
204 	switch (dk4str_array_index(dk4enc_encoding_names, p1, 0)) {
205 	  case 0: case 1: case 33: case 34: {
206 	    enc = DK4_FILE_ENCODING_PLAIN;
207 	    back = 1;
208 	  } break;
209 	  case 2: case 35: case 36: {
210 	    enc = DK4_FILE_ENCODING_WIN1252;
211 	    back = 1;
212 	  } break;
213 	  case 3: case 4: {
214 	    enc = DK4_FILE_ENCODING_UTF8;
215 	    back = 1;
216 	  } break;
217 	  case 5: case 6: {
218 	    enc = DK4_ENCODING_UTF16;
219 	    ae  = 1;
220 	    back = 1;
221 	    bom = 1;
222 	  } break;
223 	  case 7: case 8: case 9: case 10: case 11: case 12: case 29: {
224 	    enc = DK4_FILE_ENCODING_UTF16_LE;
225 	    back = 1;
226 	    bom = 1;
227 	  } break;
228 	  case 13: case 14: case 15: case 16: case 17: case 18: case 28: {
229 	    enc = DK4_FILE_ENCODING_UTF16_BE;
230 	    back = 1;
231 	    bom = 1;
232 	  } break;
233 	  case 19: case 30: {
234 	    enc = DK4_ENCODING_32;
235 	    ae  = 1;
236 	    back = 1;
237 	    bom = 1;
238 	  } break;
239 	  case 20: case 21: case 22: case 23: case 32: {
240 	    enc = DK4_FILE_ENCODING_32_LE;
241 	    back = 1;
242 	    bom = 1;
243 	  } break;
244 	  case 24: case 25: case 26: case 27: case 31: {
245 	    enc = DK4_FILE_ENCODING_32_BE;
246 	    back = 1;
247 	    bom = 1;
248 	  } break;
249 	  default: {
250 	    dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
251 	  } break;
252 	}
253 	if (1 == back) {
254 	  while (NULL != p2) {
255 	    p1 = dk4str_chr(p2, dkT(','));
256 	    if (NULL != p1) { *(p1++) = dkT('\0'); p1 = dk4str_start(p1,NULL); }
257 	    dk4str_normalize(p2, NULL);
258 	    switch (res = dk4str_array_index(dk4enc_option_keywords, p2, 0)) {
259 	      case 0: case 1: case 2: case 3: {
260 	        if (0 != ae) {
261 		  switch (enc) {
262 		    case DK4_ENCODING_UTF16: {
263 		      enc = (
264 		        ((2 == res) || (3 == res))
265 			? DK4_FILE_ENCODING_UTF16_BE
266 			: DK4_FILE_ENCODING_UTF16_LE
267 		      );
268 		    } break;
269 		    case DK4_ENCODING_32: {
270 		      enc = (
271 		        ((2 == res) || (3 == res))
272 			? DK4_FILE_ENCODING_32_BE
273 			: DK4_FILE_ENCODING_32_LE
274 		      );
275 		    } break;
276 		  }
277 		  ae = 0;
278 		} else {
279 		  back = 0;
280 		  dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
281 		}
282 	      } break;
283 	      case 4: {
284 	        switch (enc) {
285 		  case DK4_FILE_ENCODING_UTF8:
286 		  case DK4_FILE_ENCODING_UTF16_LE:
287 		  case DK4_FILE_ENCODING_UTF16_BE:
288 		  case DK4_FILE_ENCODING_32_LE:
289 		  case DK4_FILE_ENCODING_32_BE: {
290 		    bom = 1;
291 		    bom_f = 1;
292 		  } break;
293 		  default: {
294 		    back = 0;
295 		    dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
296 		  } break;
297 		}
298 	      } break;
299 	      case 5: {
300 	        bom = 0;
301 		bom_f = 1;
302 	      } break;
303 	      default: {
304 	        back = 0;
305 		dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
306 	      } break;
307 	    }
308 	    p2 = p1;
309 	  }
310 	}
311       } else {
312         /* ERROR: Empty string */
313 	dk4error_set_simple_error_code(erp, DK4_E_SYNTAX);
314       }
315     } else {
316       dk4error_set_simple_error_code(erp, DK4_E_BUFFER_TOO_SMALL);
317     }
318   } else {
319     dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
320   }
321   if (NULL != encptr) { *encptr = enc; }
322   if (0 != bom_f) { if (NULL != bomptr) { *bomptr = bom; } }
323   return back;
324 }
325 
326 
327