1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5 
6 /*
7 	WARNING: This file was generated by the dkct program (see
8 	http://dktools.sourceforge.net/ for details).
9 	Changes you make here will be lost if dkct is run again!
10 	You should modify the original source and run dkct on it.
11 	Original source: dk4bom.ctr
12 */
13 
14 /**	@file dk4bom.c The dk4bom module.
15 */
16 
17 
18 
19 
20 #include "dk4conf.h"
21 #include <libdk4base/dk4types.h>
22 #include <libdk4c/dk4enc.h>
23 #include <libdk4c/dk4bom.h>
24 
25 #if	DK4_HAVE_ASSERT_H
26 #ifndef	ASSERT_H_INCLUDED
27 #include <assert.h>
28 #define	ASSERT_H_INCLUDED 1
29 #endif
30 #endif
31 
32 
33 
34 
35 
36 
37 void
38 
dk4bom_detect_init(dk4_bom_detector_t * ptr,int defenc)39 dk4bom_detect_init(dk4_bom_detector_t *ptr, int defenc)
40 {
41 #if	DK4_USE_ASSERT
42   assert(NULL != ptr);
43 #endif
44   if (NULL != ptr) {
45     ptr->defenc = defenc;
46     ptr->found  = defenc;
47     ptr->state  = 0;
48     ptr->rjb    = 0;
49     ptr->c1     = 0x00;
50     ptr->c2     = 0x00;
51     ptr->c3	= 0x00;
52   }
53 }
54 
55 
56 
57 /*
58 	BOMS and states
59 	---------------
60 	UTF-32 LSB:	0xFF (1) 0xFE (2) 0x00 (3) 0x00
61 	UTF-32 MSB:	0x00 (4) 0x00 (5) 0xFE (6) 0xFF
62 	UTF-16 LSB:	0xFF (1) 0xFE (2)
63 	UTF-16 MSB:	0xFE (7) 0xFF
64 	UTF-8:		0xEF (8) 0xBB (9) 0xBF
65 */
66 
67 
68 
69 int
70 
dk4bom_detect_add(dk4_bom_detector_t * ptr,unsigned char byte)71 dk4bom_detect_add(dk4_bom_detector_t *ptr, unsigned char byte)
72 {
73   int		back	=	DK4_EDSTM_ERROR;
74 #if	DK4_USE_ASSERT
75   assert(NULL != ptr);
76 #endif
77   if (NULL != ptr) {
78     switch (ptr->state) {
79       case 0: {
80 	ptr->c1 = byte;
81 	ptr->rjb = 1;
82 	switch ( (int)byte ) {
83 	  case 0x00: {
84 	    ptr->state = 4; back = DK4_EDSTM_ACCEPT;
85 	  } break;
86 	  case 0xFF: {
87 	    ptr->state = 1; back = DK4_EDSTM_ACCEPT;
88 	  } break;
89 	  case 0xFE: {
90 	    ptr->state = 7; back = DK4_EDSTM_ACCEPT;
91 	  } break;
92 	  case 0xEF: {
93 	    ptr->state = 8; back = DK4_EDSTM_ACCEPT;
94 	  } break;
95 	  default: {
96 	    ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
97 	  } break;
98 	}
99       } break;
100       case 1: {
101         ptr->c2 = byte;
102 	ptr->rjb = 2;
103         if (0xFE == byte) {
104 	  ptr->state = 2; back = DK4_EDSTM_ACCEPT;
105 	  ptr->found = DK4_FILE_ENCODING_UTF16_LE;
106 	  ptr->rjb = 0;
107 	} else {
108 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
109 	}
110       } break;
111       case 2: {
112         ptr->c1 = byte;
113 	ptr->rjb = 1;
114         if (0x00 == byte) {
115 	  ptr->state = 3; back = DK4_EDSTM_ACCEPT;
116 	} else {
117 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
118 	  ptr->found = DK4_FILE_ENCODING_UTF16_LE;
119 	}
120       } break;
121       case 3: {
122         ptr->c2 = byte;
123 	ptr->rjb = 2;
124         if (0x00 == byte) {
125 	  ptr->state = -1; back = DK4_EDSTM_FINISHED;
126 	  ptr->found = DK4_FILE_ENCODING_32_LE;
127 	  ptr->rjb = 0;
128 	} else {
129 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
130 	  ptr->found = DK4_FILE_ENCODING_UTF16_LE;
131 	}
132       } break;
133       case 4: {
134         ptr->c2 = byte;
135 	ptr->rjb = 2;
136         if (0x00 == byte) {
137 	  ptr->state = 5; back = DK4_EDSTM_ACCEPT;
138 	} else {
139 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
140 	}
141       } break;
142       case 5: {
143         ptr->c3 = byte;
144 	ptr->rjb = 3;
145         if (0xFE == byte) {
146 	  ptr->state = 6; back = DK4_EDSTM_ACCEPT;
147 	} else {
148 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
149 	}
150       } break;
151       case 6: {
152         ptr->c4 = byte;
153 	ptr->rjb = 4;
154         if (0xFF == byte) {
155 	  ptr->state = -1; back = DK4_EDSTM_FINISHED;
156 	  ptr->found = DK4_FILE_ENCODING_32_BE;
157 	  ptr->rjb = 0;
158 	} else {
159 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
160 	}
161       } break;
162       case 7: {
163         ptr->c2 = byte;
164 	ptr->rjb = 2;
165         if (0xFF == byte) {
166 	  ptr->state = -1; back = DK4_EDSTM_FINISHED;
167 	  ptr->found = DK4_FILE_ENCODING_UTF16_BE;
168 	  ptr->rjb = 0;
169 	} else {
170 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
171 	}
172       } break;
173       case 8: {
174         ptr->c2 = byte;
175 	ptr->rjb = 2;
176         if (0xBB == byte) {
177 	  ptr->state = 9; back = DK4_EDSTM_ACCEPT;
178 	} else {
179 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
180 	}
181       } break;
182       case 9: {
183         ptr->c3 = byte;
184 	ptr->rjb = 3;
185         if (0xBF == byte) {
186 	  ptr->state = -1; back = DK4_EDSTM_FINISHED;
187 	  ptr->found = DK4_FILE_ENCODING_UTF8;
188 	  ptr->rjb = 0;
189 	} else {
190 	  ptr->state = -1; back = DK4_EDSTM_FINISHED_WITH_UNUSED;
191 	}
192       } break;
193     }
194   }
195   return back;
196 }
197 
198 
199 
200 int
201 
dk4bom_detect_get_encoding(dk4_bom_detector_t const * ptr)202 dk4bom_detect_get_encoding(dk4_bom_detector_t const *ptr)
203 {
204   int	back	=	DK4_ENCODING_PLAIN;
205 #if	DK4_USE_ASSERT
206   assert(NULL != ptr);
207 #endif
208 #if DK4_ON_WINDOWS
209   back = DK4_ENCODING_WIN1252;
210 #endif
211   if (NULL != ptr) {
212     back = ptr->found;
213   }
214   return back;
215 }
216 
217 
218 
219 size_t
220 
dk4bom_detect_num_unused_bytes(dk4_bom_detector_t const * ptr)221 dk4bom_detect_num_unused_bytes(dk4_bom_detector_t const *ptr)
222 {
223   size_t	back	=	0;
224 #if	DK4_USE_ASSERT
225   assert(NULL != ptr);
226 #endif
227   if (NULL != ptr) {
228     back = ptr->rjb;
229   }
230   return back;
231 }
232 
233 
234 
235 unsigned char
236 
dk4bom_detect_unused_byte(dk4_bom_detector_t const * ptr,size_t num)237 dk4bom_detect_unused_byte(dk4_bom_detector_t const *ptr, size_t num)
238 {
239   unsigned char	back	=	0x00;
240 #if	DK4_USE_ASSERT
241   assert(NULL != ptr);
242 #endif
243   if (NULL != ptr) {
244     if (0 == num) {
245       back = ptr->c1;
246     } else {
247       if (1 == num) {
248         back = ptr->c2;
249       } else {
250         if (2 == num) {
251 	  back = ptr->c3;
252 	} else {
253 	  if (3 == num) {
254 	    back = ptr->c4;
255 	  }
256 	}
257       }
258     }
259   }
260   return back;
261 }
262 
263 
264 
265 int
266 
dk4bom_detect_in_detection(dk4_bom_detector_t const * ptr)267 dk4bom_detect_in_detection(dk4_bom_detector_t const *ptr)
268 {
269   int		 back = 0;
270 #if	DK4_USE_ASSERT
271   assert(NULL != ptr);
272 #endif
273   if (NULL != ptr) {
274     if (-1 != ptr->state) {
275       back = 1;
276     }
277   }
278   return back;
279 }
280 
281 
282