1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5 
6 /*
7 	WARNING: This file was generated by the dkct program (see
8 	http://dktools.sourceforge.net/ for details).
9 	Changes you make here will be lost if dkct is run again!
10 	You should modify the original source and run dkct on it.
11 	Original source: dk4utf8.ctr
12 */
13 
14 /**	@file dk4utf8.c The dk4utf8 module.
15 */
16 
17 
18 #include "dk4conf.h"
19 
20 #if	DK4_HAVE_ASSERT_H
21 #ifndef	ASSERT_H_INCLUDED
22 #include <assert.h>
23 #define	ASSERT_H_INCLUDED 1
24 #endif
25 #endif
26 
27 #include <libdk4base/dk4types.h>
28 #include <libdk4base/dk4error.h>
29 #include <libdk4c/dk4utf8.h>
30 #include <libdk4c/dk4rec26.h>
31 
32 
33 
34 
35 
36 
37 
38 void
dk4utf8_init(dk4_utf8_decoder_t * ptr)39 dk4utf8_init(dk4_utf8_decoder_t *ptr)
40 {
41 #if	DK4_USE_ASSERT
42   assert(NULL != ptr);
43 #endif
44   if (NULL != ptr) {
45     ptr->uc  = dkC32(0);
46     ptr->bfc = 0;
47     ptr->bfn = 0;
48   }
49 }
50 
51 
52 
53 int
dk4utf8_add(dk4_utf8_decoder_t * ptr,unsigned char byte)54 dk4utf8_add(dk4_utf8_decoder_t *ptr, unsigned char byte)
55 {
56   int		back	=	DK4_EDSTM_ERROR;
57 #if	DK4_USE_ASSERT
58   assert(NULL != ptr);
59 #endif
60   if (NULL != ptr) {
61     switch (ptr->bfc) {
62       case 0: {
63         if (0x7F < byte) {				/* more than 1 byte */
64           if ((0xE0 & byte) == 0xC0) {			/* 2 bytes */
65 	    ptr->uc = (((dk4_c32_t)byte) << 6) & dkC32(0x000007C0);
66 	    ptr->bfc = 2;
67 	    ptr->bfn = 1;
68 	    back = DK4_EDSTM_ACCEPT;
69 	  } else {
70 	    if ((0xF0 & byte) == 0xE0) {			/* 3 bytes */
71 	      ptr->uc = (((dk4_c32_t)byte) << 12) & dkC32(0x0000F000);
72 	      ptr->bfc = 3;
73 	      ptr->bfn = 1;
74 	      back = DK4_EDSTM_ACCEPT;
75 	    } else {
76 	      if ((0xF8 & byte) == 0xF0) {		/* 4 bytes */
77 	        ptr->uc = (((dk4_c32_t)byte) << 18) & dkC32(0x001C0000);
78 	        ptr->bfc = 4;
79 	        ptr->bfn = 1;
80 	        back = DK4_EDSTM_ACCEPT;
81 	      } else {
82 	        if ((0xFC & byte) == 0xF8) {		/* 5 bytes */
83 	          ptr->uc = ((((dk4_c32_t)byte) << 24) & dkC32(0x03000000));
84 		  ptr->bfc = 5;
85 		  ptr->bfn = 1;
86 		  back = DK4_EDSTM_ACCEPT;
87 	        } else {
88 	          if ((0xFE & byte) == 0xFC) {		/* 6 bytes */
89 		    ptr->uc = dkC32(0x40000000);
90 		    ptr->bfc = 6;
91 		    ptr->bfn = 1;
92 		    back = DK4_EDSTM_ACCEPT;
93 		  } else {
94 		    /* ERROR: Illegal input! */
95 		  }
96 	        }
97 	      }
98 	    }
99 	  }
100         } else {			/* just one byte */
101           ptr->uc = (dk4_c32_t)byte;
102 	  ptr->uc &= dkC32(0xFF);
103 	  back = DK4_EDSTM_FINISHED;
104 	  ptr->bfn = 0;
105         }
106       } break;
107       case 2: {
108         switch (ptr->bfn) {
109           case 1: {
110 	    if ((0xC0 & byte) == 0x80) {
111 	      back = DK4_EDSTM_FINISHED;
112 	      ptr->uc |= (((dk4_c32_t)byte) & dkC32(0x0000003F));
113 	    }
114 	    ptr->bfc = 0;
115 	    ptr->bfn = 0;
116 	  } break;
117         }
118       } break;
119       case 3: {
120         switch (ptr->bfn) {
121           case 1: {
122 	    if ((0xC0 & byte) == 0x80) {
123 	      back = DK4_EDSTM_ACCEPT;
124 	      ptr->uc |= ((((dk4_c32_t)byte) << 6) & dkC32(0x00000FC0));
125 	    }
126 	    ptr->bfn = 2;
127 	  } break;
128 	  case 2: {
129 	    if ((0xC0 & byte) == 0x80) {
130 	      back = DK4_EDSTM_FINISHED;
131 	      ptr->uc |= (((dk4_c32_t)byte) & dkC32(0x0000003F));
132 	    }
133 	    ptr->bfc = 0;
134 	    ptr->bfn = 0;
135 	  } break;
136         }
137       } break;
138       case 4: {
139         switch (ptr->bfn) {
140           case 1: {
141 	    if ((0xC0 & byte) == 0x80) {
142 	      back = DK4_EDSTM_ACCEPT;
143 	      ptr->uc |= ((((dk4_c32_t)byte) << 12) & dkC32(0x0003F000));
144 	    }
145 	    ptr->bfn = 2;
146 	  } break;
147 	  case 2: {
148 	    if ((0xC0 & byte) == 0x80) {
149 	      back = DK4_EDSTM_ACCEPT;
150 	      ptr->uc |= ((((dk4_c32_t)byte) << 6) & dkC32(0x00000FC0));
151 	    }
152 	    ptr->bfn = 3;
153 	  } break;
154 	  case 3: {
155 	    if ((0xC0 & byte) == 0x80) {
156 	      back = DK4_EDSTM_FINISHED;
157 	      ptr->uc |= (((dk4_c32_t)byte) & dkC32(0x0000003F));
158 	    }
159 	    ptr->bfc = 0;
160 	    ptr->bfn = 0;
161 	  } break;
162         }
163       } break;
164       case 5: {
165         switch (ptr->bfn) {
166           case 1: {
167 	    if ((0xC0 & byte) == 0x80) {
168 	      back = DK4_EDSTM_ACCEPT;
169 	      ptr->uc |= ((((dk4_c32_t)byte) << 18) & dkC32(0x00FC0000));
170 	    }
171 	    ptr->bfn = 2;
172 	  } break;
173 	  case 2: {
174 	    if ((0xC0 & byte) == 0x80) {
175 	      back = DK4_EDSTM_ACCEPT;
176 	      ptr->uc |= ((((dk4_c32_t)byte) << 12) & dkC32(0x0003F000));
177 	    }
178 	    ptr->bfn = 3;
179 	  } break;
180 	  case 3: {
181 	    if ((0xC0 & byte) == 0x80) {
182 	      back = DK4_EDSTM_ACCEPT;
183 	      ptr->uc |= ((((dk4_c32_t)byte) << 6) & dkC32(0x00000FC0));
184 	    }
185 	    ptr->bfn = 4;
186 	  } break;
187 	  case 4: {
188 	    if ((0xC0 & byte) == 0x80) {
189 	      back = DK4_EDSTM_FINISHED;
190 	      ptr->uc |= (((dk4_c32_t)byte) & dkC32(0x0000003F));
191 	    }
192 	    ptr->bfc = 0;
193 	    ptr->bfn = 0;
194 	  } break;
195         }
196       } break;
197       case 6: {
198         switch (ptr->bfn) {
199           case 1: {
200 	    if ((0xC0 & byte) == 0x80) {
201 	      back = DK4_EDSTM_ACCEPT;
202 	      ptr->uc |= ((((dk4_c32_t)byte) << 24) & dkC32(0x3F000000));
203 	    }
204 	    ptr->bfn = 2;
205 	  } break;
206 	  case 2: {
207 	    if ((0xC0 & byte) == 0x80) {
208 	      back = DK4_EDSTM_ACCEPT;
209 	      ptr->uc |= ((((dk4_c32_t)byte) << 18) & dkC32(0x00FC0000));
210 	    }
211 	    ptr->bfn = 3;
212 	  } break;
213 	  case 3: {
214 	    if ((0xC0 & byte) == 0x80) {
215 	      back = DK4_EDSTM_ACCEPT;
216 	      ptr->uc |= ((((dk4_c32_t)byte) << 12) & dkC32(0x0003F000));
217 	    }
218 	    ptr->bfn = 4;
219 	  } break;
220 	  case 4: {
221 	    if ((0xC0 & byte) == 0x80) {
222 	      back = DK4_EDSTM_ACCEPT;
223 	      ptr->uc |= ((((dk4_c32_t)byte) << 6) & dkC32(0x00000FC0));
224 	    }
225 	    ptr->bfn = 5;
226 	  } break;
227 	  case 5: {
228 	    if ((0xC0 & byte) == 0x80) {
229 	      back = DK4_EDSTM_FINISHED;
230 	      ptr->uc |= (((dk4_c32_t)byte) & dkC32(0x0000003F));
231 	    }
232 	    ptr->bfc = 0;
233 	    ptr->bfn = 0;
234 	  } break;
235         }
236       } break;
237     }
238   }
239   return back;
240 }
241 
242 
243 
244 dk4_c32_t
dk4utf8_get(dk4_utf8_decoder_t const * ptr)245 dk4utf8_get(dk4_utf8_decoder_t const *ptr)
246 {
247   dk4_c32_t	back	=	dkC32(0);
248 #if	DK4_USE_ASSERT
249   assert(NULL != ptr);
250 #endif
251   if (NULL != ptr) {
252     back = ptr->uc;
253   }
254   return back;
255 }
256 
257 
258 
259 int
dk4utf8_is_empty(dk4_utf8_decoder_t const * ptr)260 dk4utf8_is_empty(dk4_utf8_decoder_t const *ptr)
261 {
262   int	back	=	0;
263 #if	DK4_USE_ASSERT
264   assert(NULL != ptr);
265 #endif
266   if (NULL != ptr) {
267     if (0 == ptr->bfc) {
268       back = 1;
269     }
270   }
271   return back;
272 }
273 
274 
275 
276 /**	Extract part of character by right-shift and mask.
277 	@param	c	Character to retrieve bits from.
278 	@param	w	Shift width (bits to the right).
279 	@param	m	Mask to apply.
280 	@param	o	Or-value (bits to add).
281 	@return	Operations result.
282 */
283 static
284 unsigned char
dk4utf8_rshift_mask_or(dk4_c32_t c,unsigned w,unsigned long m,unsigned long o)285 dk4utf8_rshift_mask_or(dk4_c32_t c, unsigned w, unsigned long m, unsigned long o)
286 {
287   unsigned long u;
288 
289   u = (unsigned long)c;
290   if (0 < w) {
291     u = u >> w;
292   }
293   u &= m;
294   u |= o;
295   return ((unsigned char)u);
296 }
297 
298 
299 
300 int
dk4utf8_encode(unsigned char * rb,size_t * pszrb,dk4_c32_t c,dk4_er_t * erp)301 dk4utf8_encode(unsigned char *rb, size_t *pszrb, dk4_c32_t c, dk4_er_t *erp)
302 {
303   int		back	=	0;
304 
305 #if	DK4_USE_ASSERT
306   assert(NULL != rb);
307   assert(NULL != pszrb);
308   assert(6 < *pszrb);
309 #endif
310   if ((NULL != rb) && (NULL != pszrb)) {
311     if (6 < *pszrb) {
312       if (dkC32(0x7F) < c) {
313 	if (dkC32(0x000007FF) < c) {
314 	  if (dkC32(0x001FFFFF) < c) {
315 	    if (dkC32(0x03FFFFFF) < c) {
316 	      if (dkC32(0x80000000) < c) {		/* 6 bytes */
317 		back = 1;
318 		*pszrb = 6;
319 #if VERSION_BEFORE_20210729
320 		/*
321 			2021-07-29:	Modified to silence compiler
322 					warnings.
323 					Not a bug fix.
324 		*/
325 		rb[0] = ((unsigned char)((c >> 30) & dkC32(0x00000001))) | 0xFC;
326 		rb[1] = ((unsigned char)((c >> 24) & dkC32(0x0000003F))) | 0x80;
327 		rb[2] = ((unsigned char)((c >> 18) & dkC32(0x0000003F))) | 0x80;
328 		rb[3] = ((unsigned char)((c >> 12) & dkC32(0x0000003F))) | 0x80;
329 		rb[4] = ((unsigned char)((c >>  6) & dkC32(0x0000003F))) | 0x80;
330 		rb[5] = ((unsigned char)( c        & dkC32(0x0000003F))) | 0x80;
331 #else
332 		rb[0] = dk4utf8_rshift_mask_or(c, 30, 0x00000001UL, 0xFCUL);
333 		rb[1] = dk4utf8_rshift_mask_or(c, 24, 0x0000003FUL, 0x80UL);
334 		rb[2] = dk4utf8_rshift_mask_or(c, 18, 0x0000003FUL, 0x80UL);
335 		rb[3] = dk4utf8_rshift_mask_or(c, 12, 0x0000003FUL, 0x80UL);
336 		rb[4] = dk4utf8_rshift_mask_or(c,  6, 0x0000003FUL, 0x80UL);
337 		rb[5] = dk4utf8_rshift_mask_or(c,  0, 0x0000003FUL, 0x80UL);
338 #endif
339 		rb[6] = '\0';
340 	      } else {					/* 5 bytes */
341 		back = 1;
342 		*pszrb = 5;
343 #if VERSION_BEFORE_20210729
344 		rb[0] = ((unsigned char)((c >> 24) & dkC32(0x00000003))) | 0xF8;
345 		rb[1] = ((unsigned char)((c >> 18) & dkC32(0x0000003F))) | 0x80;
346 		rb[2] = ((unsigned char)((c >> 12) & dkC32(0x0000003F))) | 0x80;
347 		rb[3] = ((unsigned char)((c >>  6) & dkC32(0x0000003F))) | 0x80;
348 		rb[4] = ((unsigned char)( c        & dkC32(0x0000003F))) | 0x80;
349 #else
350 		rb[0] = dk4utf8_rshift_mask_or(c, 24, 0x00000003UL, 0xF8UL);
351 		rb[1] = dk4utf8_rshift_mask_or(c, 18, 0x0000003FUL, 0x80UL);
352 		rb[2] = dk4utf8_rshift_mask_or(c, 12, 0x0000003FUL, 0x80UL);
353 		rb[3] = dk4utf8_rshift_mask_or(c,  6, 0x0000003FUL, 0x80UL);
354 		rb[4] = dk4utf8_rshift_mask_or(c,  0, 0x0000003FUL, 0x80UL);
355 #endif
356 		rb[5] = '\0';
357 	      }
358 	    } else {					/* 4 bytes */
359 	      back = 1;
360 	      *pszrb = 4;
361 #if VERSION_BEFORE_20210729
362 	      rb[0] = ((unsigned char)((c >> 18) & dkC32(0x00000007))) | 0xF0;
363 	      rb[1] = ((unsigned char)((c >> 12) & dkC32(0x0000003F))) | 0x80;
364 	      rb[2] = ((unsigned char)((c >>  6) & dkC32(0x0000003F))) | 0x80;
365 	      rb[3] = ((unsigned char)( c        & dkC32(0x0000003F))) | 0x80;
366 #else
367 	      rb[0] = dk4utf8_rshift_mask_or(c, 18, 0x00000007UL, 0xF0UL);
368 	      rb[1] = dk4utf8_rshift_mask_or(c, 12, 0x0000003FUL, 0x80UL);
369 	      rb[2] = dk4utf8_rshift_mask_or(c,  6, 0x0000003FUL, 0x80UL);
370 	      rb[3] = dk4utf8_rshift_mask_or(c,  0, 0x0000003FUL, 0x80UL);
371 #endif
372 	      rb[4] = '\0';
373 	    }
374 	  } else {					/* 3 bytes */
375 	    back = 1;
376 	    *pszrb = 3;
377 #if VERSION_BEFORE_20210729
378 	    rb[0] = ((unsigned char)((c >> 12) & dkC32(0x0000000F))) | 0xE0;
379 	    rb[1] = ((unsigned char)((c >>  6) & dkC32(0x0000003F))) | 0x80;
380 	    rb[2] = ((unsigned char)( c        & dkC32(0x0000003F))) | 0x80;
381 #else
382 	    rb[0] = dk4utf8_rshift_mask_or(c, 12, 0x0000000FUL, 0xE0UL);
383 	    rb[1] = dk4utf8_rshift_mask_or(c,  6, 0x0000003FUL, 0x80UL);
384 	    rb[2] = dk4utf8_rshift_mask_or(c,  0, 0x0000003FUL, 0x80UL);
385 #endif
386 	    rb[3] = '\0';
387 	  }
388 	} else {					/* 2 bytes */
389 	  back = 1;
390 	  *pszrb = 2;
391 #if VERSION_BEFORE_20210729
392 	  rb[0] = ((unsigned char)((c >>  6) & dkC32(0x0000001F))) | 0xC0;
393 	  rb[1] = ((unsigned char)( c        & dkC32(0x0000003F))) | 0x80;
394 #else
395 	  rb[0] = dk4utf8_rshift_mask_or(c, 6, 0x0000001FUL, 0xC0UL);
396 	  rb[1] = dk4utf8_rshift_mask_or(c, 0, 0x0000003FUL, 0x80UL);
397 #endif
398 	  rb[2] = '\0';
399 	}
400       } else {						/* 1 byte */
401 	back = 1;
402 	*pszrb = 1;
403 #if 0
404 	rb[0] = (unsigned char)(c & dkC32(0x0000007F));
405 #endif
406 	rb[0] = dk4utf8_rshift_mask_or(c, 0, 0x0000007FUL, 0UL);
407 	rb[1] = '\0';
408       }
409     } else {
410       dk4error_set_simple_error_code(erp, DK4_E_BUFFER_TOO_SMALL);
411     }
412   } else {
413     dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
414   }
415   return back;
416 }
417 
418