1%%	options
2
3copyright owner	=	Dirk Krause
4copyright year	=	2015-xxxx
5SPDX-License-Identifier:	BSD-3-Clause
6
7
8%%	header
9
10/**	@file
11	String recoding from UTF-8 to 32 bit.
12
13	CRT on Windows: Not used.
14*/
15
16#ifndef DK4CONF_H_INCLUDED
17#if DK4_BUILDING_DKTOOLS4
18#include "dk4conf.h"
19#else
20#include <dktools-4/dk4conf.h>
21#endif
22#endif
23
24#ifndef DK4TYPES_H_INCLUDED
25#if DK4_BUILDING_DKTOOLS4
26#include <libdk4base/dk4types.h>
27#else
28#include <dktools-4/dk4types.h>
29#endif
30#endif
31
32#ifndef DK4ERROR_H_INCLUDED
33#if DK4_BUILDING_DKTOOLS4
34#include <libdk4base/dk4error.h>
35#else
36#include <dktools-4/dk4error.h>
37#endif
38#endif
39
40#ifdef __cplusplus
41extern "C" {
42#endif
43
44/**	Recode string from UTF-8 to 32 bit.
45	@param	dstb	Destination buffer.
46	@param	szdstb	Size of destination buffer (number of bytes).
47	@param	src	Source string to convert.
48	@param	erp	Error report, may be NULL.
49	@return	1 on success, 0 on error.
50
51	Error codes:
52	- DK4_E_INVALID_ARGUMENTS<br>
53	  if src or dstb is NULL or szdstb is 0,
54	- DK4_E_BUFFER_TOO_SMALL<br>
55	  if dstb is too small,
56	- DK4_E_SYNTAX<br>
57	  with the number of successfully recoded characters in nelem if a
58	  non-recodable character was found.
59*/
60int
61dk4recode_utf8_to_c32(
62  dk4_c32_t *dstb, size_t szdstb, const char *src, dk4_er_t *erp
63);
64
65/**	Calculate required buffer size (including final 0x00000000UL character)
66	to convert from UTF-8 to 32 bit characters.
67	@param	src	UTF-8 encoded source string.
68	@param	erp	Error report, may be NULL.
69	@return	Positive required buffer size (number of dk4_c32_t) on success,
70	0 on error.
71
72	Error codes:
73	- DK4_E_INVALID_ARGUMENTS<br>
74	  if src is NULL,
75	- DK4_E_MATH_OVERFLOW<br>
76	  if the required size calculation results in mathematical overflow,
77	- DK4_E_SYNTAX<br>
78	  if there are errors while decoding the source string.
79*/
80size_t
81dk4recode_size_utf8_to_c32(const char *src, dk4_er_t *erp);
82
83#ifdef __cplusplus
84}
85#endif
86
87%%	module
88
89#include "dk4conf.h"
90#include <libdk4c/dk4rec17.h>
91#include <libdk4c/dk4utf8.h>
92
93#if	DK4_HAVE_ASSERT_H
94#ifndef	ASSERT_H_INCLUDED
95#include <assert.h>
96#define	ASSERT_H_INCLUDED 1
97#endif
98#endif
99
100$!trace-include
101
102
103size_t
104dk4recode_size_utf8_to_c32(const char *src, dk4_er_t *erp)
105{
106  dk4_er_t		 er;
107  dk4_utf8_decoder_t	 dec;
108  size_t		 rdb	= 0;
109  size_t		 back	= 0;
110#if	DK4_USE_ASSERT
111  assert(NULL != src);
112#endif
113  if (NULL != src) {
114    /*	Initialize data structures.
115    */
116    dk4error_init(&er);
117    dk4utf8_init(&dec);
118    /*	Process source string.
119    */
120    while ('\0' != *src) {
121      rdb++;
122      switch(dk4utf8_add(&dec, (unsigned char)(*(src++)))) {
123        case DK4_EDSTM_FINISHED: {
124	  (void)dk4utf8_get(&dec);
125	  dk4utf8_init(&dec);
126	  if (SIZE_MAX == back) {
127	    dk4error_set_simple_error_code(&er, DK4_E_MATH_OVERFLOW);
128	  } else {
129	    back++;
130	  }
131	} break;
132	case DK4_EDSTM_ERROR: {
133	  dk4error_set_elsize_nelem(&er, DK4_E_SYNTAX, 1, rdb);
134	} break;
135      }
136    }
137    if (0 == dk4utf8_is_empty(&dec)) {
138      dk4error_set_elsize_nelem(&er, DK4_E_SYNTAX, 1, rdb);
139    }
140    /*	Add one for the finalizer.
141    */
142    if (SIZE_MAX == back) {
143      dk4error_set_simple_error_code(&er, DK4_E_MATH_OVERFLOW);
144    } else {
145      back++;
146    }
147    /*	On error return 0 and pass error code.
148    */
149    if (DK4_E_NONE != er.ec) {
150      back = 0;
151      dk4error_copy(erp, &er);
152    }
153  } else {
154    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
155  }
156  return back;
157}
158
159
160
161int
162dk4recode_utf8_to_c32(
163  dk4_c32_t *dstb, size_t szdstb, const char *src, dk4_er_t *erp
164)
165{
166  dk4_utf8_decoder_t	 dec;
167  size_t		 used	=	0;
168  size_t		 rdb	=	0;
169  dk4_c32_t		 c32	=	dkC32(0);
170  int			 back	=	0;
171
172#if	DK4_USE_ASSERT
173  assert(NULL != dstb);
174  assert(0 < szdstb);
175  assert(NULL != src);
176#endif
177  if ((NULL != dstb) && (NULL != src) && (0 < szdstb)) {
178    back = 1;
179    dk4utf8_init(&dec);
180    while(('\0' != *src) && (1 == back) && (used < szdstb)) {
181      switch(dk4utf8_add(&dec, (unsigned char)(*(src++)))) {
182        case DK4_EDSTM_FINISHED: {
183	  c32 = dk4utf8_get(&dec);
184	  dk4utf8_init(&dec);
185	  dstb[used++] = c32;
186	} break;
187	case DK4_EDSTM_ERROR: {
188	  back = 0;
189	  dk4error_set_elsize_nelem(erp, DK4_E_SYNTAX, 1, rdb);
190	} break;
191      }
192      if(back) { rdb++; }
193    }
194    if (used < szdstb) {
195      dstb[used] = (dk4_c16_t)0U;
196    } else {
197      dstb[szdstb - 1] = (dk4_c16_t)0U;
198      back = 0;
199      dk4error_set_simple_error_code(erp, DK4_E_BUFFER_TOO_SMALL);
200    }
201    if (0 == dk4utf8_is_empty(&dec)) {
202      back = 0;
203      dk4error_set_elsize_nelem(erp, DK4_E_SYNTAX, 1, rdb);
204    }
205  } else {
206    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
207  }
208  return back;
209}
210
211
212
213