1%% options 2 3copyright owner = Dirk Krause 4copyright year = 2015-xxxx 5SPDX-License-Identifier: BSD-3-Clause 6 7 8%% header 9 10/** @file 11 String recoding from UTF-8 to 32 bit. 12 13 CRT on Windows: Not used. 14*/ 15 16#ifndef DK4CONF_H_INCLUDED 17#if DK4_BUILDING_DKTOOLS4 18#include "dk4conf.h" 19#else 20#include <dktools-4/dk4conf.h> 21#endif 22#endif 23 24#ifndef DK4TYPES_H_INCLUDED 25#if DK4_BUILDING_DKTOOLS4 26#include <libdk4base/dk4types.h> 27#else 28#include <dktools-4/dk4types.h> 29#endif 30#endif 31 32#ifndef DK4ERROR_H_INCLUDED 33#if DK4_BUILDING_DKTOOLS4 34#include <libdk4base/dk4error.h> 35#else 36#include <dktools-4/dk4error.h> 37#endif 38#endif 39 40#ifdef __cplusplus 41extern "C" { 42#endif 43 44/** Recode string from UTF-8 to 32 bit. 45 @param dstb Destination buffer. 46 @param szdstb Size of destination buffer (number of bytes). 47 @param src Source string to convert. 48 @param erp Error report, may be NULL. 49 @return 1 on success, 0 on error. 50 51 Error codes: 52 - DK4_E_INVALID_ARGUMENTS<br> 53 if src or dstb is NULL or szdstb is 0, 54 - DK4_E_BUFFER_TOO_SMALL<br> 55 if dstb is too small, 56 - DK4_E_SYNTAX<br> 57 with the number of successfully recoded characters in nelem if a 58 non-recodable character was found. 59*/ 60int 61dk4recode_utf8_to_c32( 62 dk4_c32_t *dstb, size_t szdstb, const char *src, dk4_er_t *erp 63); 64 65/** Calculate required buffer size (including final 0x00000000UL character) 66 to convert from UTF-8 to 32 bit characters. 67 @param src UTF-8 encoded source string. 68 @param erp Error report, may be NULL. 69 @return Positive required buffer size (number of dk4_c32_t) on success, 70 0 on error. 71 72 Error codes: 73 - DK4_E_INVALID_ARGUMENTS<br> 74 if src is NULL, 75 - DK4_E_MATH_OVERFLOW<br> 76 if the required size calculation results in mathematical overflow, 77 - DK4_E_SYNTAX<br> 78 if there are errors while decoding the source string. 79*/ 80size_t 81dk4recode_size_utf8_to_c32(const char *src, dk4_er_t *erp); 82 83#ifdef __cplusplus 84} 85#endif 86 87%% module 88 89#include "dk4conf.h" 90#include <libdk4c/dk4rec17.h> 91#include <libdk4c/dk4utf8.h> 92 93#if DK4_HAVE_ASSERT_H 94#ifndef ASSERT_H_INCLUDED 95#include <assert.h> 96#define ASSERT_H_INCLUDED 1 97#endif 98#endif 99 100$!trace-include 101 102 103size_t 104dk4recode_size_utf8_to_c32(const char *src, dk4_er_t *erp) 105{ 106 dk4_er_t er; 107 dk4_utf8_decoder_t dec; 108 size_t rdb = 0; 109 size_t back = 0; 110#if DK4_USE_ASSERT 111 assert(NULL != src); 112#endif 113 if (NULL != src) { 114 /* Initialize data structures. 115 */ 116 dk4error_init(&er); 117 dk4utf8_init(&dec); 118 /* Process source string. 119 */ 120 while ('\0' != *src) { 121 rdb++; 122 switch(dk4utf8_add(&dec, (unsigned char)(*(src++)))) { 123 case DK4_EDSTM_FINISHED: { 124 (void)dk4utf8_get(&dec); 125 dk4utf8_init(&dec); 126 if (SIZE_MAX == back) { 127 dk4error_set_simple_error_code(&er, DK4_E_MATH_OVERFLOW); 128 } else { 129 back++; 130 } 131 } break; 132 case DK4_EDSTM_ERROR: { 133 dk4error_set_elsize_nelem(&er, DK4_E_SYNTAX, 1, rdb); 134 } break; 135 } 136 } 137 if (0 == dk4utf8_is_empty(&dec)) { 138 dk4error_set_elsize_nelem(&er, DK4_E_SYNTAX, 1, rdb); 139 } 140 /* Add one for the finalizer. 141 */ 142 if (SIZE_MAX == back) { 143 dk4error_set_simple_error_code(&er, DK4_E_MATH_OVERFLOW); 144 } else { 145 back++; 146 } 147 /* On error return 0 and pass error code. 148 */ 149 if (DK4_E_NONE != er.ec) { 150 back = 0; 151 dk4error_copy(erp, &er); 152 } 153 } else { 154 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 155 } 156 return back; 157} 158 159 160 161int 162dk4recode_utf8_to_c32( 163 dk4_c32_t *dstb, size_t szdstb, const char *src, dk4_er_t *erp 164) 165{ 166 dk4_utf8_decoder_t dec; 167 size_t used = 0; 168 size_t rdb = 0; 169 dk4_c32_t c32 = dkC32(0); 170 int back = 0; 171 172#if DK4_USE_ASSERT 173 assert(NULL != dstb); 174 assert(0 < szdstb); 175 assert(NULL != src); 176#endif 177 if ((NULL != dstb) && (NULL != src) && (0 < szdstb)) { 178 back = 1; 179 dk4utf8_init(&dec); 180 while(('\0' != *src) && (1 == back) && (used < szdstb)) { 181 switch(dk4utf8_add(&dec, (unsigned char)(*(src++)))) { 182 case DK4_EDSTM_FINISHED: { 183 c32 = dk4utf8_get(&dec); 184 dk4utf8_init(&dec); 185 dstb[used++] = c32; 186 } break; 187 case DK4_EDSTM_ERROR: { 188 back = 0; 189 dk4error_set_elsize_nelem(erp, DK4_E_SYNTAX, 1, rdb); 190 } break; 191 } 192 if(back) { rdb++; } 193 } 194 if (used < szdstb) { 195 dstb[used] = (dk4_c16_t)0U; 196 } else { 197 dstb[szdstb - 1] = (dk4_c16_t)0U; 198 back = 0; 199 dk4error_set_simple_error_code(erp, DK4_E_BUFFER_TOO_SMALL); 200 } 201 if (0 == dk4utf8_is_empty(&dec)) { 202 back = 0; 203 dk4error_set_elsize_nelem(erp, DK4_E_SYNTAX, 1, rdb); 204 } 205 } else { 206 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 207 } 208 return back; 209} 210 211 212 213