1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999 by Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 27 /* 28 * This program convert Additional Codeset Characters from 0x00 through 0xff 29 * to UTF-8 codeset. And also again converting the chacter in UTF-8 to original 30 * codeset. 31 * For example, 32 * IBM -> UTF-8 -> IBM 33 * (1) (2) (3) 34 * -> Unicode Scaler 35 * (4) 36 * output (1) (2) (4)line by line 37 * comparing (1) (3) 38 */ 39 40 #include <stdio.h> 41 #include <libgen.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <locale.h> 45 #include <iconv.h> 46 #include <string.h> 47 #include <errno.h> 48 #include <sys/types.h> 49 #include <sys/wait.h> 50 51 static void mk_data(char *,char *); 52 53 char * ME; 54 int status; 55 static int flag_check = 0; /* check with data file */ 56 57 static struct { 58 unsigned int from; 59 unsigned int u4; 60 } tbl[0x10000]; 61 62 63 void 64 usage(int status) 65 { 66 fprintf(stderr, "Usage: %s from-code\n", ME); 67 exit(status); 68 } 69 70 void 71 validate(int i, iconv_t cd, iconv_t cd2, iconv_t cd3) 72 { 73 uchar_t source_buf[1024]; 74 uchar_t result_buf[1024]; 75 uchar_t tmp_buf[1024]; 76 const uchar_t * source; 77 uchar_t * result; 78 uchar_t * tmp; 79 size_t source_len; 80 size_t result_len; 81 size_t result_len2; 82 size_t tmp_len; 83 size_t s; 84 int j; 85 ulong_t l; 86 87 #ifdef _LITTLE_ENDIAN 88 #define CHECKWITHFILE \ 89 if( flag_check > 0 ) { \ 90 l = 0U; \ 91 for( j = sizeof (tmp_buf) - tmp_len -1; \ 92 j >= ((i == 0) ? 2: 0); j--) \ 93 l = (l << 8) + ((uint_t)tmp_buf[j]); \ 94 if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \ 95 } 96 #else 97 #define CHECKWITHFILE \ 98 if( flag_check > 0 ) { \ 99 l = 0U; \ 100 j = ((i == 0) ? 2: 0); \ 101 for(; j < sizeof (tmp_buf) - tmp_len ; j++) \ 102 l = (l << 8) + ((uint_t)tmp_buf[j]); \ 103 if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \ 104 } 105 #endif 106 107 #define PRINTUNICODE \ 108 tmp = tmp_buf; \ 109 tmp_len = sizeof (tmp_buf); \ 110 result = result_buf; \ 111 result_len2 = sizeof (result_buf) - result_len; \ 112 s = iconv(cd2, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \ 113 if (s != 0) { \ 114 printf(" \n stoped \n"); \ 115 fprintf(stderr, "fail to con_LITTLE_ENDIANvert UTF-8 to Unicode\n"); \ 116 exit (status); \ 117 } \ 118 printf("\t"); \ 119 for( j = 0; j < sizeof (tmp_buf) - tmp_len ; j++) \ 120 printf("%02x", (uchar_t)tmp_buf[j]); \ 121 CHECKWITHFILE 122 123 #define COMPARE \ 124 tmp = tmp_buf; \ 125 tmp_len = sizeof (tmp_buf); \ 126 result = result_buf; \ 127 result_len2 = sizeof (result_buf) - result_len; \ 128 s = iconv(cd3, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \ 129 if (s != 0) { \ 130 printf(" \n WARNING \n"); \ 131 fprintf(stderr, "fail to convert UTF-8 to Orignal Codeset(%x)\n",\ 132 i); \ 133 fprintf(stderr, "errno=%d %d %d\n", \ 134 errno, \ 135 sizeof (result_buf) - result_len - result_len2, \ 136 result - result_buf); \ 137 exit (status); \ 138 } \ 139 printf("\t"); \ 140 if ((sizeof (tmp_buf) - tmp_len != 1) || \ 141 ((uchar_t)tmp_buf[0] != (uchar_t)i )) { \ 142 printf("\t-> 0x%2x \n warning \n", (uchar_t)tmp_buf[0] ); \ 143 fprintf(stderr, " Converting answer is not the same (0x%02x) for (0x%02x)\n", \ 144 (uchar_t)tmp_buf[0], i); \ 145 } 146 147 #define DATASIZE 1 148 149 source_buf[0] = i; 150 source = source_buf; 151 source_len = DATASIZE; 152 153 result = result_buf; 154 result_len = sizeof (result_buf); 155 156 s = iconv(cd, (const char**)&source, &source_len, (char**)&result, &result_len); 157 158 status = 1; 159 if (((size_t)(0)) == s) { 160 if ((source_len != 0) || 161 ((source - source_buf) != DATASIZE)) { 162 fprintf(stderr, ": %d %d %d\n", 163 errno, 164 source_len, 165 source - source_buf); 166 exit(status); 167 } 168 printf("0x%02x\t0x", i); 169 for( j = 0; j < sizeof (result_buf) - result_len ; j++) 170 printf("%02x", (uchar_t)result_buf[j]); 171 PRINTUNICODE 172 COMPARE 173 printf("\n"); 174 return; 175 } 176 177 status += 1; 178 if (((size_t)(-1)) == s) { 179 if (errno == EILSEQ) { 180 printf("0x%02x EILSEQ\n", i); 181 return; 182 } 183 fprintf(stderr, "Error for source 0x%02x(%d): %d %d %d %d %d\n", 184 i, i, 185 errno, 186 (DATASIZE) - source_len, /* not converted size */ 187 source - source_buf, 188 (sizeof (result_buf)) - result_len, 189 result - result_buf); 190 exit(status); 191 } 192 193 status += 1; 194 exit(status); 195 } 196 197 main(int argc, char ** argv) 198 { 199 int r; 200 char * p; 201 iconv_t cd; 202 iconv_t cd2; 203 iconv_t cd3; 204 int i, j, k; 205 char *dir; 206 207 ME = basename(argv[0]); 208 setlocale(LC_ALL, ""); 209 status = 100; 210 211 for (j = 1; j < argc; j++) { 212 if (argv[j][0] != '-') 213 break; 214 for (k = 1; ; k++) { 215 if (argv[j][k] == '\0') 216 break; 217 if (argv[j][k] == 'c') { 218 flag_check = 1; 219 j++; 220 if (j >= argc) usage(-1); 221 dir = argv[j]; 222 continue; 223 } 224 } 225 } 226 if (j >= argc) usage(-1); 227 228 229 if( flag_check > 0 ) mk_data(dir, argv[j]); 230 231 cd = iconv_open("UTF-8", argv[j]); 232 if (((iconv_t)(-1)) == cd) { 233 perror("iconv_open"); 234 exit(status); 235 } 236 237 cd2 = iconv_open("UCS-2", "UTF-8"); 238 if (((iconv_t)(-1)) == cd2) { 239 perror("iconv_open for UTF-8"); 240 exit(status); 241 } 242 243 cd3 = iconv_open(argv[j], "UTF-8"); 244 if (((iconv_t)(-1)) == cd3) { 245 perror("iconv_open for reverse"); 246 exit(status); 247 } 248 249 /* 250 * main logic 251 */ 252 for (i = 0; i <= 0xff; i++) 253 validate(i, cd, cd2, cd3); 254 255 status = 200; 256 r = iconv_close(cd); 257 if (-1 == r) { 258 perror("iconv_close"); 259 exit(status); 260 } 261 262 r = iconv_close(cd2); 263 if (-1 == r) { 264 perror("iconv_close for UTF-8"); 265 exit(status); 266 } 267 268 return (0); 269 } 270 271 static void 272 mk_data(char *dir, char* name) 273 { 274 register int i, j; 275 char buf[BUFSIZ], num[100]; 276 unsigned int l, k; 277 FILE *fd; 278 char file[BUFSIZ]; 279 280 sprintf( file, "%s/%s.txt", dir, name); 281 if ((fd = fopen(file, "r")) == NULL) { 282 perror("fopen"); 283 exit (-1); 284 } 285 /* for information file, pari data is created */ 286 while (fgets(buf, BUFSIZ, fd)) { 287 i = 0; 288 while (buf[i] && isspace(buf[i])) 289 i++; 290 if (buf[i] == '#' || buf[i] == '\0') 291 continue; 292 293 for (j = 0; !isspace(buf[i]); i++, j++) 294 num[j] = buf[i]; 295 num[j] = '\0'; 296 297 k = strtol(num, (char **)NULL, 16); 298 299 while (isspace(buf[i])) 300 i++; 301 302 if (buf[i] == '#' || buf[i] == '\0') 303 /* undefined */ 304 continue; 305 306 for (j = 0; !isspace(buf[i]); i++, j++) 307 num[j] = buf[i]; 308 num[j] = '\0'; 309 310 l = strtol(num, (char **)NULL, 16); 311 312 tbl[k].u4 = l; 313 tbl[k].from = k; 314 } 315 } 316