1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * This program will generate UTF-8 to whatever single byte codeset mapping 26 * table in the single byte codeset code values' ascending order. You need to 27 * use sort(1) to sort out and make it ready for binary search that will 28 * do the search on the UTF-8 values. 29 */ 30 31 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <ctype.h> 36 #include <strings.h> 37 #include "../common_defs.h" 38 39 int 40 main(int ac, char **av) 41 { 42 to_utf8_table_component_t tbl[256]; 43 register int i, j; 44 char buf[BUFSIZ], num[100]; 45 unsigned int l, k; 46 char ascii_only = 0; 47 48 if (ac > 1 && strcmp(av[1], "-ascii") == 0) 49 ascii_only = 1; 50 51 for (i = 0; i < 256; i++) { 52 if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) { 53 tbl[i].size = (signed char)1; 54 tbl[i].u8 = (unsigned int)i; 55 } else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) { 56 tbl[i].size = (signed char)2; 57 tbl[i].u8 = (unsigned int)i; 58 } else { 59 tbl[i].size = (signed char)ICV_TYPE_ILLEGAL_CHAR; 60 tbl[i].u8 = 0; 61 } 62 } 63 64 65 while (fgets(buf, BUFSIZ, stdin)) { 66 i = 0; 67 while (buf[i] && isspace(buf[i])) 68 i++; 69 if (buf[i] == '#' || buf[i] == '\0') 70 continue; 71 72 for (j = 0; !isspace(buf[i]); i++, j++) 73 num[j] = buf[i]; 74 num[j] = '\0'; 75 76 k = strtol(num, (char **)NULL, 0); 77 78 while (isspace(buf[i])) 79 i++; 80 81 if (buf[i] == '#' || buf[i] == '\0') { 82 tbl[k].size = (signed char)ICV_TYPE_ILLEGAL_CHAR; 83 tbl[k].u8 = 0; 84 continue; 85 } 86 87 for (j = 0; !isspace(buf[i]); i++, j++) 88 num[j] = buf[i]; 89 num[j] = '\0'; 90 91 l = strtol(num, (char **)NULL, 0); 92 93 tbl[k].u8 = l; 94 if (l < 0x80) 95 tbl[k].size = (signed char)1; 96 else if (l < 0x800) 97 tbl[k].size = (signed char)2; 98 else if (l < 0x10000) 99 tbl[k].size = (signed char)3; 100 else if (l < 0x200000) 101 tbl[k].size = (signed char)4; 102 else if (l < 0x4000000) 103 tbl[k].size = (signed char)5; 104 else 105 tbl[k].size = (signed char)6; 106 } 107 108 for (i = 0; i < 256; i++) { 109 if (tbl[i].u8 < 0x80) 110 l = tbl[i].u8; 111 else if (tbl[i].u8 < 0x800) { 112 l = 0xc080 | 113 (((tbl[i].u8 >> 6) & 0x1f) << 8) | 114 (tbl[i].u8 & 0x3f); 115 } else if (tbl[i].u8 < 0x10000) { 116 l = 0xe08080 | 117 (((tbl[i].u8 >> 12) & 0x0f) << 16) | 118 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 119 (tbl[i].u8 & 0x3f); 120 } else if (tbl[i].u8 < 0x200000) { 121 l = 0xf0808080 | 122 (((tbl[i].u8 >> 18) & 0x07) << 24) | 123 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 124 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 125 (tbl[i].u8 & 0x3f); 126 } /* We only support characters in range of UTF-16 127 else if (tbl[i].u8 < 0x4000000) { 128 l = 0xf880808080 | 129 (((tbl[i].u8 >> 24) & 0x03) << 32) | 130 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 131 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 132 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 133 (tbl[i].u8 & 0x3f); 134 } else { 135 l = 0xfc8080808080 | 136 (((tbl[i].u8 >> 30) & 0x01) << 40) | 137 (((tbl[i].u8 >> 24) & 0x3f) << 32) | 138 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 139 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 140 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 141 (tbl[i].u8 & 0x3f); 142 } 143 */ 144 145 if (i > 0x7f && l != 0) 146 printf("\t{ 0x%08X, 0x%02X },\n", l, i); 147 } 148 149 if (ascii_only) 150 printf("\t{ 0x%08X, 0x%02X },\n", 0, 0); 151 fprintf(stderr, "%s: make sure you sort the result by using\n\n\ 152 \tsort -k 1 -t ',' result_file\n\n\ 153 since iconv module that will include the result table uses binary search.\n", 154 av[0]); 155 } 156