1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <ctype.h> 31 #include <strings.h> 32 #include "../common_defs.h" 33 34 int 35 main(int ac, char **av) 36 { 37 to_utf8_table_component_t tbl[256]; 38 register int i, j; 39 char buf[BUFSIZ], num[100]; 40 unsigned int l, k; 41 char ascii_only = 0; 42 43 if (ac > 1 && strcmp(av[1], "-ascii") == 0) 44 ascii_only = 1; 45 46 for (i = 0; i < 256; i++) { 47 if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) { 48 tbl[i].size = 1; 49 tbl[i].u8 = (unsigned int)i; 50 } else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) { 51 tbl[i].size = 2; 52 tbl[i].u8 = (unsigned int)i; 53 } else { 54 tbl[i].size = ICV_TYPE_ILLEGAL_CHAR; 55 tbl[i].u8 = 0; 56 } 57 } 58 59 60 while (fgets(buf, BUFSIZ, stdin)) { 61 i = 0; 62 while (buf[i] && isspace(buf[i])) 63 i++; 64 if (buf[i] == '#' || buf[i] == '\0') 65 continue; 66 67 for (j = 0; !isspace(buf[i]); i++, j++) 68 num[j] = buf[i]; 69 num[j] = '\0'; 70 71 k = strtol(num, (char **)NULL, 0); 72 73 while (isspace(buf[i])) 74 i++; 75 76 /* Take care of UNDEFINED cases. */ 77 if (buf[i] == '#' || buf[i] == '\0') { 78 tbl[k].size = ICV_TYPE_ILLEGAL_CHAR; 79 tbl[k].u8 = 0; 80 continue; 81 } 82 83 for (j = 0; !isspace(buf[i]); i++, j++) 84 num[j] = buf[i]; 85 num[j] = '\0'; 86 87 l = strtol(num, (char **)NULL, 0); 88 89 tbl[k].u8 = l; 90 if (l < 0x80) 91 tbl[k].size = 1; 92 else if (l < 0x800) 93 tbl[k].size = 2; 94 else if (l < 0x10000) 95 tbl[k].size = 3; 96 else if (l < 0x200000) 97 tbl[k].size = 4; 98 else if (l < 0x4000000) 99 tbl[k].size = 5; 100 else 101 tbl[k].size = 6; 102 } 103 104 for (i = 0; i < 256; i++) { 105 if (tbl[i].u8 < 0x80) 106 l = tbl[i].u8; 107 else if (tbl[i].u8 < 0x800) { 108 l = 0xc080 | 109 (((tbl[i].u8 >> 6) & 0x1f) << 8) | 110 (tbl[i].u8 & 0x3f); 111 } else if (tbl[i].u8 < 0x10000) { 112 l = 0xe08080 | 113 (((tbl[i].u8 >> 12) & 0x0f) << 16) | 114 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 115 (tbl[i].u8 & 0x3f); 116 } else if (tbl[i].u8 < 0x200000) { 117 l = 0xf0808080 | 118 (((tbl[i].u8 >> 18) & 0x07) << 24) | 119 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 120 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 121 (tbl[i].u8 & 0x3f); 122 } /* We only support characters in range of UTF-16 123 else if (tbl[i].u8 < 0x4000000) { 124 l = 0xf880808080 | 125 (((tbl[i].u8 >> 24) & 0x03) << 32) | 126 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 127 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 128 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 129 (tbl[i].u8 & 0x3f); 130 } else { 131 l = 0xfc8080808080 | 132 (((tbl[i].u8 >> 30) & 0x01) << 40) | 133 (((tbl[i].u8 >> 24) & 0x3f) << 32) | 134 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 135 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 136 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 137 (tbl[i].u8 & 0x3f); 138 } 139 */ 140 141 printf("/* 0x%02X */ { 0x%08X, %-3d},\n", i, l, tbl[i].size); 142 } 143 } 144