1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <ctype.h> 31 #include <strings.h> 32 #include "../common_defs.h" 33 34 int 35 main(int ac, char **av) 36 { 37 to_utf8_table_component_t tbl[256]; 38 register int i, j; 39 char buf[BUFSIZ], num[100]; 40 unsigned int l, k; 41 char ascii_only = 0; 42 43 if (ac > 1 && strcmp(av[1], "-ascii") == 0) 44 ascii_only = 1; 45 46 for (i = 0; i < 256; i++) { 47 if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) { 48 tbl[i].size = 1; 49 tbl[i].u8 = (unsigned int)i; 50 } else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) { 51 tbl[i].size = 2; 52 tbl[i].u8 = (unsigned int)i; 53 } else { 54 tbl[i].size = ICV_TYPE_ILLEGAL_CHAR; 55 tbl[i].u8 = 0; 56 } 57 } 58 59 60 while (fgets(buf, BUFSIZ, stdin)) { 61 i = 0; 62 while (buf[i] && isspace(buf[i])) 63 i++; 64 65 if (strncmp(buf + i, "<U", 2) != 0) 66 continue; 67 68 i += 2; 69 for (j = 0; isxdigit(buf[i]); i++, j++) 70 num[j] = buf[i]; 71 num[j] = '\0'; 72 73 l = strtol(num, (char **)NULL, 16); 74 75 while (!isxdigit(buf[i])) 76 i++; 77 78 for (j = 0; isxdigit(buf[i]); i++, j++) 79 num[j] = buf[i]; 80 num[j] = '\0'; 81 82 k = strtol(num, (char **)NULL, 16); 83 84 while (buf[i] == ' ' || buf[i] == '\t') 85 i++; 86 87 if (buf[i] && buf[i] == '|' && buf[i + 1] && buf[i + 1] != '0') 88 continue; 89 90 tbl[k].u8 = l; 91 if (l < 0x80) 92 tbl[k].size = 1; 93 else if (l < 0x800) 94 tbl[k].size = 2; 95 else if (l < 0x10000) 96 tbl[k].size = 3; 97 else if (l < 0x200000) 98 tbl[k].size = 4; 99 else if (l < 0x4000000) 100 tbl[k].size = 5; 101 else 102 tbl[k].size = 6; 103 } 104 105 for (i = 0; i < 256; i++) { 106 if (tbl[i].u8 < 0x80) 107 l = tbl[i].u8; 108 else if (tbl[i].u8 < 0x800) { 109 l = 0xc080 | 110 (((tbl[i].u8 >> 6) & 0x1f) << 8) | 111 (tbl[i].u8 & 0x3f); 112 } else if (tbl[i].u8 < 0x10000) { 113 l = 0xe08080 | 114 (((tbl[i].u8 >> 12) & 0x0f) << 16) | 115 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 116 (tbl[i].u8 & 0x3f); 117 } else if (tbl[i].u8 < 0x200000) { 118 l = 0xf0808080 | 119 (((tbl[i].u8 >> 18) & 0x07) << 24) | 120 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 121 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 122 (tbl[i].u8 & 0x3f); 123 } /* We only support characters in range of UTF-16 124 else if (tbl[i].u8 < 0x4000000) { 125 l = 0xf880808080 | 126 (((tbl[i].u8 >> 24) & 0x03) << 32) | 127 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 128 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 129 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 130 (tbl[i].u8 & 0x3f); 131 } else { 132 l = 0xfc8080808080 | 133 (((tbl[i].u8 >> 30) & 0x01) << 40) | 134 (((tbl[i].u8 >> 24) & 0x3f) << 32) | 135 (((tbl[i].u8 >> 18) & 0x3f) << 24) | 136 (((tbl[i].u8 >> 12) & 0x3f) << 16) | 137 (((tbl[i].u8 >> 6) & 0x3f) << 8) | 138 (tbl[i].u8 & 0x3f); 139 } 140 */ 141 142 printf("/* 0x%02X */ { 0x%08X, %-3d},\n", i, l, tbl[i].size); 143 } 144 } 145