1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "utype.h"
5
6 static char used[0x110000];
7 static int cid_2_unicode[0x10000];
8 static char *nonuni_names[0x10000];
9 #define MULT_MAX 6
10 static int cid_2_unicodemult[0x100000][MULT_MAX];
11
12 #define VERTMARK 0x1000000
13
ucs2_score(int val)14 static int ucs2_score(int val) { /* Supplied by KANOU Hiroki */
15 if ( val>=0x2e80 && val<=0x2fff )
16 return( 1 ); /* New CJK Radicals are least important */
17 else if ( val>=VERTMARK )
18 return( 2 ); /* Then vertical guys */
19 else if ( val>=0xf000 && val<=0xffff )
20 return( 3 );
21 /* else if (( val>=0x3400 && val<0x3dff ) || (val>=0x4000 && val<=0x4dff))*/
22 else if ( val>=0x3400 && val<=0x4dff )
23 return( 4 );
24 else
25 return( 5 );
26 }
27
allstars(char * buffer)28 static int allstars(char *buffer) {
29 while ( isdigit(*buffer))
30 ++buffer;
31 while ( *buffer=='\t' || *buffer=='*' )
32 ++buffer;
33 if ( *buffer=='\r' ) ++buffer;
34 if ( *buffer=='\n' ) ++buffer;
35 return( *buffer=='\0' );
36 }
37
getnth(char * buffer,int col,int * mults)38 static int getnth(char *buffer, int col, int *mults) {
39 int i,j=0, val=0, best;
40 char *end;
41 int vals[10];
42
43 if ( col==1 ) {
44 /* first column is decimal, others are hex */
45 if ( !isdigit(*buffer))
46 return( -1 );
47 while ( isdigit(*buffer))
48 val = 10*val + *buffer++-'0';
49 return( val );
50 }
51 for ( i=1; i<col; ++buffer ) {
52 if ( *buffer=='\t' )
53 ++i;
54 else if ( *buffer=='\0' )
55 return( -1 );
56 }
57 val = strtol(buffer,&end,16);
58 if ( end==buffer )
59 return( -1 );
60 if ( *end=='v' ) {
61 val += VERTMARK;
62 ++end;
63 }
64 if ( *end==',' ) {
65 /* Multiple guess... now we've got to pick one */
66 vals[0] = val;
67 i = 1;
68 while ( *end==',' && i<9 ) {
69 buffer = end+1;
70 vals[i] = strtol(buffer,&end,16);
71 if ( *end=='v' ) {
72 vals[i] += VERTMARK;
73 ++end;
74 }
75 ++i;
76 }
77 vals[i] = 0;
78 best = 0; val = -1;
79 for ( i=0; vals[i]!=0; ++i ) {
80 if ( ucs2_score(vals[i])>best ) {
81 val = vals[i];
82 best = ucs2_score(vals[i]);
83 }
84 }
85 if ( mults!=NULL ) for ( i=j=0; vals[i]!=0; ++i ) {
86 if ( vals[i]!=val && !(vals[i]&VERTMARK))
87 mults[j++] = vals[i];
88 }
89 if ( j>MULT_MAX ) {
90 fprintf( stderr, "Too many multiple values for %04x, need %d slots\n", (unsigned int)(val), j );
91 exit(1);
92 }
93 }
94
95 return( val );
96 }
97
main(int argc,char ** argv)98 int main(int argc, char **argv) {
99 char buffer[600];
100 int cid, uni, max=0, maxcid=0, i;
101 extern char *psunicodenames[];
102
103 nonuni_names[0] = ".notdef";
104 for ( cid=0; cid<0x10000; ++cid ) cid_2_unicode[cid] = -1;
105
106 while ( fgets(buffer,sizeof(buffer),stdin)!=NULL ) {
107 if ( *buffer=='#' /*|| allstars(buffer)*/)
108 continue;
109 cid = getnth(buffer,1,NULL);
110 if ( cid==-1 )
111 continue;
112 uni = getnth(buffer,7,cid_2_unicodemult[cid]);
113 maxcid = cid;
114 if ( uni==-1 ) {
115 continue;
116 sprintf( buffer,"Japan2.%d", cid );
117 nonuni_names[cid] = strdup(buffer);
118 } else if ( !used[uni] ) {
119 used[uni] = 1;
120 cid_2_unicode[cid] = uni;
121 } else {
122 sprintf( buffer, "uni%04X.dup%d", (unsigned int)(uni), ++used[uni] );
123 nonuni_names[cid] = strdup(buffer);
124 }
125 max = cid;
126 }
127
128 printf("%d %d\n",maxcid, max );
129
130 for ( cid=0; cid<=max; ++cid ) {
131 if ( cid_2_unicode[cid]!=-1 && cid_2_unicodemult[cid][0]==0 ) {
132 for ( i=1; cid+i<=max && cid_2_unicode[cid+i]==cid_2_unicode[cid]+i && cid_2_unicodemult[cid+i][0]==0; ++i );
133 --i;
134 if ( i!=0 ) {
135 printf( "%d..%d %04x\n", cid, cid+i, (unsigned int)(cid_2_unicode[cid]) );
136 cid += i;
137 } else
138 printf( "%d %04x\n", cid, (unsigned int)(cid_2_unicode[cid]) );
139 } else if ( cid_2_unicode[cid]!=-1 ) {
140 printf( "%d %04x", cid, (unsigned int)(cid_2_unicode[cid]) );
141 for ( i=0; cid_2_unicodemult[cid][i]!=0; ++i )
142 printf( ",%04x", (unsigned int)(cid_2_unicodemult[cid][i]) );
143 printf( "\n");
144 } else if ( nonuni_names[cid]!=NULL )
145 printf( "%d /%s\n", cid, nonuni_names[cid] );
146 }
147 return( 0 );
148 }
149