1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "utype.h"
5 
6 static char used[0x110000];
7 static int cid_2_unicode[0x10000];
8 static char *nonuni_names[0x10000];
9 #define MULT_MAX	6
10 static int cid_2_unicodemult[0x100000][MULT_MAX];
11 
12 #define VERTMARK 0x1000000
13 
ucs2_score(int val)14 static int ucs2_score(int val) {		/* Supplied by KANOU Hiroki */
15     if ( val>=0x2e80 && val<=0x2fff )
16 return( 1 );				/* New CJK Radicals are least important */
17     else if ( val>=VERTMARK )
18 return( 2 );				/* Then vertical guys */
19     else if ( val>=0xf000 && val<=0xffff )
20 return( 3 );
21 /*    else if (( val>=0x3400 && val<0x3dff ) || (val>=0x4000 && val<=0x4dff))*/
22     else if ( val>=0x3400 && val<=0x4dff )
23 return( 4 );
24     else
25 return( 5 );
26 }
27 
allstars(char * buffer)28 static int allstars(char *buffer) {
29     while ( isdigit(*buffer))
30 	++buffer;
31     while ( *buffer=='\t' || *buffer=='*' )
32 	++buffer;
33     if ( *buffer=='\r' ) ++buffer;
34     if ( *buffer=='\n' ) ++buffer;
35 return( *buffer=='\0' );
36 }
37 
getnth(char * buffer,int col,int * mults)38 static int getnth(char *buffer, int col, int *mults) {
39     int i,j=0, val=0, best;
40     char *end;
41     int vals[10];
42 
43     if ( col==1 ) {
44 	/* first column is decimal, others are hex */
45 	if ( !isdigit(*buffer))
46 return( -1 );
47 	while ( isdigit(*buffer))
48 	    val = 10*val + *buffer++-'0';
49 return( val );
50     }
51     for ( i=1; i<col; ++buffer ) {
52 	if ( *buffer=='\t' )
53 	    ++i;
54 	else if ( *buffer=='\0' )
55 return( -1 );
56     }
57     val = strtol(buffer,&end,16);
58     if ( end==buffer )
59 return( -1 );
60     if ( *end=='v' ) {
61 	val += VERTMARK;
62 	++end;
63     }
64     if ( *end==',' ) {
65 	/* Multiple guess... now we've got to pick one */
66 	vals[0] = val;
67 	i = 1;
68 	while ( *end==',' && i<9 ) {
69 	    buffer = end+1;
70 	    vals[i] = strtol(buffer,&end,16);
71 	    if ( *end=='v' ) {
72 		vals[i] += VERTMARK;
73 		++end;
74 	    }
75 	    ++i;
76 	}
77 	vals[i] = 0;
78 	best = 0; val = -1;
79 	for ( i=0; vals[i]!=0; ++i ) {
80 	    if ( ucs2_score(vals[i])>best ) {
81 		val = vals[i];
82 		best = ucs2_score(vals[i]);
83 	    }
84 	}
85 	if ( mults!=NULL ) for ( i=j=0; vals[i]!=0; ++i ) {
86 	    if ( vals[i]!=val && !(vals[i]&VERTMARK))
87 		mults[j++] = vals[i];
88 	}
89 	if ( j>MULT_MAX ) {
90 	    fprintf( stderr, "Too many multiple values for %04x, need %d slots\n", (unsigned int)(val), j );
91 exit(1);
92 	}
93     }
94 
95 return( val );
96 }
97 
main(int argc,char ** argv)98 int main(int argc, char **argv) {
99     char buffer[600];
100     int cid, uni, max=0, maxcid=0, i;
101     extern char *psunicodenames[];
102 
103     nonuni_names[0] = ".notdef";
104     for ( cid=0; cid<0x10000; ++cid ) cid_2_unicode[cid] = -1;
105 
106     while ( fgets(buffer,sizeof(buffer),stdin)!=NULL ) {
107 	if ( *buffer=='#' /*|| allstars(buffer)*/)
108     continue;
109 	cid = getnth(buffer,1,NULL);
110 	if ( cid==-1 )
111     continue;
112 	uni = getnth(buffer,7,cid_2_unicodemult[cid]);
113 	maxcid = cid;
114 	if ( uni==-1 ) {
115     continue;
116 	    sprintf( buffer,"Japan2.%d", cid );
117 	    nonuni_names[cid] = strdup(buffer);
118 	} else if ( !used[uni] ) {
119 	    used[uni] = 1;
120 	    cid_2_unicode[cid] = uni;
121 	} else {
122 	    sprintf( buffer, "uni%04X.dup%d", (unsigned int)(uni), ++used[uni] );
123 	    nonuni_names[cid] = strdup(buffer);
124 	}
125 	max = cid;
126     }
127 
128     printf("%d %d\n",maxcid, max );
129 
130     for ( cid=0; cid<=max; ++cid ) {
131 	if ( cid_2_unicode[cid]!=-1 && cid_2_unicodemult[cid][0]==0 ) {
132 	    for ( i=1; cid+i<=max && cid_2_unicode[cid+i]==cid_2_unicode[cid]+i && cid_2_unicodemult[cid+i][0]==0; ++i );
133 	    --i;
134 	    if ( i!=0 ) {
135 		printf( "%d..%d %04x\n", cid, cid+i, (unsigned int)(cid_2_unicode[cid]) );
136 		cid += i;
137 	    } else
138 		printf( "%d %04x\n", cid, (unsigned int)(cid_2_unicode[cid]) );
139 	} else if ( cid_2_unicode[cid]!=-1 ) {
140 	    printf( "%d %04x", cid, (unsigned int)(cid_2_unicode[cid]) );
141 	    for ( i=0; cid_2_unicodemult[cid][i]!=0; ++i )
142 		printf( ",%04x", (unsigned int)(cid_2_unicodemult[cid][i]) );
143 	    printf( "\n");
144 	} else if ( nonuni_names[cid]!=NULL )
145 	    printf( "%d /%s\n", cid, nonuni_names[cid] );
146     }
147 return( 0 );
148 }
149