1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "utype.h"
5 
6 static char used[0x110000];
7 static int cid_2_unicode[0x10000];
8 static char *nonuni_names[0x10000];
9 static int cid_2_rotunicode[0x10000];
10 static int puamap[0x2000];
11 #define MULT_MAX	6
12 static int cid_2_unicodemult[0x100000][MULT_MAX];
13 
14 #define VERTMARK 0x1000000
15 
ucs2_score(int val)16 static int ucs2_score(int val) {		/* Supplied by KANOU Hiroki */
17     if ( val>=0x2e80 && val<=0x2fff )
18 return( 1 );				/* New CJK Radicals are least important */
19     else if ( val>=VERTMARK )
20 return( 2 );				/* Then vertical guys */
21     else if ( val>=0xf000 && val<=0xffff )
22 return( 3 );
23 /*    else if (( val>=0x3400 && val<0x3dff ) || (val>=0x4000 && val<=0x4dff))*/
24     else if ( val>=0x3400 && val<=0x4dff )
25 return( 4 );
26     else
27 return( 5 );
28 }
29 
allstars(char * buffer)30 static int allstars(char *buffer) {
31     while ( isdigit(*buffer))
32 	++buffer;
33     while ( *buffer=='\t' || *buffer=='*' )
34 	++buffer;
35     if ( *buffer=='\r' ) ++buffer;
36     if ( *buffer=='\n' ) ++buffer;
37 return( *buffer=='\0' );
38 }
39 
getnth(char * buffer,int col,int * mults)40 static int getnth(char *buffer, int col, int *mults) {
41     int i,j=0, val=0, best;
42     char *end;
43     int vals[10];
44     int pua, nonbmp;
45 
46     if ( col==1 ) {
47 	/* first column is decimal, others are hex */
48 	if ( !isdigit(*buffer))
49 return( -1 );
50 	while ( isdigit(*buffer))
51 	    val = 10*val + *buffer++-'0';
52 return( val );
53     }
54     for ( i=1; i<col; ++buffer ) {
55 	if ( *buffer=='\t' )
56 	    ++i;
57 	else if ( *buffer=='\0' )
58 return( -1 );
59     }
60     val = strtol(buffer,&end,16);
61     if ( end==buffer )
62 return( -1 );
63     if ( *end=='v' ) {
64 	val += VERTMARK;
65 	++end;
66     }
67     if ( *end==',' ) {
68 	/* Multiple guess... now we've got to pick one */
69 	vals[0] = val;
70 	i = 1;
71 	while ( *end==',' && i<9 ) {
72 	    buffer = end+1;
73 	    vals[i] = strtol(buffer,&end,16);
74 	    if ( *end=='v' ) {
75 		vals[i] += VERTMARK;
76 		++end;
77 	    }
78 	    ++i;
79 	}
80 	vals[i] = 0;
81 	best = 0; val = -1;
82 	pua = nonbmp = 0;
83 	for ( i=0; vals[i]!=0; ++i ) {
84 	    if ( ucs2_score(vals[i])>best ) {
85 		val = vals[i];
86 		best = ucs2_score(vals[i]);
87 	    }
88 	    if ( vals[i]>=0xe000 && vals[i]<=0xf8ff )
89 		pua = vals[i];
90 	    else if ( vals[i]>0x10000 && vals[i]<VERTMARK )
91 		nonbmp = vals[i];
92 	}
93 	if ( pua!=0 && nonbmp!=0 )
94 	    puamap[pua-0xe000] = nonbmp;
95 	if ( mults!=NULL ) for ( i=j=0; vals[i]!=0; ++i ) {
96 	    if ( vals[i]!=val && !(vals[i]&VERTMARK))
97 		mults[j++] = vals[i];
98 	}
99 	if ( j>MULT_MAX ) {
100 	    fprintf( stderr, "Too many multiple values for %04x, need %d slots\n", (unsigned int)(val), j );
101 exit(1);
102 	}
103     }
104 
105 return( val );
106 }
107 
main(int argc,char ** argv)108 int main(int argc, char **argv) {
109     char buffer[600];
110     int cid, uni, max=0, maxcid=0, i,j;
111     extern char *psunicodenames[];
112     FILE *pua;
113 
114     nonuni_names[0] = ".notdef";
115     for ( cid=0; cid<0x10000; ++cid ) cid_2_unicode[cid] = -1;
116 
117     while ( fgets(buffer,sizeof(buffer),stdin)!=NULL ) {
118 	if ( *buffer=='#' /*|| allstars(buffer)*/)
119     continue;
120 	cid = getnth(buffer,1,NULL);
121 	if ( cid==-1 )
122     continue;
123 	uni = getnth(buffer,12,cid_2_unicodemult[cid]);
124 	maxcid = cid;
125 	if ( (cid>=17408 && cid<=17599) || cid==17604 || cid==17605 ) {
126 	    if ( cid>=17408 && cid<=17505 )		/* proportional */
127 		sprintf( buffer,"CNS1.%d.vert", cid-17408+1 );
128 	    else if ( cid>=17506 && cid<=17599 )	/* Half width */
129 		sprintf( buffer,"CNS1.%d.vert", cid-17506+13648 );
130 	    else if ( cid==17604 )
131 		strcpy( buffer, "CNS1.17601.vert" );
132 	    else if ( cid==17605 )
133 		strcpy( buffer, "CNS1.17603.vert" );
134 	    nonuni_names[cid] = strdup(buffer);
135 	/* Adobe's CNS cids have the rotated and non-rotated forms intermixed */
136 	} else if ( (cid>=120 && cid<=127 ) && (cid&1) ) {
137 	    sprintf( buffer,"CNS1.%d.vert", cid-1 );
138 	    nonuni_names[cid] = strdup(buffer);
139 	} else if ( (cid>=128 && cid<=159) && (cid&2) ) {
140 	    sprintf( buffer,"CNS1.%d.vert", cid-2 );
141 	    nonuni_names[cid] = strdup(buffer);
142 	} else if ( cid>=13648 && cid<=13741 ) {
143 	    sprintf( buffer, "uni%04X.hw", (unsigned int)(cid-13648+' ') );
144 	    nonuni_names[cid] = strdup(buffer);
145 	} else if ( cid==13742 ) {
146 	    strcpy( buffer, "uni203E.hw" );
147 	} else if ( uni==-1 ) {
148     continue;
149 	    sprintf( buffer,"CNS1.%d", cid );
150 	    nonuni_names[cid] = strdup(buffer);
151 	} else if ( uni>=VERTMARK ) {
152 	    /* rotated */
153 	    cid_2_rotunicode[cid] = uni-VERTMARK;
154 	} else if ( !used[uni] ) {
155 	    used[uni] = 1;
156 	    cid_2_unicode[cid] = uni;
157 	} else {
158 	    sprintf( buffer, "uni%04X.dup%d", (unsigned int)(uni), ++used[uni] );
159 	    nonuni_names[cid] = strdup(buffer);
160 	}
161 	max = cid;
162     }
163     for ( i=0; i<maxcid; ++i ) if ( cid_2_rotunicode[i]!=0 ) {
164 	for ( j=0; j<maxcid; ++j )
165 	    if ( cid_2_unicode[j] == cid_2_rotunicode[i] )
166 	break;
167 	if ( j==maxcid )
168 	    sprintf( buffer, "uni%04X.vert", (unsigned int)(cid_2_rotunicode[i]) );
169 	else
170 	    sprintf( buffer, "CNS1.%d.vert", j);
171 	nonuni_names[i] = strdup(buffer);
172     }
173 
174     printf("%d %d\n",maxcid, max );
175 
176     for ( cid=0; cid<=max; ++cid ) {
177 	if ( cid_2_unicode[cid]!=-1 && cid_2_unicodemult[cid][0]==0 ) {
178 	    for ( i=1; cid+i<=max && cid_2_unicode[cid+i]==cid_2_unicode[cid]+i && cid_2_unicodemult[cid+i][0]==0; ++i );
179 	    --i;
180 	    if ( i!=0 ) {
181 		printf( "%d..%d %04x\n", cid, cid+i, (unsigned int)(cid_2_unicode[cid]) );
182 		cid += i;
183 	    } else
184 		printf( "%d %04x\n", cid, (unsigned int)(cid_2_unicode[cid]) );
185 	} else if ( cid_2_unicode[cid]!=-1 ) {
186 	    printf( "%d %04x", cid, (unsigned int)(cid_2_unicode[cid]) );
187 	    for ( i=0; cid_2_unicodemult[cid][i]!=0; ++i )
188 		printf( ",%04x", (unsigned int)(cid_2_unicodemult[cid][i]) );
189 	    printf( "\n");
190 	} else if ( nonuni_names[cid]!=NULL )
191 	    printf( "%d /%s\n", cid, nonuni_names[cid] );
192     }
193 
194     pua = fopen("cns14.pua","w");
195     if (pua) {
196         for ( i=0; i<0xf8ff-0xe000; i+=8 ) {
197             fprintf(pua, "/* %0X */\t", (unsigned int)(i+0xe000) );
198             for ( j=0; j<8; ++j ) {
199                 if ( puamap[i+j]!=0 )
200                     fprintf(pua, "0x%05x,%s", (unsigned int)(puamap[i+j]), j==7 ? "\n" : " " );
201                 else
202                     fprintf(pua, "    0x0,%s", j==7 ? "\n" : " " );
203             }
204         }
205         fclose(pua);
206     }
207 
208 return( 0 );
209 }
210