1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "utype.h"
5
6 static char used[0x110000];
7 static int cid_2_unicode[0x10000];
8 static char *nonuni_names[0x10000];
9 static int cid_2_rotunicode[0x10000];
10 static int puamap[0x2000];
11 #define MULT_MAX 6
12 static int cid_2_unicodemult[0x100000][MULT_MAX];
13
14 #define VERTMARK 0x1000000
15
ucs2_score(int val)16 static int ucs2_score(int val) { /* Supplied by KANOU Hiroki */
17 if ( val>=0x2e80 && val<=0x2fff )
18 return( 1 ); /* New CJK Radicals are least important */
19 else if ( val>=VERTMARK )
20 return( 2 ); /* Then vertical guys */
21 else if ( val>=0xf000 && val<=0xffff )
22 return( 3 );
23 /* else if (( val>=0x3400 && val<0x3dff ) || (val>=0x4000 && val<=0x4dff))*/
24 else if ( val>=0x3400 && val<=0x4dff )
25 return( 4 );
26 else
27 return( 5 );
28 }
29
allstars(char * buffer)30 static int allstars(char *buffer) {
31 while ( isdigit(*buffer))
32 ++buffer;
33 while ( *buffer=='\t' || *buffer=='*' )
34 ++buffer;
35 if ( *buffer=='\r' ) ++buffer;
36 if ( *buffer=='\n' ) ++buffer;
37 return( *buffer=='\0' );
38 }
39
getnth(char * buffer,int col,int * mults)40 static int getnth(char *buffer, int col, int *mults) {
41 int i,j=0, val=0, best;
42 char *end;
43 int vals[10];
44 int pua, nonbmp;
45
46 if ( col==1 ) {
47 /* first column is decimal, others are hex */
48 if ( !isdigit(*buffer))
49 return( -1 );
50 while ( isdigit(*buffer))
51 val = 10*val + *buffer++-'0';
52 return( val );
53 }
54 for ( i=1; i<col; ++buffer ) {
55 if ( *buffer=='\t' )
56 ++i;
57 else if ( *buffer=='\0' )
58 return( -1 );
59 }
60 val = strtol(buffer,&end,16);
61 if ( end==buffer )
62 return( -1 );
63 if ( *end=='v' ) {
64 val += VERTMARK;
65 ++end;
66 }
67 if ( *end==',' ) {
68 /* Multiple guess... now we've got to pick one */
69 vals[0] = val;
70 i = 1;
71 while ( *end==',' && i<9 ) {
72 buffer = end+1;
73 vals[i] = strtol(buffer,&end,16);
74 if ( *end=='v' ) {
75 vals[i] += VERTMARK;
76 ++end;
77 }
78 ++i;
79 }
80 vals[i] = 0;
81 best = 0; val = -1;
82 pua = nonbmp = 0;
83 for ( i=0; vals[i]!=0; ++i ) {
84 if ( ucs2_score(vals[i])>best ) {
85 val = vals[i];
86 best = ucs2_score(vals[i]);
87 }
88 if ( vals[i]>=0xe000 && vals[i]<=0xf8ff )
89 pua = vals[i];
90 else if ( vals[i]>0x10000 && vals[i]<VERTMARK )
91 nonbmp = vals[i];
92 }
93 if ( pua!=0 && nonbmp!=0 )
94 puamap[pua-0xe000] = nonbmp;
95 if ( mults!=NULL ) for ( i=j=0; vals[i]!=0; ++i ) {
96 if ( vals[i]!=val && !(vals[i]&VERTMARK))
97 mults[j++] = vals[i];
98 }
99 if ( j>MULT_MAX ) {
100 fprintf( stderr, "Too many multiple values for %04x, need %d slots\n", (unsigned int)(val), j );
101 exit(1);
102 }
103 }
104
105 return( val );
106 }
107
main(int argc,char ** argv)108 int main(int argc, char **argv) {
109 char buffer[600];
110 int cid, uni, max=0, maxcid=0, i,j;
111 extern char *psunicodenames[];
112 FILE *pua;
113
114 nonuni_names[0] = ".notdef";
115 for ( cid=0; cid<0x10000; ++cid ) cid_2_unicode[cid] = -1;
116
117 while ( fgets(buffer,sizeof(buffer),stdin)!=NULL ) {
118 if ( *buffer=='#' /*|| allstars(buffer)*/)
119 continue;
120 cid = getnth(buffer,1,NULL);
121 if ( cid==-1 )
122 continue;
123 uni = getnth(buffer,12,cid_2_unicodemult[cid]);
124 maxcid = cid;
125 if ( (cid>=17408 && cid<=17599) || cid==17604 || cid==17605 ) {
126 if ( cid>=17408 && cid<=17505 ) /* proportional */
127 sprintf( buffer,"CNS1.%d.vert", cid-17408+1 );
128 else if ( cid>=17506 && cid<=17599 ) /* Half width */
129 sprintf( buffer,"CNS1.%d.vert", cid-17506+13648 );
130 else if ( cid==17604 )
131 strcpy( buffer, "CNS1.17601.vert" );
132 else if ( cid==17605 )
133 strcpy( buffer, "CNS1.17603.vert" );
134 nonuni_names[cid] = strdup(buffer);
135 /* Adobe's CNS cids have the rotated and non-rotated forms intermixed */
136 } else if ( (cid>=120 && cid<=127 ) && (cid&1) ) {
137 sprintf( buffer,"CNS1.%d.vert", cid-1 );
138 nonuni_names[cid] = strdup(buffer);
139 } else if ( (cid>=128 && cid<=159) && (cid&2) ) {
140 sprintf( buffer,"CNS1.%d.vert", cid-2 );
141 nonuni_names[cid] = strdup(buffer);
142 } else if ( cid>=13648 && cid<=13741 ) {
143 sprintf( buffer, "uni%04X.hw", (unsigned int)(cid-13648+' ') );
144 nonuni_names[cid] = strdup(buffer);
145 } else if ( cid==13742 ) {
146 strcpy( buffer, "uni203E.hw" );
147 } else if ( uni==-1 ) {
148 continue;
149 sprintf( buffer,"CNS1.%d", cid );
150 nonuni_names[cid] = strdup(buffer);
151 } else if ( uni>=VERTMARK ) {
152 /* rotated */
153 cid_2_rotunicode[cid] = uni-VERTMARK;
154 } else if ( !used[uni] ) {
155 used[uni] = 1;
156 cid_2_unicode[cid] = uni;
157 } else {
158 sprintf( buffer, "uni%04X.dup%d", (unsigned int)(uni), ++used[uni] );
159 nonuni_names[cid] = strdup(buffer);
160 }
161 max = cid;
162 }
163 for ( i=0; i<maxcid; ++i ) if ( cid_2_rotunicode[i]!=0 ) {
164 for ( j=0; j<maxcid; ++j )
165 if ( cid_2_unicode[j] == cid_2_rotunicode[i] )
166 break;
167 if ( j==maxcid )
168 sprintf( buffer, "uni%04X.vert", (unsigned int)(cid_2_rotunicode[i]) );
169 else
170 sprintf( buffer, "CNS1.%d.vert", j);
171 nonuni_names[i] = strdup(buffer);
172 }
173
174 printf("%d %d\n",maxcid, max );
175
176 for ( cid=0; cid<=max; ++cid ) {
177 if ( cid_2_unicode[cid]!=-1 && cid_2_unicodemult[cid][0]==0 ) {
178 for ( i=1; cid+i<=max && cid_2_unicode[cid+i]==cid_2_unicode[cid]+i && cid_2_unicodemult[cid+i][0]==0; ++i );
179 --i;
180 if ( i!=0 ) {
181 printf( "%d..%d %04x\n", cid, cid+i, (unsigned int)(cid_2_unicode[cid]) );
182 cid += i;
183 } else
184 printf( "%d %04x\n", cid, (unsigned int)(cid_2_unicode[cid]) );
185 } else if ( cid_2_unicode[cid]!=-1 ) {
186 printf( "%d %04x", cid, (unsigned int)(cid_2_unicode[cid]) );
187 for ( i=0; cid_2_unicodemult[cid][i]!=0; ++i )
188 printf( ",%04x", (unsigned int)(cid_2_unicodemult[cid][i]) );
189 printf( "\n");
190 } else if ( nonuni_names[cid]!=NULL )
191 printf( "%d /%s\n", cid, nonuni_names[cid] );
192 }
193
194 pua = fopen("cns14.pua","w");
195 if (pua) {
196 for ( i=0; i<0xf8ff-0xe000; i+=8 ) {
197 fprintf(pua, "/* %0X */\t", (unsigned int)(i+0xe000) );
198 for ( j=0; j<8; ++j ) {
199 if ( puamap[i+j]!=0 )
200 fprintf(pua, "0x%05x,%s", (unsigned int)(puamap[i+j]), j==7 ? "\n" : " " );
201 else
202 fprintf(pua, " 0x0,%s", j==7 ? "\n" : " " );
203 }
204 }
205 fclose(pua);
206 }
207
208 return( 0 );
209 }
210