1 /*--------------------------------------------------------------------
2 nxeditor
3 FILE NAME:iskanji.c
4 Programed by : I.Neva
5 R & D ADVANCED SYSTEMS. IMAGING PRODUCTS.
6 1992.06.01
7
8 Copyright (c) 1998,1999,2000 SASAKI Shunsuke.
9 All rights reserved.
10 --------------------------------------------------------------------*/
11 #include "ed.h"
12
13
iscnt(unsigned char c)14 bool iscnt(unsigned char c)
15 {
16 return iscntrl(c) && c<0x7f;
17 }
18
iskanji(int c)19 bool iskanji(int c)
20 {
21 // return (c&0x00ff) >=0xa1 && (c&0x00ff) <= 0xfe;
22 return ((c&0x00ff) >=0xa1 && (c&0x00ff) <= 0xfe)|| (c&0x00ff)==0x8e;
23 }
24
25
26
IsThisKanjiPosition(int offset,const char * buffer)27 int IsThisKanjiPosition(int offset,const char *buffer)
28 {
29 int i;
30 int Iskanji_flg;
31
32 Iskanji_flg=FALSE;
33 for (i = 0; i <= offset; i++) {
34 if (iskanji(buffer[i]) && iskanji(buffer[i+1]))
35 {
36 i++;
37 Iskanji_flg = TRUE;
38 } else {
39 Iskanji_flg = FALSE;
40 }
41 }
42 if (i == offset+1)
43 Iskanji_flg=FALSE;
44 return Iskanji_flg;
45 }
46
IsKanjiPosition()47 int IsKanjiPosition()
48 {
49 int i;
50
51 i=GetBufferOffset();
52
53 return i+1<=strlen(csrle.buf)
54 && iskanji(csrle.buf[i]) && iskanji(csrle.buf[i+1]);
55 }
56
57
58
59
60
61
62
63
64
65
66 #define CT_ank (CT_space| CT_cntrl| CT_other| CT_alnum| CT_kana)
67 #define CT_kana (CT_hira| CT_kata)
68
69 #define CT_skip 1
70 #define CT_other 2
71 #define CT_alnum 8
72
73 #define CT_hira 16
74 #define CT_kata 32
75 #define CT_kkigou 64
76 #define CT_kalnum 128
77 #define CT_kanji 256
78
char_getctype(int c)79 int char_getctype(int c)
80 {
81 if (c==0)
82 return 0;
83
84 if (isspace(c)||iscnt(c))
85 return CT_skip;
86 if (isalnum(c)||c=='_')
87 return CT_alnum;
88 // if (c<=0xff)
89 return CT_other;
90 }
91
kanji_getctype(int c1,int c2)92 int kanji_getctype(int c1, int c2)
93 {
94 int c;
95
96 c = ((c1&0xff) << 8) | (c2&0xff);
97
98 if (c == 0xa1ab || c == 0xa1ac || c == 0xa1bc)
99 return CT_kana;
100 if (c >= 0xa1a1 && c <= 0xa3af)
101 return CT_kkigou;
102 if (c >= 0xa3b0 && c <= 0xa3fa)
103 return CT_kalnum;
104 if (c >= 0xa4a1 && c <= 0xa4f3)
105 return CT_hira;
106 if (c >= 0xa5a1 && c <= 0xa5f6)
107 return CT_kata;
108 return CT_kanji;
109 }
110
kanji_tknext(const char * s,int a,bool f)111 int kanji_tknext(const char *s,int a,bool f)
112 {
113 int pa,pb;
114
115 if (s[a]=='\0')
116 return a;
117 if (a>strlen(s))
118 return strlen(s);
119
120 if (iskanji(s[a]))
121 {
122 pa=kanji_getctype(s[a],s[a+1]);
123 do {
124 a+=2;
125 } while(iskanji(s[a])&& (pa&kanji_getctype(s[a],s[a+1]))!=0);
126 pb=char_getctype(s[a]);
127 }else
128 {
129 pa=char_getctype(s[a]);
130 do {
131 ++a;
132 pb=char_getctype(s[a]);
133 } while(pa==pb&&!iskanji(s[a]));
134 }
135
136 if (a==0|| iskanji(s[a])|| pb!=CT_skip|| !f)
137 return a;
138
139 ++a;
140 while(pb==char_getctype(s[a])&&!iskanji(s[a]))
141 ++a;
142 return a;
143 }
144
kanji_tkprev(const char * s,int a,bool f)145 int kanji_tkprev(const char *s,int a,bool f)
146 {
147 int pa;
148
149 if (a<=0)
150 return 0;
151 if (a>strlen(s))
152 return strlen(s);
153
154 --a;
155
156 pa=char_getctype(s[a]);
157 if (f&& !IsThisKanjiPosition(a-1,s)&& pa==CT_skip)
158 {
159 --a;
160 while(a>0&& !IsThisKanjiPosition(a,s)
161 && pa==char_getctype(s[a]))
162 --a;
163 pa=char_getctype(s[a]);
164 }
165
166 if (a<=0)
167 return 0;
168
169 if (IsThisKanjiPosition(a-1,s))
170 {
171 --a;
172 pa=kanji_getctype(s[a],s[a+1]);
173 do {
174 a-=2;
175 } while(a>=0&& IsThisKanjiPosition(a,s)&&
176 (pa&kanji_getctype(s[a],s[a+1]))!=0);
177 ++a;
178 } else
179 {
180 do {
181 --a;
182 } while(a>=0&& (a==0|| !IsThisKanjiPosition(a-1,s))
183 && pa== char_getctype(s[a]));
184 }
185 ++a;
186 return a;
187 }
188
189
190
191 /* kanji */
192 /* table�����르�ꥺ��ʤɤ� ekc �Υ����ɤ����Ѥ��Ƥ��ޤ���*/
193
194
195 //#define issjis2(c) ((((u_char)c)>=0x40&&((u_char)c)<=0x7e)||(((u_char)c)>=0x80&&((u_char)c)<=0xfc))
196
197 //#define issjis1(c) (((u_char)(c)>=0x81 &&(u_char)(c)<=0x9f) || ((u_char)(c)>=0xe0 &&(u_char)(c)<=0xfc))
198 //#define issjis2(c) ( (u_char)(c)>=0x40 &&(u_char)(c)<=0xfc)
199 //#define iskana(c) ( (u_char)(c)>=0xa0 &&(u_char)(c)<=0xdf)
200 //#define iseuc(c) ( (u_char)(c)>=0xa1 &&(u_char)(c)<=0xfe)
201 //#define isjis(c) ( (u_char)(c)>=0x21 &&(u_char)(c)<=0x7e)
202
203 #define ESC 0x1b
204 #define CR 0x0d
205 #define LF 0x0a
206 #define SI 0x0f
207 #define SO 0x0e
208
209 u_char SJIStoEUCtable1[]=
210 {
211 0x80,0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,
212 0xbf,0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,
213 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
214 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
215 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
216 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
217 0xdf,0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,
218 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
219 };
220
221 u_char SJIStoEUCtable2[]=
222 {
223 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,
224 0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,
225 0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,
226 0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0x7f,
227 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
228 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xa1,
229 0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,
230 0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,
231 0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,
232 0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,
233 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,
234 0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xfd,0xfe,0xff
235 };
236
237 u_char EUCtoSJIStable1[]=
238 {
239 0x81,0x81,0x82,0x82,0x83,0x83,0x84,0x84,0x85,0x85,0x86,0x86,0x87,0x87,0x88,
240 0x88,0x89,0x89,0x8a,0x8a,0x8b,0x8b,0x8c,0x8c,0x8d,0x8d,0x8e,0x8e,0x8f,0x8f,0x90,
241 0x90,0x91,0x91,0x92,0x92,0x93,0x93,0x94,0x94,0x95,0x95,0x96,0x96,0x97,0x97,0x98,
242 0x98,0x99,0x99,0x9a,0x9a,0x9b,0x9b,0x9c,0x9c,0x9d,0x9d,0x9e,0x9e,0x9f,0x9f,0xe0,
243 0xe0,0xe1,0xe1,0xe2,0xe2,0xe3,0xe3,0xe4,0xe4,0xe5,0xe5,0xe6,0xe6,0xe7,0xe7,0xe8,
244 0xe8,0xe9,0xe9,0xea,0xea,0xeb,0xeb,0xec,0xec,0xed,0xed,0xee,0xee,0xef,0xef
245 };
246
247
kanji_fromeuc(char * s,size_t bytes,const char * t,int kc)248 const char *kanji_fromeuc(char *s,size_t bytes,const char *t,int kc)
249 {
250 u_char c;
251 const char *p;
252 enum {KM_ank, KM_kanji, KM_kana} km;
253
254 p=s;
255 switch(kc)
256 {
257 case KC_euc:
258 return t;
259
260 case KC_sjis:
261 for(;*t!='\0'&& bytes>0;)
262 {
263 c=*t++;
264 if (c==0x8e&& iskana(*t))
265 c=*t++; else
266 {
267 if (iseuc(c)&& iseuc(*t))
268 {
269 *s++= EUCtoSJIStable1[c - 0xa1];
270 --bytes;
271
272 c= *(u_char *)t - ((c&1)==0 ? 2 : *(u_char *)t>=0xe0? 0x60: 0x61);
273 ++t;
274 }
275 }
276 *s++=c;
277 --bytes;
278 }
279 *s='\0';
280 break;
281
282 case KC_jis:
283 km=KM_ank;
284 for(;*t!='\0'&& bytes>0;)
285 {
286 c=*t++;
287 if (iseuc(c)&& km!=KM_kanji)
288 {
289 if (bytes<3)
290 break;
291 *s++='\x1b';
292 *s++='$';
293 *s++='B';
294 bytes-=3;
295 km=KM_kanji;
296 }
297 if (c==0x8e && iskana(*t))
298 {
299 if (km!=KM_kana)
300 {
301 if (bytes<3)
302 break;
303 *s++='\x1b';
304 *s++='(';
305 *s++='I';
306 bytes-=3;
307 km=KM_kana;
308 }
309
310 c=*t++;
311 } else
312 {
313 if (!iseuc(c) && km!=KM_ank)
314 {
315 if (bytes<3)
316 break;
317 *s++='\x1b';
318 *s++='(';
319 *s++='B';
320 bytes-=3;
321 km=KM_ank;
322 }
323 }
324
325 *s++= (c&0x7f);
326 --bytes;
327 }
328 if (km!=KM_ank&& bytes>=3)
329 strcpy(s,"\x1b(B"); else
330 *s='\0';
331 }
332 return p;
333 }
334
file_knjchk(FILE * fp)335 int file_knjchk(FILE *fp)
336 {
337 int c;
338 int f_sjis,f_euc;
339 int n_sjis,n_sjis_i,n_euc,n_euc_i;
340
341 n_sjis=0;
342 n_sjis_i=0;
343 n_euc=0;
344 n_euc_i=0;
345
346 f_sjis=FALSE;
347 f_euc=FALSE;
348
349 for(;;)
350 {
351 c=fgetc(fp);
352 if (c==EOF|| n_euc>32||n_sjis>32||n_euc_i>8||n_sjis_i>8)
353 return n_euc- n_euc_i*4>= n_sjis- n_sjis_i*4? KC_euc:KC_sjis;
354 if (c==ESC)
355 {
356 c=fgetc(fp);
357 if (c=='K')
358 return KC_jis;
359 if (c=='$')
360 {
361 c=fgetc(fp);
362 if (c=='B'||c=='@')
363 return KC_jis;
364 }
365 }
366
367 if (f_euc)
368 {
369 if (iseuc(c))
370 ++n_euc; else
371 ++n_euc_i;
372 f_euc=FALSE;
373 } else
374 {
375 if (iseuc(c))
376 f_euc=TRUE;
377 }
378
379 if (f_sjis)
380 {
381 if (issjis2(c))
382 ++n_sjis; else
383 ++n_sjis_i;
384 f_sjis=FALSE;
385 } else
386 {
387 if (issjis1(c))
388 f_sjis=TRUE;
389 }
390 }
391 }
392
file_gets(char * s,size_t bytes,FILE * fp,int * n_cr,int * n_lf)393 int file_gets(char *s, size_t bytes, FILE *fp, int *n_cr, int *n_lf)
394 {
395 int c;
396 bool f_cr;
397
398 f_cr=FALSE;
399 for (;bytes>0;)
400 {
401 c=fgetc(fp);
402
403 if (c==LF|| f_cr)
404 {
405 if (c==LF)
406 ++*n_lf; else
407 ungetc(c, fp);
408 c=0;
409 break;
410 }
411 if (c==EOF)
412 {
413 c=-1;
414 break;
415 }
416 if (c==CR)
417 {
418 f_cr=TRUE;
419 ++*n_cr;
420 continue;
421 }
422
423 *s++=c;
424 --bytes;
425 }
426 *s='\0';
427 return c;
428 }
429
430
kanji_toeuc(char * s,size_t bytes,const char * t,bool f_sjis,int * jm)431 void kanji_toeuc(char *s,size_t bytes,const char *t,bool f_sjis,int *jm)
432 {
433 u_char c,sjc;
434 bool sf; // (SI/SO)shift flag
435
436 sjc=0;
437 sf=FALSE;
438 for (;*t!='\0'&& bytes>0;)
439 {
440 c=(u_char)*t++;
441
442 switch(c)
443 {
444 case SI:
445 sf=FALSE;
446 continue;
447 case SO:
448 sf=TRUE;
449 continue;
450
451 case ESC:
452 switch(*t++)
453 {
454 case 'K': // NEC kanji
455 *jm=JM_kanji;
456 sf=FALSE;
457 continue;
458
459 case 'H': // NEC ank
460 *jm=JM_ank;
461 sf=FALSE;
462 continue;
463
464 case '&':
465 if (*t=='@') // JIS X0208-1990 �ڤ��ؤ�����
466 {
467 ++t;
468 continue;
469 }
470 break;
471
472 case '$':
473 if (*t=='B'||*t=='@') // JIS X0208
474 {
475 ++t;
476 sf=FALSE;
477 *jm=JM_kanji;
478 continue;
479 }
480 break;
481
482 case '(':
483 if (*t=='J'||*t=='B'||*t=='H') // JIS X0201(roman)/ank
484 {
485 ++t;
486 sf=FALSE;
487 *jm=JM_ank;
488 continue;
489 }
490 if (*t=='I') // JIS X0201(kana)
491 {
492 ++t;
493 *jm=JM_kana;
494 continue;
495 }
496 }
497 --t;
498 }
499
500 if (f_sjis)
501 {
502 if (sjc!=0)
503 {
504 if (issjis2(c))
505 {
506 if (bytes<2)
507 break;
508
509 if (c>=0x9f)
510 ++sjc;
511 *s++=sjc;
512 *s++=SJIStoEUCtable2[c-0x40];
513 bytes-=2;
514 sjc=0;
515 continue;
516 }
517 sjc=0;
518 --t;
519 c=*t;
520 } else
521 {
522 if (issjis1(c))
523 {
524 sjc= SJIStoEUCtable1[c&0x7f];
525 continue;
526 }
527 if (iskana(c))
528 {
529 if (bytes<2)
530 break;
531
532 *s++=0x8e;
533 --bytes;
534 }
535 }
536 }
537
538 if (*jm==JM_kana|| sf)
539 {
540 if (bytes<2)
541 break;
542 *s++=0x8e;
543 --bytes;
544 }
545 if (*jm!=JM_ank|| sf)
546 c |= 0x80;
547 *s++=c;
548 --bytes;
549 }
550
551 *s='\0';
552 }
553
554
kanji_poscanon(int offset,const char * buf)555 int kanji_poscanon(int offset, const char *buf)
556 {
557 int n,m;
558
559 offset=min(strlen(buf), offset);
560
561 n=m=0;
562 for (;;)
563 {
564 if (n==offset)
565 return offset;
566 if (n>offset)
567 return m;
568 if (buf[n]=='\0')
569 return n;
570 m=n;
571 n+=kanji_countbuf(buf[n]);
572 }
573 }
574
kanji_poscandsp(int offset,const char * buf)575 int kanji_poscandsp(int offset, const char *buf)
576 {
577 int n,m,a,ln;
578
579 ln=strlen(buf);
580 n=m=a=0;
581 for (;;)
582 {
583 if (n==offset)
584 return offset;
585 if (n>offset)
586 return m;
587 if (a>ln || buf[a]=='\0')
588 return n;
589 m=n;
590 n+=kanji_countdsp(buf[a], n);
591 a+=kanji_countbuf(buf[a]);
592 }
593 }
594
595
kanji_posnext(int offset,const char * buf)596 int kanji_posnext(int offset, const char *buf)
597 {
598 int i,n;
599
600 n=kanji_countbuf(buf[offset]);
601
602 for (i=0;i<n;++i)
603 {
604 if (buf[offset+i]=='\0')
605 break;
606 }
607
608 return offset+i;
609 }
610
kanji_posprev(int offset,const char * buf)611 int kanji_posprev(int offset, const char *buf)
612 {
613 int n,m;
614
615 n=m=0;
616 for (;;)
617 {
618 if (n>=offset|| buf[n]=='\0')
619 return m;
620 m=n;
621 n+=kanji_countbuf(buf[n]);
622 }
623 }
624
kanji_posdsp(int offset,const char * buf)625 int kanji_posdsp(int offset, const char *buf)
626 {
627 int n, m;
628
629 n=0;
630 m=0;
631 for (;;)
632 {
633 if (n>=offset|| buf[n]=='\0')
634 return m;
635 m+=kanji_countdsp(buf[n], m);
636 n+=kanji_countbuf(buf[n]);
637 }
638 }
639
kanji_posbuf(int offset,const char * buf)640 int kanji_posbuf(int offset, const char *buf)
641 {
642 int n,m;
643
644 n=0;
645 m=0;
646 for (;;)
647 {
648 if (m>=offset|| buf[n]=='\0')
649 return n;
650 m+=kanji_countdsp(buf[n], m);
651 n+=kanji_countbuf(buf[n]);
652 }
653 }
654
strjfcpy(char * s,const char * t,size_t bytes,size_t len)655 void strjfcpy(char *s,const char *t,size_t bytes,size_t len)
656 {
657 int n,m;
658
659 for (;*t!=0;)
660 {
661 n=kanji_countbuf(*t);
662 m=kanji_countdsp(*t, -1);
663 if (bytes<n || len<m)
664 break;
665 memcpy(s, t, n);
666 s+=n;
667 t+=n;
668 bytes-=n;
669 len-=m;
670 }
671
672 for (;len>0&& bytes>0; --len,--bytes)
673 *s++=' ';
674
675 *s='\0';
676 }
677
kanji_countbuf(char c)678 int kanji_countbuf(char c)
679 {
680 if (c==0)
681 return 0;
682
683 if (iseuc(c))
684 return 2;
685 if ((u_char)c==0x8e)
686 return 2;
687 if ((u_char)c==0x8f)
688 return 3;
689 return 1;
690 }
691
kanji_countdsp(char c,int n)692 int kanji_countdsp(char c, int n)
693 {
694 if (c==0)
695 return 0;
696
697 if (c=='\t' && n!=-1)
698 return (n/sysinfo.tabstop+1)*sysinfo.tabstop - n;
699
700 if ((u_char)c==0x8e) // Ⱦ�Ѥ���
701 return 2;
702
703 if (iseuc(c)|| (u_char)c==0x8f|| iscntrl(c))
704 return 2;
705 return 1;
706 }
707
708