1
2 #include "wc.h"
3 #include "iso2022.h"
4 #include "hz.h"
5 #include "wtf.h"
6 #ifdef USE_UNICODE
7 #include "ucs.h"
8 #endif
9
10 Str
wc_conv_from_hz(Str is,wc_ces ces)11 wc_conv_from_hz(Str is, wc_ces ces)
12 {
13 Str os;
14 wc_uchar *sp = (wc_uchar *)is->ptr;
15 wc_uchar *ep = sp + is->length;
16 wc_uchar *p;
17 int state = WC_HZ_NOSTATE;
18
19 for (p = sp; p < ep && *p < 0x80 && *p != WC_C_HZ_TILDA; p++)
20 ;
21 if (p == ep)
22 return is;
23 os = Strnew_size(is->length);
24 if (p > sp)
25 Strcat_charp_n(os, is->ptr, (int)(p - sp));
26
27 for (; p < ep; p++) {
28 switch (state) {
29 case WC_HZ_NOSTATE:
30 if (*p == WC_C_HZ_TILDA)
31 state = WC_HZ_TILDA;
32 else if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
33 state = WC_HZ_MBYTE1_GR; /* GB 2312 ? */
34 else if (*p & 0x80)
35 wtf_push_unknown(os, p, 1);
36 else
37 Strcat_char(os, (char)*p);
38 break;
39 case WC_HZ_TILDA:
40 if (*p == WC_C_HZ_SI) {
41 state = WC_HZ_MBYTE;
42 break;
43 } else if (*p == WC_C_HZ_TILDA)
44 Strcat_char(os, (char)*p);
45 else if (*p == '\n')
46 break;
47 else
48 wtf_push_unknown(os, p-1, 2);
49 state = WC_HZ_NOSTATE;
50 break;
51 case WC_HZ_TILDA_MB:
52 if (*p == WC_C_HZ_SO || *p == '\n') {
53 state = WC_HZ_NOSTATE;
54 break;
55 }
56 else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
57 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
58 else
59 wtf_push_unknown(os, p-1, 2);
60 state = WC_HZ_MBYTE;
61 break;
62 case WC_HZ_MBYTE:
63 if (*p == WC_C_HZ_TILDA)
64 state = WC_HZ_TILDA_MB;
65 else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
66 state = WC_HZ_MBYTE1;
67 else
68 wtf_push_unknown(os, p, 1);
69 break;
70 case WC_HZ_MBYTE1:
71 if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
72 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
73 else
74 wtf_push_unknown(os, p-1, 2);
75 state = WC_HZ_MBYTE;
76 break;
77 case WC_HZ_MBYTE1_GR:
78 if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
79 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
80 else
81 wtf_push_unknown(os, p-1, 2);
82 state = WC_HZ_NOSTATE;
83 break;
84 }
85 }
86 switch (state) {
87 case WC_HZ_TILDA:
88 case WC_HZ_TILDA_MB:
89 case WC_HZ_MBYTE1:
90 case WC_HZ_MBYTE1_GR:
91 wtf_push_unknown(os, p-1, 1);
92 break;
93 }
94 return os;
95 }
96
97 void
wc_push_to_hz(Str os,wc_wchar_t cc,wc_status * st)98 wc_push_to_hz(Str os, wc_wchar_t cc, wc_status *st)
99 {
100 while (1) {
101 switch (cc.ccs) {
102 case WC_CCS_US_ASCII:
103 if (st->gl) {
104 Strcat_char(os, WC_C_HZ_TILDA);
105 Strcat_char(os, WC_C_HZ_SO);
106 st->gl = 0;
107 }
108 if ((char)cc.code == WC_C_HZ_TILDA)
109 Strcat_char(os, WC_C_HZ_TILDA);
110 Strcat_char(os, (char)cc.code);
111 return;
112 case WC_CCS_GB_2312:
113 if (! st->gl) {
114 Strcat_char(os, WC_C_HZ_TILDA);
115 Strcat_char(os, WC_C_HZ_SI);
116 st->gl = 1;
117 }
118 Strcat_char(os, (char)((cc.code >> 8) & 0x7f));
119 Strcat_char(os, (char)(cc.code & 0x7f));
120 return;
121 case WC_CCS_UNKNOWN_W:
122 if (WcOption.no_replace)
123 return;
124 if (st->gl) {
125 Strcat_char(os, WC_C_HZ_TILDA);
126 Strcat_char(os, WC_C_HZ_SO);
127 st->gl = 0;
128 }
129 Strcat_charp(os, WC_REPLACE_W);
130 return;
131 case WC_CCS_UNKNOWN:
132 if (WcOption.no_replace)
133 return;
134 if (st->gl) {
135 Strcat_char(os, WC_C_HZ_TILDA);
136 Strcat_char(os, WC_C_HZ_SO);
137 st->gl = 0;
138 }
139 Strcat_charp(os, WC_REPLACE);
140 return;
141 default:
142 #ifdef USE_UNICODE
143 if (WcOption.ucs_conv)
144 cc = wc_any_to_any_ces(cc, st);
145 else
146 #endif
147 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
148 continue;
149 }
150 }
151 }
152
153 void
wc_push_to_hz_end(Str os,wc_status * st)154 wc_push_to_hz_end(Str os, wc_status *st)
155 {
156 if (st->gl) {
157 Strcat_char(os, WC_C_HZ_TILDA);
158 Strcat_char(os, WC_C_HZ_SO);
159 st->gl = 0;
160 }
161 }
162