1 /* knj.c: check for 2-Byte Kanji (CP 932, SJIS) codes.
2
3 Copyright 2010, 2014 Akira Kakuto.
4 Copyright 2013, 2014 TANAKA Takuji.
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with this library; if not, see <http://www.gnu.org/licenses/>. */
18
19 #include <kpathsea/config.h>
20 #include <kpathsea/debug.h>
21 #include <wchar.h>
22
23 int is_cp932_system, file_system_codepage;
24
isknj(int c)25 int isknj(int c)
26 {
27 c &= 0xff;
28 switch (is_cp932_system) {
29 case 932:
30 return((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc));
31 case 936:
32 return(c>=0x81 && c<=0xfe);
33 case 950:
34 return((c>=0xa1 && c<=0xc6) || (c>=0xc9 && c<=0xf9));
35 default:
36 return(0);
37 }
38 }
39
isknj2(int c)40 int isknj2(int c)
41 {
42 c &= 0xff;
43 switch (is_cp932_system) {
44 case 932:
45 return(c>=0x40 && c<=0xfc && c!=0x7f);
46 case 936:
47 return(c>=0x40 && c<=0xfe && c!=0x7f);
48 case 950:
49 return((c>=0x40 && c<=0x7e) || (c>=0xa1 && c<=0xfe));
50 default:
51 return(0);
52 }
53 }
54
55 /*
56 Get wide string from multibyte string.
57 */
58 wchar_t *
get_wstring_from_mbstring(int cp,const char * mbstr,wchar_t * wstr)59 get_wstring_from_mbstring(int cp, const char *mbstr, wchar_t *wstr)
60 {
61 int len;
62
63 len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, 0);
64 if (len==0) {
65 FATAL("cannot convert string to wide string");
66 }
67 if (wstr==NULL) {
68 wstr = xmalloc(sizeof(wchar_t)*(len+1));
69 }
70 len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, len+1);
71 if (len==0) {
72 FATAL("cannot convert multibyte string to wide string");
73 }
74 return wstr;
75 }
76
77 /*
78 Get multibyte string from wide string.
79 */
80 char *
get_mbstring_from_wstring(int cp,const wchar_t * wstr,char * mbstr)81 get_mbstring_from_wstring(int cp, const wchar_t *wstr, char *mbstr)
82 {
83 int len;
84
85 len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, 0, NULL, NULL);
86 if (len==0) {
87 FATAL("cannot convert string to multibyte string");
88 }
89 if (mbstr==NULL) {
90 mbstr = xmalloc(len+1);
91 }
92 len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, len+1, NULL, NULL);
93 if (len==0) {
94 FATAL("cannot convert wide string to multibyte string");
95 }
96 return mbstr;
97 }
98
99 /*
100 xfopen by file system codepage
101 */
102 FILE *
fsyscp_xfopen(const char * filename,const char * mode)103 fsyscp_xfopen (const char *filename, const char *mode)
104 {
105 FILE *f;
106 wchar_t *fnamew, modew[4];
107 int i;
108 #if defined (KPSE_COMPAT_API)
109 kpathsea kpse;
110 #endif
111 assert(filename && mode);
112
113 fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
114 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
115 f = _wfopen(fnamew, modew);
116 if (f == NULL)
117 FATAL_PERROR(filename);
118 #if defined (KPSE_COMPAT_API)
119 kpse = kpse_def;
120 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
121 DEBUGF_START ();
122 fprintf (stderr, "fsyscp_xfopen(%s [", filename);
123 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
124 #if defined(_WIN64)
125 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
126 #else
127 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
128 #endif
129 DEBUGF_END ();
130 }
131 #endif
132 free(fnamew);
133
134 return f;
135 }
136
137 /*
138 fopen by file system codepage
139 */
140 FILE *
fsyscp_fopen(const char * filename,const char * mode)141 fsyscp_fopen (const char *filename, const char *mode)
142 {
143 FILE *f;
144 wchar_t *fnamew, modew[4];
145 int i;
146 #if defined (KPSE_COMPAT_API)
147 kpathsea kpse;
148 #endif
149 assert(filename && mode);
150
151 fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
152 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
153 f = _wfopen(fnamew, modew);
154 #if defined (KPSE_COMPAT_API)
155 if (f != NULL) {
156 kpse = kpse_def;
157 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
158 DEBUGF_START ();
159 fprintf (stderr, "fsyscp_fopen(%s [", filename);
160 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
161 #if defined(_WIN64)
162 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
163 #else
164 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
165 #endif
166 DEBUGF_END ();
167 }
168 }
169 #endif
170 free(fnamew);
171
172 return f;
173 }
174
175 /*
176 popen by file system codepage
177 */
178 static int
is_include_space(const char * s)179 is_include_space(const char *s)
180 {
181 char *p;
182 p = strchr(s, ' ');
183 if(p) return 1;
184 p = strchr(s, '\t');
185 if(p) return 1;
186 return 0;
187 }
188
189 FILE *
fsyscp_popen(const char * command,const char * mode)190 fsyscp_popen (const char *command, const char *mode)
191 {
192 FILE *f;
193 wchar_t *commandw, modew[4];
194 int i;
195 #if defined (KPSE_COMPAT_API)
196 kpathsea kpse;
197 #endif
198 assert(command && mode);
199
200 if (is_include_space (command)) {
201 const char *p;
202 char *command2, *q;
203 command2 = xmalloc (strlen (command) + 3);
204 p = command;
205 q = command2;
206 *q++ = '\"';
207 while (*p)
208 *q++ = *p++;
209 *q++ = '\"';
210 *q = '\0';
211 commandw = get_wstring_from_fsyscp(command2, commandw=NULL);
212 free (command2);
213 } else {
214 commandw = get_wstring_from_fsyscp(command, commandw=NULL);
215 }
216 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
217 f = _wpopen(commandw, modew);
218 #if defined (KPSE_COMPAT_API)
219 if (f != NULL) {
220 kpse = kpse_def;
221 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
222 DEBUGF_START ();
223 fprintf (stderr, "fsyscp_popen(%s [", command);
224 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), commandw, wcslen( commandw ), NULL, NULL );
225 #if defined(_WIN64)
226 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
227 #else
228 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
229 #endif
230 DEBUGF_END ();
231 }
232 }
233 #endif
234 free (commandw);
235 /* We use always binary mode on Windows */
236 if(f) _setmode (fileno (f), _O_BINARY);
237
238 return f;
239 }
240
241 int
get_command_line_args_utf8(const_string enc,int * p_ac,char *** p_av)242 get_command_line_args_utf8 (const_string enc, int *p_ac, char ***p_av)
243 {
244 int argc;
245 string *argv;
246
247 if (!enc || !strncmp(enc,"",1)) return 0;
248
249 #ifdef DEBUG
250 fprintf(stderr, "command_line_encoding (%s)\n", enc);
251 #endif /* DEBUG */
252 if (!(strncmp(enc,"utf8",5) && strncmp(enc,"utf-8",6))) {
253 LPWSTR *argvw;
254 INT argcw, i;
255 string s;
256 #ifdef DEBUG
257 DWORD ret;
258 HANDLE hStderr;
259 hStderr = GetStdHandle( STD_ERROR_HANDLE );
260 #endif /* DEBUG */
261 file_system_codepage = CP_UTF8;
262 is_cp932_system = 0;
263 argvw = CommandLineToArgvW(GetCommandLineW(), &argcw);
264 argc = argcw;
265 argv = xmalloc(sizeof(char *)*(argcw+1));
266 for (i=0; i<argcw; i++) {
267 s = get_utf8_from_wstring(argvw[i], s=NULL);
268 argv[i] = s;
269 #ifdef DEBUG
270 fprintf(stderr, "Commandline arguments %d:(%s) [", i, argv[i]);
271 WriteConsoleW( hStderr, argvw[i], wcslen(argvw[i]), &ret, NULL);
272 fprintf(stderr, "]\n");
273 #endif /* DEBUG */
274 }
275 argv[argcw] = NULL;
276 *p_ac = argc;
277 *p_av = argv;
278 return file_system_codepage;
279 } else {
280 WARNING1("kpathsea: Ignoring unknown encoding `%s'", enc);
281 return 0;
282 }
283 }
284
285 /*
286 spawnvp by file system codepage
287 */
288 int
fsyscp_spawnvp(int mode,const char * command,const char * const * argv)289 fsyscp_spawnvp (int mode, const char *command, const char* const *argv)
290 {
291 int ret;
292 wchar_t *commandw, **argvw, **pw;
293 int i;
294 const char* const *p;
295
296 assert(command && argv);
297 for (i = 0, p = argv; *p; p++)
298 i++;
299 argvw = xcalloc (i + 3, sizeof (wchar_t *));
300 commandw = get_wstring_from_fsyscp(command, commandw=NULL);
301 p = argv;
302 pw = argvw;
303 while (*p) {
304 *pw = get_wstring_from_fsyscp(*p, *pw=NULL);
305 p++;
306 pw++;
307 }
308 *pw = NULL;
309 ret = _wspawnvp (mode, (const wchar_t *)commandw, (const wchar_t* const*) argvw);
310 if(commandw) free(commandw);
311 if (argvw) {
312 pw = argvw;
313 while (*pw) {
314 free (*pw);
315 pw++;
316 }
317 free (argvw);
318 }
319
320 return ret;
321 }
322
323 /*
324 system by file system codepage
325 */
326 int
fsyscp_system(const char * cmd)327 fsyscp_system (const char *cmd)
328 {
329 const char *p;
330 char *q;
331 char *av[4];
332 int len, ret;
333 int spacep = 0;
334
335 if (cmd == NULL)
336 return 1;
337
338 av[0] = xstrdup ("cmd.exe");
339 av[1] = xstrdup ("/c");
340
341 len = strlen (cmd) + 3;
342 spacep = is_include_space (cmd);
343 av[2] = xmalloc (len);
344 q = av[2];
345 if (spacep)
346 *q++ = '"';
347 for (p = cmd; *p; p++, q++) {
348 if (*p == '\'')
349 *q = '"';
350 else
351 *q = *p;
352 }
353 if (spacep)
354 *q++ = '"';
355 *q = '\0';
356 av[3] = NULL;
357 ret = fsyscp_spawnvp (_P_WAIT, av[0], (const char* const*) av);
358 free (av[0]);
359 free (av[1]);
360 free (av[2]);
361 return ret;
362 }
363
364 static int getc_len;
365 static int getc_buff[4];
366
win32_getc(FILE * fp)367 int win32_getc(FILE *fp)
368 {
369 const int fd = fileno(fp);
370 HANDLE hStdin;
371 DWORD ret;
372 wchar_t wc[3];
373 char mbc[5];
374 int j;
375 static wchar_t wcbuf = L'\0';
376
377 if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
378 return getc(fp);
379
380 if (getc_len == 0)
381 {
382 hStdin = GetStdHandle(STD_INPUT_HANDLE);
383 if (wcbuf) {
384 wc[0] = wcbuf;
385 wcbuf = L'\0';
386 }
387 else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0)
388 return EOF;
389 if (0xd800<=wc[0] && wc[0]<0xdc00) {
390 if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0)
391 return EOF;
392 if (0xdc00<=wc[1] && wc[1]<0xe000) {
393 wc[2]=L'\0';
394 } else {
395 wcbuf=wc[1];
396 wc[0]=0xfffd; /* illegal surrogate pair */
397 wc[1]=L'\0';
398 }
399 } else if (0xdc00<=wc[0] && wc[0]<0xe000) {
400 wc[0]=0xfffd; /* illegal surrogate pair */
401 wc[1]=L'\0';
402 } else {
403 wc[1]=L'\0';
404 }
405 get_utf8_from_wstring(wc,mbc);
406 j=strlen(mbc)-1;
407 while(j>=0) {
408 getc_buff[getc_len++]=(int)mbc[j--];
409 }
410 }
411 return getc_buff[--getc_len];
412 }
413
win32_ungetc(int c,FILE * fp)414 int win32_ungetc(int c, FILE *fp)
415 {
416 const int fd = fileno(fp);
417
418 if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
419 return ungetc(c, fp);
420
421 assert(getc_len < 4);
422 return getc_buff[getc_len++] = c;
423 }
424
__win32_fputs(const char * str,HANDLE hStdout)425 static int __win32_fputs(const char *str, HANDLE hStdout)
426 {
427 DWORD ret;
428 wchar_t *wstr;
429
430 wstr = get_wstring_from_utf8(str, wstr=NULL);
431
432 if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
433 free(wstr);
434 return EOF;
435 }
436
437 free(wstr);
438 return ret;
439 }
440
win32_fputs(const char * str,FILE * fp)441 int win32_fputs(const char *str, FILE *fp)
442 {
443 const int fd = fileno(fp);
444 HANDLE hStdout;
445
446 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
447 && file_system_codepage == CP_UTF8))
448 return fputs(str, fp);
449
450 hStdout = (fd == fileno(stdout)) ?
451 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
452
453 return __win32_fputs(str, hStdout);
454 }
455
456 #define MAX_PROMPT_STR_SIZE 8192
457
win32_vfprintf(FILE * fp,const char * format,va_list argp)458 int win32_vfprintf(FILE *fp, const char *format, va_list argp)
459 {
460 const int fd = fileno(fp);
461 HANDLE hStdout;
462 char buff[MAX_PROMPT_STR_SIZE];
463 int ret;
464
465 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
466 && file_system_codepage == CP_UTF8))
467 return vfprintf(fp, format, argp);
468
469 hStdout = (fd == fileno(stdout)) ?
470 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
471
472 ret = _vsnprintf(buff, sizeof(buff), format, argp);
473 if (__win32_fputs(buff, hStdout)==EOF) {
474 return EOF;
475 }
476 return ret;
477 }
478
win32_puts(const char * str)479 int win32_puts(const char *str)
480 {
481 if (win32_fputs(str, stdout)==EOF) {
482 return EOF;
483 }
484 return puts("");
485 }
486
win32_putc(int c,FILE * fp)487 int win32_putc(int c, FILE *fp)
488 {
489 const int fd = fileno(fp);
490 HANDLE hStdout;
491 DWORD ret;
492 wchar_t wstr[3];
493 static int len = 0;
494 static char buff[5], *str;
495
496 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
497 && file_system_codepage == CP_UTF8))
498 return putc(c, fp);
499
500 hStdout = (fd == fileno(stdout)) ?
501 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
502
503 c &= 0xff;
504
505 if (c < 0x80) {
506 str = buff;
507 len = 1;
508 }
509 if (c < 0xc0) { /* ASCII or trailer */
510 *str++ = c;
511 len--;
512 if (len == 0) {
513 *str = '\0';
514 get_wstring_from_utf8(buff, wstr);
515 if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
516 len = 0;
517 return EOF;
518 }
519 }
520 else if (len < 0) return EOF;
521 return c;
522 }
523 else if (c < 0xc2) { len = 0; return EOF; } /* illegal */
524 else if (c < 0xe0) len = 2;
525 else if (c < 0xf0) len = 3;
526 else if (c < 0xf5) len = 4;
527 else { len = 0; return EOF; }
528
529 str = buff;
530 *str++ = c;
531 len--;
532 return c;
533 }
534