1 /* knj.c: check for 2-Byte Kanji (CP 932, SJIS) codes.
2 
3    Copyright 2010, 2014 Akira Kakuto.
4    Copyright 2013, 2014 TANAKA Takuji.
5 
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15 
16    You should have received a copy of the GNU Lesser General Public License
17    along with this library; if not, see <http://www.gnu.org/licenses/>.  */
18 
19 #include <kpathsea/config.h>
20 #include <kpathsea/debug.h>
21 #include <wchar.h>
22 
23 int is_cp932_system, file_system_codepage;
24 
isknj(int c)25 int isknj(int c)
26 {
27   c &= 0xff;
28   switch (is_cp932_system) {
29   case 932:
30     return((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc));
31   case 936:
32     return(c>=0x81 && c<=0xfe);
33   case 950:
34     return((c>=0xa1 && c<=0xc6) || (c>=0xc9 && c<=0xf9));
35   default:
36     return(0);
37   }
38 }
39 
isknj2(int c)40 int isknj2(int c)
41 {
42   c &= 0xff;
43   switch (is_cp932_system) {
44   case 932:
45     return(c>=0x40 && c<=0xfc && c!=0x7f);
46   case 936:
47     return(c>=0x40 && c<=0xfe && c!=0x7f);
48   case 950:
49     return((c>=0x40 && c<=0x7e) || (c>=0xa1 && c<=0xfe));
50   default:
51     return(0);
52   }
53 }
54 
55 /*
56   Get wide string from multibyte string.
57 */
58 wchar_t *
get_wstring_from_mbstring(int cp,const char * mbstr,wchar_t * wstr)59 get_wstring_from_mbstring(int cp, const char *mbstr, wchar_t *wstr)
60 {
61   int len;
62 
63   len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, 0);
64   if (len==0) {
65     FATAL("cannot convert string to wide string");
66   }
67   if (wstr==NULL) {
68     wstr = xmalloc(sizeof(wchar_t)*(len+1));
69   }
70   len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, len+1);
71   if (len==0) {
72     FATAL("cannot convert multibyte string to wide string");
73   }
74   return wstr;
75 }
76 
77 /*
78   Get multibyte string from wide string.
79 */
80 char *
get_mbstring_from_wstring(int cp,const wchar_t * wstr,char * mbstr)81 get_mbstring_from_wstring(int cp, const wchar_t *wstr, char *mbstr)
82 {
83   int len;
84 
85   len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, 0, NULL, NULL);
86   if (len==0) {
87     FATAL("cannot convert string to multibyte string");
88   }
89   if (mbstr==NULL) {
90     mbstr = xmalloc(len+1);
91   }
92   len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, len+1, NULL, NULL);
93   if (len==0) {
94     FATAL("cannot convert wide string to multibyte string");
95   }
96   return mbstr;
97 }
98 
99 /*
100   xfopen by file system codepage
101 */
102 FILE *
fsyscp_xfopen(const char * filename,const char * mode)103 fsyscp_xfopen (const char *filename, const char *mode)
104 {
105     FILE *f;
106     wchar_t *fnamew, modew[4];
107     int i;
108 #if defined (KPSE_COMPAT_API)
109     kpathsea kpse;
110 #endif
111     assert(filename && mode);
112 
113     fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
114     for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
115     f = _wfopen(fnamew, modew);
116     if (f == NULL)
117         FATAL_PERROR(filename);
118 #if defined (KPSE_COMPAT_API)
119     kpse = kpse_def;
120     if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
121         DEBUGF_START ();
122         fprintf (stderr, "fsyscp_xfopen(%s [", filename);
123         WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
124 #if defined(_WIN64)
125         fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
126 #else
127         fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
128 #endif
129         DEBUGF_END ();
130     }
131 #endif
132     free(fnamew);
133 
134     return f;
135 }
136 
137 /*
138   fopen by file system codepage
139 */
140 FILE *
fsyscp_fopen(const char * filename,const char * mode)141 fsyscp_fopen (const char *filename, const char *mode)
142 {
143     FILE *f;
144     wchar_t *fnamew, modew[4];
145     int i;
146 #if defined (KPSE_COMPAT_API)
147     kpathsea kpse;
148 #endif
149     assert(filename && mode);
150 
151     fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
152     for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
153     f = _wfopen(fnamew, modew);
154 #if defined (KPSE_COMPAT_API)
155     if (f != NULL) {
156         kpse = kpse_def;
157         if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
158             DEBUGF_START ();
159             fprintf (stderr, "fsyscp_fopen(%s [", filename);
160             WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
161 #if defined(_WIN64)
162             fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
163 #else
164             fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
165 #endif
166             DEBUGF_END ();
167         }
168     }
169 #endif
170     free(fnamew);
171 
172     return f;
173 }
174 
175 /*
176   popen by file system codepage
177 */
178 static int
is_include_space(const char * s)179 is_include_space(const char *s)
180 {
181     char *p;
182     p = strchr(s, ' ');
183     if(p) return 1;
184     p = strchr(s, '\t');
185     if(p) return 1;
186     return 0;
187 }
188 
189 FILE *
fsyscp_popen(const char * command,const char * mode)190 fsyscp_popen (const char *command, const char *mode)
191 {
192     FILE *f;
193     wchar_t *commandw, modew[4];
194     int i;
195 #if defined (KPSE_COMPAT_API)
196     kpathsea kpse;
197 #endif
198     assert(command && mode);
199 
200     if (is_include_space (command)) {
201         const char *p;
202         char *command2, *q;
203         command2 = xmalloc (strlen (command) + 3);
204         p = command;
205         q = command2;
206         *q++ = '\"';
207         while (*p)
208             *q++ = *p++;
209         *q++ = '\"';
210         *q = '\0';
211         commandw = get_wstring_from_fsyscp(command2, commandw=NULL);
212         free (command2);
213     } else {
214         commandw = get_wstring_from_fsyscp(command, commandw=NULL);
215     }
216     for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
217     f = _wpopen(commandw, modew);
218 #if defined (KPSE_COMPAT_API)
219     if (f != NULL) {
220         kpse = kpse_def;
221         if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
222             DEBUGF_START ();
223             fprintf (stderr, "fsyscp_popen(%s [", command);
224             WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), commandw, wcslen( commandw ), NULL, NULL );
225 #if defined(_WIN64)
226             fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
227 #else
228             fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
229 #endif
230             DEBUGF_END ();
231         }
232     }
233 #endif
234     free (commandw);
235 /* We use always binary mode on Windows */
236     if(f) _setmode (fileno (f), _O_BINARY);
237 
238     return f;
239 }
240 
241 int
get_command_line_args_utf8(const_string enc,int * p_ac,char *** p_av)242 get_command_line_args_utf8 (const_string enc, int *p_ac, char ***p_av)
243 {
244     int argc;
245     string *argv;
246 
247     if (!enc || !strncmp(enc,"",1)) return 0;
248 
249 #ifdef DEBUG
250     fprintf(stderr, "command_line_encoding (%s)\n", enc);
251 #endif /* DEBUG */
252     if (!(strncmp(enc,"utf8",5) && strncmp(enc,"utf-8",6))) {
253       LPWSTR *argvw;
254       INT argcw, i;
255       string s;
256 #ifdef DEBUG
257       DWORD ret;
258       HANDLE hStderr;
259       hStderr = GetStdHandle( STD_ERROR_HANDLE );
260 #endif /* DEBUG */
261       file_system_codepage = CP_UTF8;
262       is_cp932_system = 0;
263       argvw = CommandLineToArgvW(GetCommandLineW(), &argcw);
264       argc = argcw;
265       argv = xmalloc(sizeof(char *)*(argcw+1));
266       for (i=0; i<argcw; i++) {
267         s = get_utf8_from_wstring(argvw[i], s=NULL);
268         argv[i] = s;
269 #ifdef DEBUG
270         fprintf(stderr, "Commandline arguments %d:(%s) [", i, argv[i]);
271         WriteConsoleW( hStderr, argvw[i], wcslen(argvw[i]), &ret, NULL);
272         fprintf(stderr, "]\n");
273 #endif /* DEBUG */
274       }
275       argv[argcw] = NULL;
276       *p_ac = argc;
277       *p_av = argv;
278       return file_system_codepage;
279     } else {
280       WARNING1("kpathsea: Ignoring unknown encoding `%s'", enc);
281       return 0;
282     }
283 }
284 
285 /*
286   spawnvp by file system codepage
287 */
288 int
fsyscp_spawnvp(int mode,const char * command,const char * const * argv)289 fsyscp_spawnvp (int mode, const char *command, const char* const *argv)
290 {
291     int ret;
292     wchar_t *commandw, **argvw, **pw;
293     int i;
294     const char* const *p;
295 
296     assert(command && argv);
297     for (i = 0, p = argv; *p; p++)
298       i++;
299     argvw = xcalloc (i + 3, sizeof (wchar_t *));
300     commandw = get_wstring_from_fsyscp(command, commandw=NULL);
301     p = argv;
302     pw = argvw;
303     while (*p) {
304       *pw = get_wstring_from_fsyscp(*p, *pw=NULL);
305       p++;
306       pw++;
307     }
308     *pw = NULL;
309     ret = _wspawnvp (mode, (const wchar_t *)commandw, (const wchar_t* const*) argvw);
310     if(commandw) free(commandw);
311     if (argvw) {
312       pw = argvw;
313       while (*pw) {
314 	free (*pw);
315 	pw++;
316       }
317       free (argvw);
318     }
319 
320     return ret;
321 }
322 
323 /*
324   system by file system codepage
325 */
326 int
fsyscp_system(const char * cmd)327 fsyscp_system (const char *cmd)
328 {
329     const char *p;
330     char  *q;
331     char  *av[4];
332     int   len, ret;
333     int   spacep = 0;
334 
335     if (cmd == NULL)
336       return 1;
337 
338     av[0] = xstrdup ("cmd.exe");
339     av[1] = xstrdup ("/c");
340 
341     len = strlen (cmd) + 3;
342     spacep = is_include_space (cmd);
343     av[2] = xmalloc (len);
344     q = av[2];
345     if (spacep)
346       *q++ = '"';
347     for (p = cmd; *p; p++, q++) {
348       if (*p == '\'')
349         *q = '"';
350       else
351         *q = *p;
352     }
353     if (spacep)
354       *q++ = '"';
355     *q = '\0';
356     av[3] = NULL;
357     ret = fsyscp_spawnvp (_P_WAIT, av[0], (const char* const*) av);
358     free (av[0]);
359     free (av[1]);
360     free (av[2]);
361     return ret;
362 }
363 
364 static int getc_len;
365 static int getc_buff[4];
366 
win32_getc(FILE * fp)367 int win32_getc(FILE *fp)
368 {
369     const int fd = fileno(fp);
370     HANDLE hStdin;
371     DWORD ret;
372     wchar_t wc[3];
373     char mbc[5];
374     int j;
375     static wchar_t wcbuf = L'\0';
376 
377     if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
378         return getc(fp);
379 
380     if (getc_len == 0)
381     {
382         hStdin = GetStdHandle(STD_INPUT_HANDLE);
383         if (wcbuf) {
384             wc[0] = wcbuf;
385             wcbuf = L'\0';
386         }
387         else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0)
388             return EOF;
389         if (0xd800<=wc[0] && wc[0]<0xdc00) {
390             if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0)
391                 return EOF;
392             if (0xdc00<=wc[1] && wc[1]<0xe000) {
393                 wc[2]=L'\0';
394             } else {
395                 wcbuf=wc[1];
396                 wc[0]=0xfffd;    /* illegal surrogate pair */
397                 wc[1]=L'\0';
398             }
399         } else if (0xdc00<=wc[0] && wc[0]<0xe000) {
400             wc[0]=0xfffd;        /* illegal surrogate pair */
401             wc[1]=L'\0';
402         } else {
403             wc[1]=L'\0';
404         }
405         get_utf8_from_wstring(wc,mbc);
406         j=strlen(mbc)-1;
407         while(j>=0) {
408             getc_buff[getc_len++]=(int)mbc[j--];
409         }
410     }
411     return getc_buff[--getc_len];
412 }
413 
win32_ungetc(int c,FILE * fp)414 int win32_ungetc(int c, FILE *fp)
415 {
416     const int fd = fileno(fp);
417 
418     if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
419         return ungetc(c, fp);
420 
421     assert(getc_len < 4);
422     return getc_buff[getc_len++] = c;
423 }
424 
__win32_fputs(const char * str,HANDLE hStdout)425 static int __win32_fputs(const char *str, HANDLE hStdout)
426 {
427     DWORD ret;
428     wchar_t *wstr;
429 
430     wstr = get_wstring_from_utf8(str, wstr=NULL);
431 
432     if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
433         free(wstr);
434         return EOF;
435     }
436 
437     free(wstr);
438     return ret;
439 }
440 
win32_fputs(const char * str,FILE * fp)441 int win32_fputs(const char *str, FILE *fp)
442 {
443     const int fd = fileno(fp);
444     HANDLE hStdout;
445 
446     if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
447         && file_system_codepage == CP_UTF8))
448         return fputs(str, fp);
449 
450     hStdout = (fd == fileno(stdout)) ?
451         GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
452 
453     return __win32_fputs(str, hStdout);
454 }
455 
456 #define MAX_PROMPT_STR_SIZE 8192
457 
win32_vfprintf(FILE * fp,const char * format,va_list argp)458 int win32_vfprintf(FILE *fp, const char *format, va_list argp)
459 {
460     const int fd = fileno(fp);
461     HANDLE hStdout;
462     char buff[MAX_PROMPT_STR_SIZE];
463     int ret;
464 
465     if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
466         && file_system_codepage == CP_UTF8))
467         return vfprintf(fp, format, argp);
468 
469     hStdout = (fd == fileno(stdout)) ?
470         GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
471 
472     ret = _vsnprintf(buff, sizeof(buff), format, argp);
473     if (__win32_fputs(buff, hStdout)==EOF) {
474         return EOF;
475     }
476     return ret;
477 }
478 
win32_puts(const char * str)479 int win32_puts(const char *str)
480 {
481     if (win32_fputs(str, stdout)==EOF) {
482         return EOF;
483     }
484     return puts("");
485 }
486 
win32_putc(int c,FILE * fp)487 int win32_putc(int c, FILE *fp)
488 {
489     const int fd = fileno(fp);
490     HANDLE hStdout;
491     DWORD ret;
492     wchar_t wstr[3];
493     static int len = 0;
494     static char buff[5], *str;
495 
496     if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
497         && file_system_codepage == CP_UTF8))
498         return putc(c, fp);
499 
500     hStdout = (fd == fileno(stdout)) ?
501         GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
502 
503     c &= 0xff;
504 
505     if (c < 0x80) {
506         str = buff;
507         len = 1;
508     }
509     if (c < 0xc0) { /* ASCII or trailer */
510         *str++ = c;
511         len--;
512         if (len == 0) {
513             *str = '\0';
514             get_wstring_from_utf8(buff, wstr);
515             if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
516                 len = 0;
517                 return EOF;
518             }
519         }
520         else if (len < 0) return EOF;
521         return c;
522     }
523     else if (c < 0xc2) { len = 0; return EOF; }  /* illegal */
524     else if (c < 0xe0) len = 2;
525     else if (c < 0xf0) len = 3;
526     else if (c < 0xf5) len = 4;
527     else { len = 0; return EOF; }
528 
529     str = buff;
530     *str++ = c;
531     len--;
532     return c;
533 }
534