1 #include "Python.h"
2 #include "osdefs.h"
3 #include <locale.h>
4 
5 #ifdef MS_WINDOWS
6 #  include <malloc.h>
7 #  include <windows.h>
8 extern int winerror_to_errno(int);
9 #endif
10 
11 #ifdef HAVE_LANGINFO_H
12 #include <langinfo.h>
13 #endif
14 
15 #ifdef HAVE_SYS_IOCTL_H
16 #include <sys/ioctl.h>
17 #endif
18 
19 #ifdef HAVE_FCNTL_H
20 #include <fcntl.h>
21 #endif /* HAVE_FCNTL_H */
22 
23 #ifdef O_CLOEXEC
24 /* Does open() support the O_CLOEXEC flag? Possible values:
25 
26    -1: unknown
27     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28     1: open() supports O_CLOEXEC flag, close-on-exec is set
29 
30    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31    and os.open(). */
32 int _Py_open_cloexec_works = -1;
33 #endif
34 
35 PyObject *
_Py_device_encoding(int fd)36 _Py_device_encoding(int fd)
37 {
38 #if defined(MS_WINDOWS)
39     UINT cp;
40 #endif
41     int valid;
42     _Py_BEGIN_SUPPRESS_IPH
43     valid = isatty(fd);
44     _Py_END_SUPPRESS_IPH
45     if (!valid)
46         Py_RETURN_NONE;
47 
48 #if defined(MS_WINDOWS)
49     if (fd == 0)
50         cp = GetConsoleCP();
51     else if (fd == 1 || fd == 2)
52         cp = GetConsoleOutputCP();
53     else
54         cp = 0;
55     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56        has no console */
57     if (cp != 0)
58         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59 #elif defined(CODESET)
60     {
61         char *codeset = nl_langinfo(CODESET);
62         if (codeset != NULL && codeset[0] != 0)
63             return PyUnicode_FromString(codeset);
64     }
65 #endif
66     Py_RETURN_NONE;
67 }
68 
69 #if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70 
71 #define USE_FORCE_ASCII
72 
73 extern int _Py_normalize_encoding(const char *, char *, size_t);
74 
75 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76    On these operating systems, nl_langinfo(CODESET) announces an alias of the
77    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79    locale.getpreferredencoding() codec. For example, if command line arguments
80    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81    UnicodeEncodeError instead of retrieving the original byte string.
82 
83    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85    one byte in range 0x80-0xff can be decoded from the locale encoding. The
86    workaround is also enabled on error, for example if getting the locale
87    failed.
88 
89    Values of force_ascii:
90 
91        1: the workaround is used: Py_EncodeLocale() uses
92           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
93           decode_ascii()
94        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95           Py_DecodeLocale() uses mbstowcs()
96       -1: unknown, need to call check_force_ascii() to get the value
97 */
98 static int force_ascii = -1;
99 
100 static int
check_force_ascii(void)101 check_force_ascii(void)
102 {
103     char *loc;
104 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
105     char *codeset, **alias;
106     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
107     int is_ascii;
108     unsigned int i;
109     char* ascii_aliases[] = {
110         "ascii",
111         /* Aliases from Lib/encodings/aliases.py */
112         "646",
113         "ansi_x3.4_1968",
114         "ansi_x3.4_1986",
115         "ansi_x3_4_1968",
116         "cp367",
117         "csascii",
118         "ibm367",
119         "iso646_us",
120         "iso_646.irv_1991",
121         "iso_ir_6",
122         "us",
123         "us_ascii",
124         NULL
125     };
126 #endif
127 
128     loc = setlocale(LC_CTYPE, NULL);
129     if (loc == NULL)
130         goto error;
131     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
132         /* the LC_CTYPE locale is different than C */
133         return 0;
134     }
135 
136 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
137     codeset = nl_langinfo(CODESET);
138     if (!codeset || codeset[0] == '\0') {
139         /* CODESET is not set or empty */
140         goto error;
141     }
142     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143         goto error;
144 
145     is_ascii = 0;
146     for (alias=ascii_aliases; *alias != NULL; alias++) {
147         if (strcmp(encoding, *alias) == 0) {
148             is_ascii = 1;
149             break;
150         }
151     }
152     if (!is_ascii) {
153         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154         return 0;
155     }
156 
157     for (i=0x80; i<0xff; i++) {
158         unsigned char ch;
159         wchar_t wch;
160         size_t res;
161 
162         ch = (unsigned char)i;
163         res = mbstowcs(&wch, (char*)&ch, 1);
164         if (res != (size_t)-1) {
165             /* decoding a non-ASCII character from the locale encoding succeed:
166                the locale encoding is not ASCII, force ASCII */
167             return 1;
168         }
169     }
170     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171        encoding: the locale encoding is really ASCII */
172     return 0;
173 #else
174     /* nl_langinfo(CODESET) is not available: always force ASCII */
175     return 1;
176 #endif
177 
178 error:
179     /* if an error occurred, force the ASCII encoding */
180     return 1;
181 }
182 
183 
184 int
_Py_GetForceASCII(void)185 _Py_GetForceASCII(void)
186 {
187     if (force_ascii == -1) {
188         force_ascii = check_force_ascii();
189     }
190     return force_ascii;
191 }
192 
193 
194 void
_Py_ResetForceASCII(void)195 _Py_ResetForceASCII(void)
196 {
197     force_ascii = -1;
198 }
199 
200 
201 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int surrogateescape)202 encode_ascii(const wchar_t *text, char **str,
203              size_t *error_pos, const char **reason,
204              int raw_malloc, int surrogateescape)
205 {
206     char *result = NULL, *out;
207     size_t len, i;
208     wchar_t ch;
209 
210     len = wcslen(text);
211 
212     /* +1 for NULL byte */
213     if (raw_malloc) {
214         result = PyMem_RawMalloc(len + 1);
215     }
216     else {
217         result = PyMem_Malloc(len + 1);
218     }
219     if (result == NULL) {
220         return -1;
221     }
222 
223     out = result;
224     for (i=0; i<len; i++) {
225         ch = text[i];
226 
227         if (ch <= 0x7f) {
228             /* ASCII character */
229             *out++ = (char)ch;
230         }
231         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
232             /* UTF-8b surrogate */
233             *out++ = (char)(ch - 0xdc00);
234         }
235         else {
236             if (raw_malloc) {
237                 PyMem_RawFree(result);
238             }
239             else {
240                 PyMem_Free(result);
241             }
242             if (error_pos != NULL) {
243                 *error_pos = i;
244             }
245             if (reason) {
246                 *reason = "encoding error";
247             }
248             return -2;
249         }
250     }
251     *out = '\0';
252     *str = result;
253     return 0;
254 }
255 #else
256 int
_Py_GetForceASCII(void)257 _Py_GetForceASCII(void)
258 {
259     return 0;
260 }
261 
262 void
_Py_ResetForceASCII(void)263 _Py_ResetForceASCII(void)
264 {
265     /* nothing to do */
266 }
267 #endif   /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
268 
269 
270 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
271 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int surrogateescape)272 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
273              const char **reason, int surrogateescape)
274 {
275     wchar_t *res;
276     unsigned char *in;
277     wchar_t *out;
278     size_t argsize = strlen(arg) + 1;
279 
280     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
281         return -1;
282     }
283     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
284     if (!res) {
285         return -1;
286     }
287 
288     out = res;
289     for (in = (unsigned char*)arg; *in; in++) {
290         unsigned char ch = *in;
291         if (ch < 128) {
292             *out++ = ch;
293         }
294         else {
295             if (!surrogateescape) {
296                 PyMem_RawFree(res);
297                 if (wlen) {
298                     *wlen = in - (unsigned char*)arg;
299                 }
300                 if (reason) {
301                     *reason = "decoding error";
302                 }
303                 return -2;
304             }
305             *out++ = 0xdc00 + ch;
306         }
307     }
308     *out = 0;
309 
310     if (wlen != NULL) {
311         *wlen = out - res;
312     }
313     *wstr = res;
314     return 0;
315 }
316 #endif   /* !HAVE_MBRTOWC */
317 
318 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int surrogateescape)319 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
320                       const char **reason, int surrogateescape)
321 {
322     wchar_t *res;
323     size_t argsize;
324     size_t count;
325 #ifdef HAVE_MBRTOWC
326     unsigned char *in;
327     wchar_t *out;
328     mbstate_t mbs;
329 #endif
330 
331 #ifdef HAVE_BROKEN_MBSTOWCS
332     /* Some platforms have a broken implementation of
333      * mbstowcs which does not count the characters that
334      * would result from conversion.  Use an upper bound.
335      */
336     argsize = strlen(arg);
337 #else
338     argsize = mbstowcs(NULL, arg, 0);
339 #endif
340     if (argsize != (size_t)-1) {
341         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
342             return -1;
343         }
344         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
345         if (!res) {
346             return -1;
347         }
348 
349         count = mbstowcs(res, arg, argsize + 1);
350         if (count != (size_t)-1) {
351             wchar_t *tmp;
352             /* Only use the result if it contains no
353                surrogate characters. */
354             for (tmp = res; *tmp != 0 &&
355                          !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
356                 ;
357             if (*tmp == 0) {
358                 if (wlen != NULL) {
359                     *wlen = count;
360                 }
361                 *wstr = res;
362                 return 0;
363             }
364         }
365         PyMem_RawFree(res);
366     }
367 
368     /* Conversion failed. Fall back to escaping with surrogateescape. */
369 #ifdef HAVE_MBRTOWC
370     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
371 
372     /* Overallocate; as multi-byte characters are in the argument, the
373        actual output could use less memory. */
374     argsize = strlen(arg) + 1;
375     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
376         return -1;
377     }
378     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
379     if (!res) {
380         return -1;
381     }
382 
383     in = (unsigned char*)arg;
384     out = res;
385     memset(&mbs, 0, sizeof mbs);
386     while (argsize) {
387         size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
388         if (converted == 0) {
389             /* Reached end of string; null char stored. */
390             break;
391         }
392 
393         if (converted == (size_t)-2) {
394             /* Incomplete character. This should never happen,
395                since we provide everything that we have -
396                unless there is a bug in the C library, or I
397                misunderstood how mbrtowc works. */
398             goto decode_error;
399         }
400 
401         if (converted == (size_t)-1) {
402             if (!surrogateescape) {
403                 goto decode_error;
404             }
405 
406             /* Conversion error. Escape as UTF-8b, and start over
407                in the initial shift state. */
408             *out++ = 0xdc00 + *in++;
409             argsize--;
410             memset(&mbs, 0, sizeof mbs);
411             continue;
412         }
413 
414         if (Py_UNICODE_IS_SURROGATE(*out)) {
415             if (!surrogateescape) {
416                 goto decode_error;
417             }
418 
419             /* Surrogate character.  Escape the original
420                byte sequence with surrogateescape. */
421             argsize -= converted;
422             while (converted--) {
423                 *out++ = 0xdc00 + *in++;
424             }
425             continue;
426         }
427         /* successfully converted some bytes */
428         in += converted;
429         argsize -= converted;
430         out++;
431     }
432     if (wlen != NULL) {
433         *wlen = out - res;
434     }
435     *wstr = res;
436     return 0;
437 
438 decode_error:
439     PyMem_RawFree(res);
440     if (wlen) {
441         *wlen = in - (unsigned char*)arg;
442     }
443     if (reason) {
444         *reason = "decoding error";
445     }
446     return -2;
447 #else   /* HAVE_MBRTOWC */
448     /* Cannot use C locale for escaping; manually escape as if charset
449        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
450        correctly in the locale's charset, which must be an ASCII superset. */
451     return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
452 #endif   /* HAVE_MBRTOWC */
453 }
454 
455 
456 /* Decode a byte string from the locale encoding.
457 
458    Use the strict error handler if 'surrogateescape' is zero.  Use the
459    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
460    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
461    can be decoded as a surrogate character, escape the bytes using the
462    surrogateescape error handler instead of decoding them.
463 
464    On success, return 0 and write the newly allocated wide character string into
465    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
466    the number of wide characters excluding the null character into *wlen.
467 
468    On memory allocation failure, return -1.
469 
470    On decoding error, return -2. If wlen is not NULL, write the start of
471    invalid byte sequence in the input string into *wlen. If reason is not NULL,
472    write the decoding error message into *reason.
473 
474    Use the Py_EncodeLocaleEx() function to encode the character string back to
475    a byte string. */
476 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,int surrogateescape)477 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
478                    const char **reason,
479                    int current_locale, int surrogateescape)
480 {
481     if (current_locale) {
482 #ifdef __ANDROID__
483         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
484                                 surrogateescape);
485 #else
486         return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
487 #endif
488     }
489 
490 #if defined(__APPLE__) || defined(__ANDROID__)
491     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
492                             surrogateescape);
493 #else
494     if (Py_UTF8Mode == 1) {
495         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
496                                 surrogateescape);
497     }
498 
499 #ifdef USE_FORCE_ASCII
500     if (force_ascii == -1) {
501         force_ascii = check_force_ascii();
502     }
503 
504     if (force_ascii) {
505         /* force ASCII encoding to workaround mbstowcs() issue */
506         return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
507     }
508 #endif
509 
510     return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
511 #endif   /* __APPLE__ or __ANDROID__ */
512 }
513 
514 
515 /* Decode a byte string from the locale encoding with the
516    surrogateescape error handler: undecodable bytes are decoded as characters
517    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
518    character, escape the bytes using the surrogateescape error handler instead
519    of decoding them.
520 
521    Return a pointer to a newly allocated wide character string, use
522    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
523    wide characters excluding the null character into *size
524 
525    Return NULL on decoding error or memory allocation error. If *size* is not
526    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
527    decoding error.
528 
529    Decoding errors should never happen, unless there is a bug in the C
530    library.
531 
532    Use the Py_EncodeLocale() function to encode the character string back to a
533    byte string. */
534 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)535 Py_DecodeLocale(const char* arg, size_t *wlen)
536 {
537     wchar_t *wstr;
538     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
539     if (res != 0) {
540         if (wlen != NULL) {
541             *wlen = (size_t)res;
542         }
543         return NULL;
544     }
545     return wstr;
546 }
547 
548 
549 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int surrogateescape)550 encode_current_locale(const wchar_t *text, char **str,
551                       size_t *error_pos, const char **reason,
552                       int raw_malloc, int surrogateescape)
553 {
554     const size_t len = wcslen(text);
555     char *result = NULL, *bytes = NULL;
556     size_t i, size, converted;
557     wchar_t c, buf[2];
558 
559     /* The function works in two steps:
560        1. compute the length of the output buffer in bytes (size)
561        2. outputs the bytes */
562     size = 0;
563     buf[1] = 0;
564     while (1) {
565         for (i=0; i < len; i++) {
566             c = text[i];
567             if (c >= 0xdc80 && c <= 0xdcff) {
568                 if (!surrogateescape) {
569                     goto encode_error;
570                 }
571                 /* UTF-8b surrogate */
572                 if (bytes != NULL) {
573                     *bytes++ = c - 0xdc00;
574                     size--;
575                 }
576                 else {
577                     size++;
578                 }
579                 continue;
580             }
581             else {
582                 buf[0] = c;
583                 if (bytes != NULL) {
584                     converted = wcstombs(bytes, buf, size);
585                 }
586                 else {
587                     converted = wcstombs(NULL, buf, 0);
588                 }
589                 if (converted == (size_t)-1) {
590                     goto encode_error;
591                 }
592                 if (bytes != NULL) {
593                     bytes += converted;
594                     size -= converted;
595                 }
596                 else {
597                     size += converted;
598                 }
599             }
600         }
601         if (result != NULL) {
602             *bytes = '\0';
603             break;
604         }
605 
606         size += 1; /* nul byte at the end */
607         if (raw_malloc) {
608             result = PyMem_RawMalloc(size);
609         }
610         else {
611             result = PyMem_Malloc(size);
612         }
613         if (result == NULL) {
614             return -1;
615         }
616         bytes = result;
617     }
618     *str = result;
619     return 0;
620 
621 encode_error:
622     if (raw_malloc) {
623         PyMem_RawFree(result);
624     }
625     else {
626         PyMem_Free(result);
627     }
628     if (error_pos != NULL) {
629         *error_pos = i;
630     }
631     if (reason) {
632         *reason = "encoding error";
633     }
634     return -2;
635 }
636 
637 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,int surrogateescape)638 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
639                  const char **reason,
640                  int raw_malloc, int current_locale, int surrogateescape)
641 {
642     if (current_locale) {
643 #ifdef __ANDROID__
644         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
645                                 raw_malloc, surrogateescape);
646 #else
647         return encode_current_locale(text, str, error_pos, reason,
648                                      raw_malloc, surrogateescape);
649 #endif
650     }
651 
652 #if defined(__APPLE__) || defined(__ANDROID__)
653     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
654                             raw_malloc, surrogateescape);
655 #else   /* __APPLE__ */
656     if (Py_UTF8Mode == 1) {
657         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
658                                 raw_malloc, surrogateescape);
659     }
660 
661 #ifdef USE_FORCE_ASCII
662     if (force_ascii == -1) {
663         force_ascii = check_force_ascii();
664     }
665 
666     if (force_ascii) {
667         return encode_ascii(text, str, error_pos, reason,
668                             raw_malloc, surrogateescape);
669     }
670 #endif
671 
672     return encode_current_locale(text, str, error_pos, reason,
673                                  raw_malloc, surrogateescape);
674 #endif   /* __APPLE__ or __ANDROID__ */
675 }
676 
677 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)678 encode_locale(const wchar_t *text, size_t *error_pos,
679               int raw_malloc, int current_locale)
680 {
681     char *str;
682     int res = encode_locale_ex(text, &str, error_pos, NULL,
683                                raw_malloc, current_locale, 1);
684     if (res != -2 && error_pos) {
685         *error_pos = (size_t)-1;
686     }
687     if (res != 0) {
688         return NULL;
689     }
690     return str;
691 }
692 
693 /* Encode a wide character string to the locale encoding with the
694    surrogateescape error handler: surrogate characters in the range
695    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
696 
697    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
698    the memory. Return NULL on encoding or memory allocation error.
699 
700    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
701    to the index of the invalid character on encoding error.
702 
703    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
704    character string. */
705 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)706 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
707 {
708     return encode_locale(text, error_pos, 0, 0);
709 }
710 
711 
712 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
713    instead of PyMem_Free(). */
714 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)715 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
716 {
717     return encode_locale(text, error_pos, 1, 0);
718 }
719 
720 
721 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,int surrogateescape)722 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
723                    size_t *error_pos, const char **reason,
724                    int current_locale, int surrogateescape)
725 {
726     return encode_locale_ex(text, str, error_pos, reason, 1,
727                             current_locale, surrogateescape);
728 }
729 
730 
731 #ifdef MS_WINDOWS
732 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
733 
734 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)735 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
736 {
737     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
738     /* Cannot simply cast and dereference in_ptr,
739        since it might not be aligned properly */
740     __int64 in;
741     memcpy(&in, in_ptr, sizeof(in));
742     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
743     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
744 }
745 
746 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)747 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
748 {
749     /* XXX endianness */
750     __int64 out;
751     out = time_in + secs_between_epochs;
752     out = out * 10000000 + nsec_in / 100;
753     memcpy(out_ptr, &out, sizeof(out));
754 }
755 
756 /* Below, we *know* that ugo+r is 0444 */
757 #if _S_IREAD != 0400
758 #error Unsupported C library
759 #endif
760 static int
attributes_to_mode(DWORD attr)761 attributes_to_mode(DWORD attr)
762 {
763     int m = 0;
764     if (attr & FILE_ATTRIBUTE_DIRECTORY)
765         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
766     else
767         m |= _S_IFREG;
768     if (attr & FILE_ATTRIBUTE_READONLY)
769         m |= 0444;
770     else
771         m |= 0666;
772     return m;
773 }
774 
775 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)776 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
777                            struct _Py_stat_struct *result)
778 {
779     memset(result, 0, sizeof(*result));
780     result->st_mode = attributes_to_mode(info->dwFileAttributes);
781     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
782     result->st_dev = info->dwVolumeSerialNumber;
783     result->st_rdev = result->st_dev;
784     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
785     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
786     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
787     result->st_nlink = info->nNumberOfLinks;
788     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
789     if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
790         /* first clear the S_IFMT bits */
791         result->st_mode ^= (result->st_mode & S_IFMT);
792         /* now set the bits that make this a symlink */
793         result->st_mode |= S_IFLNK;
794     }
795     result->st_file_attributes = info->dwFileAttributes;
796 }
797 #endif
798 
799 /* Return information about a file.
800 
801    On POSIX, use fstat().
802 
803    On Windows, use GetFileType() and GetFileInformationByHandle() which support
804    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
805    than 2 GiB because the file size type is a signed 32-bit integer: see issue
806    #23152.
807 
808    On Windows, set the last Windows error and return nonzero on error. On
809    POSIX, set errno and return nonzero on error. Fill status and return 0 on
810    success. */
811 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)812 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
813 {
814 #ifdef MS_WINDOWS
815     BY_HANDLE_FILE_INFORMATION info;
816     HANDLE h;
817     int type;
818 
819     _Py_BEGIN_SUPPRESS_IPH
820     h = (HANDLE)_get_osfhandle(fd);
821     _Py_END_SUPPRESS_IPH
822 
823     if (h == INVALID_HANDLE_VALUE) {
824         /* errno is already set by _get_osfhandle, but we also set
825            the Win32 error for callers who expect that */
826         SetLastError(ERROR_INVALID_HANDLE);
827         return -1;
828     }
829     memset(status, 0, sizeof(*status));
830 
831     type = GetFileType(h);
832     if (type == FILE_TYPE_UNKNOWN) {
833         DWORD error = GetLastError();
834         if (error != 0) {
835             errno = winerror_to_errno(error);
836             return -1;
837         }
838         /* else: valid but unknown file */
839     }
840 
841     if (type != FILE_TYPE_DISK) {
842         if (type == FILE_TYPE_CHAR)
843             status->st_mode = _S_IFCHR;
844         else if (type == FILE_TYPE_PIPE)
845             status->st_mode = _S_IFIFO;
846         return 0;
847     }
848 
849     if (!GetFileInformationByHandle(h, &info)) {
850         /* The Win32 error is already set, but we also set errno for
851            callers who expect it */
852         errno = winerror_to_errno(GetLastError());
853         return -1;
854     }
855 
856     _Py_attribute_data_to_stat(&info, 0, status);
857     /* specific to fstat() */
858     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
859     return 0;
860 #else
861     return fstat(fd, status);
862 #endif
863 }
864 
865 /* Return information about a file.
866 
867    On POSIX, use fstat().
868 
869    On Windows, use GetFileType() and GetFileInformationByHandle() which support
870    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
871    than 2 GiB because the file size type is a signed 32-bit integer: see issue
872    #23152.
873 
874    Raise an exception and return -1 on error. On Windows, set the last Windows
875    error on error. On POSIX, set errno on error. Fill status and return 0 on
876    success.
877 
878    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
879    to call fstat(). The caller must hold the GIL. */
880 int
_Py_fstat(int fd,struct _Py_stat_struct * status)881 _Py_fstat(int fd, struct _Py_stat_struct *status)
882 {
883     int res;
884 
885     assert(PyGILState_Check());
886 
887     Py_BEGIN_ALLOW_THREADS
888     res = _Py_fstat_noraise(fd, status);
889     Py_END_ALLOW_THREADS
890 
891     if (res != 0) {
892 #ifdef MS_WINDOWS
893         PyErr_SetFromWindowsErr(0);
894 #else
895         PyErr_SetFromErrno(PyExc_OSError);
896 #endif
897         return -1;
898     }
899     return 0;
900 }
901 
902 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
903    call stat() otherwise. Only fill st_mode attribute on Windows.
904 
905    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
906    raised. */
907 
908 int
_Py_stat(PyObject * path,struct stat * statbuf)909 _Py_stat(PyObject *path, struct stat *statbuf)
910 {
911 #ifdef MS_WINDOWS
912     int err;
913     struct _stat wstatbuf;
914     const wchar_t *wpath;
915 
916     wpath = _PyUnicode_AsUnicode(path);
917     if (wpath == NULL)
918         return -2;
919 
920     err = _wstat(wpath, &wstatbuf);
921     if (!err)
922         statbuf->st_mode = wstatbuf.st_mode;
923     return err;
924 #else
925     int ret;
926     PyObject *bytes;
927     char *cpath;
928 
929     bytes = PyUnicode_EncodeFSDefault(path);
930     if (bytes == NULL)
931         return -2;
932 
933     /* check for embedded null bytes */
934     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
935         Py_DECREF(bytes);
936         return -2;
937     }
938 
939     ret = stat(cpath, statbuf);
940     Py_DECREF(bytes);
941     return ret;
942 #endif
943 }
944 
945 
946 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
947 static int
get_inheritable(int fd,int raise)948 get_inheritable(int fd, int raise)
949 {
950 #ifdef MS_WINDOWS
951     HANDLE handle;
952     DWORD flags;
953 
954     _Py_BEGIN_SUPPRESS_IPH
955     handle = (HANDLE)_get_osfhandle(fd);
956     _Py_END_SUPPRESS_IPH
957     if (handle == INVALID_HANDLE_VALUE) {
958         if (raise)
959             PyErr_SetFromErrno(PyExc_OSError);
960         return -1;
961     }
962 
963     if (!GetHandleInformation(handle, &flags)) {
964         if (raise)
965             PyErr_SetFromWindowsErr(0);
966         return -1;
967     }
968 
969     return (flags & HANDLE_FLAG_INHERIT);
970 #else
971     int flags;
972 
973     flags = fcntl(fd, F_GETFD, 0);
974     if (flags == -1) {
975         if (raise)
976             PyErr_SetFromErrno(PyExc_OSError);
977         return -1;
978     }
979     return !(flags & FD_CLOEXEC);
980 #endif
981 }
982 
983 /* Get the inheritable flag of the specified file descriptor.
984    Return 1 if the file descriptor can be inherited, 0 if it cannot,
985    raise an exception and return -1 on error. */
986 int
_Py_get_inheritable(int fd)987 _Py_get_inheritable(int fd)
988 {
989     return get_inheritable(fd, 1);
990 }
991 
992 
993 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
994 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)995 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
996 {
997 #ifdef MS_WINDOWS
998     HANDLE handle;
999     DWORD flags;
1000 #else
1001 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1002     static int ioctl_works = -1;
1003     int request;
1004     int err;
1005 #endif
1006     int flags, new_flags;
1007     int res;
1008 #endif
1009 
1010     /* atomic_flag_works can only be used to make the file descriptor
1011        non-inheritable */
1012     assert(!(atomic_flag_works != NULL && inheritable));
1013 
1014     if (atomic_flag_works != NULL && !inheritable) {
1015         if (*atomic_flag_works == -1) {
1016             int isInheritable = get_inheritable(fd, raise);
1017             if (isInheritable == -1)
1018                 return -1;
1019             *atomic_flag_works = !isInheritable;
1020         }
1021 
1022         if (*atomic_flag_works)
1023             return 0;
1024     }
1025 
1026 #ifdef MS_WINDOWS
1027     _Py_BEGIN_SUPPRESS_IPH
1028     handle = (HANDLE)_get_osfhandle(fd);
1029     _Py_END_SUPPRESS_IPH
1030     if (handle == INVALID_HANDLE_VALUE) {
1031         if (raise)
1032             PyErr_SetFromErrno(PyExc_OSError);
1033         return -1;
1034     }
1035 
1036     if (inheritable)
1037         flags = HANDLE_FLAG_INHERIT;
1038     else
1039         flags = 0;
1040 
1041     /* This check can be removed once support for Windows 7 ends. */
1042 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1043         GetFileType(handle) == FILE_TYPE_CHAR)
1044 
1045     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1046         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1047         if (raise)
1048             PyErr_SetFromWindowsErr(0);
1049         return -1;
1050     }
1051 #undef CONSOLE_PSEUDOHANDLE
1052     return 0;
1053 
1054 #else
1055 
1056 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1057     if (ioctl_works != 0 && raise != 0) {
1058         /* fast-path: ioctl() only requires one syscall */
1059         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1060          * thus avoid using ioctl() so we skip the fast-path. */
1061         if (inheritable)
1062             request = FIONCLEX;
1063         else
1064             request = FIOCLEX;
1065         err = ioctl(fd, request, NULL);
1066         if (!err) {
1067             ioctl_works = 1;
1068             return 0;
1069         }
1070 
1071         if (errno != ENOTTY && errno != EACCES) {
1072             if (raise)
1073                 PyErr_SetFromErrno(PyExc_OSError);
1074             return -1;
1075         }
1076         else {
1077             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1078                device". The ioctl is declared but not supported by the kernel.
1079                Remember that ioctl() doesn't work. It is the case on
1080                Illumos-based OS for example.
1081 
1082                Issue #27057: When SELinux policy disallows ioctl it will fail
1083                with EACCES. While FIOCLEX is safe operation it may be
1084                unavailable because ioctl was denied altogether.
1085                This can be the case on Android. */
1086             ioctl_works = 0;
1087         }
1088         /* fallback to fcntl() if ioctl() does not work */
1089     }
1090 #endif
1091 
1092     /* slow-path: fcntl() requires two syscalls */
1093     flags = fcntl(fd, F_GETFD);
1094     if (flags < 0) {
1095         if (raise)
1096             PyErr_SetFromErrno(PyExc_OSError);
1097         return -1;
1098     }
1099 
1100     if (inheritable) {
1101         new_flags = flags & ~FD_CLOEXEC;
1102     }
1103     else {
1104         new_flags = flags | FD_CLOEXEC;
1105     }
1106 
1107     if (new_flags == flags) {
1108         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1109         return 0;
1110     }
1111 
1112     res = fcntl(fd, F_SETFD, new_flags);
1113     if (res < 0) {
1114         if (raise)
1115             PyErr_SetFromErrno(PyExc_OSError);
1116         return -1;
1117     }
1118     return 0;
1119 #endif
1120 }
1121 
1122 /* Make the file descriptor non-inheritable.
1123    Return 0 on success, set errno and return -1 on error. */
1124 static int
make_non_inheritable(int fd)1125 make_non_inheritable(int fd)
1126 {
1127     return set_inheritable(fd, 0, 0, NULL);
1128 }
1129 
1130 /* Set the inheritable flag of the specified file descriptor.
1131    On success: return 0, on error: raise an exception and return -1.
1132 
1133    If atomic_flag_works is not NULL:
1134 
1135     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1136       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1137       set the inheritable flag
1138     * if *atomic_flag_works==1: do nothing
1139     * if *atomic_flag_works==0: set inheritable flag to False
1140 
1141    Set atomic_flag_works to NULL if no atomic flag was used to create the
1142    file descriptor.
1143 
1144    atomic_flag_works can only be used to make a file descriptor
1145    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1146 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1147 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1148 {
1149     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1150 }
1151 
1152 /* Same as _Py_set_inheritable() but on error, set errno and
1153    don't raise an exception.
1154    This function is async-signal-safe. */
1155 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1156 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1157 {
1158     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1159 }
1160 
1161 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1162 _Py_open_impl(const char *pathname, int flags, int gil_held)
1163 {
1164     int fd;
1165     int async_err = 0;
1166 #ifndef MS_WINDOWS
1167     int *atomic_flag_works;
1168 #endif
1169 
1170 #ifdef MS_WINDOWS
1171     flags |= O_NOINHERIT;
1172 #elif defined(O_CLOEXEC)
1173     atomic_flag_works = &_Py_open_cloexec_works;
1174     flags |= O_CLOEXEC;
1175 #else
1176     atomic_flag_works = NULL;
1177 #endif
1178 
1179     if (gil_held) {
1180         do {
1181             Py_BEGIN_ALLOW_THREADS
1182             fd = open(pathname, flags);
1183             Py_END_ALLOW_THREADS
1184         } while (fd < 0
1185                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1186         if (async_err)
1187             return -1;
1188         if (fd < 0) {
1189             PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1190             return -1;
1191         }
1192     }
1193     else {
1194         fd = open(pathname, flags);
1195         if (fd < 0)
1196             return -1;
1197     }
1198 
1199 #ifndef MS_WINDOWS
1200     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1201         close(fd);
1202         return -1;
1203     }
1204 #endif
1205 
1206     return fd;
1207 }
1208 
1209 /* Open a file with the specified flags (wrapper to open() function).
1210    Return a file descriptor on success. Raise an exception and return -1 on
1211    error.
1212 
1213    The file descriptor is created non-inheritable.
1214 
1215    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1216    except if the Python signal handler raises an exception.
1217 
1218    Release the GIL to call open(). The caller must hold the GIL. */
1219 int
_Py_open(const char * pathname,int flags)1220 _Py_open(const char *pathname, int flags)
1221 {
1222     /* _Py_open() must be called with the GIL held. */
1223     assert(PyGILState_Check());
1224     return _Py_open_impl(pathname, flags, 1);
1225 }
1226 
1227 /* Open a file with the specified flags (wrapper to open() function).
1228    Return a file descriptor on success. Set errno and return -1 on error.
1229 
1230    The file descriptor is created non-inheritable.
1231 
1232    If interrupted by a signal, fail with EINTR. */
1233 int
_Py_open_noraise(const char * pathname,int flags)1234 _Py_open_noraise(const char *pathname, int flags)
1235 {
1236     return _Py_open_impl(pathname, flags, 0);
1237 }
1238 
1239 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1240    encoding and use fopen() otherwise.
1241 
1242    The file descriptor is created non-inheritable.
1243 
1244    If interrupted by a signal, fail with EINTR. */
1245 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1246 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1247 {
1248     FILE *f;
1249 #ifndef MS_WINDOWS
1250     char *cpath;
1251     char cmode[10];
1252     size_t r;
1253     r = wcstombs(cmode, mode, 10);
1254     if (r == (size_t)-1 || r >= 10) {
1255         errno = EINVAL;
1256         return NULL;
1257     }
1258     cpath = _Py_EncodeLocaleRaw(path, NULL);
1259     if (cpath == NULL) {
1260         return NULL;
1261     }
1262     f = fopen(cpath, cmode);
1263     PyMem_RawFree(cpath);
1264 #else
1265     f = _wfopen(path, mode);
1266 #endif
1267     if (f == NULL)
1268         return NULL;
1269     if (make_non_inheritable(fileno(f)) < 0) {
1270         fclose(f);
1271         return NULL;
1272     }
1273     return f;
1274 }
1275 
1276 /* Wrapper to fopen().
1277 
1278    The file descriptor is created non-inheritable.
1279 
1280    If interrupted by a signal, fail with EINTR. */
1281 FILE*
_Py_fopen(const char * pathname,const char * mode)1282 _Py_fopen(const char *pathname, const char *mode)
1283 {
1284     FILE *f = fopen(pathname, mode);
1285     if (f == NULL)
1286         return NULL;
1287     if (make_non_inheritable(fileno(f)) < 0) {
1288         fclose(f);
1289         return NULL;
1290     }
1291     return f;
1292 }
1293 
1294 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1295    encoding and call fopen() otherwise.
1296 
1297    Return the new file object on success. Raise an exception and return NULL
1298    on error.
1299 
1300    The file descriptor is created non-inheritable.
1301 
1302    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1303    except if the Python signal handler raises an exception.
1304 
1305    Release the GIL to call _wfopen() or fopen(). The caller must hold
1306    the GIL. */
1307 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1308 _Py_fopen_obj(PyObject *path, const char *mode)
1309 {
1310     FILE *f;
1311     int async_err = 0;
1312 #ifdef MS_WINDOWS
1313     const wchar_t *wpath;
1314     wchar_t wmode[10];
1315     int usize;
1316 
1317     assert(PyGILState_Check());
1318 
1319     if (!PyUnicode_Check(path)) {
1320         PyErr_Format(PyExc_TypeError,
1321                      "str file path expected under Windows, got %R",
1322                      Py_TYPE(path));
1323         return NULL;
1324     }
1325     wpath = _PyUnicode_AsUnicode(path);
1326     if (wpath == NULL)
1327         return NULL;
1328 
1329     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1330                                 wmode, Py_ARRAY_LENGTH(wmode));
1331     if (usize == 0) {
1332         PyErr_SetFromWindowsErr(0);
1333         return NULL;
1334     }
1335 
1336     do {
1337         Py_BEGIN_ALLOW_THREADS
1338         f = _wfopen(wpath, wmode);
1339         Py_END_ALLOW_THREADS
1340     } while (f == NULL
1341              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1342 #else
1343     PyObject *bytes;
1344     char *path_bytes;
1345 
1346     assert(PyGILState_Check());
1347 
1348     if (!PyUnicode_FSConverter(path, &bytes))
1349         return NULL;
1350     path_bytes = PyBytes_AS_STRING(bytes);
1351 
1352     do {
1353         Py_BEGIN_ALLOW_THREADS
1354         f = fopen(path_bytes, mode);
1355         Py_END_ALLOW_THREADS
1356     } while (f == NULL
1357              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1358 
1359     Py_DECREF(bytes);
1360 #endif
1361     if (async_err)
1362         return NULL;
1363 
1364     if (f == NULL) {
1365         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1366         return NULL;
1367     }
1368 
1369     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1370         fclose(f);
1371         return NULL;
1372     }
1373     return f;
1374 }
1375 
1376 /* Read count bytes from fd into buf.
1377 
1378    On success, return the number of read bytes, it can be lower than count.
1379    If the current file offset is at or past the end of file, no bytes are read,
1380    and read() returns zero.
1381 
1382    On error, raise an exception, set errno and return -1.
1383 
1384    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1385    If the Python signal handler raises an exception, the function returns -1
1386    (the syscall is not retried).
1387 
1388    Release the GIL to call read(). The caller must hold the GIL. */
1389 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1390 _Py_read(int fd, void *buf, size_t count)
1391 {
1392     Py_ssize_t n;
1393     int err;
1394     int async_err = 0;
1395 
1396     assert(PyGILState_Check());
1397 
1398     /* _Py_read() must not be called with an exception set, otherwise the
1399      * caller may think that read() was interrupted by a signal and the signal
1400      * handler raised an exception. */
1401     assert(!PyErr_Occurred());
1402 
1403     if (count > _PY_READ_MAX) {
1404         count = _PY_READ_MAX;
1405     }
1406 
1407     _Py_BEGIN_SUPPRESS_IPH
1408     do {
1409         Py_BEGIN_ALLOW_THREADS
1410         errno = 0;
1411 #ifdef MS_WINDOWS
1412         n = read(fd, buf, (int)count);
1413 #else
1414         n = read(fd, buf, count);
1415 #endif
1416         /* save/restore errno because PyErr_CheckSignals()
1417          * and PyErr_SetFromErrno() can modify it */
1418         err = errno;
1419         Py_END_ALLOW_THREADS
1420     } while (n < 0 && err == EINTR &&
1421             !(async_err = PyErr_CheckSignals()));
1422     _Py_END_SUPPRESS_IPH
1423 
1424     if (async_err) {
1425         /* read() was interrupted by a signal (failed with EINTR)
1426          * and the Python signal handler raised an exception */
1427         errno = err;
1428         assert(errno == EINTR && PyErr_Occurred());
1429         return -1;
1430     }
1431     if (n < 0) {
1432         PyErr_SetFromErrno(PyExc_OSError);
1433         errno = err;
1434         return -1;
1435     }
1436 
1437     return n;
1438 }
1439 
1440 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1441 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1442 {
1443     Py_ssize_t n;
1444     int err;
1445     int async_err = 0;
1446 
1447     _Py_BEGIN_SUPPRESS_IPH
1448 #ifdef MS_WINDOWS
1449     if (count > 32767 && isatty(fd)) {
1450         /* Issue #11395: the Windows console returns an error (12: not
1451            enough space error) on writing into stdout if stdout mode is
1452            binary and the length is greater than 66,000 bytes (or less,
1453            depending on heap usage). */
1454         count = 32767;
1455     }
1456 #endif
1457     if (count > _PY_WRITE_MAX) {
1458         count = _PY_WRITE_MAX;
1459     }
1460 
1461     if (gil_held) {
1462         do {
1463             Py_BEGIN_ALLOW_THREADS
1464             errno = 0;
1465 #ifdef MS_WINDOWS
1466             n = write(fd, buf, (int)count);
1467 #else
1468             n = write(fd, buf, count);
1469 #endif
1470             /* save/restore errno because PyErr_CheckSignals()
1471              * and PyErr_SetFromErrno() can modify it */
1472             err = errno;
1473             Py_END_ALLOW_THREADS
1474         } while (n < 0 && err == EINTR &&
1475                 !(async_err = PyErr_CheckSignals()));
1476     }
1477     else {
1478         do {
1479             errno = 0;
1480 #ifdef MS_WINDOWS
1481             n = write(fd, buf, (int)count);
1482 #else
1483             n = write(fd, buf, count);
1484 #endif
1485             err = errno;
1486         } while (n < 0 && err == EINTR);
1487     }
1488     _Py_END_SUPPRESS_IPH
1489 
1490     if (async_err) {
1491         /* write() was interrupted by a signal (failed with EINTR)
1492            and the Python signal handler raised an exception (if gil_held is
1493            nonzero). */
1494         errno = err;
1495         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1496         return -1;
1497     }
1498     if (n < 0) {
1499         if (gil_held)
1500             PyErr_SetFromErrno(PyExc_OSError);
1501         errno = err;
1502         return -1;
1503     }
1504 
1505     return n;
1506 }
1507 
1508 /* Write count bytes of buf into fd.
1509 
1510    On success, return the number of written bytes, it can be lower than count
1511    including 0. On error, raise an exception, set errno and return -1.
1512 
1513    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1514    If the Python signal handler raises an exception, the function returns -1
1515    (the syscall is not retried).
1516 
1517    Release the GIL to call write(). The caller must hold the GIL. */
1518 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1519 _Py_write(int fd, const void *buf, size_t count)
1520 {
1521     assert(PyGILState_Check());
1522 
1523     /* _Py_write() must not be called with an exception set, otherwise the
1524      * caller may think that write() was interrupted by a signal and the signal
1525      * handler raised an exception. */
1526     assert(!PyErr_Occurred());
1527 
1528     return _Py_write_impl(fd, buf, count, 1);
1529 }
1530 
1531 /* Write count bytes of buf into fd.
1532  *
1533  * On success, return the number of written bytes, it can be lower than count
1534  * including 0. On error, set errno and return -1.
1535  *
1536  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1537  * without calling the Python signal handler. */
1538 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1539 _Py_write_noraise(int fd, const void *buf, size_t count)
1540 {
1541     return _Py_write_impl(fd, buf, count, 0);
1542 }
1543 
1544 #ifdef HAVE_READLINK
1545 
1546 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1547    the result from the locale encoding. Return -1 on error. */
1548 
1549 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t bufsiz)1550 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1551 {
1552     char *cpath;
1553     char cbuf[MAXPATHLEN];
1554     wchar_t *wbuf;
1555     int res;
1556     size_t r1;
1557 
1558     cpath = _Py_EncodeLocaleRaw(path, NULL);
1559     if (cpath == NULL) {
1560         errno = EINVAL;
1561         return -1;
1562     }
1563     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1564     PyMem_RawFree(cpath);
1565     if (res == -1)
1566         return -1;
1567     if (res == Py_ARRAY_LENGTH(cbuf)) {
1568         errno = EINVAL;
1569         return -1;
1570     }
1571     cbuf[res] = '\0'; /* buf will be null terminated */
1572     wbuf = Py_DecodeLocale(cbuf, &r1);
1573     if (wbuf == NULL) {
1574         errno = EINVAL;
1575         return -1;
1576     }
1577     if (bufsiz <= r1) {
1578         PyMem_RawFree(wbuf);
1579         errno = EINVAL;
1580         return -1;
1581     }
1582     wcsncpy(buf, wbuf, bufsiz);
1583     PyMem_RawFree(wbuf);
1584     return (int)r1;
1585 }
1586 #endif
1587 
1588 #ifdef HAVE_REALPATH
1589 
1590 /* Return the canonicalized absolute pathname. Encode path to the locale
1591    encoding, decode the result from the locale encoding.
1592    Return NULL on error. */
1593 
1594 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_size)1595 _Py_wrealpath(const wchar_t *path,
1596               wchar_t *resolved_path, size_t resolved_path_size)
1597 {
1598     char *cpath;
1599     char cresolved_path[MAXPATHLEN];
1600     wchar_t *wresolved_path;
1601     char *res;
1602     size_t r;
1603     cpath = _Py_EncodeLocaleRaw(path, NULL);
1604     if (cpath == NULL) {
1605         errno = EINVAL;
1606         return NULL;
1607     }
1608     res = realpath(cpath, cresolved_path);
1609     PyMem_RawFree(cpath);
1610     if (res == NULL)
1611         return NULL;
1612 
1613     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1614     if (wresolved_path == NULL) {
1615         errno = EINVAL;
1616         return NULL;
1617     }
1618     if (resolved_path_size <= r) {
1619         PyMem_RawFree(wresolved_path);
1620         errno = EINVAL;
1621         return NULL;
1622     }
1623     wcsncpy(resolved_path, wresolved_path, resolved_path_size);
1624     PyMem_RawFree(wresolved_path);
1625     return resolved_path;
1626 }
1627 #endif
1628 
1629 /* Get the current directory. size is the buffer size in wide characters
1630    including the null character. Decode the path from the locale encoding.
1631    Return NULL on error. */
1632 
1633 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t size)1634 _Py_wgetcwd(wchar_t *buf, size_t size)
1635 {
1636 #ifdef MS_WINDOWS
1637     int isize = (int)Py_MIN(size, INT_MAX);
1638     return _wgetcwd(buf, isize);
1639 #else
1640     char fname[MAXPATHLEN];
1641     wchar_t *wname;
1642     size_t len;
1643 
1644     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
1645         return NULL;
1646     wname = Py_DecodeLocale(fname, &len);
1647     if (wname == NULL)
1648         return NULL;
1649     if (size <= len) {
1650         PyMem_RawFree(wname);
1651         return NULL;
1652     }
1653     wcsncpy(buf, wname, size);
1654     PyMem_RawFree(wname);
1655     return buf;
1656 #endif
1657 }
1658 
1659 /* Duplicate a file descriptor. The new file descriptor is created as
1660    non-inheritable. Return a new file descriptor on success, raise an OSError
1661    exception and return -1 on error.
1662 
1663    The GIL is released to call dup(). The caller must hold the GIL. */
1664 int
_Py_dup(int fd)1665 _Py_dup(int fd)
1666 {
1667 #ifdef MS_WINDOWS
1668     HANDLE handle;
1669 #endif
1670 
1671     assert(PyGILState_Check());
1672 
1673 #ifdef MS_WINDOWS
1674     _Py_BEGIN_SUPPRESS_IPH
1675     handle = (HANDLE)_get_osfhandle(fd);
1676     _Py_END_SUPPRESS_IPH
1677     if (handle == INVALID_HANDLE_VALUE) {
1678         PyErr_SetFromErrno(PyExc_OSError);
1679         return -1;
1680     }
1681 
1682     Py_BEGIN_ALLOW_THREADS
1683     _Py_BEGIN_SUPPRESS_IPH
1684     fd = dup(fd);
1685     _Py_END_SUPPRESS_IPH
1686     Py_END_ALLOW_THREADS
1687     if (fd < 0) {
1688         PyErr_SetFromErrno(PyExc_OSError);
1689         return -1;
1690     }
1691 
1692     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1693         _Py_BEGIN_SUPPRESS_IPH
1694         close(fd);
1695         _Py_END_SUPPRESS_IPH
1696         return -1;
1697     }
1698 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1699     Py_BEGIN_ALLOW_THREADS
1700     _Py_BEGIN_SUPPRESS_IPH
1701     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1702     _Py_END_SUPPRESS_IPH
1703     Py_END_ALLOW_THREADS
1704     if (fd < 0) {
1705         PyErr_SetFromErrno(PyExc_OSError);
1706         return -1;
1707     }
1708 
1709 #else
1710     Py_BEGIN_ALLOW_THREADS
1711     _Py_BEGIN_SUPPRESS_IPH
1712     fd = dup(fd);
1713     _Py_END_SUPPRESS_IPH
1714     Py_END_ALLOW_THREADS
1715     if (fd < 0) {
1716         PyErr_SetFromErrno(PyExc_OSError);
1717         return -1;
1718     }
1719 
1720     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1721         _Py_BEGIN_SUPPRESS_IPH
1722         close(fd);
1723         _Py_END_SUPPRESS_IPH
1724         return -1;
1725     }
1726 #endif
1727     return fd;
1728 }
1729 
1730 #ifndef MS_WINDOWS
1731 /* Get the blocking mode of the file descriptor.
1732    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1733    raise an exception and return -1 on error. */
1734 int
_Py_get_blocking(int fd)1735 _Py_get_blocking(int fd)
1736 {
1737     int flags;
1738     _Py_BEGIN_SUPPRESS_IPH
1739     flags = fcntl(fd, F_GETFL, 0);
1740     _Py_END_SUPPRESS_IPH
1741     if (flags < 0) {
1742         PyErr_SetFromErrno(PyExc_OSError);
1743         return -1;
1744     }
1745 
1746     return !(flags & O_NONBLOCK);
1747 }
1748 
1749 /* Set the blocking mode of the specified file descriptor.
1750 
1751    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1752    otherwise.
1753 
1754    Return 0 on success, raise an exception and return -1 on error. */
1755 int
_Py_set_blocking(int fd,int blocking)1756 _Py_set_blocking(int fd, int blocking)
1757 {
1758 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1759     int arg = !blocking;
1760     if (ioctl(fd, FIONBIO, &arg) < 0)
1761         goto error;
1762 #else
1763     int flags, res;
1764 
1765     _Py_BEGIN_SUPPRESS_IPH
1766     flags = fcntl(fd, F_GETFL, 0);
1767     if (flags >= 0) {
1768         if (blocking)
1769             flags = flags & (~O_NONBLOCK);
1770         else
1771             flags = flags | O_NONBLOCK;
1772 
1773         res = fcntl(fd, F_SETFL, flags);
1774     } else {
1775         res = -1;
1776     }
1777     _Py_END_SUPPRESS_IPH
1778 
1779     if (res < 0)
1780         goto error;
1781 #endif
1782     return 0;
1783 
1784 error:
1785     PyErr_SetFromErrno(PyExc_OSError);
1786     return -1;
1787 }
1788 #endif
1789 
1790 
1791 int
_Py_GetLocaleconvNumeric(PyObject ** decimal_point,PyObject ** thousands_sep,const char ** grouping)1792 _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1793                          const char **grouping)
1794 {
1795     int res = -1;
1796 
1797     struct lconv *lc = localeconv();
1798 
1799     int change_locale = 0;
1800     if (decimal_point != NULL &&
1801         (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1802     {
1803         change_locale = 1;
1804     }
1805     if (thousands_sep != NULL &&
1806         (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1807     {
1808         change_locale = 1;
1809     }
1810 
1811     /* Keep a copy of the LC_CTYPE locale */
1812     char *oldloc = NULL, *loc = NULL;
1813     if (change_locale) {
1814         oldloc = setlocale(LC_CTYPE, NULL);
1815         if (!oldloc) {
1816             PyErr_SetString(PyExc_RuntimeWarning, "failed to get LC_CTYPE locale");
1817             return -1;
1818         }
1819 
1820         oldloc = _PyMem_Strdup(oldloc);
1821         if (!oldloc) {
1822             PyErr_NoMemory();
1823             return -1;
1824         }
1825 
1826         loc = setlocale(LC_NUMERIC, NULL);
1827         if (loc != NULL && strcmp(loc, oldloc) == 0) {
1828             loc = NULL;
1829         }
1830 
1831         if (loc != NULL) {
1832             /* Only set the locale temporarily the LC_CTYPE locale
1833                if LC_NUMERIC locale is different than LC_CTYPE locale and
1834                decimal_point and/or thousands_sep are non-ASCII or longer than
1835                1 byte */
1836             setlocale(LC_CTYPE, loc);
1837         }
1838     }
1839 
1840     if (decimal_point != NULL) {
1841         *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1842         if (*decimal_point == NULL) {
1843             goto error;
1844         }
1845     }
1846     if (thousands_sep != NULL) {
1847         *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1848         if (*thousands_sep == NULL) {
1849             goto error;
1850         }
1851     }
1852 
1853     if (grouping != NULL) {
1854         *grouping = lc->grouping;
1855     }
1856 
1857     res = 0;
1858 
1859 error:
1860     if (loc != NULL) {
1861         setlocale(LC_CTYPE, oldloc);
1862     }
1863     PyMem_Free(oldloc);
1864     return res;
1865 }
1866