1 #include "Python.h"
2 #include "pycore_fileutils.h"     // fileutils definitions
3 #include "pycore_runtime.h"       // _PyRuntime
4 #include "osdefs.h"               // SEP
5 #include <locale.h>
6 
7 #ifdef MS_WINDOWS
8 #  include <malloc.h>
9 #  include <windows.h>
10 extern int winerror_to_errno(int);
11 #endif
12 
13 #ifdef HAVE_LANGINFO_H
14 #include <langinfo.h>
15 #endif
16 
17 #ifdef HAVE_SYS_IOCTL_H
18 #include <sys/ioctl.h>
19 #endif
20 
21 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
22 #include <iconv.h>
23 #endif
24 
25 #ifdef HAVE_FCNTL_H
26 #include <fcntl.h>
27 #endif /* HAVE_FCNTL_H */
28 
29 #ifdef O_CLOEXEC
30 /* Does open() support the O_CLOEXEC flag? Possible values:
31 
32    -1: unknown
33     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
34     1: open() supports O_CLOEXEC flag, close-on-exec is set
35 
36    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
37    and os.open(). */
38 int _Py_open_cloexec_works = -1;
39 #endif
40 
41 // The value must be the same in unicodeobject.c.
42 #define MAX_UNICODE 0x10ffff
43 
44 // mbstowcs() and mbrtowc() errors
45 static const size_t DECODE_ERROR = ((size_t)-1);
46 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
47 
48 
49 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)50 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
51 {
52     switch (errors)
53     {
54     case _Py_ERROR_STRICT:
55         *surrogateescape = 0;
56         return 0;
57     case _Py_ERROR_SURROGATEESCAPE:
58         *surrogateescape = 1;
59         return 0;
60     default:
61         return -1;
62     }
63 }
64 
65 
66 PyObject *
_Py_device_encoding(int fd)67 _Py_device_encoding(int fd)
68 {
69     int valid;
70     Py_BEGIN_ALLOW_THREADS
71     _Py_BEGIN_SUPPRESS_IPH
72     valid = isatty(fd);
73     _Py_END_SUPPRESS_IPH
74     Py_END_ALLOW_THREADS
75     if (!valid)
76         Py_RETURN_NONE;
77 
78 #if defined(MS_WINDOWS)
79     UINT cp;
80     if (fd == 0)
81         cp = GetConsoleCP();
82     else if (fd == 1 || fd == 2)
83         cp = GetConsoleOutputCP();
84     else
85         cp = 0;
86     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
87        has no console */
88     if (cp == 0) {
89         Py_RETURN_NONE;
90     }
91 
92     return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
93 #else
94     return _Py_GetLocaleEncodingObject();
95 #endif
96 }
97 
98 
99 static size_t
is_valid_wide_char(wchar_t ch)100 is_valid_wide_char(wchar_t ch)
101 {
102 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
103     /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
104        for non-Unicode locales, which makes values higher than MAX_UNICODE
105        possibly valid. */
106     return 1;
107 #endif
108     if (Py_UNICODE_IS_SURROGATE(ch)) {
109         // Reject lone surrogate characters
110         return 0;
111     }
112     if (ch > MAX_UNICODE) {
113         // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
114         // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
115         // it creates characters outside the [U+0000; U+10ffff] range:
116         // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
117         return 0;
118     }
119     return 1;
120 }
121 
122 
123 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)124 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
125 {
126     size_t count = mbstowcs(dest, src, n);
127     if (dest != NULL && count != DECODE_ERROR) {
128         for (size_t i=0; i < count; i++) {
129             wchar_t ch = dest[i];
130             if (!is_valid_wide_char(ch)) {
131                 return DECODE_ERROR;
132             }
133         }
134     }
135     return count;
136 }
137 
138 
139 #ifdef HAVE_MBRTOWC
140 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)141 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
142 {
143     assert(pwc != NULL);
144     size_t count = mbrtowc(pwc, str, len, pmbs);
145     if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
146         if (!is_valid_wide_char(*pwc)) {
147             return DECODE_ERROR;
148         }
149     }
150     return count;
151 }
152 #endif
153 
154 
155 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
156 
157 #define USE_FORCE_ASCII
158 
159 extern int _Py_normalize_encoding(const char *, char *, size_t);
160 
161 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
162    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
163    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
164    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
165    locale.getpreferredencoding() codec. For example, if command line arguments
166    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
167    UnicodeEncodeError instead of retrieving the original byte string.
168 
169    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
170    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
171    one byte in range 0x80-0xff can be decoded from the locale encoding. The
172    workaround is also enabled on error, for example if getting the locale
173    failed.
174 
175    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
176    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
177    ASCII encoding in this case.
178 
179    Values of force_ascii:
180 
181        1: the workaround is used: Py_EncodeLocale() uses
182           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
183           decode_ascii()
184        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
185           Py_DecodeLocale() uses mbstowcs()
186       -1: unknown, need to call check_force_ascii() to get the value
187 */
188 static int force_ascii = -1;
189 
190 static int
check_force_ascii(void)191 check_force_ascii(void)
192 {
193     char *loc = setlocale(LC_CTYPE, NULL);
194     if (loc == NULL) {
195         goto error;
196     }
197     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
198         /* the LC_CTYPE locale is different than C and POSIX */
199         return 0;
200     }
201 
202 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
203     const char *codeset = nl_langinfo(CODESET);
204     if (!codeset || codeset[0] == '\0') {
205         /* CODESET is not set or empty */
206         goto error;
207     }
208 
209     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
210     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
211         goto error;
212     }
213 
214 #ifdef __hpux
215     if (strcmp(encoding, "roman8") == 0) {
216         unsigned char ch;
217         wchar_t wch;
218         size_t res;
219 
220         ch = (unsigned char)0xA7;
221         res = _Py_mbstowcs(&wch, (char*)&ch, 1);
222         if (res != DECODE_ERROR && wch == L'\xA7') {
223             /* On HP-UX with C locale or the POSIX locale,
224                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
225                Latin1 encoding in practice. Force ASCII in this case.
226 
227                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
228             return 1;
229         }
230     }
231 #else
232     const char* ascii_aliases[] = {
233         "ascii",
234         /* Aliases from Lib/encodings/aliases.py */
235         "646",
236         "ansi_x3.4_1968",
237         "ansi_x3.4_1986",
238         "ansi_x3_4_1968",
239         "cp367",
240         "csascii",
241         "ibm367",
242         "iso646_us",
243         "iso_646.irv_1991",
244         "iso_ir_6",
245         "us",
246         "us_ascii",
247         NULL
248     };
249 
250     int is_ascii = 0;
251     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
252         if (strcmp(encoding, *alias) == 0) {
253             is_ascii = 1;
254             break;
255         }
256     }
257     if (!is_ascii) {
258         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
259         return 0;
260     }
261 
262     for (unsigned int i=0x80; i<=0xff; i++) {
263         char ch[1];
264         wchar_t wch[1];
265         size_t res;
266 
267         unsigned uch = (unsigned char)i;
268         ch[0] = (char)uch;
269         res = _Py_mbstowcs(wch, ch, 1);
270         if (res != DECODE_ERROR) {
271             /* decoding a non-ASCII character from the locale encoding succeed:
272                the locale encoding is not ASCII, force ASCII */
273             return 1;
274         }
275     }
276     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
277        encoding: the locale encoding is really ASCII */
278 #endif   /* !defined(__hpux) */
279     return 0;
280 #else
281     /* nl_langinfo(CODESET) is not available: always force ASCII */
282     return 1;
283 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
284 
285 error:
286     /* if an error occurred, force the ASCII encoding */
287     return 1;
288 }
289 
290 
291 int
_Py_GetForceASCII(void)292 _Py_GetForceASCII(void)
293 {
294     if (force_ascii == -1) {
295         force_ascii = check_force_ascii();
296     }
297     return force_ascii;
298 }
299 
300 
301 void
_Py_ResetForceASCII(void)302 _Py_ResetForceASCII(void)
303 {
304     force_ascii = -1;
305 }
306 
307 
308 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)309 encode_ascii(const wchar_t *text, char **str,
310              size_t *error_pos, const char **reason,
311              int raw_malloc, _Py_error_handler errors)
312 {
313     char *result = NULL, *out;
314     size_t len, i;
315     wchar_t ch;
316 
317     int surrogateescape;
318     if (get_surrogateescape(errors, &surrogateescape) < 0) {
319         return -3;
320     }
321 
322     len = wcslen(text);
323 
324     /* +1 for NULL byte */
325     if (raw_malloc) {
326         result = PyMem_RawMalloc(len + 1);
327     }
328     else {
329         result = PyMem_Malloc(len + 1);
330     }
331     if (result == NULL) {
332         return -1;
333     }
334 
335     out = result;
336     for (i=0; i<len; i++) {
337         ch = text[i];
338 
339         if (ch <= 0x7f) {
340             /* ASCII character */
341             *out++ = (char)ch;
342         }
343         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
344             /* UTF-8b surrogate */
345             *out++ = (char)(ch - 0xdc00);
346         }
347         else {
348             if (raw_malloc) {
349                 PyMem_RawFree(result);
350             }
351             else {
352                 PyMem_Free(result);
353             }
354             if (error_pos != NULL) {
355                 *error_pos = i;
356             }
357             if (reason) {
358                 *reason = "encoding error";
359             }
360             return -2;
361         }
362     }
363     *out = '\0';
364     *str = result;
365     return 0;
366 }
367 #else
368 int
_Py_GetForceASCII(void)369 _Py_GetForceASCII(void)
370 {
371     return 0;
372 }
373 
374 void
_Py_ResetForceASCII(void)375 _Py_ResetForceASCII(void)
376 {
377     /* nothing to do */
378 }
379 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
380 
381 
382 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
383 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)384 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
385              const char **reason, _Py_error_handler errors)
386 {
387     wchar_t *res;
388     unsigned char *in;
389     wchar_t *out;
390     size_t argsize = strlen(arg) + 1;
391 
392     int surrogateescape;
393     if (get_surrogateescape(errors, &surrogateescape) < 0) {
394         return -3;
395     }
396 
397     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
398         return -1;
399     }
400     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
401     if (!res) {
402         return -1;
403     }
404 
405     out = res;
406     for (in = (unsigned char*)arg; *in; in++) {
407         unsigned char ch = *in;
408         if (ch < 128) {
409             *out++ = ch;
410         }
411         else {
412             if (!surrogateescape) {
413                 PyMem_RawFree(res);
414                 if (wlen) {
415                     *wlen = in - (unsigned char*)arg;
416                 }
417                 if (reason) {
418                     *reason = "decoding error";
419                 }
420                 return -2;
421             }
422             *out++ = 0xdc00 + ch;
423         }
424     }
425     *out = 0;
426 
427     if (wlen != NULL) {
428         *wlen = out - res;
429     }
430     *wstr = res;
431     return 0;
432 }
433 #endif   /* !HAVE_MBRTOWC */
434 
435 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)436 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
437                       const char **reason, _Py_error_handler errors)
438 {
439     wchar_t *res;
440     size_t argsize;
441     size_t count;
442 #ifdef HAVE_MBRTOWC
443     unsigned char *in;
444     wchar_t *out;
445     mbstate_t mbs;
446 #endif
447 
448     int surrogateescape;
449     if (get_surrogateescape(errors, &surrogateescape) < 0) {
450         return -3;
451     }
452 
453 #ifdef HAVE_BROKEN_MBSTOWCS
454     /* Some platforms have a broken implementation of
455      * mbstowcs which does not count the characters that
456      * would result from conversion.  Use an upper bound.
457      */
458     argsize = strlen(arg);
459 #else
460     argsize = _Py_mbstowcs(NULL, arg, 0);
461 #endif
462     if (argsize != DECODE_ERROR) {
463         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
464             return -1;
465         }
466         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
467         if (!res) {
468             return -1;
469         }
470 
471         count = _Py_mbstowcs(res, arg, argsize + 1);
472         if (count != DECODE_ERROR) {
473             *wstr = res;
474             if (wlen != NULL) {
475                 *wlen = count;
476             }
477             return 0;
478         }
479         PyMem_RawFree(res);
480     }
481 
482     /* Conversion failed. Fall back to escaping with surrogateescape. */
483 #ifdef HAVE_MBRTOWC
484     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
485 
486     /* Overallocate; as multi-byte characters are in the argument, the
487        actual output could use less memory. */
488     argsize = strlen(arg) + 1;
489     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
490         return -1;
491     }
492     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
493     if (!res) {
494         return -1;
495     }
496 
497     in = (unsigned char*)arg;
498     out = res;
499     memset(&mbs, 0, sizeof mbs);
500     while (argsize) {
501         size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
502         if (converted == 0) {
503             /* Reached end of string; null char stored. */
504             break;
505         }
506 
507         if (converted == INCOMPLETE_CHARACTER) {
508             /* Incomplete character. This should never happen,
509                since we provide everything that we have -
510                unless there is a bug in the C library, or I
511                misunderstood how mbrtowc works. */
512             goto decode_error;
513         }
514 
515         if (converted == DECODE_ERROR) {
516             if (!surrogateescape) {
517                 goto decode_error;
518             }
519 
520             /* Decoding error. Escape as UTF-8b, and start over in the initial
521                shift state. */
522             *out++ = 0xdc00 + *in++;
523             argsize--;
524             memset(&mbs, 0, sizeof mbs);
525             continue;
526         }
527 
528         // _Py_mbrtowc() reject lone surrogate characters
529         assert(!Py_UNICODE_IS_SURROGATE(*out));
530 
531         /* successfully converted some bytes */
532         in += converted;
533         argsize -= converted;
534         out++;
535     }
536     if (wlen != NULL) {
537         *wlen = out - res;
538     }
539     *wstr = res;
540     return 0;
541 
542 decode_error:
543     PyMem_RawFree(res);
544     if (wlen) {
545         *wlen = in - (unsigned char*)arg;
546     }
547     if (reason) {
548         *reason = "decoding error";
549     }
550     return -2;
551 #else   /* HAVE_MBRTOWC */
552     /* Cannot use C locale for escaping; manually escape as if charset
553        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
554        correctly in the locale's charset, which must be an ASCII superset. */
555     return decode_ascii(arg, wstr, wlen, reason, errors);
556 #endif   /* HAVE_MBRTOWC */
557 }
558 
559 
560 /* Decode a byte string from the locale encoding.
561 
562    Use the strict error handler if 'surrogateescape' is zero.  Use the
563    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
564    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
565    can be decoded as a surrogate character, escape the bytes using the
566    surrogateescape error handler instead of decoding them.
567 
568    On success, return 0 and write the newly allocated wide character string into
569    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
570    the number of wide characters excluding the null character into *wlen.
571 
572    On memory allocation failure, return -1.
573 
574    On decoding error, return -2. If wlen is not NULL, write the start of
575    invalid byte sequence in the input string into *wlen. If reason is not NULL,
576    write the decoding error message into *reason.
577 
578    Return -3 if the error handler 'errors' is not supported.
579 
580    Use the Py_EncodeLocaleEx() function to encode the character string back to
581    a byte string. */
582 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)583 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
584                    const char **reason,
585                    int current_locale, _Py_error_handler errors)
586 {
587     if (current_locale) {
588 #ifdef _Py_FORCE_UTF8_LOCALE
589         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
590                                 errors);
591 #else
592         return decode_current_locale(arg, wstr, wlen, reason, errors);
593 #endif
594     }
595 
596 #ifdef _Py_FORCE_UTF8_FS_ENCODING
597     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
598                             errors);
599 #else
600     int use_utf8 = (Py_UTF8Mode == 1);
601 #ifdef MS_WINDOWS
602     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
603 #endif
604     if (use_utf8) {
605         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
606                                 errors);
607     }
608 
609 #ifdef USE_FORCE_ASCII
610     if (force_ascii == -1) {
611         force_ascii = check_force_ascii();
612     }
613 
614     if (force_ascii) {
615         /* force ASCII encoding to workaround mbstowcs() issue */
616         return decode_ascii(arg, wstr, wlen, reason, errors);
617     }
618 #endif
619 
620     return decode_current_locale(arg, wstr, wlen, reason, errors);
621 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
622 }
623 
624 
625 /* Decode a byte string from the locale encoding with the
626    surrogateescape error handler: undecodable bytes are decoded as characters
627    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
628    character, escape the bytes using the surrogateescape error handler instead
629    of decoding them.
630 
631    Return a pointer to a newly allocated wide character string, use
632    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
633    wide characters excluding the null character into *size
634 
635    Return NULL on decoding error or memory allocation error. If *size* is not
636    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
637    decoding error.
638 
639    Decoding errors should never happen, unless there is a bug in the C
640    library.
641 
642    Use the Py_EncodeLocale() function to encode the character string back to a
643    byte string. */
644 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)645 Py_DecodeLocale(const char* arg, size_t *wlen)
646 {
647     wchar_t *wstr;
648     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
649                                  NULL, 0,
650                                  _Py_ERROR_SURROGATEESCAPE);
651     if (res != 0) {
652         assert(res != -3);
653         if (wlen != NULL) {
654             *wlen = (size_t)res;
655         }
656         return NULL;
657     }
658     return wstr;
659 }
660 
661 
662 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)663 encode_current_locale(const wchar_t *text, char **str,
664                       size_t *error_pos, const char **reason,
665                       int raw_malloc, _Py_error_handler errors)
666 {
667     const size_t len = wcslen(text);
668     char *result = NULL, *bytes = NULL;
669     size_t i, size, converted;
670     wchar_t c, buf[2];
671 
672     int surrogateescape;
673     if (get_surrogateescape(errors, &surrogateescape) < 0) {
674         return -3;
675     }
676 
677     /* The function works in two steps:
678        1. compute the length of the output buffer in bytes (size)
679        2. outputs the bytes */
680     size = 0;
681     buf[1] = 0;
682     while (1) {
683         for (i=0; i < len; i++) {
684             c = text[i];
685             if (c >= 0xdc80 && c <= 0xdcff) {
686                 if (!surrogateescape) {
687                     goto encode_error;
688                 }
689                 /* UTF-8b surrogate */
690                 if (bytes != NULL) {
691                     *bytes++ = c - 0xdc00;
692                     size--;
693                 }
694                 else {
695                     size++;
696                 }
697                 continue;
698             }
699             else {
700                 buf[0] = c;
701                 if (bytes != NULL) {
702                     converted = wcstombs(bytes, buf, size);
703                 }
704                 else {
705                     converted = wcstombs(NULL, buf, 0);
706                 }
707                 if (converted == DECODE_ERROR) {
708                     goto encode_error;
709                 }
710                 if (bytes != NULL) {
711                     bytes += converted;
712                     size -= converted;
713                 }
714                 else {
715                     size += converted;
716                 }
717             }
718         }
719         if (result != NULL) {
720             *bytes = '\0';
721             break;
722         }
723 
724         size += 1; /* nul byte at the end */
725         if (raw_malloc) {
726             result = PyMem_RawMalloc(size);
727         }
728         else {
729             result = PyMem_Malloc(size);
730         }
731         if (result == NULL) {
732             return -1;
733         }
734         bytes = result;
735     }
736     *str = result;
737     return 0;
738 
739 encode_error:
740     if (raw_malloc) {
741         PyMem_RawFree(result);
742     }
743     else {
744         PyMem_Free(result);
745     }
746     if (error_pos != NULL) {
747         *error_pos = i;
748     }
749     if (reason) {
750         *reason = "encoding error";
751     }
752     return -2;
753 }
754 
755 
756 /* Encode a string to the locale encoding.
757 
758    Parameters:
759 
760    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
761      of PyMem_Malloc().
762    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
763      Python filesystem encoding.
764    * errors: error handler like "strict" or "surrogateescape".
765 
766    Return value:
767 
768     0: success, *str is set to a newly allocated decoded string.
769    -1: memory allocation failure
770    -2: encoding error, set *error_pos and *reason (if set).
771    -3: the error handler 'errors' is not supported.
772  */
773 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)774 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
775                  const char **reason,
776                  int raw_malloc, int current_locale, _Py_error_handler errors)
777 {
778     if (current_locale) {
779 #ifdef _Py_FORCE_UTF8_LOCALE
780         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
781                                 raw_malloc, errors);
782 #else
783         return encode_current_locale(text, str, error_pos, reason,
784                                      raw_malloc, errors);
785 #endif
786     }
787 
788 #ifdef _Py_FORCE_UTF8_FS_ENCODING
789     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
790                             raw_malloc, errors);
791 #else
792     int use_utf8 = (Py_UTF8Mode == 1);
793 #ifdef MS_WINDOWS
794     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
795 #endif
796     if (use_utf8) {
797         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
798                                 raw_malloc, errors);
799     }
800 
801 #ifdef USE_FORCE_ASCII
802     if (force_ascii == -1) {
803         force_ascii = check_force_ascii();
804     }
805 
806     if (force_ascii) {
807         return encode_ascii(text, str, error_pos, reason,
808                             raw_malloc, errors);
809     }
810 #endif
811 
812     return encode_current_locale(text, str, error_pos, reason,
813                                  raw_malloc, errors);
814 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
815 }
816 
817 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)818 encode_locale(const wchar_t *text, size_t *error_pos,
819               int raw_malloc, int current_locale)
820 {
821     char *str;
822     int res = encode_locale_ex(text, &str, error_pos, NULL,
823                                raw_malloc, current_locale,
824                                _Py_ERROR_SURROGATEESCAPE);
825     if (res != -2 && error_pos) {
826         *error_pos = (size_t)-1;
827     }
828     if (res != 0) {
829         return NULL;
830     }
831     return str;
832 }
833 
834 /* Encode a wide character string to the locale encoding with the
835    surrogateescape error handler: surrogate characters in the range
836    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
837 
838    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
839    the memory. Return NULL on encoding or memory allocation error.
840 
841    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
842    to the index of the invalid character on encoding error.
843 
844    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
845    character string. */
846 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)847 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
848 {
849     return encode_locale(text, error_pos, 0, 0);
850 }
851 
852 
853 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
854    instead of PyMem_Free(). */
855 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)856 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
857 {
858     return encode_locale(text, error_pos, 1, 0);
859 }
860 
861 
862 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)863 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
864                    size_t *error_pos, const char **reason,
865                    int current_locale, _Py_error_handler errors)
866 {
867     return encode_locale_ex(text, str, error_pos, reason, 1,
868                             current_locale, errors);
869 }
870 
871 
872 // Get the current locale encoding name:
873 //
874 // - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
875 // - Return "UTF-8" if the UTF-8 Mode is enabled
876 // - On Windows, return the ANSI code page (ex: "cp1250")
877 // - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
878 // - Otherwise, return nl_langinfo(CODESET).
879 //
880 // Return NULL on memory allocation failure.
881 //
882 // See also config_get_locale_encoding()
883 wchar_t*
_Py_GetLocaleEncoding(void)884 _Py_GetLocaleEncoding(void)
885 {
886 #ifdef _Py_FORCE_UTF8_LOCALE
887     // On Android langinfo.h and CODESET are missing,
888     // and UTF-8 is always used in mbstowcs() and wcstombs().
889     return _PyMem_RawWcsdup(L"UTF-8");
890 #else
891     const PyPreConfig *preconfig = &_PyRuntime.preconfig;
892     if (preconfig->utf8_mode) {
893         return _PyMem_RawWcsdup(L"UTF-8");
894     }
895 
896 #ifdef MS_WINDOWS
897     wchar_t encoding[23];
898     unsigned int ansi_codepage = GetACP();
899     swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
900     encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
901     return _PyMem_RawWcsdup(encoding);
902 #else
903     const char *encoding = nl_langinfo(CODESET);
904     if (!encoding || encoding[0] == '\0') {
905         // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
906         // macOS if the LC_CTYPE locale is not supported.
907         return _PyMem_RawWcsdup(L"UTF-8");
908     }
909 
910     wchar_t *wstr;
911     int res = decode_current_locale(encoding, &wstr, NULL,
912                                     NULL, _Py_ERROR_SURROGATEESCAPE);
913     if (res < 0) {
914         return NULL;
915     }
916     return wstr;
917 #endif  // !MS_WINDOWS
918 
919 #endif  // !_Py_FORCE_UTF8_LOCALE
920 }
921 
922 
923 PyObject *
_Py_GetLocaleEncodingObject(void)924 _Py_GetLocaleEncodingObject(void)
925 {
926     wchar_t *encoding = _Py_GetLocaleEncoding();
927     if (encoding == NULL) {
928         PyErr_NoMemory();
929         return NULL;
930     }
931 
932     PyObject *str = PyUnicode_FromWideChar(encoding, -1);
933     PyMem_RawFree(encoding);
934     return str;
935 }
936 
937 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
938 
939 /* Check whether current locale uses Unicode as internal wchar_t form. */
940 int
_Py_LocaleUsesNonUnicodeWchar(void)941 _Py_LocaleUsesNonUnicodeWchar(void)
942 {
943     /* Oracle Solaris uses non-Unicode internal wchar_t form for
944        non-Unicode locales and hence needs conversion to UTF first. */
945     char* codeset = nl_langinfo(CODESET);
946     if (!codeset) {
947         return 0;
948     }
949     /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
950     return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
951 }
952 
953 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)954 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
955                      const char *tocode, const char *fromcode)
956 {
957     Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
958 
959     /* Ensure we won't overflow the size. */
960     if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
961         PyErr_NoMemory();
962         return NULL;
963     }
964 
965     /* the string doesn't have to be NULL terminated */
966     wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
967     if (target == NULL) {
968         PyErr_NoMemory();
969         return NULL;
970     }
971 
972     iconv_t cd = iconv_open(tocode, fromcode);
973     if (cd == (iconv_t)-1) {
974         PyErr_Format(PyExc_ValueError, "iconv_open() failed");
975         PyMem_Free(target);
976         return NULL;
977     }
978 
979     char *inbuf = (char *) source;
980     char *outbuf = (char *) target;
981     size_t inbytesleft = sizeof(wchar_t) * size;
982     size_t outbytesleft = inbytesleft;
983 
984     size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
985     if (ret == DECODE_ERROR) {
986         PyErr_Format(PyExc_ValueError, "iconv() failed");
987         PyMem_Free(target);
988         iconv_close(cd);
989         return NULL;
990     }
991 
992     iconv_close(cd);
993     return target;
994 }
995 
996 /* Convert a wide character string to the UCS-4 encoded string. This
997    is necessary on systems where internal form of wchar_t are not Unicode
998    code points (e.g. Oracle Solaris).
999 
1000    Return a pointer to a newly allocated string, use PyMem_Free() to free
1001    the memory. Return NULL and raise exception on conversion or memory
1002    allocation error. */
1003 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)1004 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1005 {
1006     return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1007 }
1008 
1009 /* Convert a UCS-4 encoded string to native wide character string. This
1010    is necessary on systems where internal form of wchar_t are not Unicode
1011    code points (e.g. Oracle Solaris).
1012 
1013    The conversion is done in place. This can be done because both wchar_t
1014    and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1015    to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1016    which is currently the only system using these functions; it doesn't have
1017    to be for other systems).
1018 
1019    Return 0 on success. Return -1 and raise exception on conversion
1020    or memory allocation error. */
1021 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)1022 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1023 {
1024     wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1025     if (!result) {
1026         return -1;
1027     }
1028     memcpy(unicode, result, size * sizeof(wchar_t));
1029     PyMem_Free(result);
1030     return 0;
1031 }
1032 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1033 
1034 #ifdef MS_WINDOWS
1035 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1036 
1037 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)1038 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1039 {
1040     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1041     /* Cannot simply cast and dereference in_ptr,
1042        since it might not be aligned properly */
1043     __int64 in;
1044     memcpy(&in, in_ptr, sizeof(in));
1045     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1046     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1047 }
1048 
1049 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)1050 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1051 {
1052     /* XXX endianness */
1053     __int64 out;
1054     out = time_in + secs_between_epochs;
1055     out = out * 10000000 + nsec_in / 100;
1056     memcpy(out_ptr, &out, sizeof(out));
1057 }
1058 
1059 /* Below, we *know* that ugo+r is 0444 */
1060 #if _S_IREAD != 0400
1061 #error Unsupported C library
1062 #endif
1063 static int
attributes_to_mode(DWORD attr)1064 attributes_to_mode(DWORD attr)
1065 {
1066     int m = 0;
1067     if (attr & FILE_ATTRIBUTE_DIRECTORY)
1068         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1069     else
1070         m |= _S_IFREG;
1071     if (attr & FILE_ATTRIBUTE_READONLY)
1072         m |= 0444;
1073     else
1074         m |= 0666;
1075     return m;
1076 }
1077 
1078 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)1079 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1080                            struct _Py_stat_struct *result)
1081 {
1082     memset(result, 0, sizeof(*result));
1083     result->st_mode = attributes_to_mode(info->dwFileAttributes);
1084     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1085     result->st_dev = info->dwVolumeSerialNumber;
1086     result->st_rdev = result->st_dev;
1087     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1088     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1089     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1090     result->st_nlink = info->nNumberOfLinks;
1091     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1092     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1093        open other name surrogate reparse points without traversing them. To
1094        detect/handle these, check st_file_attributes and st_reparse_tag. */
1095     result->st_reparse_tag = reparse_tag;
1096     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1097         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1098         /* first clear the S_IFMT bits */
1099         result->st_mode ^= (result->st_mode & S_IFMT);
1100         /* now set the bits that make this a symlink */
1101         result->st_mode |= S_IFLNK;
1102     }
1103     result->st_file_attributes = info->dwFileAttributes;
1104 }
1105 #endif
1106 
1107 /* Return information about a file.
1108 
1109    On POSIX, use fstat().
1110 
1111    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1112    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1113    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1114    #23152.
1115 
1116    On Windows, set the last Windows error and return nonzero on error. On
1117    POSIX, set errno and return nonzero on error. Fill status and return 0 on
1118    success. */
1119 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1120 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1121 {
1122 #ifdef MS_WINDOWS
1123     BY_HANDLE_FILE_INFORMATION info;
1124     HANDLE h;
1125     int type;
1126 
1127     h = _Py_get_osfhandle_noraise(fd);
1128 
1129     if (h == INVALID_HANDLE_VALUE) {
1130         /* errno is already set by _get_osfhandle, but we also set
1131            the Win32 error for callers who expect that */
1132         SetLastError(ERROR_INVALID_HANDLE);
1133         return -1;
1134     }
1135     memset(status, 0, sizeof(*status));
1136 
1137     type = GetFileType(h);
1138     if (type == FILE_TYPE_UNKNOWN) {
1139         DWORD error = GetLastError();
1140         if (error != 0) {
1141             errno = winerror_to_errno(error);
1142             return -1;
1143         }
1144         /* else: valid but unknown file */
1145     }
1146 
1147     if (type != FILE_TYPE_DISK) {
1148         if (type == FILE_TYPE_CHAR)
1149             status->st_mode = _S_IFCHR;
1150         else if (type == FILE_TYPE_PIPE)
1151             status->st_mode = _S_IFIFO;
1152         return 0;
1153     }
1154 
1155     if (!GetFileInformationByHandle(h, &info)) {
1156         /* The Win32 error is already set, but we also set errno for
1157            callers who expect it */
1158         errno = winerror_to_errno(GetLastError());
1159         return -1;
1160     }
1161 
1162     _Py_attribute_data_to_stat(&info, 0, status);
1163     /* specific to fstat() */
1164     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1165     return 0;
1166 #else
1167     return fstat(fd, status);
1168 #endif
1169 }
1170 
1171 /* Return information about a file.
1172 
1173    On POSIX, use fstat().
1174 
1175    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1176    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1177    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1178    #23152.
1179 
1180    Raise an exception and return -1 on error. On Windows, set the last Windows
1181    error on error. On POSIX, set errno on error. Fill status and return 0 on
1182    success.
1183 
1184    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1185    to call fstat(). The caller must hold the GIL. */
1186 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1187 _Py_fstat(int fd, struct _Py_stat_struct *status)
1188 {
1189     int res;
1190 
1191     assert(PyGILState_Check());
1192 
1193     Py_BEGIN_ALLOW_THREADS
1194     res = _Py_fstat_noraise(fd, status);
1195     Py_END_ALLOW_THREADS
1196 
1197     if (res != 0) {
1198 #ifdef MS_WINDOWS
1199         PyErr_SetFromWindowsErr(0);
1200 #else
1201         PyErr_SetFromErrno(PyExc_OSError);
1202 #endif
1203         return -1;
1204     }
1205     return 0;
1206 }
1207 
1208 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1209    call stat() otherwise. Only fill st_mode attribute on Windows.
1210 
1211    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1212    raised. */
1213 
1214 int
_Py_stat(PyObject * path,struct stat * statbuf)1215 _Py_stat(PyObject *path, struct stat *statbuf)
1216 {
1217 #ifdef MS_WINDOWS
1218     int err;
1219     struct _stat wstatbuf;
1220 
1221 #if USE_UNICODE_WCHAR_CACHE
1222     const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1223 #else /* USE_UNICODE_WCHAR_CACHE */
1224     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1225 #endif /* USE_UNICODE_WCHAR_CACHE */
1226     if (wpath == NULL)
1227         return -2;
1228 
1229     err = _wstat(wpath, &wstatbuf);
1230     if (!err)
1231         statbuf->st_mode = wstatbuf.st_mode;
1232 #if !USE_UNICODE_WCHAR_CACHE
1233     PyMem_Free(wpath);
1234 #endif /* USE_UNICODE_WCHAR_CACHE */
1235     return err;
1236 #else
1237     int ret;
1238     PyObject *bytes;
1239     char *cpath;
1240 
1241     bytes = PyUnicode_EncodeFSDefault(path);
1242     if (bytes == NULL)
1243         return -2;
1244 
1245     /* check for embedded null bytes */
1246     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1247         Py_DECREF(bytes);
1248         return -2;
1249     }
1250 
1251     ret = stat(cpath, statbuf);
1252     Py_DECREF(bytes);
1253     return ret;
1254 #endif
1255 }
1256 
1257 
1258 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1259 static int
get_inheritable(int fd,int raise)1260 get_inheritable(int fd, int raise)
1261 {
1262 #ifdef MS_WINDOWS
1263     HANDLE handle;
1264     DWORD flags;
1265 
1266     handle = _Py_get_osfhandle_noraise(fd);
1267     if (handle == INVALID_HANDLE_VALUE) {
1268         if (raise)
1269             PyErr_SetFromErrno(PyExc_OSError);
1270         return -1;
1271     }
1272 
1273     if (!GetHandleInformation(handle, &flags)) {
1274         if (raise)
1275             PyErr_SetFromWindowsErr(0);
1276         return -1;
1277     }
1278 
1279     return (flags & HANDLE_FLAG_INHERIT);
1280 #else
1281     int flags;
1282 
1283     flags = fcntl(fd, F_GETFD, 0);
1284     if (flags == -1) {
1285         if (raise)
1286             PyErr_SetFromErrno(PyExc_OSError);
1287         return -1;
1288     }
1289     return !(flags & FD_CLOEXEC);
1290 #endif
1291 }
1292 
1293 /* Get the inheritable flag of the specified file descriptor.
1294    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1295    raise an exception and return -1 on error. */
1296 int
_Py_get_inheritable(int fd)1297 _Py_get_inheritable(int fd)
1298 {
1299     return get_inheritable(fd, 1);
1300 }
1301 
1302 
1303 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1304 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1305 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1306 {
1307 #ifdef MS_WINDOWS
1308     HANDLE handle;
1309     DWORD flags;
1310 #else
1311 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1312     static int ioctl_works = -1;
1313     int request;
1314     int err;
1315 #endif
1316     int flags, new_flags;
1317     int res;
1318 #endif
1319 
1320     /* atomic_flag_works can only be used to make the file descriptor
1321        non-inheritable */
1322     assert(!(atomic_flag_works != NULL && inheritable));
1323 
1324     if (atomic_flag_works != NULL && !inheritable) {
1325         if (*atomic_flag_works == -1) {
1326             int isInheritable = get_inheritable(fd, raise);
1327             if (isInheritable == -1)
1328                 return -1;
1329             *atomic_flag_works = !isInheritable;
1330         }
1331 
1332         if (*atomic_flag_works)
1333             return 0;
1334     }
1335 
1336 #ifdef MS_WINDOWS
1337     handle = _Py_get_osfhandle_noraise(fd);
1338     if (handle == INVALID_HANDLE_VALUE) {
1339         if (raise)
1340             PyErr_SetFromErrno(PyExc_OSError);
1341         return -1;
1342     }
1343 
1344     if (inheritable)
1345         flags = HANDLE_FLAG_INHERIT;
1346     else
1347         flags = 0;
1348 
1349     /* This check can be removed once support for Windows 7 ends. */
1350 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1351         GetFileType(handle) == FILE_TYPE_CHAR)
1352 
1353     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1354         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1355         if (raise)
1356             PyErr_SetFromWindowsErr(0);
1357         return -1;
1358     }
1359 #undef CONSOLE_PSEUDOHANDLE
1360     return 0;
1361 
1362 #else
1363 
1364 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1365     if (ioctl_works != 0 && raise != 0) {
1366         /* fast-path: ioctl() only requires one syscall */
1367         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1368          * thus avoid using ioctl() so we skip the fast-path. */
1369         if (inheritable)
1370             request = FIONCLEX;
1371         else
1372             request = FIOCLEX;
1373         err = ioctl(fd, request, NULL);
1374         if (!err) {
1375             ioctl_works = 1;
1376             return 0;
1377         }
1378 
1379 #ifdef O_PATH
1380         if (errno == EBADF) {
1381             // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1382             // on O_PATH file descriptors. Fall through to the fcntl()
1383             // implementation.
1384         }
1385         else
1386 #endif
1387         if (errno != ENOTTY && errno != EACCES) {
1388             if (raise)
1389                 PyErr_SetFromErrno(PyExc_OSError);
1390             return -1;
1391         }
1392         else {
1393             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1394                device". The ioctl is declared but not supported by the kernel.
1395                Remember that ioctl() doesn't work. It is the case on
1396                Illumos-based OS for example.
1397 
1398                Issue #27057: When SELinux policy disallows ioctl it will fail
1399                with EACCES. While FIOCLEX is safe operation it may be
1400                unavailable because ioctl was denied altogether.
1401                This can be the case on Android. */
1402             ioctl_works = 0;
1403         }
1404         /* fallback to fcntl() if ioctl() does not work */
1405     }
1406 #endif
1407 
1408     /* slow-path: fcntl() requires two syscalls */
1409     flags = fcntl(fd, F_GETFD);
1410     if (flags < 0) {
1411         if (raise)
1412             PyErr_SetFromErrno(PyExc_OSError);
1413         return -1;
1414     }
1415 
1416     if (inheritable) {
1417         new_flags = flags & ~FD_CLOEXEC;
1418     }
1419     else {
1420         new_flags = flags | FD_CLOEXEC;
1421     }
1422 
1423     if (new_flags == flags) {
1424         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1425         return 0;
1426     }
1427 
1428     res = fcntl(fd, F_SETFD, new_flags);
1429     if (res < 0) {
1430         if (raise)
1431             PyErr_SetFromErrno(PyExc_OSError);
1432         return -1;
1433     }
1434     return 0;
1435 #endif
1436 }
1437 
1438 /* Make the file descriptor non-inheritable.
1439    Return 0 on success, set errno and return -1 on error. */
1440 static int
make_non_inheritable(int fd)1441 make_non_inheritable(int fd)
1442 {
1443     return set_inheritable(fd, 0, 0, NULL);
1444 }
1445 
1446 /* Set the inheritable flag of the specified file descriptor.
1447    On success: return 0, on error: raise an exception and return -1.
1448 
1449    If atomic_flag_works is not NULL:
1450 
1451     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1452       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1453       set the inheritable flag
1454     * if *atomic_flag_works==1: do nothing
1455     * if *atomic_flag_works==0: set inheritable flag to False
1456 
1457    Set atomic_flag_works to NULL if no atomic flag was used to create the
1458    file descriptor.
1459 
1460    atomic_flag_works can only be used to make a file descriptor
1461    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1462 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1463 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1464 {
1465     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1466 }
1467 
1468 /* Same as _Py_set_inheritable() but on error, set errno and
1469    don't raise an exception.
1470    This function is async-signal-safe. */
1471 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1472 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1473 {
1474     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1475 }
1476 
1477 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1478 _Py_open_impl(const char *pathname, int flags, int gil_held)
1479 {
1480     int fd;
1481     int async_err = 0;
1482 #ifndef MS_WINDOWS
1483     int *atomic_flag_works;
1484 #endif
1485 
1486 #ifdef MS_WINDOWS
1487     flags |= O_NOINHERIT;
1488 #elif defined(O_CLOEXEC)
1489     atomic_flag_works = &_Py_open_cloexec_works;
1490     flags |= O_CLOEXEC;
1491 #else
1492     atomic_flag_works = NULL;
1493 #endif
1494 
1495     if (gil_held) {
1496         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1497         if (pathname_obj == NULL) {
1498             return -1;
1499         }
1500         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1501             Py_DECREF(pathname_obj);
1502             return -1;
1503         }
1504 
1505         do {
1506             Py_BEGIN_ALLOW_THREADS
1507             fd = open(pathname, flags);
1508             Py_END_ALLOW_THREADS
1509         } while (fd < 0
1510                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1511         if (async_err) {
1512             Py_DECREF(pathname_obj);
1513             return -1;
1514         }
1515         if (fd < 0) {
1516             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1517             Py_DECREF(pathname_obj);
1518             return -1;
1519         }
1520         Py_DECREF(pathname_obj);
1521     }
1522     else {
1523         fd = open(pathname, flags);
1524         if (fd < 0)
1525             return -1;
1526     }
1527 
1528 #ifndef MS_WINDOWS
1529     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1530         close(fd);
1531         return -1;
1532     }
1533 #endif
1534 
1535     return fd;
1536 }
1537 
1538 /* Open a file with the specified flags (wrapper to open() function).
1539    Return a file descriptor on success. Raise an exception and return -1 on
1540    error.
1541 
1542    The file descriptor is created non-inheritable.
1543 
1544    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1545    except if the Python signal handler raises an exception.
1546 
1547    Release the GIL to call open(). The caller must hold the GIL. */
1548 int
_Py_open(const char * pathname,int flags)1549 _Py_open(const char *pathname, int flags)
1550 {
1551     /* _Py_open() must be called with the GIL held. */
1552     assert(PyGILState_Check());
1553     return _Py_open_impl(pathname, flags, 1);
1554 }
1555 
1556 /* Open a file with the specified flags (wrapper to open() function).
1557    Return a file descriptor on success. Set errno and return -1 on error.
1558 
1559    The file descriptor is created non-inheritable.
1560 
1561    If interrupted by a signal, fail with EINTR. */
1562 int
_Py_open_noraise(const char * pathname,int flags)1563 _Py_open_noraise(const char *pathname, int flags)
1564 {
1565     return _Py_open_impl(pathname, flags, 0);
1566 }
1567 
1568 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1569    encoding and use fopen() otherwise.
1570 
1571    The file descriptor is created non-inheritable.
1572 
1573    If interrupted by a signal, fail with EINTR. */
1574 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1575 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1576 {
1577     FILE *f;
1578     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1579         return NULL;
1580     }
1581 #ifndef MS_WINDOWS
1582     char *cpath;
1583     char cmode[10];
1584     size_t r;
1585     r = wcstombs(cmode, mode, 10);
1586     if (r == DECODE_ERROR || r >= 10) {
1587         errno = EINVAL;
1588         return NULL;
1589     }
1590     cpath = _Py_EncodeLocaleRaw(path, NULL);
1591     if (cpath == NULL) {
1592         return NULL;
1593     }
1594     f = fopen(cpath, cmode);
1595     PyMem_RawFree(cpath);
1596 #else
1597     f = _wfopen(path, mode);
1598 #endif
1599     if (f == NULL)
1600         return NULL;
1601     if (make_non_inheritable(fileno(f)) < 0) {
1602         fclose(f);
1603         return NULL;
1604     }
1605     return f;
1606 }
1607 
1608 
1609 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1610    encoding and call fopen() otherwise.
1611 
1612    Return the new file object on success. Raise an exception and return NULL
1613    on error.
1614 
1615    The file descriptor is created non-inheritable.
1616 
1617    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1618    except if the Python signal handler raises an exception.
1619 
1620    Release the GIL to call _wfopen() or fopen(). The caller must hold
1621    the GIL. */
1622 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1623 _Py_fopen_obj(PyObject *path, const char *mode)
1624 {
1625     FILE *f;
1626     int async_err = 0;
1627 #ifdef MS_WINDOWS
1628     wchar_t wmode[10];
1629     int usize;
1630 
1631     assert(PyGILState_Check());
1632 
1633     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1634         return NULL;
1635     }
1636     if (!PyUnicode_Check(path)) {
1637         PyErr_Format(PyExc_TypeError,
1638                      "str file path expected under Windows, got %R",
1639                      Py_TYPE(path));
1640         return NULL;
1641     }
1642 #if USE_UNICODE_WCHAR_CACHE
1643     const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1644 #else /* USE_UNICODE_WCHAR_CACHE */
1645     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1646 #endif /* USE_UNICODE_WCHAR_CACHE */
1647     if (wpath == NULL)
1648         return NULL;
1649 
1650     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1651                                 wmode, Py_ARRAY_LENGTH(wmode));
1652     if (usize == 0) {
1653         PyErr_SetFromWindowsErr(0);
1654 #if !USE_UNICODE_WCHAR_CACHE
1655         PyMem_Free(wpath);
1656 #endif /* USE_UNICODE_WCHAR_CACHE */
1657         return NULL;
1658     }
1659 
1660     do {
1661         Py_BEGIN_ALLOW_THREADS
1662         f = _wfopen(wpath, wmode);
1663         Py_END_ALLOW_THREADS
1664     } while (f == NULL
1665              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1666 #if !USE_UNICODE_WCHAR_CACHE
1667     PyMem_Free(wpath);
1668 #endif /* USE_UNICODE_WCHAR_CACHE */
1669 #else
1670     PyObject *bytes;
1671     const char *path_bytes;
1672 
1673     assert(PyGILState_Check());
1674 
1675     if (!PyUnicode_FSConverter(path, &bytes))
1676         return NULL;
1677     path_bytes = PyBytes_AS_STRING(bytes);
1678 
1679     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1680         Py_DECREF(bytes);
1681         return NULL;
1682     }
1683 
1684     do {
1685         Py_BEGIN_ALLOW_THREADS
1686         f = fopen(path_bytes, mode);
1687         Py_END_ALLOW_THREADS
1688     } while (f == NULL
1689              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1690 
1691     Py_DECREF(bytes);
1692 #endif
1693     if (async_err)
1694         return NULL;
1695 
1696     if (f == NULL) {
1697         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1698         return NULL;
1699     }
1700 
1701     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1702         fclose(f);
1703         return NULL;
1704     }
1705     return f;
1706 }
1707 
1708 /* Read count bytes from fd into buf.
1709 
1710    On success, return the number of read bytes, it can be lower than count.
1711    If the current file offset is at or past the end of file, no bytes are read,
1712    and read() returns zero.
1713 
1714    On error, raise an exception, set errno and return -1.
1715 
1716    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1717    If the Python signal handler raises an exception, the function returns -1
1718    (the syscall is not retried).
1719 
1720    Release the GIL to call read(). The caller must hold the GIL. */
1721 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1722 _Py_read(int fd, void *buf, size_t count)
1723 {
1724     Py_ssize_t n;
1725     int err;
1726     int async_err = 0;
1727 
1728     assert(PyGILState_Check());
1729 
1730     /* _Py_read() must not be called with an exception set, otherwise the
1731      * caller may think that read() was interrupted by a signal and the signal
1732      * handler raised an exception. */
1733     assert(!PyErr_Occurred());
1734 
1735     if (count > _PY_READ_MAX) {
1736         count = _PY_READ_MAX;
1737     }
1738 
1739     _Py_BEGIN_SUPPRESS_IPH
1740     do {
1741         Py_BEGIN_ALLOW_THREADS
1742         errno = 0;
1743 #ifdef MS_WINDOWS
1744         n = read(fd, buf, (int)count);
1745 #else
1746         n = read(fd, buf, count);
1747 #endif
1748         /* save/restore errno because PyErr_CheckSignals()
1749          * and PyErr_SetFromErrno() can modify it */
1750         err = errno;
1751         Py_END_ALLOW_THREADS
1752     } while (n < 0 && err == EINTR &&
1753             !(async_err = PyErr_CheckSignals()));
1754     _Py_END_SUPPRESS_IPH
1755 
1756     if (async_err) {
1757         /* read() was interrupted by a signal (failed with EINTR)
1758          * and the Python signal handler raised an exception */
1759         errno = err;
1760         assert(errno == EINTR && PyErr_Occurred());
1761         return -1;
1762     }
1763     if (n < 0) {
1764         PyErr_SetFromErrno(PyExc_OSError);
1765         errno = err;
1766         return -1;
1767     }
1768 
1769     return n;
1770 }
1771 
1772 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1773 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1774 {
1775     Py_ssize_t n;
1776     int err;
1777     int async_err = 0;
1778 
1779     _Py_BEGIN_SUPPRESS_IPH
1780 #ifdef MS_WINDOWS
1781     if (count > 32767) {
1782         /* Issue #11395: the Windows console returns an error (12: not
1783            enough space error) on writing into stdout if stdout mode is
1784            binary and the length is greater than 66,000 bytes (or less,
1785            depending on heap usage). */
1786         if (gil_held) {
1787             Py_BEGIN_ALLOW_THREADS
1788             if (isatty(fd)) {
1789                 count = 32767;
1790             }
1791             Py_END_ALLOW_THREADS
1792         } else {
1793             if (isatty(fd)) {
1794                 count = 32767;
1795             }
1796         }
1797     }
1798 #endif
1799     if (count > _PY_WRITE_MAX) {
1800         count = _PY_WRITE_MAX;
1801     }
1802 
1803     if (gil_held) {
1804         do {
1805             Py_BEGIN_ALLOW_THREADS
1806             errno = 0;
1807 #ifdef MS_WINDOWS
1808             n = write(fd, buf, (int)count);
1809 #else
1810             n = write(fd, buf, count);
1811 #endif
1812             /* save/restore errno because PyErr_CheckSignals()
1813              * and PyErr_SetFromErrno() can modify it */
1814             err = errno;
1815             Py_END_ALLOW_THREADS
1816         } while (n < 0 && err == EINTR &&
1817                 !(async_err = PyErr_CheckSignals()));
1818     }
1819     else {
1820         do {
1821             errno = 0;
1822 #ifdef MS_WINDOWS
1823             n = write(fd, buf, (int)count);
1824 #else
1825             n = write(fd, buf, count);
1826 #endif
1827             err = errno;
1828         } while (n < 0 && err == EINTR);
1829     }
1830     _Py_END_SUPPRESS_IPH
1831 
1832     if (async_err) {
1833         /* write() was interrupted by a signal (failed with EINTR)
1834            and the Python signal handler raised an exception (if gil_held is
1835            nonzero). */
1836         errno = err;
1837         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1838         return -1;
1839     }
1840     if (n < 0) {
1841         if (gil_held)
1842             PyErr_SetFromErrno(PyExc_OSError);
1843         errno = err;
1844         return -1;
1845     }
1846 
1847     return n;
1848 }
1849 
1850 /* Write count bytes of buf into fd.
1851 
1852    On success, return the number of written bytes, it can be lower than count
1853    including 0. On error, raise an exception, set errno and return -1.
1854 
1855    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1856    If the Python signal handler raises an exception, the function returns -1
1857    (the syscall is not retried).
1858 
1859    Release the GIL to call write(). The caller must hold the GIL. */
1860 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1861 _Py_write(int fd, const void *buf, size_t count)
1862 {
1863     assert(PyGILState_Check());
1864 
1865     /* _Py_write() must not be called with an exception set, otherwise the
1866      * caller may think that write() was interrupted by a signal and the signal
1867      * handler raised an exception. */
1868     assert(!PyErr_Occurred());
1869 
1870     return _Py_write_impl(fd, buf, count, 1);
1871 }
1872 
1873 /* Write count bytes of buf into fd.
1874  *
1875  * On success, return the number of written bytes, it can be lower than count
1876  * including 0. On error, set errno and return -1.
1877  *
1878  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1879  * without calling the Python signal handler. */
1880 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1881 _Py_write_noraise(int fd, const void *buf, size_t count)
1882 {
1883     return _Py_write_impl(fd, buf, count, 0);
1884 }
1885 
1886 #ifdef HAVE_READLINK
1887 
1888 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1889    the result from the locale encoding.
1890 
1891    Return -1 on encoding error, on readlink() error, if the internal buffer is
1892    too short, on decoding error, or if 'buf' is too short. */
1893 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1894 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1895 {
1896     char *cpath;
1897     char cbuf[MAXPATHLEN];
1898     size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1899     wchar_t *wbuf;
1900     Py_ssize_t res;
1901     size_t r1;
1902 
1903     cpath = _Py_EncodeLocaleRaw(path, NULL);
1904     if (cpath == NULL) {
1905         errno = EINVAL;
1906         return -1;
1907     }
1908     res = readlink(cpath, cbuf, cbuf_len);
1909     PyMem_RawFree(cpath);
1910     if (res == -1) {
1911         return -1;
1912     }
1913     if ((size_t)res == cbuf_len) {
1914         errno = EINVAL;
1915         return -1;
1916     }
1917     cbuf[res] = '\0'; /* buf will be null terminated */
1918     wbuf = Py_DecodeLocale(cbuf, &r1);
1919     if (wbuf == NULL) {
1920         errno = EINVAL;
1921         return -1;
1922     }
1923     /* wbuf must have space to store the trailing NUL character */
1924     if (buflen <= r1) {
1925         PyMem_RawFree(wbuf);
1926         errno = EINVAL;
1927         return -1;
1928     }
1929     wcsncpy(buf, wbuf, buflen);
1930     PyMem_RawFree(wbuf);
1931     return (int)r1;
1932 }
1933 #endif
1934 
1935 #ifdef HAVE_REALPATH
1936 
1937 /* Return the canonicalized absolute pathname. Encode path to the locale
1938    encoding, decode the result from the locale encoding.
1939 
1940    Return NULL on encoding error, realpath() error, decoding error
1941    or if 'resolved_path' is too short. */
1942 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1943 _Py_wrealpath(const wchar_t *path,
1944               wchar_t *resolved_path, size_t resolved_path_len)
1945 {
1946     char *cpath;
1947     char cresolved_path[MAXPATHLEN];
1948     wchar_t *wresolved_path;
1949     char *res;
1950     size_t r;
1951     cpath = _Py_EncodeLocaleRaw(path, NULL);
1952     if (cpath == NULL) {
1953         errno = EINVAL;
1954         return NULL;
1955     }
1956     res = realpath(cpath, cresolved_path);
1957     PyMem_RawFree(cpath);
1958     if (res == NULL)
1959         return NULL;
1960 
1961     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1962     if (wresolved_path == NULL) {
1963         errno = EINVAL;
1964         return NULL;
1965     }
1966     /* wresolved_path must have space to store the trailing NUL character */
1967     if (resolved_path_len <= r) {
1968         PyMem_RawFree(wresolved_path);
1969         errno = EINVAL;
1970         return NULL;
1971     }
1972     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1973     PyMem_RawFree(wresolved_path);
1974     return resolved_path;
1975 }
1976 #endif
1977 
1978 
1979 #ifndef MS_WINDOWS
1980 int
_Py_isabs(const wchar_t * path)1981 _Py_isabs(const wchar_t *path)
1982 {
1983     return (path[0] == SEP);
1984 }
1985 #endif
1986 
1987 
1988 /* Get an absolute path.
1989    On error (ex: fail to get the current directory), return -1.
1990    On memory allocation failure, set *abspath_p to NULL and return 0.
1991    On success, return a newly allocated to *abspath_p to and return 0.
1992    The string must be freed by PyMem_RawFree(). */
1993 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)1994 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1995 {
1996 #ifdef MS_WINDOWS
1997     wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1998     DWORD result;
1999 
2000     result = GetFullPathNameW(path,
2001                               Py_ARRAY_LENGTH(woutbuf), woutbuf,
2002                               NULL);
2003     if (!result) {
2004         return -1;
2005     }
2006 
2007     if (result > Py_ARRAY_LENGTH(woutbuf)) {
2008         if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2009             woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
2010         }
2011         else {
2012             woutbufp = NULL;
2013         }
2014         if (!woutbufp) {
2015             *abspath_p = NULL;
2016             return 0;
2017         }
2018 
2019         result = GetFullPathNameW(path, result, woutbufp, NULL);
2020         if (!result) {
2021             PyMem_RawFree(woutbufp);
2022             return -1;
2023         }
2024     }
2025 
2026     if (woutbufp != woutbuf) {
2027         *abspath_p = woutbufp;
2028         return 0;
2029     }
2030 
2031     *abspath_p = _PyMem_RawWcsdup(woutbufp);
2032     return 0;
2033 #else
2034     if (_Py_isabs(path)) {
2035         *abspath_p = _PyMem_RawWcsdup(path);
2036         return 0;
2037     }
2038 
2039     wchar_t cwd[MAXPATHLEN + 1];
2040     cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2041     if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2042         /* unable to get the current directory */
2043         return -1;
2044     }
2045 
2046     size_t cwd_len = wcslen(cwd);
2047     size_t path_len = wcslen(path);
2048     size_t len = cwd_len + 1 + path_len + 1;
2049     if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2050         *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2051     }
2052     else {
2053         *abspath_p = NULL;
2054     }
2055     if (*abspath_p == NULL) {
2056         return 0;
2057     }
2058 
2059     wchar_t *abspath = *abspath_p;
2060     memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2061     abspath += cwd_len;
2062 
2063     *abspath = (wchar_t)SEP;
2064     abspath++;
2065 
2066     memcpy(abspath, path, path_len * sizeof(wchar_t));
2067     abspath += path_len;
2068 
2069     *abspath = 0;
2070     return 0;
2071 #endif
2072 }
2073 
2074 
2075 /* Get the current directory. buflen is the buffer size in wide characters
2076    including the null character. Decode the path from the locale encoding.
2077 
2078    Return NULL on getcwd() error, on decoding error, or if 'buf' is
2079    too short. */
2080 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2081 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2082 {
2083 #ifdef MS_WINDOWS
2084     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2085     return _wgetcwd(buf, ibuflen);
2086 #else
2087     char fname[MAXPATHLEN];
2088     wchar_t *wname;
2089     size_t len;
2090 
2091     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2092         return NULL;
2093     wname = Py_DecodeLocale(fname, &len);
2094     if (wname == NULL)
2095         return NULL;
2096     /* wname must have space to store the trailing NUL character */
2097     if (buflen <= len) {
2098         PyMem_RawFree(wname);
2099         return NULL;
2100     }
2101     wcsncpy(buf, wname, buflen);
2102     PyMem_RawFree(wname);
2103     return buf;
2104 #endif
2105 }
2106 
2107 /* Duplicate a file descriptor. The new file descriptor is created as
2108    non-inheritable. Return a new file descriptor on success, raise an OSError
2109    exception and return -1 on error.
2110 
2111    The GIL is released to call dup(). The caller must hold the GIL. */
2112 int
_Py_dup(int fd)2113 _Py_dup(int fd)
2114 {
2115 #ifdef MS_WINDOWS
2116     HANDLE handle;
2117 #endif
2118 
2119     assert(PyGILState_Check());
2120 
2121 #ifdef MS_WINDOWS
2122     handle = _Py_get_osfhandle(fd);
2123     if (handle == INVALID_HANDLE_VALUE)
2124         return -1;
2125 
2126     Py_BEGIN_ALLOW_THREADS
2127     _Py_BEGIN_SUPPRESS_IPH
2128     fd = dup(fd);
2129     _Py_END_SUPPRESS_IPH
2130     Py_END_ALLOW_THREADS
2131     if (fd < 0) {
2132         PyErr_SetFromErrno(PyExc_OSError);
2133         return -1;
2134     }
2135 
2136     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2137         _Py_BEGIN_SUPPRESS_IPH
2138         close(fd);
2139         _Py_END_SUPPRESS_IPH
2140         return -1;
2141     }
2142 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2143     Py_BEGIN_ALLOW_THREADS
2144     _Py_BEGIN_SUPPRESS_IPH
2145     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2146     _Py_END_SUPPRESS_IPH
2147     Py_END_ALLOW_THREADS
2148     if (fd < 0) {
2149         PyErr_SetFromErrno(PyExc_OSError);
2150         return -1;
2151     }
2152 
2153 #else
2154     Py_BEGIN_ALLOW_THREADS
2155     _Py_BEGIN_SUPPRESS_IPH
2156     fd = dup(fd);
2157     _Py_END_SUPPRESS_IPH
2158     Py_END_ALLOW_THREADS
2159     if (fd < 0) {
2160         PyErr_SetFromErrno(PyExc_OSError);
2161         return -1;
2162     }
2163 
2164     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2165         _Py_BEGIN_SUPPRESS_IPH
2166         close(fd);
2167         _Py_END_SUPPRESS_IPH
2168         return -1;
2169     }
2170 #endif
2171     return fd;
2172 }
2173 
2174 #ifndef MS_WINDOWS
2175 /* Get the blocking mode of the file descriptor.
2176    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2177    raise an exception and return -1 on error. */
2178 int
_Py_get_blocking(int fd)2179 _Py_get_blocking(int fd)
2180 {
2181     int flags;
2182     _Py_BEGIN_SUPPRESS_IPH
2183     flags = fcntl(fd, F_GETFL, 0);
2184     _Py_END_SUPPRESS_IPH
2185     if (flags < 0) {
2186         PyErr_SetFromErrno(PyExc_OSError);
2187         return -1;
2188     }
2189 
2190     return !(flags & O_NONBLOCK);
2191 }
2192 
2193 /* Set the blocking mode of the specified file descriptor.
2194 
2195    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2196    otherwise.
2197 
2198    Return 0 on success, raise an exception and return -1 on error. */
2199 int
_Py_set_blocking(int fd,int blocking)2200 _Py_set_blocking(int fd, int blocking)
2201 {
2202 /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2203    Use fcntl() instead. */
2204 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2205     int arg = !blocking;
2206     if (ioctl(fd, FIONBIO, &arg) < 0)
2207         goto error;
2208 #else
2209     int flags, res;
2210 
2211     _Py_BEGIN_SUPPRESS_IPH
2212     flags = fcntl(fd, F_GETFL, 0);
2213     if (flags >= 0) {
2214         if (blocking)
2215             flags = flags & (~O_NONBLOCK);
2216         else
2217             flags = flags | O_NONBLOCK;
2218 
2219         res = fcntl(fd, F_SETFL, flags);
2220     } else {
2221         res = -1;
2222     }
2223     _Py_END_SUPPRESS_IPH
2224 
2225     if (res < 0)
2226         goto error;
2227 #endif
2228     return 0;
2229 
2230 error:
2231     PyErr_SetFromErrno(PyExc_OSError);
2232     return -1;
2233 }
2234 #else   /* MS_WINDOWS */
2235 void*
_Py_get_osfhandle_noraise(int fd)2236 _Py_get_osfhandle_noraise(int fd)
2237 {
2238     void *handle;
2239     _Py_BEGIN_SUPPRESS_IPH
2240     handle = (void*)_get_osfhandle(fd);
2241     _Py_END_SUPPRESS_IPH
2242     return handle;
2243 }
2244 
2245 void*
_Py_get_osfhandle(int fd)2246 _Py_get_osfhandle(int fd)
2247 {
2248     void *handle = _Py_get_osfhandle_noraise(fd);
2249     if (handle == INVALID_HANDLE_VALUE)
2250         PyErr_SetFromErrno(PyExc_OSError);
2251 
2252     return handle;
2253 }
2254 
2255 int
_Py_open_osfhandle_noraise(void * handle,int flags)2256 _Py_open_osfhandle_noraise(void *handle, int flags)
2257 {
2258     int fd;
2259     _Py_BEGIN_SUPPRESS_IPH
2260     fd = _open_osfhandle((intptr_t)handle, flags);
2261     _Py_END_SUPPRESS_IPH
2262     return fd;
2263 }
2264 
2265 int
_Py_open_osfhandle(void * handle,int flags)2266 _Py_open_osfhandle(void *handle, int flags)
2267 {
2268     int fd = _Py_open_osfhandle_noraise(handle, flags);
2269     if (fd == -1)
2270         PyErr_SetFromErrno(PyExc_OSError);
2271 
2272     return fd;
2273 }
2274 #endif  /* MS_WINDOWS */
2275 
2276 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2277 _Py_GetLocaleconvNumeric(struct lconv *lc,
2278                          PyObject **decimal_point, PyObject **thousands_sep)
2279 {
2280     assert(decimal_point != NULL);
2281     assert(thousands_sep != NULL);
2282 
2283 #ifndef MS_WINDOWS
2284     int change_locale = 0;
2285     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2286         change_locale = 1;
2287     }
2288     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2289         change_locale = 1;
2290     }
2291 
2292     /* Keep a copy of the LC_CTYPE locale */
2293     char *oldloc = NULL, *loc = NULL;
2294     if (change_locale) {
2295         oldloc = setlocale(LC_CTYPE, NULL);
2296         if (!oldloc) {
2297             PyErr_SetString(PyExc_RuntimeWarning,
2298                             "failed to get LC_CTYPE locale");
2299             return -1;
2300         }
2301 
2302         oldloc = _PyMem_Strdup(oldloc);
2303         if (!oldloc) {
2304             PyErr_NoMemory();
2305             return -1;
2306         }
2307 
2308         loc = setlocale(LC_NUMERIC, NULL);
2309         if (loc != NULL && strcmp(loc, oldloc) == 0) {
2310             loc = NULL;
2311         }
2312 
2313         if (loc != NULL) {
2314             /* Only set the locale temporarily the LC_CTYPE locale
2315                if LC_NUMERIC locale is different than LC_CTYPE locale and
2316                decimal_point and/or thousands_sep are non-ASCII or longer than
2317                1 byte */
2318             setlocale(LC_CTYPE, loc);
2319         }
2320     }
2321 
2322 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2323 #else /* MS_WINDOWS */
2324 /* Use _W_* fields of Windows strcut lconv */
2325 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2326 #endif /* MS_WINDOWS */
2327 
2328     int res = -1;
2329 
2330     *decimal_point = GET_LOCALE_STRING(decimal_point);
2331     if (*decimal_point == NULL) {
2332         goto done;
2333     }
2334 
2335     *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2336     if (*thousands_sep == NULL) {
2337         goto done;
2338     }
2339 
2340     res = 0;
2341 
2342 done:
2343 #ifndef MS_WINDOWS
2344     if (loc != NULL) {
2345         setlocale(LC_CTYPE, oldloc);
2346     }
2347     PyMem_Free(oldloc);
2348 #endif
2349     return res;
2350 
2351 #undef GET_LOCALE_STRING
2352 }
2353 
2354 /* Our selection logic for which function to use is as follows:
2355  * 1. If close_range(2) is available, always prefer that; it's better for
2356  *    contiguous ranges like this than fdwalk(3) which entails iterating over
2357  *    the entire fd space and simply doing nothing for those outside the range.
2358  * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2359  *    closing up to sysconf(_SC_OPEN_MAX).
2360  * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2361  *    as that will be more performant if the range happens to have any chunk of
2362  *    non-opened fd in the middle.
2363  * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2364  */
2365 #ifdef __FreeBSD__
2366 #  define USE_CLOSEFROM
2367 #endif /* __FreeBSD__ */
2368 
2369 #ifdef HAVE_FDWALK
2370 #  define USE_FDWALK
2371 #endif /* HAVE_FDWALK */
2372 
2373 #ifdef USE_FDWALK
2374 static int
_fdwalk_close_func(void * lohi,int fd)2375 _fdwalk_close_func(void *lohi, int fd)
2376 {
2377     int lo = ((int *)lohi)[0];
2378     int hi = ((int *)lohi)[1];
2379 
2380     if (fd >= hi) {
2381         return 1;
2382     }
2383     else if (fd >= lo) {
2384         /* Ignore errors */
2385         (void)close(fd);
2386     }
2387     return 0;
2388 }
2389 #endif /* USE_FDWALK */
2390 
2391 /* Closes all file descriptors in [first, last], ignoring errors. */
2392 void
_Py_closerange(int first,int last)2393 _Py_closerange(int first, int last)
2394 {
2395     first = Py_MAX(first, 0);
2396     _Py_BEGIN_SUPPRESS_IPH
2397 #ifdef HAVE_CLOSE_RANGE
2398     if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2399         /* Any errors encountered while closing file descriptors are ignored;
2400          * ENOSYS means no kernel support, though,
2401          * so we'll fallback to the other methods. */
2402     }
2403     else
2404 #endif /* HAVE_CLOSE_RANGE */
2405 #ifdef USE_CLOSEFROM
2406     if (last >= sysconf(_SC_OPEN_MAX)) {
2407         /* Any errors encountered while closing file descriptors are ignored */
2408         closefrom(first);
2409     }
2410     else
2411 #endif /* USE_CLOSEFROM */
2412 #ifdef USE_FDWALK
2413     {
2414         int lohi[2];
2415         lohi[0] = first;
2416         lohi[1] = last + 1;
2417         fdwalk(_fdwalk_close_func, lohi);
2418     }
2419 #else
2420     {
2421         for (int i = first; i <= last; i++) {
2422             /* Ignore errors */
2423             (void)close(i);
2424         }
2425     }
2426 #endif /* USE_FDWALK */
2427     _Py_END_SUPPRESS_IPH
2428 }
2429