1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h"               // SEP
4 #include <locale.h>
5 
6 #ifdef MS_WINDOWS
7 #  include <malloc.h>
8 #  include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11 
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15 
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19 
20 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
21 #include <iconv.h>
22 #endif
23 
24 #ifdef HAVE_FCNTL_H
25 #include <fcntl.h>
26 #endif /* HAVE_FCNTL_H */
27 
28 #ifdef O_CLOEXEC
29 /* Does open() support the O_CLOEXEC flag? Possible values:
30 
31    -1: unknown
32     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
33     1: open() supports O_CLOEXEC flag, close-on-exec is set
34 
35    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
36    and os.open(). */
37 int _Py_open_cloexec_works = -1;
38 #endif
39 
40 // The value must be the same in unicodeobject.c.
41 #define MAX_UNICODE 0x10ffff
42 
43 // mbstowcs() and mbrtowc() errors
44 static const size_t DECODE_ERROR = ((size_t)-1);
45 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
46 
47 
48 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)49 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
50 {
51     switch (errors)
52     {
53     case _Py_ERROR_STRICT:
54         *surrogateescape = 0;
55         return 0;
56     case _Py_ERROR_SURROGATEESCAPE:
57         *surrogateescape = 1;
58         return 0;
59     default:
60         return -1;
61     }
62 }
63 
64 
65 PyObject *
_Py_device_encoding(int fd)66 _Py_device_encoding(int fd)
67 {
68 #if defined(MS_WINDOWS)
69     UINT cp;
70 #endif
71     int valid;
72     Py_BEGIN_ALLOW_THREADS
73     _Py_BEGIN_SUPPRESS_IPH
74     valid = isatty(fd);
75     _Py_END_SUPPRESS_IPH
76     Py_END_ALLOW_THREADS
77     if (!valid)
78         Py_RETURN_NONE;
79 
80 #if defined(MS_WINDOWS)
81     if (fd == 0)
82         cp = GetConsoleCP();
83     else if (fd == 1 || fd == 2)
84         cp = GetConsoleOutputCP();
85     else
86         cp = 0;
87     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
88        has no console */
89     if (cp != 0)
90         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
91 #elif defined(CODESET)
92     {
93         char *codeset = nl_langinfo(CODESET);
94         if (codeset != NULL && codeset[0] != 0)
95             return PyUnicode_FromString(codeset);
96     }
97 #endif
98     Py_RETURN_NONE;
99 }
100 
101 
102 static size_t
is_valid_wide_char(wchar_t ch)103 is_valid_wide_char(wchar_t ch)
104 {
105 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
106     /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
107        for non-Unicode locales, which makes values higher than MAX_UNICODE
108        possibly valid. */
109     return 1;
110 #endif
111     if (Py_UNICODE_IS_SURROGATE(ch)) {
112         // Reject lone surrogate characters
113         return 0;
114     }
115     if (ch > MAX_UNICODE) {
116         // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
117         // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
118         // it creates characters outside the [U+0000; U+10ffff] range:
119         // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
120         return 0;
121     }
122     return 1;
123 }
124 
125 
126 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)127 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
128 {
129     size_t count = mbstowcs(dest, src, n);
130     if (dest != NULL && count != DECODE_ERROR) {
131         for (size_t i=0; i < count; i++) {
132             wchar_t ch = dest[i];
133             if (!is_valid_wide_char(ch)) {
134                 return DECODE_ERROR;
135             }
136         }
137     }
138     return count;
139 }
140 
141 
142 #ifdef HAVE_MBRTOWC
143 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)144 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
145 {
146     assert(pwc != NULL);
147     size_t count = mbrtowc(pwc, str, len, pmbs);
148     if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
149         if (!is_valid_wide_char(*pwc)) {
150             return DECODE_ERROR;
151         }
152     }
153     return count;
154 }
155 #endif
156 
157 
158 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
159 
160 #define USE_FORCE_ASCII
161 
162 extern int _Py_normalize_encoding(const char *, char *, size_t);
163 
164 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
165    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
166    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
167    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
168    locale.getpreferredencoding() codec. For example, if command line arguments
169    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
170    UnicodeEncodeError instead of retrieving the original byte string.
171 
172    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
173    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
174    one byte in range 0x80-0xff can be decoded from the locale encoding. The
175    workaround is also enabled on error, for example if getting the locale
176    failed.
177 
178    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
179    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
180    ASCII encoding in this case.
181 
182    Values of force_ascii:
183 
184        1: the workaround is used: Py_EncodeLocale() uses
185           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
186           decode_ascii()
187        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
188           Py_DecodeLocale() uses mbstowcs()
189       -1: unknown, need to call check_force_ascii() to get the value
190 */
191 static int force_ascii = -1;
192 
193 static int
check_force_ascii(void)194 check_force_ascii(void)
195 {
196     char *loc = setlocale(LC_CTYPE, NULL);
197     if (loc == NULL) {
198         goto error;
199     }
200     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
201         /* the LC_CTYPE locale is different than C and POSIX */
202         return 0;
203     }
204 
205 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
206     const char *codeset = nl_langinfo(CODESET);
207     if (!codeset || codeset[0] == '\0') {
208         /* CODESET is not set or empty */
209         goto error;
210     }
211 
212     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
213     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
214         goto error;
215     }
216 
217 #ifdef __hpux
218     if (strcmp(encoding, "roman8") == 0) {
219         unsigned char ch;
220         wchar_t wch;
221         size_t res;
222 
223         ch = (unsigned char)0xA7;
224         res = _Py_mbstowcs(&wch, (char*)&ch, 1);
225         if (res != DECODE_ERROR && wch == L'\xA7') {
226             /* On HP-UX with C locale or the POSIX locale,
227                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
228                Latin1 encoding in practice. Force ASCII in this case.
229 
230                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
231             return 1;
232         }
233     }
234 #else
235     const char* ascii_aliases[] = {
236         "ascii",
237         /* Aliases from Lib/encodings/aliases.py */
238         "646",
239         "ansi_x3.4_1968",
240         "ansi_x3.4_1986",
241         "ansi_x3_4_1968",
242         "cp367",
243         "csascii",
244         "ibm367",
245         "iso646_us",
246         "iso_646.irv_1991",
247         "iso_ir_6",
248         "us",
249         "us_ascii",
250         NULL
251     };
252 
253     int is_ascii = 0;
254     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
255         if (strcmp(encoding, *alias) == 0) {
256             is_ascii = 1;
257             break;
258         }
259     }
260     if (!is_ascii) {
261         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
262         return 0;
263     }
264 
265     for (unsigned int i=0x80; i<=0xff; i++) {
266         char ch[1];
267         wchar_t wch[1];
268         size_t res;
269 
270         unsigned uch = (unsigned char)i;
271         ch[0] = (char)uch;
272         res = _Py_mbstowcs(wch, ch, 1);
273         if (res != DECODE_ERROR) {
274             /* decoding a non-ASCII character from the locale encoding succeed:
275                the locale encoding is not ASCII, force ASCII */
276             return 1;
277         }
278     }
279     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
280        encoding: the locale encoding is really ASCII */
281 #endif   /* !defined(__hpux) */
282     return 0;
283 #else
284     /* nl_langinfo(CODESET) is not available: always force ASCII */
285     return 1;
286 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
287 
288 error:
289     /* if an error occurred, force the ASCII encoding */
290     return 1;
291 }
292 
293 
294 int
_Py_GetForceASCII(void)295 _Py_GetForceASCII(void)
296 {
297     if (force_ascii == -1) {
298         force_ascii = check_force_ascii();
299     }
300     return force_ascii;
301 }
302 
303 
304 void
_Py_ResetForceASCII(void)305 _Py_ResetForceASCII(void)
306 {
307     force_ascii = -1;
308 }
309 
310 
311 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)312 encode_ascii(const wchar_t *text, char **str,
313              size_t *error_pos, const char **reason,
314              int raw_malloc, _Py_error_handler errors)
315 {
316     char *result = NULL, *out;
317     size_t len, i;
318     wchar_t ch;
319 
320     int surrogateescape;
321     if (get_surrogateescape(errors, &surrogateescape) < 0) {
322         return -3;
323     }
324 
325     len = wcslen(text);
326 
327     /* +1 for NULL byte */
328     if (raw_malloc) {
329         result = PyMem_RawMalloc(len + 1);
330     }
331     else {
332         result = PyMem_Malloc(len + 1);
333     }
334     if (result == NULL) {
335         return -1;
336     }
337 
338     out = result;
339     for (i=0; i<len; i++) {
340         ch = text[i];
341 
342         if (ch <= 0x7f) {
343             /* ASCII character */
344             *out++ = (char)ch;
345         }
346         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
347             /* UTF-8b surrogate */
348             *out++ = (char)(ch - 0xdc00);
349         }
350         else {
351             if (raw_malloc) {
352                 PyMem_RawFree(result);
353             }
354             else {
355                 PyMem_Free(result);
356             }
357             if (error_pos != NULL) {
358                 *error_pos = i;
359             }
360             if (reason) {
361                 *reason = "encoding error";
362             }
363             return -2;
364         }
365     }
366     *out = '\0';
367     *str = result;
368     return 0;
369 }
370 #else
371 int
_Py_GetForceASCII(void)372 _Py_GetForceASCII(void)
373 {
374     return 0;
375 }
376 
377 void
_Py_ResetForceASCII(void)378 _Py_ResetForceASCII(void)
379 {
380     /* nothing to do */
381 }
382 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
383 
384 
385 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
386 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)387 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
388              const char **reason, _Py_error_handler errors)
389 {
390     wchar_t *res;
391     unsigned char *in;
392     wchar_t *out;
393     size_t argsize = strlen(arg) + 1;
394 
395     int surrogateescape;
396     if (get_surrogateescape(errors, &surrogateescape) < 0) {
397         return -3;
398     }
399 
400     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
401         return -1;
402     }
403     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
404     if (!res) {
405         return -1;
406     }
407 
408     out = res;
409     for (in = (unsigned char*)arg; *in; in++) {
410         unsigned char ch = *in;
411         if (ch < 128) {
412             *out++ = ch;
413         }
414         else {
415             if (!surrogateescape) {
416                 PyMem_RawFree(res);
417                 if (wlen) {
418                     *wlen = in - (unsigned char*)arg;
419                 }
420                 if (reason) {
421                     *reason = "decoding error";
422                 }
423                 return -2;
424             }
425             *out++ = 0xdc00 + ch;
426         }
427     }
428     *out = 0;
429 
430     if (wlen != NULL) {
431         *wlen = out - res;
432     }
433     *wstr = res;
434     return 0;
435 }
436 #endif   /* !HAVE_MBRTOWC */
437 
438 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)439 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
440                       const char **reason, _Py_error_handler errors)
441 {
442     wchar_t *res;
443     size_t argsize;
444     size_t count;
445 #ifdef HAVE_MBRTOWC
446     unsigned char *in;
447     wchar_t *out;
448     mbstate_t mbs;
449 #endif
450 
451     int surrogateescape;
452     if (get_surrogateescape(errors, &surrogateescape) < 0) {
453         return -3;
454     }
455 
456 #ifdef HAVE_BROKEN_MBSTOWCS
457     /* Some platforms have a broken implementation of
458      * mbstowcs which does not count the characters that
459      * would result from conversion.  Use an upper bound.
460      */
461     argsize = strlen(arg);
462 #else
463     argsize = _Py_mbstowcs(NULL, arg, 0);
464 #endif
465     if (argsize != DECODE_ERROR) {
466         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
467             return -1;
468         }
469         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
470         if (!res) {
471             return -1;
472         }
473 
474         count = _Py_mbstowcs(res, arg, argsize + 1);
475         if (count != DECODE_ERROR) {
476             *wstr = res;
477             if (wlen != NULL) {
478                 *wlen = count;
479             }
480             return 0;
481         }
482         PyMem_RawFree(res);
483     }
484 
485     /* Conversion failed. Fall back to escaping with surrogateescape. */
486 #ifdef HAVE_MBRTOWC
487     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
488 
489     /* Overallocate; as multi-byte characters are in the argument, the
490        actual output could use less memory. */
491     argsize = strlen(arg) + 1;
492     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
493         return -1;
494     }
495     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
496     if (!res) {
497         return -1;
498     }
499 
500     in = (unsigned char*)arg;
501     out = res;
502     memset(&mbs, 0, sizeof mbs);
503     while (argsize) {
504         size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
505         if (converted == 0) {
506             /* Reached end of string; null char stored. */
507             break;
508         }
509 
510         if (converted == INCOMPLETE_CHARACTER) {
511             /* Incomplete character. This should never happen,
512                since we provide everything that we have -
513                unless there is a bug in the C library, or I
514                misunderstood how mbrtowc works. */
515             goto decode_error;
516         }
517 
518         if (converted == DECODE_ERROR) {
519             if (!surrogateescape) {
520                 goto decode_error;
521             }
522 
523             /* Decoding error. Escape as UTF-8b, and start over in the initial
524                shift state. */
525             *out++ = 0xdc00 + *in++;
526             argsize--;
527             memset(&mbs, 0, sizeof mbs);
528             continue;
529         }
530 
531         // _Py_mbrtowc() reject lone surrogate characters
532         assert(!Py_UNICODE_IS_SURROGATE(*out));
533 
534         /* successfully converted some bytes */
535         in += converted;
536         argsize -= converted;
537         out++;
538     }
539     if (wlen != NULL) {
540         *wlen = out - res;
541     }
542     *wstr = res;
543     return 0;
544 
545 decode_error:
546     PyMem_RawFree(res);
547     if (wlen) {
548         *wlen = in - (unsigned char*)arg;
549     }
550     if (reason) {
551         *reason = "decoding error";
552     }
553     return -2;
554 #else   /* HAVE_MBRTOWC */
555     /* Cannot use C locale for escaping; manually escape as if charset
556        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
557        correctly in the locale's charset, which must be an ASCII superset. */
558     return decode_ascii(arg, wstr, wlen, reason, errors);
559 #endif   /* HAVE_MBRTOWC */
560 }
561 
562 
563 /* Decode a byte string from the locale encoding.
564 
565    Use the strict error handler if 'surrogateescape' is zero.  Use the
566    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
567    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
568    can be decoded as a surrogate character, escape the bytes using the
569    surrogateescape error handler instead of decoding them.
570 
571    On success, return 0 and write the newly allocated wide character string into
572    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
573    the number of wide characters excluding the null character into *wlen.
574 
575    On memory allocation failure, return -1.
576 
577    On decoding error, return -2. If wlen is not NULL, write the start of
578    invalid byte sequence in the input string into *wlen. If reason is not NULL,
579    write the decoding error message into *reason.
580 
581    Return -3 if the error handler 'errors' is not supported.
582 
583    Use the Py_EncodeLocaleEx() function to encode the character string back to
584    a byte string. */
585 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)586 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
587                    const char **reason,
588                    int current_locale, _Py_error_handler errors)
589 {
590     if (current_locale) {
591 #ifdef _Py_FORCE_UTF8_LOCALE
592         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
593                                 errors);
594 #else
595         return decode_current_locale(arg, wstr, wlen, reason, errors);
596 #endif
597     }
598 
599 #ifdef _Py_FORCE_UTF8_FS_ENCODING
600     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
601                             errors);
602 #else
603     int use_utf8 = (Py_UTF8Mode == 1);
604 #ifdef MS_WINDOWS
605     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
606 #endif
607     if (use_utf8) {
608         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
609                                 errors);
610     }
611 
612 #ifdef USE_FORCE_ASCII
613     if (force_ascii == -1) {
614         force_ascii = check_force_ascii();
615     }
616 
617     if (force_ascii) {
618         /* force ASCII encoding to workaround mbstowcs() issue */
619         return decode_ascii(arg, wstr, wlen, reason, errors);
620     }
621 #endif
622 
623     return decode_current_locale(arg, wstr, wlen, reason, errors);
624 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
625 }
626 
627 
628 /* Decode a byte string from the locale encoding with the
629    surrogateescape error handler: undecodable bytes are decoded as characters
630    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
631    character, escape the bytes using the surrogateescape error handler instead
632    of decoding them.
633 
634    Return a pointer to a newly allocated wide character string, use
635    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
636    wide characters excluding the null character into *size
637 
638    Return NULL on decoding error or memory allocation error. If *size* is not
639    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
640    decoding error.
641 
642    Decoding errors should never happen, unless there is a bug in the C
643    library.
644 
645    Use the Py_EncodeLocale() function to encode the character string back to a
646    byte string. */
647 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)648 Py_DecodeLocale(const char* arg, size_t *wlen)
649 {
650     wchar_t *wstr;
651     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
652                                  NULL, 0,
653                                  _Py_ERROR_SURROGATEESCAPE);
654     if (res != 0) {
655         assert(res != -3);
656         if (wlen != NULL) {
657             *wlen = (size_t)res;
658         }
659         return NULL;
660     }
661     return wstr;
662 }
663 
664 
665 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)666 encode_current_locale(const wchar_t *text, char **str,
667                       size_t *error_pos, const char **reason,
668                       int raw_malloc, _Py_error_handler errors)
669 {
670     const size_t len = wcslen(text);
671     char *result = NULL, *bytes = NULL;
672     size_t i, size, converted;
673     wchar_t c, buf[2];
674 
675     int surrogateescape;
676     if (get_surrogateescape(errors, &surrogateescape) < 0) {
677         return -3;
678     }
679 
680     /* The function works in two steps:
681        1. compute the length of the output buffer in bytes (size)
682        2. outputs the bytes */
683     size = 0;
684     buf[1] = 0;
685     while (1) {
686         for (i=0; i < len; i++) {
687             c = text[i];
688             if (c >= 0xdc80 && c <= 0xdcff) {
689                 if (!surrogateescape) {
690                     goto encode_error;
691                 }
692                 /* UTF-8b surrogate */
693                 if (bytes != NULL) {
694                     *bytes++ = c - 0xdc00;
695                     size--;
696                 }
697                 else {
698                     size++;
699                 }
700                 continue;
701             }
702             else {
703                 buf[0] = c;
704                 if (bytes != NULL) {
705                     converted = wcstombs(bytes, buf, size);
706                 }
707                 else {
708                     converted = wcstombs(NULL, buf, 0);
709                 }
710                 if (converted == DECODE_ERROR) {
711                     goto encode_error;
712                 }
713                 if (bytes != NULL) {
714                     bytes += converted;
715                     size -= converted;
716                 }
717                 else {
718                     size += converted;
719                 }
720             }
721         }
722         if (result != NULL) {
723             *bytes = '\0';
724             break;
725         }
726 
727         size += 1; /* nul byte at the end */
728         if (raw_malloc) {
729             result = PyMem_RawMalloc(size);
730         }
731         else {
732             result = PyMem_Malloc(size);
733         }
734         if (result == NULL) {
735             return -1;
736         }
737         bytes = result;
738     }
739     *str = result;
740     return 0;
741 
742 encode_error:
743     if (raw_malloc) {
744         PyMem_RawFree(result);
745     }
746     else {
747         PyMem_Free(result);
748     }
749     if (error_pos != NULL) {
750         *error_pos = i;
751     }
752     if (reason) {
753         *reason = "encoding error";
754     }
755     return -2;
756 }
757 
758 
759 /* Encode a string to the locale encoding.
760 
761    Parameters:
762 
763    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
764      of PyMem_Malloc().
765    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
766      Python filesystem encoding.
767    * errors: error handler like "strict" or "surrogateescape".
768 
769    Return value:
770 
771     0: success, *str is set to a newly allocated decoded string.
772    -1: memory allocation failure
773    -2: encoding error, set *error_pos and *reason (if set).
774    -3: the error handler 'errors' is not supported.
775  */
776 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)777 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
778                  const char **reason,
779                  int raw_malloc, int current_locale, _Py_error_handler errors)
780 {
781     if (current_locale) {
782 #ifdef _Py_FORCE_UTF8_LOCALE
783         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
784                                 raw_malloc, errors);
785 #else
786         return encode_current_locale(text, str, error_pos, reason,
787                                      raw_malloc, errors);
788 #endif
789     }
790 
791 #ifdef _Py_FORCE_UTF8_FS_ENCODING
792     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
793                             raw_malloc, errors);
794 #else
795     int use_utf8 = (Py_UTF8Mode == 1);
796 #ifdef MS_WINDOWS
797     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
798 #endif
799     if (use_utf8) {
800         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
801                                 raw_malloc, errors);
802     }
803 
804 #ifdef USE_FORCE_ASCII
805     if (force_ascii == -1) {
806         force_ascii = check_force_ascii();
807     }
808 
809     if (force_ascii) {
810         return encode_ascii(text, str, error_pos, reason,
811                             raw_malloc, errors);
812     }
813 #endif
814 
815     return encode_current_locale(text, str, error_pos, reason,
816                                  raw_malloc, errors);
817 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
818 }
819 
820 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)821 encode_locale(const wchar_t *text, size_t *error_pos,
822               int raw_malloc, int current_locale)
823 {
824     char *str;
825     int res = encode_locale_ex(text, &str, error_pos, NULL,
826                                raw_malloc, current_locale,
827                                _Py_ERROR_SURROGATEESCAPE);
828     if (res != -2 && error_pos) {
829         *error_pos = (size_t)-1;
830     }
831     if (res != 0) {
832         return NULL;
833     }
834     return str;
835 }
836 
837 /* Encode a wide character string to the locale encoding with the
838    surrogateescape error handler: surrogate characters in the range
839    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
840 
841    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
842    the memory. Return NULL on encoding or memory allocation error.
843 
844    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
845    to the index of the invalid character on encoding error.
846 
847    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
848    character string. */
849 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)850 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
851 {
852     return encode_locale(text, error_pos, 0, 0);
853 }
854 
855 
856 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
857    instead of PyMem_Free(). */
858 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)859 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
860 {
861     return encode_locale(text, error_pos, 1, 0);
862 }
863 
864 
865 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)866 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
867                    size_t *error_pos, const char **reason,
868                    int current_locale, _Py_error_handler errors)
869 {
870     return encode_locale_ex(text, str, error_pos, reason, 1,
871                             current_locale, errors);
872 }
873 
874 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
875 
876 /* Check whether current locale uses Unicode as internal wchar_t form. */
877 int
_Py_LocaleUsesNonUnicodeWchar(void)878 _Py_LocaleUsesNonUnicodeWchar(void)
879 {
880     /* Oracle Solaris uses non-Unicode internal wchar_t form for
881        non-Unicode locales and hence needs conversion to UTF first. */
882     char* codeset = nl_langinfo(CODESET);
883     if (!codeset) {
884         return 0;
885     }
886     /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
887     return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
888 }
889 
890 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)891 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
892                      const char *tocode, const char *fromcode)
893 {
894     Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
895 
896     /* Ensure we won't overflow the size. */
897     if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
898         PyErr_NoMemory();
899         return NULL;
900     }
901 
902     /* the string doesn't have to be NULL terminated */
903     wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
904     if (target == NULL) {
905         PyErr_NoMemory();
906         return NULL;
907     }
908 
909     iconv_t cd = iconv_open(tocode, fromcode);
910     if (cd == (iconv_t)-1) {
911         PyErr_Format(PyExc_ValueError, "iconv_open() failed");
912         PyMem_Free(target);
913         return NULL;
914     }
915 
916     char *inbuf = (char *) source;
917     char *outbuf = (char *) target;
918     size_t inbytesleft = sizeof(wchar_t) * size;
919     size_t outbytesleft = inbytesleft;
920 
921     size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
922     if (ret == DECODE_ERROR) {
923         PyErr_Format(PyExc_ValueError, "iconv() failed");
924         PyMem_Free(target);
925         iconv_close(cd);
926         return NULL;
927     }
928 
929     iconv_close(cd);
930     return target;
931 }
932 
933 /* Convert a wide character string to the UCS-4 encoded string. This
934    is necessary on systems where internal form of wchar_t are not Unicode
935    code points (e.g. Oracle Solaris).
936 
937    Return a pointer to a newly allocated string, use PyMem_Free() to free
938    the memory. Return NULL and raise exception on conversion or memory
939    allocation error. */
940 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)941 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
942 {
943     return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
944 }
945 
946 /* Convert a UCS-4 encoded string to native wide character string. This
947    is necessary on systems where internal form of wchar_t are not Unicode
948    code points (e.g. Oracle Solaris).
949 
950    The conversion is done in place. This can be done because both wchar_t
951    and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
952    to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
953    which is currently the only system using these functions; it doesn't have
954    to be for other systems).
955 
956    Return 0 on success. Return -1 and raise exception on conversion
957    or memory allocation error. */
958 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)959 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
960 {
961     wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
962     if (!result) {
963         return -1;
964     }
965     memcpy(unicode, result, size * sizeof(wchar_t));
966     PyMem_Free(result);
967     return 0;
968 }
969 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
970 
971 #ifdef MS_WINDOWS
972 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
973 
974 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)975 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
976 {
977     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
978     /* Cannot simply cast and dereference in_ptr,
979        since it might not be aligned properly */
980     __int64 in;
981     memcpy(&in, in_ptr, sizeof(in));
982     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
983     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
984 }
985 
986 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)987 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
988 {
989     /* XXX endianness */
990     __int64 out;
991     out = time_in + secs_between_epochs;
992     out = out * 10000000 + nsec_in / 100;
993     memcpy(out_ptr, &out, sizeof(out));
994 }
995 
996 /* Below, we *know* that ugo+r is 0444 */
997 #if _S_IREAD != 0400
998 #error Unsupported C library
999 #endif
1000 static int
attributes_to_mode(DWORD attr)1001 attributes_to_mode(DWORD attr)
1002 {
1003     int m = 0;
1004     if (attr & FILE_ATTRIBUTE_DIRECTORY)
1005         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1006     else
1007         m |= _S_IFREG;
1008     if (attr & FILE_ATTRIBUTE_READONLY)
1009         m |= 0444;
1010     else
1011         m |= 0666;
1012     return m;
1013 }
1014 
1015 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)1016 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1017                            struct _Py_stat_struct *result)
1018 {
1019     memset(result, 0, sizeof(*result));
1020     result->st_mode = attributes_to_mode(info->dwFileAttributes);
1021     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1022     result->st_dev = info->dwVolumeSerialNumber;
1023     result->st_rdev = result->st_dev;
1024     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1025     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1026     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1027     result->st_nlink = info->nNumberOfLinks;
1028     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1029     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1030        open other name surrogate reparse points without traversing them. To
1031        detect/handle these, check st_file_attributes and st_reparse_tag. */
1032     result->st_reparse_tag = reparse_tag;
1033     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1034         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1035         /* first clear the S_IFMT bits */
1036         result->st_mode ^= (result->st_mode & S_IFMT);
1037         /* now set the bits that make this a symlink */
1038         result->st_mode |= S_IFLNK;
1039     }
1040     result->st_file_attributes = info->dwFileAttributes;
1041 }
1042 #endif
1043 
1044 /* Return information about a file.
1045 
1046    On POSIX, use fstat().
1047 
1048    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1049    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1050    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1051    #23152.
1052 
1053    On Windows, set the last Windows error and return nonzero on error. On
1054    POSIX, set errno and return nonzero on error. Fill status and return 0 on
1055    success. */
1056 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1057 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1058 {
1059 #ifdef MS_WINDOWS
1060     BY_HANDLE_FILE_INFORMATION info;
1061     HANDLE h;
1062     int type;
1063 
1064     _Py_BEGIN_SUPPRESS_IPH
1065     h = (HANDLE)_get_osfhandle(fd);
1066     _Py_END_SUPPRESS_IPH
1067 
1068     if (h == INVALID_HANDLE_VALUE) {
1069         /* errno is already set by _get_osfhandle, but we also set
1070            the Win32 error for callers who expect that */
1071         SetLastError(ERROR_INVALID_HANDLE);
1072         return -1;
1073     }
1074     memset(status, 0, sizeof(*status));
1075 
1076     type = GetFileType(h);
1077     if (type == FILE_TYPE_UNKNOWN) {
1078         DWORD error = GetLastError();
1079         if (error != 0) {
1080             errno = winerror_to_errno(error);
1081             return -1;
1082         }
1083         /* else: valid but unknown file */
1084     }
1085 
1086     if (type != FILE_TYPE_DISK) {
1087         if (type == FILE_TYPE_CHAR)
1088             status->st_mode = _S_IFCHR;
1089         else if (type == FILE_TYPE_PIPE)
1090             status->st_mode = _S_IFIFO;
1091         return 0;
1092     }
1093 
1094     if (!GetFileInformationByHandle(h, &info)) {
1095         /* The Win32 error is already set, but we also set errno for
1096            callers who expect it */
1097         errno = winerror_to_errno(GetLastError());
1098         return -1;
1099     }
1100 
1101     _Py_attribute_data_to_stat(&info, 0, status);
1102     /* specific to fstat() */
1103     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1104     return 0;
1105 #else
1106     return fstat(fd, status);
1107 #endif
1108 }
1109 
1110 /* Return information about a file.
1111 
1112    On POSIX, use fstat().
1113 
1114    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1115    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1116    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1117    #23152.
1118 
1119    Raise an exception and return -1 on error. On Windows, set the last Windows
1120    error on error. On POSIX, set errno on error. Fill status and return 0 on
1121    success.
1122 
1123    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1124    to call fstat(). The caller must hold the GIL. */
1125 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1126 _Py_fstat(int fd, struct _Py_stat_struct *status)
1127 {
1128     int res;
1129 
1130     assert(PyGILState_Check());
1131 
1132     Py_BEGIN_ALLOW_THREADS
1133     res = _Py_fstat_noraise(fd, status);
1134     Py_END_ALLOW_THREADS
1135 
1136     if (res != 0) {
1137 #ifdef MS_WINDOWS
1138         PyErr_SetFromWindowsErr(0);
1139 #else
1140         PyErr_SetFromErrno(PyExc_OSError);
1141 #endif
1142         return -1;
1143     }
1144     return 0;
1145 }
1146 
1147 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1148    call stat() otherwise. Only fill st_mode attribute on Windows.
1149 
1150    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1151    raised. */
1152 
1153 int
_Py_stat(PyObject * path,struct stat * statbuf)1154 _Py_stat(PyObject *path, struct stat *statbuf)
1155 {
1156 #ifdef MS_WINDOWS
1157     int err;
1158     struct _stat wstatbuf;
1159     const wchar_t *wpath;
1160 
1161 _Py_COMP_DIAG_PUSH
1162 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1163     wpath = _PyUnicode_AsUnicode(path);
1164 _Py_COMP_DIAG_POP
1165     if (wpath == NULL)
1166         return -2;
1167 
1168     err = _wstat(wpath, &wstatbuf);
1169     if (!err)
1170         statbuf->st_mode = wstatbuf.st_mode;
1171     return err;
1172 #else
1173     int ret;
1174     PyObject *bytes;
1175     char *cpath;
1176 
1177     bytes = PyUnicode_EncodeFSDefault(path);
1178     if (bytes == NULL)
1179         return -2;
1180 
1181     /* check for embedded null bytes */
1182     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1183         Py_DECREF(bytes);
1184         return -2;
1185     }
1186 
1187     ret = stat(cpath, statbuf);
1188     Py_DECREF(bytes);
1189     return ret;
1190 #endif
1191 }
1192 
1193 
1194 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1195 static int
get_inheritable(int fd,int raise)1196 get_inheritable(int fd, int raise)
1197 {
1198 #ifdef MS_WINDOWS
1199     HANDLE handle;
1200     DWORD flags;
1201 
1202     _Py_BEGIN_SUPPRESS_IPH
1203     handle = (HANDLE)_get_osfhandle(fd);
1204     _Py_END_SUPPRESS_IPH
1205     if (handle == INVALID_HANDLE_VALUE) {
1206         if (raise)
1207             PyErr_SetFromErrno(PyExc_OSError);
1208         return -1;
1209     }
1210 
1211     if (!GetHandleInformation(handle, &flags)) {
1212         if (raise)
1213             PyErr_SetFromWindowsErr(0);
1214         return -1;
1215     }
1216 
1217     return (flags & HANDLE_FLAG_INHERIT);
1218 #else
1219     int flags;
1220 
1221     flags = fcntl(fd, F_GETFD, 0);
1222     if (flags == -1) {
1223         if (raise)
1224             PyErr_SetFromErrno(PyExc_OSError);
1225         return -1;
1226     }
1227     return !(flags & FD_CLOEXEC);
1228 #endif
1229 }
1230 
1231 /* Get the inheritable flag of the specified file descriptor.
1232    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1233    raise an exception and return -1 on error. */
1234 int
_Py_get_inheritable(int fd)1235 _Py_get_inheritable(int fd)
1236 {
1237     return get_inheritable(fd, 1);
1238 }
1239 
1240 
1241 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1242 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1243 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1244 {
1245 #ifdef MS_WINDOWS
1246     HANDLE handle;
1247     DWORD flags;
1248 #else
1249 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1250     static int ioctl_works = -1;
1251     int request;
1252     int err;
1253 #endif
1254     int flags, new_flags;
1255     int res;
1256 #endif
1257 
1258     /* atomic_flag_works can only be used to make the file descriptor
1259        non-inheritable */
1260     assert(!(atomic_flag_works != NULL && inheritable));
1261 
1262     if (atomic_flag_works != NULL && !inheritable) {
1263         if (*atomic_flag_works == -1) {
1264             int isInheritable = get_inheritable(fd, raise);
1265             if (isInheritable == -1)
1266                 return -1;
1267             *atomic_flag_works = !isInheritable;
1268         }
1269 
1270         if (*atomic_flag_works)
1271             return 0;
1272     }
1273 
1274 #ifdef MS_WINDOWS
1275     _Py_BEGIN_SUPPRESS_IPH
1276     handle = (HANDLE)_get_osfhandle(fd);
1277     _Py_END_SUPPRESS_IPH
1278     if (handle == INVALID_HANDLE_VALUE) {
1279         if (raise)
1280             PyErr_SetFromErrno(PyExc_OSError);
1281         return -1;
1282     }
1283 
1284     if (inheritable)
1285         flags = HANDLE_FLAG_INHERIT;
1286     else
1287         flags = 0;
1288 
1289     /* This check can be removed once support for Windows 7 ends. */
1290 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1291         GetFileType(handle) == FILE_TYPE_CHAR)
1292 
1293     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1294         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1295         if (raise)
1296             PyErr_SetFromWindowsErr(0);
1297         return -1;
1298     }
1299 #undef CONSOLE_PSEUDOHANDLE
1300     return 0;
1301 
1302 #else
1303 
1304 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1305     if (ioctl_works != 0 && raise != 0) {
1306         /* fast-path: ioctl() only requires one syscall */
1307         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1308          * thus avoid using ioctl() so we skip the fast-path. */
1309         if (inheritable)
1310             request = FIONCLEX;
1311         else
1312             request = FIOCLEX;
1313         err = ioctl(fd, request, NULL);
1314         if (!err) {
1315             ioctl_works = 1;
1316             return 0;
1317         }
1318 
1319 #ifdef O_PATH
1320         if (errno == EBADF) {
1321             // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1322             // on O_PATH file descriptors. Fall through to the fcntl()
1323             // implementation.
1324         }
1325         else
1326 #endif
1327         if (errno != ENOTTY && errno != EACCES) {
1328             if (raise)
1329                 PyErr_SetFromErrno(PyExc_OSError);
1330             return -1;
1331         }
1332         else {
1333             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1334                device". The ioctl is declared but not supported by the kernel.
1335                Remember that ioctl() doesn't work. It is the case on
1336                Illumos-based OS for example.
1337 
1338                Issue #27057: When SELinux policy disallows ioctl it will fail
1339                with EACCES. While FIOCLEX is safe operation it may be
1340                unavailable because ioctl was denied altogether.
1341                This can be the case on Android. */
1342             ioctl_works = 0;
1343         }
1344         /* fallback to fcntl() if ioctl() does not work */
1345     }
1346 #endif
1347 
1348     /* slow-path: fcntl() requires two syscalls */
1349     flags = fcntl(fd, F_GETFD);
1350     if (flags < 0) {
1351         if (raise)
1352             PyErr_SetFromErrno(PyExc_OSError);
1353         return -1;
1354     }
1355 
1356     if (inheritable) {
1357         new_flags = flags & ~FD_CLOEXEC;
1358     }
1359     else {
1360         new_flags = flags | FD_CLOEXEC;
1361     }
1362 
1363     if (new_flags == flags) {
1364         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1365         return 0;
1366     }
1367 
1368     res = fcntl(fd, F_SETFD, new_flags);
1369     if (res < 0) {
1370         if (raise)
1371             PyErr_SetFromErrno(PyExc_OSError);
1372         return -1;
1373     }
1374     return 0;
1375 #endif
1376 }
1377 
1378 /* Make the file descriptor non-inheritable.
1379    Return 0 on success, set errno and return -1 on error. */
1380 static int
make_non_inheritable(int fd)1381 make_non_inheritable(int fd)
1382 {
1383     return set_inheritable(fd, 0, 0, NULL);
1384 }
1385 
1386 /* Set the inheritable flag of the specified file descriptor.
1387    On success: return 0, on error: raise an exception and return -1.
1388 
1389    If atomic_flag_works is not NULL:
1390 
1391     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1392       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1393       set the inheritable flag
1394     * if *atomic_flag_works==1: do nothing
1395     * if *atomic_flag_works==0: set inheritable flag to False
1396 
1397    Set atomic_flag_works to NULL if no atomic flag was used to create the
1398    file descriptor.
1399 
1400    atomic_flag_works can only be used to make a file descriptor
1401    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1402 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1403 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1404 {
1405     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1406 }
1407 
1408 /* Same as _Py_set_inheritable() but on error, set errno and
1409    don't raise an exception.
1410    This function is async-signal-safe. */
1411 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1412 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1413 {
1414     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1415 }
1416 
1417 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1418 _Py_open_impl(const char *pathname, int flags, int gil_held)
1419 {
1420     int fd;
1421     int async_err = 0;
1422 #ifndef MS_WINDOWS
1423     int *atomic_flag_works;
1424 #endif
1425 
1426 #ifdef MS_WINDOWS
1427     flags |= O_NOINHERIT;
1428 #elif defined(O_CLOEXEC)
1429     atomic_flag_works = &_Py_open_cloexec_works;
1430     flags |= O_CLOEXEC;
1431 #else
1432     atomic_flag_works = NULL;
1433 #endif
1434 
1435     if (gil_held) {
1436         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1437         if (pathname_obj == NULL) {
1438             return -1;
1439         }
1440         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1441             Py_DECREF(pathname_obj);
1442             return -1;
1443         }
1444 
1445         do {
1446             Py_BEGIN_ALLOW_THREADS
1447             fd = open(pathname, flags);
1448             Py_END_ALLOW_THREADS
1449         } while (fd < 0
1450                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1451         if (async_err) {
1452             Py_DECREF(pathname_obj);
1453             return -1;
1454         }
1455         if (fd < 0) {
1456             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1457             Py_DECREF(pathname_obj);
1458             return -1;
1459         }
1460         Py_DECREF(pathname_obj);
1461     }
1462     else {
1463         fd = open(pathname, flags);
1464         if (fd < 0)
1465             return -1;
1466     }
1467 
1468 #ifndef MS_WINDOWS
1469     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1470         close(fd);
1471         return -1;
1472     }
1473 #endif
1474 
1475     return fd;
1476 }
1477 
1478 /* Open a file with the specified flags (wrapper to open() function).
1479    Return a file descriptor on success. Raise an exception and return -1 on
1480    error.
1481 
1482    The file descriptor is created non-inheritable.
1483 
1484    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1485    except if the Python signal handler raises an exception.
1486 
1487    Release the GIL to call open(). The caller must hold the GIL. */
1488 int
_Py_open(const char * pathname,int flags)1489 _Py_open(const char *pathname, int flags)
1490 {
1491     /* _Py_open() must be called with the GIL held. */
1492     assert(PyGILState_Check());
1493     return _Py_open_impl(pathname, flags, 1);
1494 }
1495 
1496 /* Open a file with the specified flags (wrapper to open() function).
1497    Return a file descriptor on success. Set errno and return -1 on error.
1498 
1499    The file descriptor is created non-inheritable.
1500 
1501    If interrupted by a signal, fail with EINTR. */
1502 int
_Py_open_noraise(const char * pathname,int flags)1503 _Py_open_noraise(const char *pathname, int flags)
1504 {
1505     return _Py_open_impl(pathname, flags, 0);
1506 }
1507 
1508 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1509    encoding and use fopen() otherwise.
1510 
1511    The file descriptor is created non-inheritable.
1512 
1513    If interrupted by a signal, fail with EINTR. */
1514 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1515 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1516 {
1517     FILE *f;
1518     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1519         return NULL;
1520     }
1521 #ifndef MS_WINDOWS
1522     char *cpath;
1523     char cmode[10];
1524     size_t r;
1525     r = wcstombs(cmode, mode, 10);
1526     if (r == DECODE_ERROR || r >= 10) {
1527         errno = EINVAL;
1528         return NULL;
1529     }
1530     cpath = _Py_EncodeLocaleRaw(path, NULL);
1531     if (cpath == NULL) {
1532         return NULL;
1533     }
1534     f = fopen(cpath, cmode);
1535     PyMem_RawFree(cpath);
1536 #else
1537     f = _wfopen(path, mode);
1538 #endif
1539     if (f == NULL)
1540         return NULL;
1541     if (make_non_inheritable(fileno(f)) < 0) {
1542         fclose(f);
1543         return NULL;
1544     }
1545     return f;
1546 }
1547 
1548 /* Wrapper to fopen().
1549 
1550    The file descriptor is created non-inheritable.
1551 
1552    If interrupted by a signal, fail with EINTR. */
1553 FILE*
_Py_fopen(const char * pathname,const char * mode)1554 _Py_fopen(const char *pathname, const char *mode)
1555 {
1556     PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1557     if (pathname_obj == NULL) {
1558         return NULL;
1559     }
1560     if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1561         Py_DECREF(pathname_obj);
1562         return NULL;
1563     }
1564     Py_DECREF(pathname_obj);
1565 
1566     FILE *f = fopen(pathname, mode);
1567     if (f == NULL)
1568         return NULL;
1569     if (make_non_inheritable(fileno(f)) < 0) {
1570         fclose(f);
1571         return NULL;
1572     }
1573     return f;
1574 }
1575 
1576 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1577    encoding and call fopen() otherwise.
1578 
1579    Return the new file object on success. Raise an exception and return NULL
1580    on error.
1581 
1582    The file descriptor is created non-inheritable.
1583 
1584    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1585    except if the Python signal handler raises an exception.
1586 
1587    Release the GIL to call _wfopen() or fopen(). The caller must hold
1588    the GIL. */
1589 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1590 _Py_fopen_obj(PyObject *path, const char *mode)
1591 {
1592     FILE *f;
1593     int async_err = 0;
1594 #ifdef MS_WINDOWS
1595     const wchar_t *wpath;
1596     wchar_t wmode[10];
1597     int usize;
1598 
1599     assert(PyGILState_Check());
1600 
1601     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1602         return NULL;
1603     }
1604     if (!PyUnicode_Check(path)) {
1605         PyErr_Format(PyExc_TypeError,
1606                      "str file path expected under Windows, got %R",
1607                      Py_TYPE(path));
1608         return NULL;
1609     }
1610 _Py_COMP_DIAG_PUSH
1611 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1612     wpath = _PyUnicode_AsUnicode(path);
1613 _Py_COMP_DIAG_POP
1614     if (wpath == NULL)
1615         return NULL;
1616 
1617     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1618                                 wmode, Py_ARRAY_LENGTH(wmode));
1619     if (usize == 0) {
1620         PyErr_SetFromWindowsErr(0);
1621         return NULL;
1622     }
1623 
1624     do {
1625         Py_BEGIN_ALLOW_THREADS
1626         f = _wfopen(wpath, wmode);
1627         Py_END_ALLOW_THREADS
1628     } while (f == NULL
1629              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1630 #else
1631     PyObject *bytes;
1632     const char *path_bytes;
1633 
1634     assert(PyGILState_Check());
1635 
1636     if (!PyUnicode_FSConverter(path, &bytes))
1637         return NULL;
1638     path_bytes = PyBytes_AS_STRING(bytes);
1639 
1640     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1641         Py_DECREF(bytes);
1642         return NULL;
1643     }
1644 
1645     do {
1646         Py_BEGIN_ALLOW_THREADS
1647         f = fopen(path_bytes, mode);
1648         Py_END_ALLOW_THREADS
1649     } while (f == NULL
1650              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1651 
1652     Py_DECREF(bytes);
1653 #endif
1654     if (async_err)
1655         return NULL;
1656 
1657     if (f == NULL) {
1658         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1659         return NULL;
1660     }
1661 
1662     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1663         fclose(f);
1664         return NULL;
1665     }
1666     return f;
1667 }
1668 
1669 /* Read count bytes from fd into buf.
1670 
1671    On success, return the number of read bytes, it can be lower than count.
1672    If the current file offset is at or past the end of file, no bytes are read,
1673    and read() returns zero.
1674 
1675    On error, raise an exception, set errno and return -1.
1676 
1677    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1678    If the Python signal handler raises an exception, the function returns -1
1679    (the syscall is not retried).
1680 
1681    Release the GIL to call read(). The caller must hold the GIL. */
1682 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1683 _Py_read(int fd, void *buf, size_t count)
1684 {
1685     Py_ssize_t n;
1686     int err;
1687     int async_err = 0;
1688 
1689     assert(PyGILState_Check());
1690 
1691     /* _Py_read() must not be called with an exception set, otherwise the
1692      * caller may think that read() was interrupted by a signal and the signal
1693      * handler raised an exception. */
1694     assert(!PyErr_Occurred());
1695 
1696     if (count > _PY_READ_MAX) {
1697         count = _PY_READ_MAX;
1698     }
1699 
1700     _Py_BEGIN_SUPPRESS_IPH
1701     do {
1702         Py_BEGIN_ALLOW_THREADS
1703         errno = 0;
1704 #ifdef MS_WINDOWS
1705         n = read(fd, buf, (int)count);
1706 #else
1707         n = read(fd, buf, count);
1708 #endif
1709         /* save/restore errno because PyErr_CheckSignals()
1710          * and PyErr_SetFromErrno() can modify it */
1711         err = errno;
1712         Py_END_ALLOW_THREADS
1713     } while (n < 0 && err == EINTR &&
1714             !(async_err = PyErr_CheckSignals()));
1715     _Py_END_SUPPRESS_IPH
1716 
1717     if (async_err) {
1718         /* read() was interrupted by a signal (failed with EINTR)
1719          * and the Python signal handler raised an exception */
1720         errno = err;
1721         assert(errno == EINTR && PyErr_Occurred());
1722         return -1;
1723     }
1724     if (n < 0) {
1725         PyErr_SetFromErrno(PyExc_OSError);
1726         errno = err;
1727         return -1;
1728     }
1729 
1730     return n;
1731 }
1732 
1733 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1734 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1735 {
1736     Py_ssize_t n;
1737     int err;
1738     int async_err = 0;
1739 
1740     _Py_BEGIN_SUPPRESS_IPH
1741 #ifdef MS_WINDOWS
1742     if (count > 32767) {
1743         /* Issue #11395: the Windows console returns an error (12: not
1744            enough space error) on writing into stdout if stdout mode is
1745            binary and the length is greater than 66,000 bytes (or less,
1746            depending on heap usage). */
1747         if (gil_held) {
1748             Py_BEGIN_ALLOW_THREADS
1749             if (isatty(fd)) {
1750                 count = 32767;
1751             }
1752             Py_END_ALLOW_THREADS
1753         } else {
1754             if (isatty(fd)) {
1755                 count = 32767;
1756             }
1757         }
1758     }
1759 #endif
1760     if (count > _PY_WRITE_MAX) {
1761         count = _PY_WRITE_MAX;
1762     }
1763 
1764     if (gil_held) {
1765         do {
1766             Py_BEGIN_ALLOW_THREADS
1767             errno = 0;
1768 #ifdef MS_WINDOWS
1769             n = write(fd, buf, (int)count);
1770 #else
1771             n = write(fd, buf, count);
1772 #endif
1773             /* save/restore errno because PyErr_CheckSignals()
1774              * and PyErr_SetFromErrno() can modify it */
1775             err = errno;
1776             Py_END_ALLOW_THREADS
1777         } while (n < 0 && err == EINTR &&
1778                 !(async_err = PyErr_CheckSignals()));
1779     }
1780     else {
1781         do {
1782             errno = 0;
1783 #ifdef MS_WINDOWS
1784             n = write(fd, buf, (int)count);
1785 #else
1786             n = write(fd, buf, count);
1787 #endif
1788             err = errno;
1789         } while (n < 0 && err == EINTR);
1790     }
1791     _Py_END_SUPPRESS_IPH
1792 
1793     if (async_err) {
1794         /* write() was interrupted by a signal (failed with EINTR)
1795            and the Python signal handler raised an exception (if gil_held is
1796            nonzero). */
1797         errno = err;
1798         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1799         return -1;
1800     }
1801     if (n < 0) {
1802         if (gil_held)
1803             PyErr_SetFromErrno(PyExc_OSError);
1804         errno = err;
1805         return -1;
1806     }
1807 
1808     return n;
1809 }
1810 
1811 /* Write count bytes of buf into fd.
1812 
1813    On success, return the number of written bytes, it can be lower than count
1814    including 0. On error, raise an exception, set errno and return -1.
1815 
1816    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1817    If the Python signal handler raises an exception, the function returns -1
1818    (the syscall is not retried).
1819 
1820    Release the GIL to call write(). The caller must hold the GIL. */
1821 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1822 _Py_write(int fd, const void *buf, size_t count)
1823 {
1824     assert(PyGILState_Check());
1825 
1826     /* _Py_write() must not be called with an exception set, otherwise the
1827      * caller may think that write() was interrupted by a signal and the signal
1828      * handler raised an exception. */
1829     assert(!PyErr_Occurred());
1830 
1831     return _Py_write_impl(fd, buf, count, 1);
1832 }
1833 
1834 /* Write count bytes of buf into fd.
1835  *
1836  * On success, return the number of written bytes, it can be lower than count
1837  * including 0. On error, set errno and return -1.
1838  *
1839  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1840  * without calling the Python signal handler. */
1841 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1842 _Py_write_noraise(int fd, const void *buf, size_t count)
1843 {
1844     return _Py_write_impl(fd, buf, count, 0);
1845 }
1846 
1847 #ifdef HAVE_READLINK
1848 
1849 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1850    the result from the locale encoding.
1851 
1852    Return -1 on encoding error, on readlink() error, if the internal buffer is
1853    too short, on decoding error, or if 'buf' is too short. */
1854 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1855 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1856 {
1857     char *cpath;
1858     char cbuf[MAXPATHLEN];
1859     size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1860     wchar_t *wbuf;
1861     Py_ssize_t res;
1862     size_t r1;
1863 
1864     cpath = _Py_EncodeLocaleRaw(path, NULL);
1865     if (cpath == NULL) {
1866         errno = EINVAL;
1867         return -1;
1868     }
1869     res = readlink(cpath, cbuf, cbuf_len);
1870     PyMem_RawFree(cpath);
1871     if (res == -1) {
1872         return -1;
1873     }
1874     if ((size_t)res == cbuf_len) {
1875         errno = EINVAL;
1876         return -1;
1877     }
1878     cbuf[res] = '\0'; /* buf will be null terminated */
1879     wbuf = Py_DecodeLocale(cbuf, &r1);
1880     if (wbuf == NULL) {
1881         errno = EINVAL;
1882         return -1;
1883     }
1884     /* wbuf must have space to store the trailing NUL character */
1885     if (buflen <= r1) {
1886         PyMem_RawFree(wbuf);
1887         errno = EINVAL;
1888         return -1;
1889     }
1890     wcsncpy(buf, wbuf, buflen);
1891     PyMem_RawFree(wbuf);
1892     return (int)r1;
1893 }
1894 #endif
1895 
1896 #ifdef HAVE_REALPATH
1897 
1898 /* Return the canonicalized absolute pathname. Encode path to the locale
1899    encoding, decode the result from the locale encoding.
1900 
1901    Return NULL on encoding error, realpath() error, decoding error
1902    or if 'resolved_path' is too short. */
1903 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1904 _Py_wrealpath(const wchar_t *path,
1905               wchar_t *resolved_path, size_t resolved_path_len)
1906 {
1907     char *cpath;
1908     char cresolved_path[MAXPATHLEN];
1909     wchar_t *wresolved_path;
1910     char *res;
1911     size_t r;
1912     cpath = _Py_EncodeLocaleRaw(path, NULL);
1913     if (cpath == NULL) {
1914         errno = EINVAL;
1915         return NULL;
1916     }
1917     res = realpath(cpath, cresolved_path);
1918     PyMem_RawFree(cpath);
1919     if (res == NULL)
1920         return NULL;
1921 
1922     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1923     if (wresolved_path == NULL) {
1924         errno = EINVAL;
1925         return NULL;
1926     }
1927     /* wresolved_path must have space to store the trailing NUL character */
1928     if (resolved_path_len <= r) {
1929         PyMem_RawFree(wresolved_path);
1930         errno = EINVAL;
1931         return NULL;
1932     }
1933     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1934     PyMem_RawFree(wresolved_path);
1935     return resolved_path;
1936 }
1937 #endif
1938 
1939 
1940 #ifndef MS_WINDOWS
1941 int
_Py_isabs(const wchar_t * path)1942 _Py_isabs(const wchar_t *path)
1943 {
1944     return (path[0] == SEP);
1945 }
1946 #endif
1947 
1948 
1949 /* Get an absolute path.
1950    On error (ex: fail to get the current directory), return -1.
1951    On memory allocation failure, set *abspath_p to NULL and return 0.
1952    On success, return a newly allocated to *abspath_p to and return 0.
1953    The string must be freed by PyMem_RawFree(). */
1954 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)1955 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1956 {
1957 #ifdef MS_WINDOWS
1958     wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1959     DWORD result;
1960 
1961     result = GetFullPathNameW(path,
1962                               Py_ARRAY_LENGTH(woutbuf), woutbuf,
1963                               NULL);
1964     if (!result) {
1965         return -1;
1966     }
1967 
1968     if (result > Py_ARRAY_LENGTH(woutbuf)) {
1969         if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1970             woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1971         }
1972         else {
1973             woutbufp = NULL;
1974         }
1975         if (!woutbufp) {
1976             *abspath_p = NULL;
1977             return 0;
1978         }
1979 
1980         result = GetFullPathNameW(path, result, woutbufp, NULL);
1981         if (!result) {
1982             PyMem_RawFree(woutbufp);
1983             return -1;
1984         }
1985     }
1986 
1987     if (woutbufp != woutbuf) {
1988         *abspath_p = woutbufp;
1989         return 0;
1990     }
1991 
1992     *abspath_p = _PyMem_RawWcsdup(woutbufp);
1993     return 0;
1994 #else
1995     if (_Py_isabs(path)) {
1996         *abspath_p = _PyMem_RawWcsdup(path);
1997         return 0;
1998     }
1999 
2000     wchar_t cwd[MAXPATHLEN + 1];
2001     cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2002     if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2003         /* unable to get the current directory */
2004         return -1;
2005     }
2006 
2007     size_t cwd_len = wcslen(cwd);
2008     size_t path_len = wcslen(path);
2009     size_t len = cwd_len + 1 + path_len + 1;
2010     if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2011         *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2012     }
2013     else {
2014         *abspath_p = NULL;
2015     }
2016     if (*abspath_p == NULL) {
2017         return 0;
2018     }
2019 
2020     wchar_t *abspath = *abspath_p;
2021     memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2022     abspath += cwd_len;
2023 
2024     *abspath = (wchar_t)SEP;
2025     abspath++;
2026 
2027     memcpy(abspath, path, path_len * sizeof(wchar_t));
2028     abspath += path_len;
2029 
2030     *abspath = 0;
2031     return 0;
2032 #endif
2033 }
2034 
2035 
2036 /* Get the current directory. buflen is the buffer size in wide characters
2037    including the null character. Decode the path from the locale encoding.
2038 
2039    Return NULL on getcwd() error, on decoding error, or if 'buf' is
2040    too short. */
2041 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2042 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2043 {
2044 #ifdef MS_WINDOWS
2045     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2046     return _wgetcwd(buf, ibuflen);
2047 #else
2048     char fname[MAXPATHLEN];
2049     wchar_t *wname;
2050     size_t len;
2051 
2052     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2053         return NULL;
2054     wname = Py_DecodeLocale(fname, &len);
2055     if (wname == NULL)
2056         return NULL;
2057     /* wname must have space to store the trailing NUL character */
2058     if (buflen <= len) {
2059         PyMem_RawFree(wname);
2060         return NULL;
2061     }
2062     wcsncpy(buf, wname, buflen);
2063     PyMem_RawFree(wname);
2064     return buf;
2065 #endif
2066 }
2067 
2068 /* Duplicate a file descriptor. The new file descriptor is created as
2069    non-inheritable. Return a new file descriptor on success, raise an OSError
2070    exception and return -1 on error.
2071 
2072    The GIL is released to call dup(). The caller must hold the GIL. */
2073 int
_Py_dup(int fd)2074 _Py_dup(int fd)
2075 {
2076 #ifdef MS_WINDOWS
2077     HANDLE handle;
2078 #endif
2079 
2080     assert(PyGILState_Check());
2081 
2082 #ifdef MS_WINDOWS
2083     _Py_BEGIN_SUPPRESS_IPH
2084     handle = (HANDLE)_get_osfhandle(fd);
2085     _Py_END_SUPPRESS_IPH
2086     if (handle == INVALID_HANDLE_VALUE) {
2087         PyErr_SetFromErrno(PyExc_OSError);
2088         return -1;
2089     }
2090 
2091     Py_BEGIN_ALLOW_THREADS
2092     _Py_BEGIN_SUPPRESS_IPH
2093     fd = dup(fd);
2094     _Py_END_SUPPRESS_IPH
2095     Py_END_ALLOW_THREADS
2096     if (fd < 0) {
2097         PyErr_SetFromErrno(PyExc_OSError);
2098         return -1;
2099     }
2100 
2101     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2102         _Py_BEGIN_SUPPRESS_IPH
2103         close(fd);
2104         _Py_END_SUPPRESS_IPH
2105         return -1;
2106     }
2107 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2108     Py_BEGIN_ALLOW_THREADS
2109     _Py_BEGIN_SUPPRESS_IPH
2110     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2111     _Py_END_SUPPRESS_IPH
2112     Py_END_ALLOW_THREADS
2113     if (fd < 0) {
2114         PyErr_SetFromErrno(PyExc_OSError);
2115         return -1;
2116     }
2117 
2118 #else
2119     Py_BEGIN_ALLOW_THREADS
2120     _Py_BEGIN_SUPPRESS_IPH
2121     fd = dup(fd);
2122     _Py_END_SUPPRESS_IPH
2123     Py_END_ALLOW_THREADS
2124     if (fd < 0) {
2125         PyErr_SetFromErrno(PyExc_OSError);
2126         return -1;
2127     }
2128 
2129     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2130         _Py_BEGIN_SUPPRESS_IPH
2131         close(fd);
2132         _Py_END_SUPPRESS_IPH
2133         return -1;
2134     }
2135 #endif
2136     return fd;
2137 }
2138 
2139 #ifndef MS_WINDOWS
2140 /* Get the blocking mode of the file descriptor.
2141    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2142    raise an exception and return -1 on error. */
2143 int
_Py_get_blocking(int fd)2144 _Py_get_blocking(int fd)
2145 {
2146     int flags;
2147     _Py_BEGIN_SUPPRESS_IPH
2148     flags = fcntl(fd, F_GETFL, 0);
2149     _Py_END_SUPPRESS_IPH
2150     if (flags < 0) {
2151         PyErr_SetFromErrno(PyExc_OSError);
2152         return -1;
2153     }
2154 
2155     return !(flags & O_NONBLOCK);
2156 }
2157 
2158 /* Set the blocking mode of the specified file descriptor.
2159 
2160    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2161    otherwise.
2162 
2163    Return 0 on success, raise an exception and return -1 on error. */
2164 int
_Py_set_blocking(int fd,int blocking)2165 _Py_set_blocking(int fd, int blocking)
2166 {
2167 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
2168     int arg = !blocking;
2169     if (ioctl(fd, FIONBIO, &arg) < 0)
2170         goto error;
2171 #else
2172     int flags, res;
2173 
2174     _Py_BEGIN_SUPPRESS_IPH
2175     flags = fcntl(fd, F_GETFL, 0);
2176     if (flags >= 0) {
2177         if (blocking)
2178             flags = flags & (~O_NONBLOCK);
2179         else
2180             flags = flags | O_NONBLOCK;
2181 
2182         res = fcntl(fd, F_SETFL, flags);
2183     } else {
2184         res = -1;
2185     }
2186     _Py_END_SUPPRESS_IPH
2187 
2188     if (res < 0)
2189         goto error;
2190 #endif
2191     return 0;
2192 
2193 error:
2194     PyErr_SetFromErrno(PyExc_OSError);
2195     return -1;
2196 }
2197 #endif
2198 
2199 
2200 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2201 _Py_GetLocaleconvNumeric(struct lconv *lc,
2202                          PyObject **decimal_point, PyObject **thousands_sep)
2203 {
2204     assert(decimal_point != NULL);
2205     assert(thousands_sep != NULL);
2206 
2207 #ifndef MS_WINDOWS
2208     int change_locale = 0;
2209     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2210         change_locale = 1;
2211     }
2212     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2213         change_locale = 1;
2214     }
2215 
2216     /* Keep a copy of the LC_CTYPE locale */
2217     char *oldloc = NULL, *loc = NULL;
2218     if (change_locale) {
2219         oldloc = setlocale(LC_CTYPE, NULL);
2220         if (!oldloc) {
2221             PyErr_SetString(PyExc_RuntimeWarning,
2222                             "failed to get LC_CTYPE locale");
2223             return -1;
2224         }
2225 
2226         oldloc = _PyMem_Strdup(oldloc);
2227         if (!oldloc) {
2228             PyErr_NoMemory();
2229             return -1;
2230         }
2231 
2232         loc = setlocale(LC_NUMERIC, NULL);
2233         if (loc != NULL && strcmp(loc, oldloc) == 0) {
2234             loc = NULL;
2235         }
2236 
2237         if (loc != NULL) {
2238             /* Only set the locale temporarily the LC_CTYPE locale
2239                if LC_NUMERIC locale is different than LC_CTYPE locale and
2240                decimal_point and/or thousands_sep are non-ASCII or longer than
2241                1 byte */
2242             setlocale(LC_CTYPE, loc);
2243         }
2244     }
2245 
2246 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2247 #else /* MS_WINDOWS */
2248 /* Use _W_* fields of Windows strcut lconv */
2249 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2250 #endif /* MS_WINDOWS */
2251 
2252     int res = -1;
2253 
2254     *decimal_point = GET_LOCALE_STRING(decimal_point);
2255     if (*decimal_point == NULL) {
2256         goto done;
2257     }
2258 
2259     *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2260     if (*thousands_sep == NULL) {
2261         goto done;
2262     }
2263 
2264     res = 0;
2265 
2266 done:
2267 #ifndef MS_WINDOWS
2268     if (loc != NULL) {
2269         setlocale(LC_CTYPE, oldloc);
2270     }
2271     PyMem_Free(oldloc);
2272 #endif
2273     return res;
2274 
2275 #undef GET_LOCALE_STRING
2276 }
2277