1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h"
4 #include <locale.h>
5 
6 #ifdef MS_WINDOWS
7 #  include <malloc.h>
8 #  include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11 
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15 
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19 
20 #ifdef HAVE_FCNTL_H
21 #include <fcntl.h>
22 #endif /* HAVE_FCNTL_H */
23 
24 #ifdef O_CLOEXEC
25 /* Does open() support the O_CLOEXEC flag? Possible values:
26 
27    -1: unknown
28     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29     1: open() supports O_CLOEXEC flag, close-on-exec is set
30 
31    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
32    and os.open(). */
33 int _Py_open_cloexec_works = -1;
34 #endif
35 
36 // The value must be the same in unicodeobject.c.
37 #define MAX_UNICODE 0x10ffff
38 
39 // mbstowcs() and mbrtowc() errors
40 static const size_t DECODE_ERROR = ((size_t)-1);
41 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
42 
43 
44 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)45 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
46 {
47     switch (errors)
48     {
49     case _Py_ERROR_STRICT:
50         *surrogateescape = 0;
51         return 0;
52     case _Py_ERROR_SURROGATEESCAPE:
53         *surrogateescape = 1;
54         return 0;
55     default:
56         return -1;
57     }
58 }
59 
60 
61 PyObject *
_Py_device_encoding(int fd)62 _Py_device_encoding(int fd)
63 {
64 #if defined(MS_WINDOWS)
65     UINT cp;
66 #endif
67     int valid;
68     _Py_BEGIN_SUPPRESS_IPH
69     valid = isatty(fd);
70     _Py_END_SUPPRESS_IPH
71     if (!valid)
72         Py_RETURN_NONE;
73 
74 #if defined(MS_WINDOWS)
75     if (fd == 0)
76         cp = GetConsoleCP();
77     else if (fd == 1 || fd == 2)
78         cp = GetConsoleOutputCP();
79     else
80         cp = 0;
81     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
82        has no console */
83     if (cp != 0)
84         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
85 #elif defined(CODESET)
86     {
87         char *codeset = nl_langinfo(CODESET);
88         if (codeset != NULL && codeset[0] != 0)
89             return PyUnicode_FromString(codeset);
90     }
91 #endif
92     Py_RETURN_NONE;
93 }
94 
95 
96 static size_t
is_valid_wide_char(wchar_t ch)97 is_valid_wide_char(wchar_t ch)
98 {
99     if (Py_UNICODE_IS_SURROGATE(ch)) {
100         // Reject lone surrogate characters
101         return 0;
102     }
103     if (ch > MAX_UNICODE) {
104         // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
105         // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
106         // it creates characters outside the [U+0000; U+10ffff] range:
107         // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
108         return 0;
109     }
110     return 1;
111 }
112 
113 
114 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)115 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
116 {
117     size_t count = mbstowcs(dest, src, n);
118     if (dest != NULL && count != DECODE_ERROR) {
119         for (size_t i=0; i < count; i++) {
120             wchar_t ch = dest[i];
121             if (!is_valid_wide_char(ch)) {
122                 return DECODE_ERROR;
123             }
124         }
125     }
126     return count;
127 }
128 
129 
130 #ifdef HAVE_MBRTOWC
131 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)132 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
133 {
134     assert(pwc != NULL);
135     size_t count = mbrtowc(pwc, str, len, pmbs);
136     if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
137         if (!is_valid_wide_char(*pwc)) {
138             return DECODE_ERROR;
139         }
140     }
141     return count;
142 }
143 #endif
144 
145 
146 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
147 
148 #define USE_FORCE_ASCII
149 
150 extern int _Py_normalize_encoding(const char *, char *, size_t);
151 
152 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
153    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
154    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
155    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
156    locale.getpreferredencoding() codec. For example, if command line arguments
157    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
158    UnicodeEncodeError instead of retrieving the original byte string.
159 
160    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
161    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
162    one byte in range 0x80-0xff can be decoded from the locale encoding. The
163    workaround is also enabled on error, for example if getting the locale
164    failed.
165 
166    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
167    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
168    ASCII encoding in this case.
169 
170    Values of force_ascii:
171 
172        1: the workaround is used: Py_EncodeLocale() uses
173           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
174           decode_ascii()
175        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
176           Py_DecodeLocale() uses mbstowcs()
177       -1: unknown, need to call check_force_ascii() to get the value
178 */
179 static int force_ascii = -1;
180 
181 static int
check_force_ascii(void)182 check_force_ascii(void)
183 {
184     char *loc = setlocale(LC_CTYPE, NULL);
185     if (loc == NULL) {
186         goto error;
187     }
188     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
189         /* the LC_CTYPE locale is different than C and POSIX */
190         return 0;
191     }
192 
193 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
194     const char *codeset = nl_langinfo(CODESET);
195     if (!codeset || codeset[0] == '\0') {
196         /* CODESET is not set or empty */
197         goto error;
198     }
199 
200     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
201     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
202         goto error;
203     }
204 
205 #ifdef __hpux
206     if (strcmp(encoding, "roman8") == 0) {
207         unsigned char ch;
208         wchar_t wch;
209         size_t res;
210 
211         ch = (unsigned char)0xA7;
212         res = _Py_mbstowcs(&wch, (char*)&ch, 1);
213         if (res != DECODE_ERROR && wch == L'\xA7') {
214             /* On HP-UX withe C locale or the POSIX locale,
215                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
216                Latin1 encoding in practice. Force ASCII in this case.
217 
218                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
219             return 1;
220         }
221     }
222 #else
223     const char* ascii_aliases[] = {
224         "ascii",
225         /* Aliases from Lib/encodings/aliases.py */
226         "646",
227         "ansi_x3.4_1968",
228         "ansi_x3.4_1986",
229         "ansi_x3_4_1968",
230         "cp367",
231         "csascii",
232         "ibm367",
233         "iso646_us",
234         "iso_646.irv_1991",
235         "iso_ir_6",
236         "us",
237         "us_ascii",
238         NULL
239     };
240 
241     int is_ascii = 0;
242     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
243         if (strcmp(encoding, *alias) == 0) {
244             is_ascii = 1;
245             break;
246         }
247     }
248     if (!is_ascii) {
249         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
250         return 0;
251     }
252 
253     for (unsigned int i=0x80; i<=0xff; i++) {
254         char ch[1];
255         wchar_t wch[1];
256         size_t res;
257 
258         unsigned uch = (unsigned char)i;
259         ch[0] = (char)uch;
260         res = _Py_mbstowcs(wch, ch, 1);
261         if (res != DECODE_ERROR) {
262             /* decoding a non-ASCII character from the locale encoding succeed:
263                the locale encoding is not ASCII, force ASCII */
264             return 1;
265         }
266     }
267     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
268        encoding: the locale encoding is really ASCII */
269 #endif   /* !defined(__hpux) */
270     return 0;
271 #else
272     /* nl_langinfo(CODESET) is not available: always force ASCII */
273     return 1;
274 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
275 
276 error:
277     /* if an error occurred, force the ASCII encoding */
278     return 1;
279 }
280 
281 
282 int
_Py_GetForceASCII(void)283 _Py_GetForceASCII(void)
284 {
285     if (force_ascii == -1) {
286         force_ascii = check_force_ascii();
287     }
288     return force_ascii;
289 }
290 
291 
292 void
_Py_ResetForceASCII(void)293 _Py_ResetForceASCII(void)
294 {
295     force_ascii = -1;
296 }
297 
298 
299 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)300 encode_ascii(const wchar_t *text, char **str,
301              size_t *error_pos, const char **reason,
302              int raw_malloc, _Py_error_handler errors)
303 {
304     char *result = NULL, *out;
305     size_t len, i;
306     wchar_t ch;
307 
308     int surrogateescape;
309     if (get_surrogateescape(errors, &surrogateescape) < 0) {
310         return -3;
311     }
312 
313     len = wcslen(text);
314 
315     /* +1 for NULL byte */
316     if (raw_malloc) {
317         result = PyMem_RawMalloc(len + 1);
318     }
319     else {
320         result = PyMem_Malloc(len + 1);
321     }
322     if (result == NULL) {
323         return -1;
324     }
325 
326     out = result;
327     for (i=0; i<len; i++) {
328         ch = text[i];
329 
330         if (ch <= 0x7f) {
331             /* ASCII character */
332             *out++ = (char)ch;
333         }
334         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
335             /* UTF-8b surrogate */
336             *out++ = (char)(ch - 0xdc00);
337         }
338         else {
339             if (raw_malloc) {
340                 PyMem_RawFree(result);
341             }
342             else {
343                 PyMem_Free(result);
344             }
345             if (error_pos != NULL) {
346                 *error_pos = i;
347             }
348             if (reason) {
349                 *reason = "encoding error";
350             }
351             return -2;
352         }
353     }
354     *out = '\0';
355     *str = result;
356     return 0;
357 }
358 #else
359 int
_Py_GetForceASCII(void)360 _Py_GetForceASCII(void)
361 {
362     return 0;
363 }
364 
365 void
_Py_ResetForceASCII(void)366 _Py_ResetForceASCII(void)
367 {
368     /* nothing to do */
369 }
370 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
371 
372 
373 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
374 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)375 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
376              const char **reason, _Py_error_handler errors)
377 {
378     wchar_t *res;
379     unsigned char *in;
380     wchar_t *out;
381     size_t argsize = strlen(arg) + 1;
382 
383     int surrogateescape;
384     if (get_surrogateescape(errors, &surrogateescape) < 0) {
385         return -3;
386     }
387 
388     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
389         return -1;
390     }
391     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
392     if (!res) {
393         return -1;
394     }
395 
396     out = res;
397     for (in = (unsigned char*)arg; *in; in++) {
398         unsigned char ch = *in;
399         if (ch < 128) {
400             *out++ = ch;
401         }
402         else {
403             if (!surrogateescape) {
404                 PyMem_RawFree(res);
405                 if (wlen) {
406                     *wlen = in - (unsigned char*)arg;
407                 }
408                 if (reason) {
409                     *reason = "decoding error";
410                 }
411                 return -2;
412             }
413             *out++ = 0xdc00 + ch;
414         }
415     }
416     *out = 0;
417 
418     if (wlen != NULL) {
419         *wlen = out - res;
420     }
421     *wstr = res;
422     return 0;
423 }
424 #endif   /* !HAVE_MBRTOWC */
425 
426 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)427 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
428                       const char **reason, _Py_error_handler errors)
429 {
430     wchar_t *res;
431     size_t argsize;
432     size_t count;
433 #ifdef HAVE_MBRTOWC
434     unsigned char *in;
435     wchar_t *out;
436     mbstate_t mbs;
437 #endif
438 
439     int surrogateescape;
440     if (get_surrogateescape(errors, &surrogateescape) < 0) {
441         return -3;
442     }
443 
444 #ifdef HAVE_BROKEN_MBSTOWCS
445     /* Some platforms have a broken implementation of
446      * mbstowcs which does not count the characters that
447      * would result from conversion.  Use an upper bound.
448      */
449     argsize = strlen(arg);
450 #else
451     argsize = _Py_mbstowcs(NULL, arg, 0);
452 #endif
453     if (argsize != DECODE_ERROR) {
454         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
455             return -1;
456         }
457         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
458         if (!res) {
459             return -1;
460         }
461 
462         count = _Py_mbstowcs(res, arg, argsize + 1);
463         if (count != DECODE_ERROR) {
464             *wstr = res;
465             if (wlen != NULL) {
466                 *wlen = count;
467             }
468             return 0;
469         }
470         PyMem_RawFree(res);
471     }
472 
473     /* Conversion failed. Fall back to escaping with surrogateescape. */
474 #ifdef HAVE_MBRTOWC
475     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
476 
477     /* Overallocate; as multi-byte characters are in the argument, the
478        actual output could use less memory. */
479     argsize = strlen(arg) + 1;
480     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
481         return -1;
482     }
483     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
484     if (!res) {
485         return -1;
486     }
487 
488     in = (unsigned char*)arg;
489     out = res;
490     memset(&mbs, 0, sizeof mbs);
491     while (argsize) {
492         size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
493         if (converted == 0) {
494             /* Reached end of string; null char stored. */
495             break;
496         }
497 
498         if (converted == INCOMPLETE_CHARACTER) {
499             /* Incomplete character. This should never happen,
500                since we provide everything that we have -
501                unless there is a bug in the C library, or I
502                misunderstood how mbrtowc works. */
503             goto decode_error;
504         }
505 
506         if (converted == DECODE_ERROR) {
507             if (!surrogateescape) {
508                 goto decode_error;
509             }
510 
511             /* Decoding error. Escape as UTF-8b, and start over in the initial
512                shift state. */
513             *out++ = 0xdc00 + *in++;
514             argsize--;
515             memset(&mbs, 0, sizeof mbs);
516             continue;
517         }
518 
519         // _Py_mbrtowc() reject lone surrogate characters
520         assert(!Py_UNICODE_IS_SURROGATE(*out));
521 
522         /* successfully converted some bytes */
523         in += converted;
524         argsize -= converted;
525         out++;
526     }
527     if (wlen != NULL) {
528         *wlen = out - res;
529     }
530     *wstr = res;
531     return 0;
532 
533 decode_error:
534     PyMem_RawFree(res);
535     if (wlen) {
536         *wlen = in - (unsigned char*)arg;
537     }
538     if (reason) {
539         *reason = "decoding error";
540     }
541     return -2;
542 #else   /* HAVE_MBRTOWC */
543     /* Cannot use C locale for escaping; manually escape as if charset
544        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
545        correctly in the locale's charset, which must be an ASCII superset. */
546     return decode_ascii(arg, wstr, wlen, reason, errors);
547 #endif   /* HAVE_MBRTOWC */
548 }
549 
550 
551 /* Decode a byte string from the locale encoding.
552 
553    Use the strict error handler if 'surrogateescape' is zero.  Use the
554    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
555    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
556    can be decoded as a surrogate character, escape the bytes using the
557    surrogateescape error handler instead of decoding them.
558 
559    On success, return 0 and write the newly allocated wide character string into
560    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
561    the number of wide characters excluding the null character into *wlen.
562 
563    On memory allocation failure, return -1.
564 
565    On decoding error, return -2. If wlen is not NULL, write the start of
566    invalid byte sequence in the input string into *wlen. If reason is not NULL,
567    write the decoding error message into *reason.
568 
569    Return -3 if the error handler 'errors' is not supported.
570 
571    Use the Py_EncodeLocaleEx() function to encode the character string back to
572    a byte string. */
573 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)574 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
575                    const char **reason,
576                    int current_locale, _Py_error_handler errors)
577 {
578     if (current_locale) {
579 #ifdef _Py_FORCE_UTF8_LOCALE
580         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
581                                 errors);
582 #else
583         return decode_current_locale(arg, wstr, wlen, reason, errors);
584 #endif
585     }
586 
587 #ifdef _Py_FORCE_UTF8_FS_ENCODING
588     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
589                             errors);
590 #else
591     int use_utf8 = (Py_UTF8Mode == 1);
592 #ifdef MS_WINDOWS
593     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
594 #endif
595     if (use_utf8) {
596         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
597                                 errors);
598     }
599 
600 #ifdef USE_FORCE_ASCII
601     if (force_ascii == -1) {
602         force_ascii = check_force_ascii();
603     }
604 
605     if (force_ascii) {
606         /* force ASCII encoding to workaround mbstowcs() issue */
607         return decode_ascii(arg, wstr, wlen, reason, errors);
608     }
609 #endif
610 
611     return decode_current_locale(arg, wstr, wlen, reason, errors);
612 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
613 }
614 
615 
616 /* Decode a byte string from the locale encoding with the
617    surrogateescape error handler: undecodable bytes are decoded as characters
618    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
619    character, escape the bytes using the surrogateescape error handler instead
620    of decoding them.
621 
622    Return a pointer to a newly allocated wide character string, use
623    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
624    wide characters excluding the null character into *size
625 
626    Return NULL on decoding error or memory allocation error. If *size* is not
627    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
628    decoding error.
629 
630    Decoding errors should never happen, unless there is a bug in the C
631    library.
632 
633    Use the Py_EncodeLocale() function to encode the character string back to a
634    byte string. */
635 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)636 Py_DecodeLocale(const char* arg, size_t *wlen)
637 {
638     wchar_t *wstr;
639     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
640                                  NULL, 0,
641                                  _Py_ERROR_SURROGATEESCAPE);
642     if (res != 0) {
643         assert(res != -3);
644         if (wlen != NULL) {
645             *wlen = (size_t)res;
646         }
647         return NULL;
648     }
649     return wstr;
650 }
651 
652 
653 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)654 encode_current_locale(const wchar_t *text, char **str,
655                       size_t *error_pos, const char **reason,
656                       int raw_malloc, _Py_error_handler errors)
657 {
658     const size_t len = wcslen(text);
659     char *result = NULL, *bytes = NULL;
660     size_t i, size, converted;
661     wchar_t c, buf[2];
662 
663     int surrogateescape;
664     if (get_surrogateescape(errors, &surrogateescape) < 0) {
665         return -3;
666     }
667 
668     /* The function works in two steps:
669        1. compute the length of the output buffer in bytes (size)
670        2. outputs the bytes */
671     size = 0;
672     buf[1] = 0;
673     while (1) {
674         for (i=0; i < len; i++) {
675             c = text[i];
676             if (c >= 0xdc80 && c <= 0xdcff) {
677                 if (!surrogateescape) {
678                     goto encode_error;
679                 }
680                 /* UTF-8b surrogate */
681                 if (bytes != NULL) {
682                     *bytes++ = c - 0xdc00;
683                     size--;
684                 }
685                 else {
686                     size++;
687                 }
688                 continue;
689             }
690             else {
691                 buf[0] = c;
692                 if (bytes != NULL) {
693                     converted = wcstombs(bytes, buf, size);
694                 }
695                 else {
696                     converted = wcstombs(NULL, buf, 0);
697                 }
698                 if (converted == DECODE_ERROR) {
699                     goto encode_error;
700                 }
701                 if (bytes != NULL) {
702                     bytes += converted;
703                     size -= converted;
704                 }
705                 else {
706                     size += converted;
707                 }
708             }
709         }
710         if (result != NULL) {
711             *bytes = '\0';
712             break;
713         }
714 
715         size += 1; /* nul byte at the end */
716         if (raw_malloc) {
717             result = PyMem_RawMalloc(size);
718         }
719         else {
720             result = PyMem_Malloc(size);
721         }
722         if (result == NULL) {
723             return -1;
724         }
725         bytes = result;
726     }
727     *str = result;
728     return 0;
729 
730 encode_error:
731     if (raw_malloc) {
732         PyMem_RawFree(result);
733     }
734     else {
735         PyMem_Free(result);
736     }
737     if (error_pos != NULL) {
738         *error_pos = i;
739     }
740     if (reason) {
741         *reason = "encoding error";
742     }
743     return -2;
744 }
745 
746 
747 /* Encode a string to the locale encoding.
748 
749    Parameters:
750 
751    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
752      of PyMem_Malloc().
753    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
754      Python filesystem encoding.
755    * errors: error handler like "strict" or "surrogateescape".
756 
757    Return value:
758 
759     0: success, *str is set to a newly allocated decoded string.
760    -1: memory allocation failure
761    -2: encoding error, set *error_pos and *reason (if set).
762    -3: the error handler 'errors' is not supported.
763  */
764 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)765 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
766                  const char **reason,
767                  int raw_malloc, int current_locale, _Py_error_handler errors)
768 {
769     if (current_locale) {
770 #ifdef _Py_FORCE_UTF8_LOCALE
771         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
772                                 raw_malloc, errors);
773 #else
774         return encode_current_locale(text, str, error_pos, reason,
775                                      raw_malloc, errors);
776 #endif
777     }
778 
779 #ifdef _Py_FORCE_UTF8_FS_ENCODING
780     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
781                             raw_malloc, errors);
782 #else
783     int use_utf8 = (Py_UTF8Mode == 1);
784 #ifdef MS_WINDOWS
785     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
786 #endif
787     if (use_utf8) {
788         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
789                                 raw_malloc, errors);
790     }
791 
792 #ifdef USE_FORCE_ASCII
793     if (force_ascii == -1) {
794         force_ascii = check_force_ascii();
795     }
796 
797     if (force_ascii) {
798         return encode_ascii(text, str, error_pos, reason,
799                             raw_malloc, errors);
800     }
801 #endif
802 
803     return encode_current_locale(text, str, error_pos, reason,
804                                  raw_malloc, errors);
805 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
806 }
807 
808 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)809 encode_locale(const wchar_t *text, size_t *error_pos,
810               int raw_malloc, int current_locale)
811 {
812     char *str;
813     int res = encode_locale_ex(text, &str, error_pos, NULL,
814                                raw_malloc, current_locale,
815                                _Py_ERROR_SURROGATEESCAPE);
816     if (res != -2 && error_pos) {
817         *error_pos = (size_t)-1;
818     }
819     if (res != 0) {
820         return NULL;
821     }
822     return str;
823 }
824 
825 /* Encode a wide character string to the locale encoding with the
826    surrogateescape error handler: surrogate characters in the range
827    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
828 
829    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
830    the memory. Return NULL on encoding or memory allocation error.
831 
832    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
833    to the index of the invalid character on encoding error.
834 
835    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
836    character string. */
837 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)838 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
839 {
840     return encode_locale(text, error_pos, 0, 0);
841 }
842 
843 
844 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
845    instead of PyMem_Free(). */
846 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)847 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
848 {
849     return encode_locale(text, error_pos, 1, 0);
850 }
851 
852 
853 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)854 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
855                    size_t *error_pos, const char **reason,
856                    int current_locale, _Py_error_handler errors)
857 {
858     return encode_locale_ex(text, str, error_pos, reason, 1,
859                             current_locale, errors);
860 }
861 
862 
863 #ifdef MS_WINDOWS
864 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
865 
866 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)867 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
868 {
869     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
870     /* Cannot simply cast and dereference in_ptr,
871        since it might not be aligned properly */
872     __int64 in;
873     memcpy(&in, in_ptr, sizeof(in));
874     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
875     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
876 }
877 
878 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)879 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
880 {
881     /* XXX endianness */
882     __int64 out;
883     out = time_in + secs_between_epochs;
884     out = out * 10000000 + nsec_in / 100;
885     memcpy(out_ptr, &out, sizeof(out));
886 }
887 
888 /* Below, we *know* that ugo+r is 0444 */
889 #if _S_IREAD != 0400
890 #error Unsupported C library
891 #endif
892 static int
attributes_to_mode(DWORD attr)893 attributes_to_mode(DWORD attr)
894 {
895     int m = 0;
896     if (attr & FILE_ATTRIBUTE_DIRECTORY)
897         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
898     else
899         m |= _S_IFREG;
900     if (attr & FILE_ATTRIBUTE_READONLY)
901         m |= 0444;
902     else
903         m |= 0666;
904     return m;
905 }
906 
907 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)908 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
909                            struct _Py_stat_struct *result)
910 {
911     memset(result, 0, sizeof(*result));
912     result->st_mode = attributes_to_mode(info->dwFileAttributes);
913     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
914     result->st_dev = info->dwVolumeSerialNumber;
915     result->st_rdev = result->st_dev;
916     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
917     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
918     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
919     result->st_nlink = info->nNumberOfLinks;
920     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
921     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
922        open other name surrogate reparse points without traversing them. To
923        detect/handle these, check st_file_attributes and st_reparse_tag. */
924     result->st_reparse_tag = reparse_tag;
925     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
926         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
927         /* first clear the S_IFMT bits */
928         result->st_mode ^= (result->st_mode & S_IFMT);
929         /* now set the bits that make this a symlink */
930         result->st_mode |= S_IFLNK;
931     }
932     result->st_file_attributes = info->dwFileAttributes;
933 }
934 #endif
935 
936 /* Return information about a file.
937 
938    On POSIX, use fstat().
939 
940    On Windows, use GetFileType() and GetFileInformationByHandle() which support
941    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
942    than 2 GiB because the file size type is a signed 32-bit integer: see issue
943    #23152.
944 
945    On Windows, set the last Windows error and return nonzero on error. On
946    POSIX, set errno and return nonzero on error. Fill status and return 0 on
947    success. */
948 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)949 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
950 {
951 #ifdef MS_WINDOWS
952     BY_HANDLE_FILE_INFORMATION info;
953     HANDLE h;
954     int type;
955 
956     _Py_BEGIN_SUPPRESS_IPH
957     h = (HANDLE)_get_osfhandle(fd);
958     _Py_END_SUPPRESS_IPH
959 
960     if (h == INVALID_HANDLE_VALUE) {
961         /* errno is already set by _get_osfhandle, but we also set
962            the Win32 error for callers who expect that */
963         SetLastError(ERROR_INVALID_HANDLE);
964         return -1;
965     }
966     memset(status, 0, sizeof(*status));
967 
968     type = GetFileType(h);
969     if (type == FILE_TYPE_UNKNOWN) {
970         DWORD error = GetLastError();
971         if (error != 0) {
972             errno = winerror_to_errno(error);
973             return -1;
974         }
975         /* else: valid but unknown file */
976     }
977 
978     if (type != FILE_TYPE_DISK) {
979         if (type == FILE_TYPE_CHAR)
980             status->st_mode = _S_IFCHR;
981         else if (type == FILE_TYPE_PIPE)
982             status->st_mode = _S_IFIFO;
983         return 0;
984     }
985 
986     if (!GetFileInformationByHandle(h, &info)) {
987         /* The Win32 error is already set, but we also set errno for
988            callers who expect it */
989         errno = winerror_to_errno(GetLastError());
990         return -1;
991     }
992 
993     _Py_attribute_data_to_stat(&info, 0, status);
994     /* specific to fstat() */
995     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
996     return 0;
997 #else
998     return fstat(fd, status);
999 #endif
1000 }
1001 
1002 /* Return information about a file.
1003 
1004    On POSIX, use fstat().
1005 
1006    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1007    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1008    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1009    #23152.
1010 
1011    Raise an exception and return -1 on error. On Windows, set the last Windows
1012    error on error. On POSIX, set errno on error. Fill status and return 0 on
1013    success.
1014 
1015    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1016    to call fstat(). The caller must hold the GIL. */
1017 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1018 _Py_fstat(int fd, struct _Py_stat_struct *status)
1019 {
1020     int res;
1021 
1022     assert(PyGILState_Check());
1023 
1024     Py_BEGIN_ALLOW_THREADS
1025     res = _Py_fstat_noraise(fd, status);
1026     Py_END_ALLOW_THREADS
1027 
1028     if (res != 0) {
1029 #ifdef MS_WINDOWS
1030         PyErr_SetFromWindowsErr(0);
1031 #else
1032         PyErr_SetFromErrno(PyExc_OSError);
1033 #endif
1034         return -1;
1035     }
1036     return 0;
1037 }
1038 
1039 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1040    call stat() otherwise. Only fill st_mode attribute on Windows.
1041 
1042    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1043    raised. */
1044 
1045 int
_Py_stat(PyObject * path,struct stat * statbuf)1046 _Py_stat(PyObject *path, struct stat *statbuf)
1047 {
1048 #ifdef MS_WINDOWS
1049     int err;
1050     struct _stat wstatbuf;
1051     const wchar_t *wpath;
1052 
1053     wpath = _PyUnicode_AsUnicode(path);
1054     if (wpath == NULL)
1055         return -2;
1056 
1057     err = _wstat(wpath, &wstatbuf);
1058     if (!err)
1059         statbuf->st_mode = wstatbuf.st_mode;
1060     return err;
1061 #else
1062     int ret;
1063     PyObject *bytes;
1064     char *cpath;
1065 
1066     bytes = PyUnicode_EncodeFSDefault(path);
1067     if (bytes == NULL)
1068         return -2;
1069 
1070     /* check for embedded null bytes */
1071     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1072         Py_DECREF(bytes);
1073         return -2;
1074     }
1075 
1076     ret = stat(cpath, statbuf);
1077     Py_DECREF(bytes);
1078     return ret;
1079 #endif
1080 }
1081 
1082 
1083 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1084 static int
get_inheritable(int fd,int raise)1085 get_inheritable(int fd, int raise)
1086 {
1087 #ifdef MS_WINDOWS
1088     HANDLE handle;
1089     DWORD flags;
1090 
1091     _Py_BEGIN_SUPPRESS_IPH
1092     handle = (HANDLE)_get_osfhandle(fd);
1093     _Py_END_SUPPRESS_IPH
1094     if (handle == INVALID_HANDLE_VALUE) {
1095         if (raise)
1096             PyErr_SetFromErrno(PyExc_OSError);
1097         return -1;
1098     }
1099 
1100     if (!GetHandleInformation(handle, &flags)) {
1101         if (raise)
1102             PyErr_SetFromWindowsErr(0);
1103         return -1;
1104     }
1105 
1106     return (flags & HANDLE_FLAG_INHERIT);
1107 #else
1108     int flags;
1109 
1110     flags = fcntl(fd, F_GETFD, 0);
1111     if (flags == -1) {
1112         if (raise)
1113             PyErr_SetFromErrno(PyExc_OSError);
1114         return -1;
1115     }
1116     return !(flags & FD_CLOEXEC);
1117 #endif
1118 }
1119 
1120 /* Get the inheritable flag of the specified file descriptor.
1121    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1122    raise an exception and return -1 on error. */
1123 int
_Py_get_inheritable(int fd)1124 _Py_get_inheritable(int fd)
1125 {
1126     return get_inheritable(fd, 1);
1127 }
1128 
1129 
1130 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1131 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1132 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1133 {
1134 #ifdef MS_WINDOWS
1135     HANDLE handle;
1136     DWORD flags;
1137 #else
1138 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1139     static int ioctl_works = -1;
1140     int request;
1141     int err;
1142 #endif
1143     int flags, new_flags;
1144     int res;
1145 #endif
1146 
1147     /* atomic_flag_works can only be used to make the file descriptor
1148        non-inheritable */
1149     assert(!(atomic_flag_works != NULL && inheritable));
1150 
1151     if (atomic_flag_works != NULL && !inheritable) {
1152         if (*atomic_flag_works == -1) {
1153             int isInheritable = get_inheritable(fd, raise);
1154             if (isInheritable == -1)
1155                 return -1;
1156             *atomic_flag_works = !isInheritable;
1157         }
1158 
1159         if (*atomic_flag_works)
1160             return 0;
1161     }
1162 
1163 #ifdef MS_WINDOWS
1164     _Py_BEGIN_SUPPRESS_IPH
1165     handle = (HANDLE)_get_osfhandle(fd);
1166     _Py_END_SUPPRESS_IPH
1167     if (handle == INVALID_HANDLE_VALUE) {
1168         if (raise)
1169             PyErr_SetFromErrno(PyExc_OSError);
1170         return -1;
1171     }
1172 
1173     if (inheritable)
1174         flags = HANDLE_FLAG_INHERIT;
1175     else
1176         flags = 0;
1177 
1178     /* This check can be removed once support for Windows 7 ends. */
1179 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1180         GetFileType(handle) == FILE_TYPE_CHAR)
1181 
1182     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1183         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1184         if (raise)
1185             PyErr_SetFromWindowsErr(0);
1186         return -1;
1187     }
1188 #undef CONSOLE_PSEUDOHANDLE
1189     return 0;
1190 
1191 #else
1192 
1193 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1194     if (ioctl_works != 0 && raise != 0) {
1195         /* fast-path: ioctl() only requires one syscall */
1196         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1197          * thus avoid using ioctl() so we skip the fast-path. */
1198         if (inheritable)
1199             request = FIONCLEX;
1200         else
1201             request = FIOCLEX;
1202         err = ioctl(fd, request, NULL);
1203         if (!err) {
1204             ioctl_works = 1;
1205             return 0;
1206         }
1207 
1208 #ifdef __linux__
1209         if (errno == EBADF) {
1210             // On Linux, ioctl(FIOCLEX) will fail with EBADF for O_PATH file descriptors
1211             // Fall through to the fcntl() path
1212         }
1213         else
1214 #endif
1215         if (errno != ENOTTY && errno != EACCES) {
1216             if (raise)
1217                 PyErr_SetFromErrno(PyExc_OSError);
1218             return -1;
1219         }
1220         else {
1221             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1222                device". The ioctl is declared but not supported by the kernel.
1223                Remember that ioctl() doesn't work. It is the case on
1224                Illumos-based OS for example.
1225 
1226                Issue #27057: When SELinux policy disallows ioctl it will fail
1227                with EACCES. While FIOCLEX is safe operation it may be
1228                unavailable because ioctl was denied altogether.
1229                This can be the case on Android. */
1230             ioctl_works = 0;
1231         }
1232         /* fallback to fcntl() if ioctl() does not work */
1233     }
1234 #endif
1235 
1236     /* slow-path: fcntl() requires two syscalls */
1237     flags = fcntl(fd, F_GETFD);
1238     if (flags < 0) {
1239         if (raise)
1240             PyErr_SetFromErrno(PyExc_OSError);
1241         return -1;
1242     }
1243 
1244     if (inheritable) {
1245         new_flags = flags & ~FD_CLOEXEC;
1246     }
1247     else {
1248         new_flags = flags | FD_CLOEXEC;
1249     }
1250 
1251     if (new_flags == flags) {
1252         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1253         return 0;
1254     }
1255 
1256     res = fcntl(fd, F_SETFD, new_flags);
1257     if (res < 0) {
1258         if (raise)
1259             PyErr_SetFromErrno(PyExc_OSError);
1260         return -1;
1261     }
1262     return 0;
1263 #endif
1264 }
1265 
1266 /* Make the file descriptor non-inheritable.
1267    Return 0 on success, set errno and return -1 on error. */
1268 static int
make_non_inheritable(int fd)1269 make_non_inheritable(int fd)
1270 {
1271     return set_inheritable(fd, 0, 0, NULL);
1272 }
1273 
1274 /* Set the inheritable flag of the specified file descriptor.
1275    On success: return 0, on error: raise an exception and return -1.
1276 
1277    If atomic_flag_works is not NULL:
1278 
1279     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1280       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1281       set the inheritable flag
1282     * if *atomic_flag_works==1: do nothing
1283     * if *atomic_flag_works==0: set inheritable flag to False
1284 
1285    Set atomic_flag_works to NULL if no atomic flag was used to create the
1286    file descriptor.
1287 
1288    atomic_flag_works can only be used to make a file descriptor
1289    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1290 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1291 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1292 {
1293     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1294 }
1295 
1296 /* Same as _Py_set_inheritable() but on error, set errno and
1297    don't raise an exception.
1298    This function is async-signal-safe. */
1299 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1300 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1301 {
1302     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1303 }
1304 
1305 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1306 _Py_open_impl(const char *pathname, int flags, int gil_held)
1307 {
1308     int fd;
1309     int async_err = 0;
1310 #ifndef MS_WINDOWS
1311     int *atomic_flag_works;
1312 #endif
1313 
1314 #ifdef MS_WINDOWS
1315     flags |= O_NOINHERIT;
1316 #elif defined(O_CLOEXEC)
1317     atomic_flag_works = &_Py_open_cloexec_works;
1318     flags |= O_CLOEXEC;
1319 #else
1320     atomic_flag_works = NULL;
1321 #endif
1322 
1323     if (gil_held) {
1324         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1325         if (pathname_obj == NULL) {
1326             return -1;
1327         }
1328         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1329             Py_DECREF(pathname_obj);
1330             return -1;
1331         }
1332 
1333         do {
1334             Py_BEGIN_ALLOW_THREADS
1335             fd = open(pathname, flags);
1336             Py_END_ALLOW_THREADS
1337         } while (fd < 0
1338                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1339         if (async_err) {
1340             Py_DECREF(pathname_obj);
1341             return -1;
1342         }
1343         if (fd < 0) {
1344             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1345             Py_DECREF(pathname_obj);
1346             return -1;
1347         }
1348         Py_DECREF(pathname_obj);
1349     }
1350     else {
1351         fd = open(pathname, flags);
1352         if (fd < 0)
1353             return -1;
1354     }
1355 
1356 #ifndef MS_WINDOWS
1357     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1358         close(fd);
1359         return -1;
1360     }
1361 #endif
1362 
1363     return fd;
1364 }
1365 
1366 /* Open a file with the specified flags (wrapper to open() function).
1367    Return a file descriptor on success. Raise an exception and return -1 on
1368    error.
1369 
1370    The file descriptor is created non-inheritable.
1371 
1372    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1373    except if the Python signal handler raises an exception.
1374 
1375    Release the GIL to call open(). The caller must hold the GIL. */
1376 int
_Py_open(const char * pathname,int flags)1377 _Py_open(const char *pathname, int flags)
1378 {
1379     /* _Py_open() must be called with the GIL held. */
1380     assert(PyGILState_Check());
1381     return _Py_open_impl(pathname, flags, 1);
1382 }
1383 
1384 /* Open a file with the specified flags (wrapper to open() function).
1385    Return a file descriptor on success. Set errno and return -1 on error.
1386 
1387    The file descriptor is created non-inheritable.
1388 
1389    If interrupted by a signal, fail with EINTR. */
1390 int
_Py_open_noraise(const char * pathname,int flags)1391 _Py_open_noraise(const char *pathname, int flags)
1392 {
1393     return _Py_open_impl(pathname, flags, 0);
1394 }
1395 
1396 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1397    encoding and use fopen() otherwise.
1398 
1399    The file descriptor is created non-inheritable.
1400 
1401    If interrupted by a signal, fail with EINTR. */
1402 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1403 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1404 {
1405     FILE *f;
1406     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1407         return NULL;
1408     }
1409 #ifndef MS_WINDOWS
1410     char *cpath;
1411     char cmode[10];
1412     size_t r;
1413     r = wcstombs(cmode, mode, 10);
1414     if (r == DECODE_ERROR || r >= 10) {
1415         errno = EINVAL;
1416         return NULL;
1417     }
1418     cpath = _Py_EncodeLocaleRaw(path, NULL);
1419     if (cpath == NULL) {
1420         return NULL;
1421     }
1422     f = fopen(cpath, cmode);
1423     PyMem_RawFree(cpath);
1424 #else
1425     f = _wfopen(path, mode);
1426 #endif
1427     if (f == NULL)
1428         return NULL;
1429     if (make_non_inheritable(fileno(f)) < 0) {
1430         fclose(f);
1431         return NULL;
1432     }
1433     return f;
1434 }
1435 
1436 /* Wrapper to fopen().
1437 
1438    The file descriptor is created non-inheritable.
1439 
1440    If interrupted by a signal, fail with EINTR. */
1441 FILE*
_Py_fopen(const char * pathname,const char * mode)1442 _Py_fopen(const char *pathname, const char *mode)
1443 {
1444     PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1445     if (pathname_obj == NULL) {
1446         return NULL;
1447     }
1448     if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1449         Py_DECREF(pathname_obj);
1450         return NULL;
1451     }
1452     Py_DECREF(pathname_obj);
1453 
1454     FILE *f = fopen(pathname, mode);
1455     if (f == NULL)
1456         return NULL;
1457     if (make_non_inheritable(fileno(f)) < 0) {
1458         fclose(f);
1459         return NULL;
1460     }
1461     return f;
1462 }
1463 
1464 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1465    encoding and call fopen() otherwise.
1466 
1467    Return the new file object on success. Raise an exception and return NULL
1468    on error.
1469 
1470    The file descriptor is created non-inheritable.
1471 
1472    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1473    except if the Python signal handler raises an exception.
1474 
1475    Release the GIL to call _wfopen() or fopen(). The caller must hold
1476    the GIL. */
1477 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1478 _Py_fopen_obj(PyObject *path, const char *mode)
1479 {
1480     FILE *f;
1481     int async_err = 0;
1482 #ifdef MS_WINDOWS
1483     const wchar_t *wpath;
1484     wchar_t wmode[10];
1485     int usize;
1486 
1487     assert(PyGILState_Check());
1488 
1489     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1490         return NULL;
1491     }
1492     if (!PyUnicode_Check(path)) {
1493         PyErr_Format(PyExc_TypeError,
1494                      "str file path expected under Windows, got %R",
1495                      Py_TYPE(path));
1496         return NULL;
1497     }
1498     wpath = _PyUnicode_AsUnicode(path);
1499     if (wpath == NULL)
1500         return NULL;
1501 
1502     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1503                                 wmode, Py_ARRAY_LENGTH(wmode));
1504     if (usize == 0) {
1505         PyErr_SetFromWindowsErr(0);
1506         return NULL;
1507     }
1508 
1509     do {
1510         Py_BEGIN_ALLOW_THREADS
1511         f = _wfopen(wpath, wmode);
1512         Py_END_ALLOW_THREADS
1513     } while (f == NULL
1514              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1515 #else
1516     PyObject *bytes;
1517     char *path_bytes;
1518 
1519     assert(PyGILState_Check());
1520 
1521     if (!PyUnicode_FSConverter(path, &bytes))
1522         return NULL;
1523     path_bytes = PyBytes_AS_STRING(bytes);
1524 
1525     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1526         Py_DECREF(bytes);
1527         return NULL;
1528     }
1529 
1530     do {
1531         Py_BEGIN_ALLOW_THREADS
1532         f = fopen(path_bytes, mode);
1533         Py_END_ALLOW_THREADS
1534     } while (f == NULL
1535              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1536 
1537     Py_DECREF(bytes);
1538 #endif
1539     if (async_err)
1540         return NULL;
1541 
1542     if (f == NULL) {
1543         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1544         return NULL;
1545     }
1546 
1547     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1548         fclose(f);
1549         return NULL;
1550     }
1551     return f;
1552 }
1553 
1554 /* Read count bytes from fd into buf.
1555 
1556    On success, return the number of read bytes, it can be lower than count.
1557    If the current file offset is at or past the end of file, no bytes are read,
1558    and read() returns zero.
1559 
1560    On error, raise an exception, set errno and return -1.
1561 
1562    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1563    If the Python signal handler raises an exception, the function returns -1
1564    (the syscall is not retried).
1565 
1566    Release the GIL to call read(). The caller must hold the GIL. */
1567 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1568 _Py_read(int fd, void *buf, size_t count)
1569 {
1570     Py_ssize_t n;
1571     int err;
1572     int async_err = 0;
1573 
1574     assert(PyGILState_Check());
1575 
1576     /* _Py_read() must not be called with an exception set, otherwise the
1577      * caller may think that read() was interrupted by a signal and the signal
1578      * handler raised an exception. */
1579     assert(!PyErr_Occurred());
1580 
1581     if (count > _PY_READ_MAX) {
1582         count = _PY_READ_MAX;
1583     }
1584 
1585     _Py_BEGIN_SUPPRESS_IPH
1586     do {
1587         Py_BEGIN_ALLOW_THREADS
1588         errno = 0;
1589 #ifdef MS_WINDOWS
1590         n = read(fd, buf, (int)count);
1591 #else
1592         n = read(fd, buf, count);
1593 #endif
1594         /* save/restore errno because PyErr_CheckSignals()
1595          * and PyErr_SetFromErrno() can modify it */
1596         err = errno;
1597         Py_END_ALLOW_THREADS
1598     } while (n < 0 && err == EINTR &&
1599             !(async_err = PyErr_CheckSignals()));
1600     _Py_END_SUPPRESS_IPH
1601 
1602     if (async_err) {
1603         /* read() was interrupted by a signal (failed with EINTR)
1604          * and the Python signal handler raised an exception */
1605         errno = err;
1606         assert(errno == EINTR && PyErr_Occurred());
1607         return -1;
1608     }
1609     if (n < 0) {
1610         PyErr_SetFromErrno(PyExc_OSError);
1611         errno = err;
1612         return -1;
1613     }
1614 
1615     return n;
1616 }
1617 
1618 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1619 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1620 {
1621     Py_ssize_t n;
1622     int err;
1623     int async_err = 0;
1624 
1625     _Py_BEGIN_SUPPRESS_IPH
1626 #ifdef MS_WINDOWS
1627     if (count > 32767 && isatty(fd)) {
1628         /* Issue #11395: the Windows console returns an error (12: not
1629            enough space error) on writing into stdout if stdout mode is
1630            binary and the length is greater than 66,000 bytes (or less,
1631            depending on heap usage). */
1632         count = 32767;
1633     }
1634 #endif
1635     if (count > _PY_WRITE_MAX) {
1636         count = _PY_WRITE_MAX;
1637     }
1638 
1639     if (gil_held) {
1640         do {
1641             Py_BEGIN_ALLOW_THREADS
1642             errno = 0;
1643 #ifdef MS_WINDOWS
1644             n = write(fd, buf, (int)count);
1645 #else
1646             n = write(fd, buf, count);
1647 #endif
1648             /* save/restore errno because PyErr_CheckSignals()
1649              * and PyErr_SetFromErrno() can modify it */
1650             err = errno;
1651             Py_END_ALLOW_THREADS
1652         } while (n < 0 && err == EINTR &&
1653                 !(async_err = PyErr_CheckSignals()));
1654     }
1655     else {
1656         do {
1657             errno = 0;
1658 #ifdef MS_WINDOWS
1659             n = write(fd, buf, (int)count);
1660 #else
1661             n = write(fd, buf, count);
1662 #endif
1663             err = errno;
1664         } while (n < 0 && err == EINTR);
1665     }
1666     _Py_END_SUPPRESS_IPH
1667 
1668     if (async_err) {
1669         /* write() was interrupted by a signal (failed with EINTR)
1670            and the Python signal handler raised an exception (if gil_held is
1671            nonzero). */
1672         errno = err;
1673         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1674         return -1;
1675     }
1676     if (n < 0) {
1677         if (gil_held)
1678             PyErr_SetFromErrno(PyExc_OSError);
1679         errno = err;
1680         return -1;
1681     }
1682 
1683     return n;
1684 }
1685 
1686 /* Write count bytes of buf into fd.
1687 
1688    On success, return the number of written bytes, it can be lower than count
1689    including 0. On error, raise an exception, set errno and return -1.
1690 
1691    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1692    If the Python signal handler raises an exception, the function returns -1
1693    (the syscall is not retried).
1694 
1695    Release the GIL to call write(). The caller must hold the GIL. */
1696 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1697 _Py_write(int fd, const void *buf, size_t count)
1698 {
1699     assert(PyGILState_Check());
1700 
1701     /* _Py_write() must not be called with an exception set, otherwise the
1702      * caller may think that write() was interrupted by a signal and the signal
1703      * handler raised an exception. */
1704     assert(!PyErr_Occurred());
1705 
1706     return _Py_write_impl(fd, buf, count, 1);
1707 }
1708 
1709 /* Write count bytes of buf into fd.
1710  *
1711  * On success, return the number of written bytes, it can be lower than count
1712  * including 0. On error, set errno and return -1.
1713  *
1714  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1715  * without calling the Python signal handler. */
1716 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1717 _Py_write_noraise(int fd, const void *buf, size_t count)
1718 {
1719     return _Py_write_impl(fd, buf, count, 0);
1720 }
1721 
1722 #ifdef HAVE_READLINK
1723 
1724 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1725    the result from the locale encoding.
1726 
1727    Return -1 on encoding error, on readlink() error, if the internal buffer is
1728    too short, on decoding error, or if 'buf' is too short. */
1729 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1730 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1731 {
1732     char *cpath;
1733     char cbuf[MAXPATHLEN];
1734     wchar_t *wbuf;
1735     int res;
1736     size_t r1;
1737 
1738     cpath = _Py_EncodeLocaleRaw(path, NULL);
1739     if (cpath == NULL) {
1740         errno = EINVAL;
1741         return -1;
1742     }
1743     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1744     PyMem_RawFree(cpath);
1745     if (res == -1)
1746         return -1;
1747     if (res == Py_ARRAY_LENGTH(cbuf)) {
1748         errno = EINVAL;
1749         return -1;
1750     }
1751     cbuf[res] = '\0'; /* buf will be null terminated */
1752     wbuf = Py_DecodeLocale(cbuf, &r1);
1753     if (wbuf == NULL) {
1754         errno = EINVAL;
1755         return -1;
1756     }
1757     /* wbuf must have space to store the trailing NUL character */
1758     if (buflen <= r1) {
1759         PyMem_RawFree(wbuf);
1760         errno = EINVAL;
1761         return -1;
1762     }
1763     wcsncpy(buf, wbuf, buflen);
1764     PyMem_RawFree(wbuf);
1765     return (int)r1;
1766 }
1767 #endif
1768 
1769 #ifdef HAVE_REALPATH
1770 
1771 /* Return the canonicalized absolute pathname. Encode path to the locale
1772    encoding, decode the result from the locale encoding.
1773 
1774    Return NULL on encoding error, realpath() error, decoding error
1775    or if 'resolved_path' is too short. */
1776 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1777 _Py_wrealpath(const wchar_t *path,
1778               wchar_t *resolved_path, size_t resolved_path_len)
1779 {
1780     char *cpath;
1781     char cresolved_path[MAXPATHLEN];
1782     wchar_t *wresolved_path;
1783     char *res;
1784     size_t r;
1785     cpath = _Py_EncodeLocaleRaw(path, NULL);
1786     if (cpath == NULL) {
1787         errno = EINVAL;
1788         return NULL;
1789     }
1790     res = realpath(cpath, cresolved_path);
1791     PyMem_RawFree(cpath);
1792     if (res == NULL)
1793         return NULL;
1794 
1795     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1796     if (wresolved_path == NULL) {
1797         errno = EINVAL;
1798         return NULL;
1799     }
1800     /* wresolved_path must have space to store the trailing NUL character */
1801     if (resolved_path_len <= r) {
1802         PyMem_RawFree(wresolved_path);
1803         errno = EINVAL;
1804         return NULL;
1805     }
1806     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1807     PyMem_RawFree(wresolved_path);
1808     return resolved_path;
1809 }
1810 #endif
1811 
1812 /* Get the current directory. buflen is the buffer size in wide characters
1813    including the null character. Decode the path from the locale encoding.
1814 
1815    Return NULL on getcwd() error, on decoding error, or if 'buf' is
1816    too short. */
1817 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)1818 _Py_wgetcwd(wchar_t *buf, size_t buflen)
1819 {
1820 #ifdef MS_WINDOWS
1821     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1822     return _wgetcwd(buf, ibuflen);
1823 #else
1824     char fname[MAXPATHLEN];
1825     wchar_t *wname;
1826     size_t len;
1827 
1828     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
1829         return NULL;
1830     wname = Py_DecodeLocale(fname, &len);
1831     if (wname == NULL)
1832         return NULL;
1833     /* wname must have space to store the trailing NUL character */
1834     if (buflen <= len) {
1835         PyMem_RawFree(wname);
1836         return NULL;
1837     }
1838     wcsncpy(buf, wname, buflen);
1839     PyMem_RawFree(wname);
1840     return buf;
1841 #endif
1842 }
1843 
1844 /* Duplicate a file descriptor. The new file descriptor is created as
1845    non-inheritable. Return a new file descriptor on success, raise an OSError
1846    exception and return -1 on error.
1847 
1848    The GIL is released to call dup(). The caller must hold the GIL. */
1849 int
_Py_dup(int fd)1850 _Py_dup(int fd)
1851 {
1852 #ifdef MS_WINDOWS
1853     HANDLE handle;
1854 #endif
1855 
1856     assert(PyGILState_Check());
1857 
1858 #ifdef MS_WINDOWS
1859     _Py_BEGIN_SUPPRESS_IPH
1860     handle = (HANDLE)_get_osfhandle(fd);
1861     _Py_END_SUPPRESS_IPH
1862     if (handle == INVALID_HANDLE_VALUE) {
1863         PyErr_SetFromErrno(PyExc_OSError);
1864         return -1;
1865     }
1866 
1867     Py_BEGIN_ALLOW_THREADS
1868     _Py_BEGIN_SUPPRESS_IPH
1869     fd = dup(fd);
1870     _Py_END_SUPPRESS_IPH
1871     Py_END_ALLOW_THREADS
1872     if (fd < 0) {
1873         PyErr_SetFromErrno(PyExc_OSError);
1874         return -1;
1875     }
1876 
1877     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1878         _Py_BEGIN_SUPPRESS_IPH
1879         close(fd);
1880         _Py_END_SUPPRESS_IPH
1881         return -1;
1882     }
1883 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1884     Py_BEGIN_ALLOW_THREADS
1885     _Py_BEGIN_SUPPRESS_IPH
1886     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1887     _Py_END_SUPPRESS_IPH
1888     Py_END_ALLOW_THREADS
1889     if (fd < 0) {
1890         PyErr_SetFromErrno(PyExc_OSError);
1891         return -1;
1892     }
1893 
1894 #else
1895     Py_BEGIN_ALLOW_THREADS
1896     _Py_BEGIN_SUPPRESS_IPH
1897     fd = dup(fd);
1898     _Py_END_SUPPRESS_IPH
1899     Py_END_ALLOW_THREADS
1900     if (fd < 0) {
1901         PyErr_SetFromErrno(PyExc_OSError);
1902         return -1;
1903     }
1904 
1905     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1906         _Py_BEGIN_SUPPRESS_IPH
1907         close(fd);
1908         _Py_END_SUPPRESS_IPH
1909         return -1;
1910     }
1911 #endif
1912     return fd;
1913 }
1914 
1915 #ifndef MS_WINDOWS
1916 /* Get the blocking mode of the file descriptor.
1917    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1918    raise an exception and return -1 on error. */
1919 int
_Py_get_blocking(int fd)1920 _Py_get_blocking(int fd)
1921 {
1922     int flags;
1923     _Py_BEGIN_SUPPRESS_IPH
1924     flags = fcntl(fd, F_GETFL, 0);
1925     _Py_END_SUPPRESS_IPH
1926     if (flags < 0) {
1927         PyErr_SetFromErrno(PyExc_OSError);
1928         return -1;
1929     }
1930 
1931     return !(flags & O_NONBLOCK);
1932 }
1933 
1934 /* Set the blocking mode of the specified file descriptor.
1935 
1936    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1937    otherwise.
1938 
1939    Return 0 on success, raise an exception and return -1 on error. */
1940 int
_Py_set_blocking(int fd,int blocking)1941 _Py_set_blocking(int fd, int blocking)
1942 {
1943 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1944     int arg = !blocking;
1945     if (ioctl(fd, FIONBIO, &arg) < 0)
1946         goto error;
1947 #else
1948     int flags, res;
1949 
1950     _Py_BEGIN_SUPPRESS_IPH
1951     flags = fcntl(fd, F_GETFL, 0);
1952     if (flags >= 0) {
1953         if (blocking)
1954             flags = flags & (~O_NONBLOCK);
1955         else
1956             flags = flags | O_NONBLOCK;
1957 
1958         res = fcntl(fd, F_SETFL, flags);
1959     } else {
1960         res = -1;
1961     }
1962     _Py_END_SUPPRESS_IPH
1963 
1964     if (res < 0)
1965         goto error;
1966 #endif
1967     return 0;
1968 
1969 error:
1970     PyErr_SetFromErrno(PyExc_OSError);
1971     return -1;
1972 }
1973 #endif
1974 
1975 
1976 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)1977 _Py_GetLocaleconvNumeric(struct lconv *lc,
1978                          PyObject **decimal_point, PyObject **thousands_sep)
1979 {
1980     assert(decimal_point != NULL);
1981     assert(thousands_sep != NULL);
1982 
1983 #ifndef MS_WINDOWS
1984     int change_locale = 0;
1985     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
1986         change_locale = 1;
1987     }
1988     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
1989         change_locale = 1;
1990     }
1991 
1992     /* Keep a copy of the LC_CTYPE locale */
1993     char *oldloc = NULL, *loc = NULL;
1994     if (change_locale) {
1995         oldloc = setlocale(LC_CTYPE, NULL);
1996         if (!oldloc) {
1997             PyErr_SetString(PyExc_RuntimeWarning,
1998                             "failed to get LC_CTYPE locale");
1999             return -1;
2000         }
2001 
2002         oldloc = _PyMem_Strdup(oldloc);
2003         if (!oldloc) {
2004             PyErr_NoMemory();
2005             return -1;
2006         }
2007 
2008         loc = setlocale(LC_NUMERIC, NULL);
2009         if (loc != NULL && strcmp(loc, oldloc) == 0) {
2010             loc = NULL;
2011         }
2012 
2013         if (loc != NULL) {
2014             /* Only set the locale temporarily the LC_CTYPE locale
2015                if LC_NUMERIC locale is different than LC_CTYPE locale and
2016                decimal_point and/or thousands_sep are non-ASCII or longer than
2017                1 byte */
2018             setlocale(LC_CTYPE, loc);
2019         }
2020     }
2021 
2022 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2023 #else /* MS_WINDOWS */
2024 /* Use _W_* fields of Windows strcut lconv */
2025 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2026 #endif /* MS_WINDOWS */
2027 
2028     int res = -1;
2029 
2030     *decimal_point = GET_LOCALE_STRING(decimal_point);
2031     if (*decimal_point == NULL) {
2032         goto done;
2033     }
2034 
2035     *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2036     if (*thousands_sep == NULL) {
2037         goto done;
2038     }
2039 
2040     res = 0;
2041 
2042 done:
2043 #ifndef MS_WINDOWS
2044     if (loc != NULL) {
2045         setlocale(LC_CTYPE, oldloc);
2046     }
2047     PyMem_Free(oldloc);
2048 #endif
2049     return res;
2050 
2051 #undef GET_LOCALE_STRING
2052 }
2053