1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h"
4 #include <locale.h>
5
6 #ifdef MS_WINDOWS
7 # include <malloc.h>
8 # include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19
20 #ifdef HAVE_FCNTL_H
21 #include <fcntl.h>
22 #endif /* HAVE_FCNTL_H */
23
24 #ifdef O_CLOEXEC
25 /* Does open() support the O_CLOEXEC flag? Possible values:
26
27 -1: unknown
28 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29 1: open() supports O_CLOEXEC flag, close-on-exec is set
30
31 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
32 and os.open(). */
33 int _Py_open_cloexec_works = -1;
34 #endif
35
36 // The value must be the same in unicodeobject.c.
37 #define MAX_UNICODE 0x10ffff
38
39 // mbstowcs() and mbrtowc() errors
40 static const size_t DECODE_ERROR = ((size_t)-1);
41 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
42
43
44 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)45 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
46 {
47 switch (errors)
48 {
49 case _Py_ERROR_STRICT:
50 *surrogateescape = 0;
51 return 0;
52 case _Py_ERROR_SURROGATEESCAPE:
53 *surrogateescape = 1;
54 return 0;
55 default:
56 return -1;
57 }
58 }
59
60
61 PyObject *
_Py_device_encoding(int fd)62 _Py_device_encoding(int fd)
63 {
64 #if defined(MS_WINDOWS)
65 UINT cp;
66 #endif
67 int valid;
68 _Py_BEGIN_SUPPRESS_IPH
69 valid = isatty(fd);
70 _Py_END_SUPPRESS_IPH
71 if (!valid)
72 Py_RETURN_NONE;
73
74 #if defined(MS_WINDOWS)
75 if (fd == 0)
76 cp = GetConsoleCP();
77 else if (fd == 1 || fd == 2)
78 cp = GetConsoleOutputCP();
79 else
80 cp = 0;
81 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
82 has no console */
83 if (cp != 0)
84 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
85 #elif defined(CODESET)
86 {
87 char *codeset = nl_langinfo(CODESET);
88 if (codeset != NULL && codeset[0] != 0)
89 return PyUnicode_FromString(codeset);
90 }
91 #endif
92 Py_RETURN_NONE;
93 }
94
95
96 static size_t
is_valid_wide_char(wchar_t ch)97 is_valid_wide_char(wchar_t ch)
98 {
99 if (Py_UNICODE_IS_SURROGATE(ch)) {
100 // Reject lone surrogate characters
101 return 0;
102 }
103 if (ch > MAX_UNICODE) {
104 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
105 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
106 // it creates characters outside the [U+0000; U+10ffff] range:
107 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
108 return 0;
109 }
110 return 1;
111 }
112
113
114 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)115 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
116 {
117 size_t count = mbstowcs(dest, src, n);
118 if (dest != NULL && count != DECODE_ERROR) {
119 for (size_t i=0; i < count; i++) {
120 wchar_t ch = dest[i];
121 if (!is_valid_wide_char(ch)) {
122 return DECODE_ERROR;
123 }
124 }
125 }
126 return count;
127 }
128
129
130 #ifdef HAVE_MBRTOWC
131 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)132 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
133 {
134 assert(pwc != NULL);
135 size_t count = mbrtowc(pwc, str, len, pmbs);
136 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
137 if (!is_valid_wide_char(*pwc)) {
138 return DECODE_ERROR;
139 }
140 }
141 return count;
142 }
143 #endif
144
145
146 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
147
148 #define USE_FORCE_ASCII
149
150 extern int _Py_normalize_encoding(const char *, char *, size_t);
151
152 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
153 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
154 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
155 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
156 locale.getpreferredencoding() codec. For example, if command line arguments
157 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
158 UnicodeEncodeError instead of retrieving the original byte string.
159
160 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
161 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
162 one byte in range 0x80-0xff can be decoded from the locale encoding. The
163 workaround is also enabled on error, for example if getting the locale
164 failed.
165
166 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
167 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
168 ASCII encoding in this case.
169
170 Values of force_ascii:
171
172 1: the workaround is used: Py_EncodeLocale() uses
173 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
174 decode_ascii()
175 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
176 Py_DecodeLocale() uses mbstowcs()
177 -1: unknown, need to call check_force_ascii() to get the value
178 */
179 static int force_ascii = -1;
180
181 static int
check_force_ascii(void)182 check_force_ascii(void)
183 {
184 char *loc = setlocale(LC_CTYPE, NULL);
185 if (loc == NULL) {
186 goto error;
187 }
188 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
189 /* the LC_CTYPE locale is different than C and POSIX */
190 return 0;
191 }
192
193 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
194 const char *codeset = nl_langinfo(CODESET);
195 if (!codeset || codeset[0] == '\0') {
196 /* CODESET is not set or empty */
197 goto error;
198 }
199
200 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
201 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
202 goto error;
203 }
204
205 #ifdef __hpux
206 if (strcmp(encoding, "roman8") == 0) {
207 unsigned char ch;
208 wchar_t wch;
209 size_t res;
210
211 ch = (unsigned char)0xA7;
212 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
213 if (res != DECODE_ERROR && wch == L'\xA7') {
214 /* On HP-UX withe C locale or the POSIX locale,
215 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
216 Latin1 encoding in practice. Force ASCII in this case.
217
218 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
219 return 1;
220 }
221 }
222 #else
223 const char* ascii_aliases[] = {
224 "ascii",
225 /* Aliases from Lib/encodings/aliases.py */
226 "646",
227 "ansi_x3.4_1968",
228 "ansi_x3.4_1986",
229 "ansi_x3_4_1968",
230 "cp367",
231 "csascii",
232 "ibm367",
233 "iso646_us",
234 "iso_646.irv_1991",
235 "iso_ir_6",
236 "us",
237 "us_ascii",
238 NULL
239 };
240
241 int is_ascii = 0;
242 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
243 if (strcmp(encoding, *alias) == 0) {
244 is_ascii = 1;
245 break;
246 }
247 }
248 if (!is_ascii) {
249 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
250 return 0;
251 }
252
253 for (unsigned int i=0x80; i<=0xff; i++) {
254 char ch[1];
255 wchar_t wch[1];
256 size_t res;
257
258 unsigned uch = (unsigned char)i;
259 ch[0] = (char)uch;
260 res = _Py_mbstowcs(wch, ch, 1);
261 if (res != DECODE_ERROR) {
262 /* decoding a non-ASCII character from the locale encoding succeed:
263 the locale encoding is not ASCII, force ASCII */
264 return 1;
265 }
266 }
267 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
268 encoding: the locale encoding is really ASCII */
269 #endif /* !defined(__hpux) */
270 return 0;
271 #else
272 /* nl_langinfo(CODESET) is not available: always force ASCII */
273 return 1;
274 #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
275
276 error:
277 /* if an error occurred, force the ASCII encoding */
278 return 1;
279 }
280
281
282 int
_Py_GetForceASCII(void)283 _Py_GetForceASCII(void)
284 {
285 if (force_ascii == -1) {
286 force_ascii = check_force_ascii();
287 }
288 return force_ascii;
289 }
290
291
292 void
_Py_ResetForceASCII(void)293 _Py_ResetForceASCII(void)
294 {
295 force_ascii = -1;
296 }
297
298
299 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)300 encode_ascii(const wchar_t *text, char **str,
301 size_t *error_pos, const char **reason,
302 int raw_malloc, _Py_error_handler errors)
303 {
304 char *result = NULL, *out;
305 size_t len, i;
306 wchar_t ch;
307
308 int surrogateescape;
309 if (get_surrogateescape(errors, &surrogateescape) < 0) {
310 return -3;
311 }
312
313 len = wcslen(text);
314
315 /* +1 for NULL byte */
316 if (raw_malloc) {
317 result = PyMem_RawMalloc(len + 1);
318 }
319 else {
320 result = PyMem_Malloc(len + 1);
321 }
322 if (result == NULL) {
323 return -1;
324 }
325
326 out = result;
327 for (i=0; i<len; i++) {
328 ch = text[i];
329
330 if (ch <= 0x7f) {
331 /* ASCII character */
332 *out++ = (char)ch;
333 }
334 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
335 /* UTF-8b surrogate */
336 *out++ = (char)(ch - 0xdc00);
337 }
338 else {
339 if (raw_malloc) {
340 PyMem_RawFree(result);
341 }
342 else {
343 PyMem_Free(result);
344 }
345 if (error_pos != NULL) {
346 *error_pos = i;
347 }
348 if (reason) {
349 *reason = "encoding error";
350 }
351 return -2;
352 }
353 }
354 *out = '\0';
355 *str = result;
356 return 0;
357 }
358 #else
359 int
_Py_GetForceASCII(void)360 _Py_GetForceASCII(void)
361 {
362 return 0;
363 }
364
365 void
_Py_ResetForceASCII(void)366 _Py_ResetForceASCII(void)
367 {
368 /* nothing to do */
369 }
370 #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
371
372
373 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
374 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)375 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
376 const char **reason, _Py_error_handler errors)
377 {
378 wchar_t *res;
379 unsigned char *in;
380 wchar_t *out;
381 size_t argsize = strlen(arg) + 1;
382
383 int surrogateescape;
384 if (get_surrogateescape(errors, &surrogateescape) < 0) {
385 return -3;
386 }
387
388 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
389 return -1;
390 }
391 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
392 if (!res) {
393 return -1;
394 }
395
396 out = res;
397 for (in = (unsigned char*)arg; *in; in++) {
398 unsigned char ch = *in;
399 if (ch < 128) {
400 *out++ = ch;
401 }
402 else {
403 if (!surrogateescape) {
404 PyMem_RawFree(res);
405 if (wlen) {
406 *wlen = in - (unsigned char*)arg;
407 }
408 if (reason) {
409 *reason = "decoding error";
410 }
411 return -2;
412 }
413 *out++ = 0xdc00 + ch;
414 }
415 }
416 *out = 0;
417
418 if (wlen != NULL) {
419 *wlen = out - res;
420 }
421 *wstr = res;
422 return 0;
423 }
424 #endif /* !HAVE_MBRTOWC */
425
426 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)427 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
428 const char **reason, _Py_error_handler errors)
429 {
430 wchar_t *res;
431 size_t argsize;
432 size_t count;
433 #ifdef HAVE_MBRTOWC
434 unsigned char *in;
435 wchar_t *out;
436 mbstate_t mbs;
437 #endif
438
439 int surrogateescape;
440 if (get_surrogateescape(errors, &surrogateescape) < 0) {
441 return -3;
442 }
443
444 #ifdef HAVE_BROKEN_MBSTOWCS
445 /* Some platforms have a broken implementation of
446 * mbstowcs which does not count the characters that
447 * would result from conversion. Use an upper bound.
448 */
449 argsize = strlen(arg);
450 #else
451 argsize = _Py_mbstowcs(NULL, arg, 0);
452 #endif
453 if (argsize != DECODE_ERROR) {
454 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
455 return -1;
456 }
457 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
458 if (!res) {
459 return -1;
460 }
461
462 count = _Py_mbstowcs(res, arg, argsize + 1);
463 if (count != DECODE_ERROR) {
464 *wstr = res;
465 if (wlen != NULL) {
466 *wlen = count;
467 }
468 return 0;
469 }
470 PyMem_RawFree(res);
471 }
472
473 /* Conversion failed. Fall back to escaping with surrogateescape. */
474 #ifdef HAVE_MBRTOWC
475 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
476
477 /* Overallocate; as multi-byte characters are in the argument, the
478 actual output could use less memory. */
479 argsize = strlen(arg) + 1;
480 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
481 return -1;
482 }
483 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
484 if (!res) {
485 return -1;
486 }
487
488 in = (unsigned char*)arg;
489 out = res;
490 memset(&mbs, 0, sizeof mbs);
491 while (argsize) {
492 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
493 if (converted == 0) {
494 /* Reached end of string; null char stored. */
495 break;
496 }
497
498 if (converted == INCOMPLETE_CHARACTER) {
499 /* Incomplete character. This should never happen,
500 since we provide everything that we have -
501 unless there is a bug in the C library, or I
502 misunderstood how mbrtowc works. */
503 goto decode_error;
504 }
505
506 if (converted == DECODE_ERROR) {
507 if (!surrogateescape) {
508 goto decode_error;
509 }
510
511 /* Decoding error. Escape as UTF-8b, and start over in the initial
512 shift state. */
513 *out++ = 0xdc00 + *in++;
514 argsize--;
515 memset(&mbs, 0, sizeof mbs);
516 continue;
517 }
518
519 // _Py_mbrtowc() reject lone surrogate characters
520 assert(!Py_UNICODE_IS_SURROGATE(*out));
521
522 /* successfully converted some bytes */
523 in += converted;
524 argsize -= converted;
525 out++;
526 }
527 if (wlen != NULL) {
528 *wlen = out - res;
529 }
530 *wstr = res;
531 return 0;
532
533 decode_error:
534 PyMem_RawFree(res);
535 if (wlen) {
536 *wlen = in - (unsigned char*)arg;
537 }
538 if (reason) {
539 *reason = "decoding error";
540 }
541 return -2;
542 #else /* HAVE_MBRTOWC */
543 /* Cannot use C locale for escaping; manually escape as if charset
544 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
545 correctly in the locale's charset, which must be an ASCII superset. */
546 return decode_ascii(arg, wstr, wlen, reason, errors);
547 #endif /* HAVE_MBRTOWC */
548 }
549
550
551 /* Decode a byte string from the locale encoding.
552
553 Use the strict error handler if 'surrogateescape' is zero. Use the
554 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
555 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
556 can be decoded as a surrogate character, escape the bytes using the
557 surrogateescape error handler instead of decoding them.
558
559 On success, return 0 and write the newly allocated wide character string into
560 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
561 the number of wide characters excluding the null character into *wlen.
562
563 On memory allocation failure, return -1.
564
565 On decoding error, return -2. If wlen is not NULL, write the start of
566 invalid byte sequence in the input string into *wlen. If reason is not NULL,
567 write the decoding error message into *reason.
568
569 Return -3 if the error handler 'errors' is not supported.
570
571 Use the Py_EncodeLocaleEx() function to encode the character string back to
572 a byte string. */
573 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)574 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
575 const char **reason,
576 int current_locale, _Py_error_handler errors)
577 {
578 if (current_locale) {
579 #ifdef _Py_FORCE_UTF8_LOCALE
580 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
581 errors);
582 #else
583 return decode_current_locale(arg, wstr, wlen, reason, errors);
584 #endif
585 }
586
587 #ifdef _Py_FORCE_UTF8_FS_ENCODING
588 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
589 errors);
590 #else
591 int use_utf8 = (Py_UTF8Mode == 1);
592 #ifdef MS_WINDOWS
593 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
594 #endif
595 if (use_utf8) {
596 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
597 errors);
598 }
599
600 #ifdef USE_FORCE_ASCII
601 if (force_ascii == -1) {
602 force_ascii = check_force_ascii();
603 }
604
605 if (force_ascii) {
606 /* force ASCII encoding to workaround mbstowcs() issue */
607 return decode_ascii(arg, wstr, wlen, reason, errors);
608 }
609 #endif
610
611 return decode_current_locale(arg, wstr, wlen, reason, errors);
612 #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
613 }
614
615
616 /* Decode a byte string from the locale encoding with the
617 surrogateescape error handler: undecodable bytes are decoded as characters
618 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
619 character, escape the bytes using the surrogateescape error handler instead
620 of decoding them.
621
622 Return a pointer to a newly allocated wide character string, use
623 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
624 wide characters excluding the null character into *size
625
626 Return NULL on decoding error or memory allocation error. If *size* is not
627 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
628 decoding error.
629
630 Decoding errors should never happen, unless there is a bug in the C
631 library.
632
633 Use the Py_EncodeLocale() function to encode the character string back to a
634 byte string. */
635 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)636 Py_DecodeLocale(const char* arg, size_t *wlen)
637 {
638 wchar_t *wstr;
639 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
640 NULL, 0,
641 _Py_ERROR_SURROGATEESCAPE);
642 if (res != 0) {
643 assert(res != -3);
644 if (wlen != NULL) {
645 *wlen = (size_t)res;
646 }
647 return NULL;
648 }
649 return wstr;
650 }
651
652
653 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)654 encode_current_locale(const wchar_t *text, char **str,
655 size_t *error_pos, const char **reason,
656 int raw_malloc, _Py_error_handler errors)
657 {
658 const size_t len = wcslen(text);
659 char *result = NULL, *bytes = NULL;
660 size_t i, size, converted;
661 wchar_t c, buf[2];
662
663 int surrogateescape;
664 if (get_surrogateescape(errors, &surrogateescape) < 0) {
665 return -3;
666 }
667
668 /* The function works in two steps:
669 1. compute the length of the output buffer in bytes (size)
670 2. outputs the bytes */
671 size = 0;
672 buf[1] = 0;
673 while (1) {
674 for (i=0; i < len; i++) {
675 c = text[i];
676 if (c >= 0xdc80 && c <= 0xdcff) {
677 if (!surrogateescape) {
678 goto encode_error;
679 }
680 /* UTF-8b surrogate */
681 if (bytes != NULL) {
682 *bytes++ = c - 0xdc00;
683 size--;
684 }
685 else {
686 size++;
687 }
688 continue;
689 }
690 else {
691 buf[0] = c;
692 if (bytes != NULL) {
693 converted = wcstombs(bytes, buf, size);
694 }
695 else {
696 converted = wcstombs(NULL, buf, 0);
697 }
698 if (converted == DECODE_ERROR) {
699 goto encode_error;
700 }
701 if (bytes != NULL) {
702 bytes += converted;
703 size -= converted;
704 }
705 else {
706 size += converted;
707 }
708 }
709 }
710 if (result != NULL) {
711 *bytes = '\0';
712 break;
713 }
714
715 size += 1; /* nul byte at the end */
716 if (raw_malloc) {
717 result = PyMem_RawMalloc(size);
718 }
719 else {
720 result = PyMem_Malloc(size);
721 }
722 if (result == NULL) {
723 return -1;
724 }
725 bytes = result;
726 }
727 *str = result;
728 return 0;
729
730 encode_error:
731 if (raw_malloc) {
732 PyMem_RawFree(result);
733 }
734 else {
735 PyMem_Free(result);
736 }
737 if (error_pos != NULL) {
738 *error_pos = i;
739 }
740 if (reason) {
741 *reason = "encoding error";
742 }
743 return -2;
744 }
745
746
747 /* Encode a string to the locale encoding.
748
749 Parameters:
750
751 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
752 of PyMem_Malloc().
753 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
754 Python filesystem encoding.
755 * errors: error handler like "strict" or "surrogateescape".
756
757 Return value:
758
759 0: success, *str is set to a newly allocated decoded string.
760 -1: memory allocation failure
761 -2: encoding error, set *error_pos and *reason (if set).
762 -3: the error handler 'errors' is not supported.
763 */
764 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)765 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
766 const char **reason,
767 int raw_malloc, int current_locale, _Py_error_handler errors)
768 {
769 if (current_locale) {
770 #ifdef _Py_FORCE_UTF8_LOCALE
771 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
772 raw_malloc, errors);
773 #else
774 return encode_current_locale(text, str, error_pos, reason,
775 raw_malloc, errors);
776 #endif
777 }
778
779 #ifdef _Py_FORCE_UTF8_FS_ENCODING
780 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
781 raw_malloc, errors);
782 #else
783 int use_utf8 = (Py_UTF8Mode == 1);
784 #ifdef MS_WINDOWS
785 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
786 #endif
787 if (use_utf8) {
788 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
789 raw_malloc, errors);
790 }
791
792 #ifdef USE_FORCE_ASCII
793 if (force_ascii == -1) {
794 force_ascii = check_force_ascii();
795 }
796
797 if (force_ascii) {
798 return encode_ascii(text, str, error_pos, reason,
799 raw_malloc, errors);
800 }
801 #endif
802
803 return encode_current_locale(text, str, error_pos, reason,
804 raw_malloc, errors);
805 #endif /* _Py_FORCE_UTF8_FS_ENCODING */
806 }
807
808 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)809 encode_locale(const wchar_t *text, size_t *error_pos,
810 int raw_malloc, int current_locale)
811 {
812 char *str;
813 int res = encode_locale_ex(text, &str, error_pos, NULL,
814 raw_malloc, current_locale,
815 _Py_ERROR_SURROGATEESCAPE);
816 if (res != -2 && error_pos) {
817 *error_pos = (size_t)-1;
818 }
819 if (res != 0) {
820 return NULL;
821 }
822 return str;
823 }
824
825 /* Encode a wide character string to the locale encoding with the
826 surrogateescape error handler: surrogate characters in the range
827 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
828
829 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
830 the memory. Return NULL on encoding or memory allocation error.
831
832 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
833 to the index of the invalid character on encoding error.
834
835 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
836 character string. */
837 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)838 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
839 {
840 return encode_locale(text, error_pos, 0, 0);
841 }
842
843
844 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
845 instead of PyMem_Free(). */
846 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)847 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
848 {
849 return encode_locale(text, error_pos, 1, 0);
850 }
851
852
853 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)854 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
855 size_t *error_pos, const char **reason,
856 int current_locale, _Py_error_handler errors)
857 {
858 return encode_locale_ex(text, str, error_pos, reason, 1,
859 current_locale, errors);
860 }
861
862
863 #ifdef MS_WINDOWS
864 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
865
866 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)867 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
868 {
869 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
870 /* Cannot simply cast and dereference in_ptr,
871 since it might not be aligned properly */
872 __int64 in;
873 memcpy(&in, in_ptr, sizeof(in));
874 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
875 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
876 }
877
878 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)879 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
880 {
881 /* XXX endianness */
882 __int64 out;
883 out = time_in + secs_between_epochs;
884 out = out * 10000000 + nsec_in / 100;
885 memcpy(out_ptr, &out, sizeof(out));
886 }
887
888 /* Below, we *know* that ugo+r is 0444 */
889 #if _S_IREAD != 0400
890 #error Unsupported C library
891 #endif
892 static int
attributes_to_mode(DWORD attr)893 attributes_to_mode(DWORD attr)
894 {
895 int m = 0;
896 if (attr & FILE_ATTRIBUTE_DIRECTORY)
897 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
898 else
899 m |= _S_IFREG;
900 if (attr & FILE_ATTRIBUTE_READONLY)
901 m |= 0444;
902 else
903 m |= 0666;
904 return m;
905 }
906
907 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)908 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
909 struct _Py_stat_struct *result)
910 {
911 memset(result, 0, sizeof(*result));
912 result->st_mode = attributes_to_mode(info->dwFileAttributes);
913 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
914 result->st_dev = info->dwVolumeSerialNumber;
915 result->st_rdev = result->st_dev;
916 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
917 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
918 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
919 result->st_nlink = info->nNumberOfLinks;
920 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
921 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
922 open other name surrogate reparse points without traversing them. To
923 detect/handle these, check st_file_attributes and st_reparse_tag. */
924 result->st_reparse_tag = reparse_tag;
925 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
926 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
927 /* first clear the S_IFMT bits */
928 result->st_mode ^= (result->st_mode & S_IFMT);
929 /* now set the bits that make this a symlink */
930 result->st_mode |= S_IFLNK;
931 }
932 result->st_file_attributes = info->dwFileAttributes;
933 }
934 #endif
935
936 /* Return information about a file.
937
938 On POSIX, use fstat().
939
940 On Windows, use GetFileType() and GetFileInformationByHandle() which support
941 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
942 than 2 GiB because the file size type is a signed 32-bit integer: see issue
943 #23152.
944
945 On Windows, set the last Windows error and return nonzero on error. On
946 POSIX, set errno and return nonzero on error. Fill status and return 0 on
947 success. */
948 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)949 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
950 {
951 #ifdef MS_WINDOWS
952 BY_HANDLE_FILE_INFORMATION info;
953 HANDLE h;
954 int type;
955
956 _Py_BEGIN_SUPPRESS_IPH
957 h = (HANDLE)_get_osfhandle(fd);
958 _Py_END_SUPPRESS_IPH
959
960 if (h == INVALID_HANDLE_VALUE) {
961 /* errno is already set by _get_osfhandle, but we also set
962 the Win32 error for callers who expect that */
963 SetLastError(ERROR_INVALID_HANDLE);
964 return -1;
965 }
966 memset(status, 0, sizeof(*status));
967
968 type = GetFileType(h);
969 if (type == FILE_TYPE_UNKNOWN) {
970 DWORD error = GetLastError();
971 if (error != 0) {
972 errno = winerror_to_errno(error);
973 return -1;
974 }
975 /* else: valid but unknown file */
976 }
977
978 if (type != FILE_TYPE_DISK) {
979 if (type == FILE_TYPE_CHAR)
980 status->st_mode = _S_IFCHR;
981 else if (type == FILE_TYPE_PIPE)
982 status->st_mode = _S_IFIFO;
983 return 0;
984 }
985
986 if (!GetFileInformationByHandle(h, &info)) {
987 /* The Win32 error is already set, but we also set errno for
988 callers who expect it */
989 errno = winerror_to_errno(GetLastError());
990 return -1;
991 }
992
993 _Py_attribute_data_to_stat(&info, 0, status);
994 /* specific to fstat() */
995 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
996 return 0;
997 #else
998 return fstat(fd, status);
999 #endif
1000 }
1001
1002 /* Return information about a file.
1003
1004 On POSIX, use fstat().
1005
1006 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1007 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1008 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1009 #23152.
1010
1011 Raise an exception and return -1 on error. On Windows, set the last Windows
1012 error on error. On POSIX, set errno on error. Fill status and return 0 on
1013 success.
1014
1015 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1016 to call fstat(). The caller must hold the GIL. */
1017 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1018 _Py_fstat(int fd, struct _Py_stat_struct *status)
1019 {
1020 int res;
1021
1022 assert(PyGILState_Check());
1023
1024 Py_BEGIN_ALLOW_THREADS
1025 res = _Py_fstat_noraise(fd, status);
1026 Py_END_ALLOW_THREADS
1027
1028 if (res != 0) {
1029 #ifdef MS_WINDOWS
1030 PyErr_SetFromWindowsErr(0);
1031 #else
1032 PyErr_SetFromErrno(PyExc_OSError);
1033 #endif
1034 return -1;
1035 }
1036 return 0;
1037 }
1038
1039 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1040 call stat() otherwise. Only fill st_mode attribute on Windows.
1041
1042 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1043 raised. */
1044
1045 int
_Py_stat(PyObject * path,struct stat * statbuf)1046 _Py_stat(PyObject *path, struct stat *statbuf)
1047 {
1048 #ifdef MS_WINDOWS
1049 int err;
1050 struct _stat wstatbuf;
1051 const wchar_t *wpath;
1052
1053 wpath = _PyUnicode_AsUnicode(path);
1054 if (wpath == NULL)
1055 return -2;
1056
1057 err = _wstat(wpath, &wstatbuf);
1058 if (!err)
1059 statbuf->st_mode = wstatbuf.st_mode;
1060 return err;
1061 #else
1062 int ret;
1063 PyObject *bytes;
1064 char *cpath;
1065
1066 bytes = PyUnicode_EncodeFSDefault(path);
1067 if (bytes == NULL)
1068 return -2;
1069
1070 /* check for embedded null bytes */
1071 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1072 Py_DECREF(bytes);
1073 return -2;
1074 }
1075
1076 ret = stat(cpath, statbuf);
1077 Py_DECREF(bytes);
1078 return ret;
1079 #endif
1080 }
1081
1082
1083 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1084 static int
get_inheritable(int fd,int raise)1085 get_inheritable(int fd, int raise)
1086 {
1087 #ifdef MS_WINDOWS
1088 HANDLE handle;
1089 DWORD flags;
1090
1091 _Py_BEGIN_SUPPRESS_IPH
1092 handle = (HANDLE)_get_osfhandle(fd);
1093 _Py_END_SUPPRESS_IPH
1094 if (handle == INVALID_HANDLE_VALUE) {
1095 if (raise)
1096 PyErr_SetFromErrno(PyExc_OSError);
1097 return -1;
1098 }
1099
1100 if (!GetHandleInformation(handle, &flags)) {
1101 if (raise)
1102 PyErr_SetFromWindowsErr(0);
1103 return -1;
1104 }
1105
1106 return (flags & HANDLE_FLAG_INHERIT);
1107 #else
1108 int flags;
1109
1110 flags = fcntl(fd, F_GETFD, 0);
1111 if (flags == -1) {
1112 if (raise)
1113 PyErr_SetFromErrno(PyExc_OSError);
1114 return -1;
1115 }
1116 return !(flags & FD_CLOEXEC);
1117 #endif
1118 }
1119
1120 /* Get the inheritable flag of the specified file descriptor.
1121 Return 1 if the file descriptor can be inherited, 0 if it cannot,
1122 raise an exception and return -1 on error. */
1123 int
_Py_get_inheritable(int fd)1124 _Py_get_inheritable(int fd)
1125 {
1126 return get_inheritable(fd, 1);
1127 }
1128
1129
1130 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1131 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1132 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1133 {
1134 #ifdef MS_WINDOWS
1135 HANDLE handle;
1136 DWORD flags;
1137 #else
1138 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1139 static int ioctl_works = -1;
1140 int request;
1141 int err;
1142 #endif
1143 int flags, new_flags;
1144 int res;
1145 #endif
1146
1147 /* atomic_flag_works can only be used to make the file descriptor
1148 non-inheritable */
1149 assert(!(atomic_flag_works != NULL && inheritable));
1150
1151 if (atomic_flag_works != NULL && !inheritable) {
1152 if (*atomic_flag_works == -1) {
1153 int isInheritable = get_inheritable(fd, raise);
1154 if (isInheritable == -1)
1155 return -1;
1156 *atomic_flag_works = !isInheritable;
1157 }
1158
1159 if (*atomic_flag_works)
1160 return 0;
1161 }
1162
1163 #ifdef MS_WINDOWS
1164 _Py_BEGIN_SUPPRESS_IPH
1165 handle = (HANDLE)_get_osfhandle(fd);
1166 _Py_END_SUPPRESS_IPH
1167 if (handle == INVALID_HANDLE_VALUE) {
1168 if (raise)
1169 PyErr_SetFromErrno(PyExc_OSError);
1170 return -1;
1171 }
1172
1173 if (inheritable)
1174 flags = HANDLE_FLAG_INHERIT;
1175 else
1176 flags = 0;
1177
1178 /* This check can be removed once support for Windows 7 ends. */
1179 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1180 GetFileType(handle) == FILE_TYPE_CHAR)
1181
1182 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1183 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1184 if (raise)
1185 PyErr_SetFromWindowsErr(0);
1186 return -1;
1187 }
1188 #undef CONSOLE_PSEUDOHANDLE
1189 return 0;
1190
1191 #else
1192
1193 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1194 if (ioctl_works != 0 && raise != 0) {
1195 /* fast-path: ioctl() only requires one syscall */
1196 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1197 * thus avoid using ioctl() so we skip the fast-path. */
1198 if (inheritable)
1199 request = FIONCLEX;
1200 else
1201 request = FIOCLEX;
1202 err = ioctl(fd, request, NULL);
1203 if (!err) {
1204 ioctl_works = 1;
1205 return 0;
1206 }
1207
1208 #ifdef __linux__
1209 if (errno == EBADF) {
1210 // On Linux, ioctl(FIOCLEX) will fail with EBADF for O_PATH file descriptors
1211 // Fall through to the fcntl() path
1212 }
1213 else
1214 #endif
1215 if (errno != ENOTTY && errno != EACCES) {
1216 if (raise)
1217 PyErr_SetFromErrno(PyExc_OSError);
1218 return -1;
1219 }
1220 else {
1221 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1222 device". The ioctl is declared but not supported by the kernel.
1223 Remember that ioctl() doesn't work. It is the case on
1224 Illumos-based OS for example.
1225
1226 Issue #27057: When SELinux policy disallows ioctl it will fail
1227 with EACCES. While FIOCLEX is safe operation it may be
1228 unavailable because ioctl was denied altogether.
1229 This can be the case on Android. */
1230 ioctl_works = 0;
1231 }
1232 /* fallback to fcntl() if ioctl() does not work */
1233 }
1234 #endif
1235
1236 /* slow-path: fcntl() requires two syscalls */
1237 flags = fcntl(fd, F_GETFD);
1238 if (flags < 0) {
1239 if (raise)
1240 PyErr_SetFromErrno(PyExc_OSError);
1241 return -1;
1242 }
1243
1244 if (inheritable) {
1245 new_flags = flags & ~FD_CLOEXEC;
1246 }
1247 else {
1248 new_flags = flags | FD_CLOEXEC;
1249 }
1250
1251 if (new_flags == flags) {
1252 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1253 return 0;
1254 }
1255
1256 res = fcntl(fd, F_SETFD, new_flags);
1257 if (res < 0) {
1258 if (raise)
1259 PyErr_SetFromErrno(PyExc_OSError);
1260 return -1;
1261 }
1262 return 0;
1263 #endif
1264 }
1265
1266 /* Make the file descriptor non-inheritable.
1267 Return 0 on success, set errno and return -1 on error. */
1268 static int
make_non_inheritable(int fd)1269 make_non_inheritable(int fd)
1270 {
1271 return set_inheritable(fd, 0, 0, NULL);
1272 }
1273
1274 /* Set the inheritable flag of the specified file descriptor.
1275 On success: return 0, on error: raise an exception and return -1.
1276
1277 If atomic_flag_works is not NULL:
1278
1279 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1280 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1281 set the inheritable flag
1282 * if *atomic_flag_works==1: do nothing
1283 * if *atomic_flag_works==0: set inheritable flag to False
1284
1285 Set atomic_flag_works to NULL if no atomic flag was used to create the
1286 file descriptor.
1287
1288 atomic_flag_works can only be used to make a file descriptor
1289 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1290 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1291 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1292 {
1293 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1294 }
1295
1296 /* Same as _Py_set_inheritable() but on error, set errno and
1297 don't raise an exception.
1298 This function is async-signal-safe. */
1299 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1300 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1301 {
1302 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1303 }
1304
1305 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1306 _Py_open_impl(const char *pathname, int flags, int gil_held)
1307 {
1308 int fd;
1309 int async_err = 0;
1310 #ifndef MS_WINDOWS
1311 int *atomic_flag_works;
1312 #endif
1313
1314 #ifdef MS_WINDOWS
1315 flags |= O_NOINHERIT;
1316 #elif defined(O_CLOEXEC)
1317 atomic_flag_works = &_Py_open_cloexec_works;
1318 flags |= O_CLOEXEC;
1319 #else
1320 atomic_flag_works = NULL;
1321 #endif
1322
1323 if (gil_held) {
1324 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1325 if (pathname_obj == NULL) {
1326 return -1;
1327 }
1328 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1329 Py_DECREF(pathname_obj);
1330 return -1;
1331 }
1332
1333 do {
1334 Py_BEGIN_ALLOW_THREADS
1335 fd = open(pathname, flags);
1336 Py_END_ALLOW_THREADS
1337 } while (fd < 0
1338 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1339 if (async_err) {
1340 Py_DECREF(pathname_obj);
1341 return -1;
1342 }
1343 if (fd < 0) {
1344 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1345 Py_DECREF(pathname_obj);
1346 return -1;
1347 }
1348 Py_DECREF(pathname_obj);
1349 }
1350 else {
1351 fd = open(pathname, flags);
1352 if (fd < 0)
1353 return -1;
1354 }
1355
1356 #ifndef MS_WINDOWS
1357 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1358 close(fd);
1359 return -1;
1360 }
1361 #endif
1362
1363 return fd;
1364 }
1365
1366 /* Open a file with the specified flags (wrapper to open() function).
1367 Return a file descriptor on success. Raise an exception and return -1 on
1368 error.
1369
1370 The file descriptor is created non-inheritable.
1371
1372 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1373 except if the Python signal handler raises an exception.
1374
1375 Release the GIL to call open(). The caller must hold the GIL. */
1376 int
_Py_open(const char * pathname,int flags)1377 _Py_open(const char *pathname, int flags)
1378 {
1379 /* _Py_open() must be called with the GIL held. */
1380 assert(PyGILState_Check());
1381 return _Py_open_impl(pathname, flags, 1);
1382 }
1383
1384 /* Open a file with the specified flags (wrapper to open() function).
1385 Return a file descriptor on success. Set errno and return -1 on error.
1386
1387 The file descriptor is created non-inheritable.
1388
1389 If interrupted by a signal, fail with EINTR. */
1390 int
_Py_open_noraise(const char * pathname,int flags)1391 _Py_open_noraise(const char *pathname, int flags)
1392 {
1393 return _Py_open_impl(pathname, flags, 0);
1394 }
1395
1396 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1397 encoding and use fopen() otherwise.
1398
1399 The file descriptor is created non-inheritable.
1400
1401 If interrupted by a signal, fail with EINTR. */
1402 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1403 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1404 {
1405 FILE *f;
1406 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1407 return NULL;
1408 }
1409 #ifndef MS_WINDOWS
1410 char *cpath;
1411 char cmode[10];
1412 size_t r;
1413 r = wcstombs(cmode, mode, 10);
1414 if (r == DECODE_ERROR || r >= 10) {
1415 errno = EINVAL;
1416 return NULL;
1417 }
1418 cpath = _Py_EncodeLocaleRaw(path, NULL);
1419 if (cpath == NULL) {
1420 return NULL;
1421 }
1422 f = fopen(cpath, cmode);
1423 PyMem_RawFree(cpath);
1424 #else
1425 f = _wfopen(path, mode);
1426 #endif
1427 if (f == NULL)
1428 return NULL;
1429 if (make_non_inheritable(fileno(f)) < 0) {
1430 fclose(f);
1431 return NULL;
1432 }
1433 return f;
1434 }
1435
1436 /* Wrapper to fopen().
1437
1438 The file descriptor is created non-inheritable.
1439
1440 If interrupted by a signal, fail with EINTR. */
1441 FILE*
_Py_fopen(const char * pathname,const char * mode)1442 _Py_fopen(const char *pathname, const char *mode)
1443 {
1444 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1445 if (pathname_obj == NULL) {
1446 return NULL;
1447 }
1448 if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1449 Py_DECREF(pathname_obj);
1450 return NULL;
1451 }
1452 Py_DECREF(pathname_obj);
1453
1454 FILE *f = fopen(pathname, mode);
1455 if (f == NULL)
1456 return NULL;
1457 if (make_non_inheritable(fileno(f)) < 0) {
1458 fclose(f);
1459 return NULL;
1460 }
1461 return f;
1462 }
1463
1464 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1465 encoding and call fopen() otherwise.
1466
1467 Return the new file object on success. Raise an exception and return NULL
1468 on error.
1469
1470 The file descriptor is created non-inheritable.
1471
1472 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1473 except if the Python signal handler raises an exception.
1474
1475 Release the GIL to call _wfopen() or fopen(). The caller must hold
1476 the GIL. */
1477 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1478 _Py_fopen_obj(PyObject *path, const char *mode)
1479 {
1480 FILE *f;
1481 int async_err = 0;
1482 #ifdef MS_WINDOWS
1483 const wchar_t *wpath;
1484 wchar_t wmode[10];
1485 int usize;
1486
1487 assert(PyGILState_Check());
1488
1489 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1490 return NULL;
1491 }
1492 if (!PyUnicode_Check(path)) {
1493 PyErr_Format(PyExc_TypeError,
1494 "str file path expected under Windows, got %R",
1495 Py_TYPE(path));
1496 return NULL;
1497 }
1498 wpath = _PyUnicode_AsUnicode(path);
1499 if (wpath == NULL)
1500 return NULL;
1501
1502 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1503 wmode, Py_ARRAY_LENGTH(wmode));
1504 if (usize == 0) {
1505 PyErr_SetFromWindowsErr(0);
1506 return NULL;
1507 }
1508
1509 do {
1510 Py_BEGIN_ALLOW_THREADS
1511 f = _wfopen(wpath, wmode);
1512 Py_END_ALLOW_THREADS
1513 } while (f == NULL
1514 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1515 #else
1516 PyObject *bytes;
1517 char *path_bytes;
1518
1519 assert(PyGILState_Check());
1520
1521 if (!PyUnicode_FSConverter(path, &bytes))
1522 return NULL;
1523 path_bytes = PyBytes_AS_STRING(bytes);
1524
1525 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1526 Py_DECREF(bytes);
1527 return NULL;
1528 }
1529
1530 do {
1531 Py_BEGIN_ALLOW_THREADS
1532 f = fopen(path_bytes, mode);
1533 Py_END_ALLOW_THREADS
1534 } while (f == NULL
1535 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1536
1537 Py_DECREF(bytes);
1538 #endif
1539 if (async_err)
1540 return NULL;
1541
1542 if (f == NULL) {
1543 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1544 return NULL;
1545 }
1546
1547 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1548 fclose(f);
1549 return NULL;
1550 }
1551 return f;
1552 }
1553
1554 /* Read count bytes from fd into buf.
1555
1556 On success, return the number of read bytes, it can be lower than count.
1557 If the current file offset is at or past the end of file, no bytes are read,
1558 and read() returns zero.
1559
1560 On error, raise an exception, set errno and return -1.
1561
1562 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1563 If the Python signal handler raises an exception, the function returns -1
1564 (the syscall is not retried).
1565
1566 Release the GIL to call read(). The caller must hold the GIL. */
1567 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1568 _Py_read(int fd, void *buf, size_t count)
1569 {
1570 Py_ssize_t n;
1571 int err;
1572 int async_err = 0;
1573
1574 assert(PyGILState_Check());
1575
1576 /* _Py_read() must not be called with an exception set, otherwise the
1577 * caller may think that read() was interrupted by a signal and the signal
1578 * handler raised an exception. */
1579 assert(!PyErr_Occurred());
1580
1581 if (count > _PY_READ_MAX) {
1582 count = _PY_READ_MAX;
1583 }
1584
1585 _Py_BEGIN_SUPPRESS_IPH
1586 do {
1587 Py_BEGIN_ALLOW_THREADS
1588 errno = 0;
1589 #ifdef MS_WINDOWS
1590 n = read(fd, buf, (int)count);
1591 #else
1592 n = read(fd, buf, count);
1593 #endif
1594 /* save/restore errno because PyErr_CheckSignals()
1595 * and PyErr_SetFromErrno() can modify it */
1596 err = errno;
1597 Py_END_ALLOW_THREADS
1598 } while (n < 0 && err == EINTR &&
1599 !(async_err = PyErr_CheckSignals()));
1600 _Py_END_SUPPRESS_IPH
1601
1602 if (async_err) {
1603 /* read() was interrupted by a signal (failed with EINTR)
1604 * and the Python signal handler raised an exception */
1605 errno = err;
1606 assert(errno == EINTR && PyErr_Occurred());
1607 return -1;
1608 }
1609 if (n < 0) {
1610 PyErr_SetFromErrno(PyExc_OSError);
1611 errno = err;
1612 return -1;
1613 }
1614
1615 return n;
1616 }
1617
1618 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1619 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1620 {
1621 Py_ssize_t n;
1622 int err;
1623 int async_err = 0;
1624
1625 _Py_BEGIN_SUPPRESS_IPH
1626 #ifdef MS_WINDOWS
1627 if (count > 32767 && isatty(fd)) {
1628 /* Issue #11395: the Windows console returns an error (12: not
1629 enough space error) on writing into stdout if stdout mode is
1630 binary and the length is greater than 66,000 bytes (or less,
1631 depending on heap usage). */
1632 count = 32767;
1633 }
1634 #endif
1635 if (count > _PY_WRITE_MAX) {
1636 count = _PY_WRITE_MAX;
1637 }
1638
1639 if (gil_held) {
1640 do {
1641 Py_BEGIN_ALLOW_THREADS
1642 errno = 0;
1643 #ifdef MS_WINDOWS
1644 n = write(fd, buf, (int)count);
1645 #else
1646 n = write(fd, buf, count);
1647 #endif
1648 /* save/restore errno because PyErr_CheckSignals()
1649 * and PyErr_SetFromErrno() can modify it */
1650 err = errno;
1651 Py_END_ALLOW_THREADS
1652 } while (n < 0 && err == EINTR &&
1653 !(async_err = PyErr_CheckSignals()));
1654 }
1655 else {
1656 do {
1657 errno = 0;
1658 #ifdef MS_WINDOWS
1659 n = write(fd, buf, (int)count);
1660 #else
1661 n = write(fd, buf, count);
1662 #endif
1663 err = errno;
1664 } while (n < 0 && err == EINTR);
1665 }
1666 _Py_END_SUPPRESS_IPH
1667
1668 if (async_err) {
1669 /* write() was interrupted by a signal (failed with EINTR)
1670 and the Python signal handler raised an exception (if gil_held is
1671 nonzero). */
1672 errno = err;
1673 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1674 return -1;
1675 }
1676 if (n < 0) {
1677 if (gil_held)
1678 PyErr_SetFromErrno(PyExc_OSError);
1679 errno = err;
1680 return -1;
1681 }
1682
1683 return n;
1684 }
1685
1686 /* Write count bytes of buf into fd.
1687
1688 On success, return the number of written bytes, it can be lower than count
1689 including 0. On error, raise an exception, set errno and return -1.
1690
1691 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1692 If the Python signal handler raises an exception, the function returns -1
1693 (the syscall is not retried).
1694
1695 Release the GIL to call write(). The caller must hold the GIL. */
1696 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1697 _Py_write(int fd, const void *buf, size_t count)
1698 {
1699 assert(PyGILState_Check());
1700
1701 /* _Py_write() must not be called with an exception set, otherwise the
1702 * caller may think that write() was interrupted by a signal and the signal
1703 * handler raised an exception. */
1704 assert(!PyErr_Occurred());
1705
1706 return _Py_write_impl(fd, buf, count, 1);
1707 }
1708
1709 /* Write count bytes of buf into fd.
1710 *
1711 * On success, return the number of written bytes, it can be lower than count
1712 * including 0. On error, set errno and return -1.
1713 *
1714 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1715 * without calling the Python signal handler. */
1716 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1717 _Py_write_noraise(int fd, const void *buf, size_t count)
1718 {
1719 return _Py_write_impl(fd, buf, count, 0);
1720 }
1721
1722 #ifdef HAVE_READLINK
1723
1724 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1725 the result from the locale encoding.
1726
1727 Return -1 on encoding error, on readlink() error, if the internal buffer is
1728 too short, on decoding error, or if 'buf' is too short. */
1729 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1730 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1731 {
1732 char *cpath;
1733 char cbuf[MAXPATHLEN];
1734 wchar_t *wbuf;
1735 int res;
1736 size_t r1;
1737
1738 cpath = _Py_EncodeLocaleRaw(path, NULL);
1739 if (cpath == NULL) {
1740 errno = EINVAL;
1741 return -1;
1742 }
1743 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1744 PyMem_RawFree(cpath);
1745 if (res == -1)
1746 return -1;
1747 if (res == Py_ARRAY_LENGTH(cbuf)) {
1748 errno = EINVAL;
1749 return -1;
1750 }
1751 cbuf[res] = '\0'; /* buf will be null terminated */
1752 wbuf = Py_DecodeLocale(cbuf, &r1);
1753 if (wbuf == NULL) {
1754 errno = EINVAL;
1755 return -1;
1756 }
1757 /* wbuf must have space to store the trailing NUL character */
1758 if (buflen <= r1) {
1759 PyMem_RawFree(wbuf);
1760 errno = EINVAL;
1761 return -1;
1762 }
1763 wcsncpy(buf, wbuf, buflen);
1764 PyMem_RawFree(wbuf);
1765 return (int)r1;
1766 }
1767 #endif
1768
1769 #ifdef HAVE_REALPATH
1770
1771 /* Return the canonicalized absolute pathname. Encode path to the locale
1772 encoding, decode the result from the locale encoding.
1773
1774 Return NULL on encoding error, realpath() error, decoding error
1775 or if 'resolved_path' is too short. */
1776 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1777 _Py_wrealpath(const wchar_t *path,
1778 wchar_t *resolved_path, size_t resolved_path_len)
1779 {
1780 char *cpath;
1781 char cresolved_path[MAXPATHLEN];
1782 wchar_t *wresolved_path;
1783 char *res;
1784 size_t r;
1785 cpath = _Py_EncodeLocaleRaw(path, NULL);
1786 if (cpath == NULL) {
1787 errno = EINVAL;
1788 return NULL;
1789 }
1790 res = realpath(cpath, cresolved_path);
1791 PyMem_RawFree(cpath);
1792 if (res == NULL)
1793 return NULL;
1794
1795 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1796 if (wresolved_path == NULL) {
1797 errno = EINVAL;
1798 return NULL;
1799 }
1800 /* wresolved_path must have space to store the trailing NUL character */
1801 if (resolved_path_len <= r) {
1802 PyMem_RawFree(wresolved_path);
1803 errno = EINVAL;
1804 return NULL;
1805 }
1806 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1807 PyMem_RawFree(wresolved_path);
1808 return resolved_path;
1809 }
1810 #endif
1811
1812 /* Get the current directory. buflen is the buffer size in wide characters
1813 including the null character. Decode the path from the locale encoding.
1814
1815 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1816 too short. */
1817 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)1818 _Py_wgetcwd(wchar_t *buf, size_t buflen)
1819 {
1820 #ifdef MS_WINDOWS
1821 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1822 return _wgetcwd(buf, ibuflen);
1823 #else
1824 char fname[MAXPATHLEN];
1825 wchar_t *wname;
1826 size_t len;
1827
1828 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
1829 return NULL;
1830 wname = Py_DecodeLocale(fname, &len);
1831 if (wname == NULL)
1832 return NULL;
1833 /* wname must have space to store the trailing NUL character */
1834 if (buflen <= len) {
1835 PyMem_RawFree(wname);
1836 return NULL;
1837 }
1838 wcsncpy(buf, wname, buflen);
1839 PyMem_RawFree(wname);
1840 return buf;
1841 #endif
1842 }
1843
1844 /* Duplicate a file descriptor. The new file descriptor is created as
1845 non-inheritable. Return a new file descriptor on success, raise an OSError
1846 exception and return -1 on error.
1847
1848 The GIL is released to call dup(). The caller must hold the GIL. */
1849 int
_Py_dup(int fd)1850 _Py_dup(int fd)
1851 {
1852 #ifdef MS_WINDOWS
1853 HANDLE handle;
1854 #endif
1855
1856 assert(PyGILState_Check());
1857
1858 #ifdef MS_WINDOWS
1859 _Py_BEGIN_SUPPRESS_IPH
1860 handle = (HANDLE)_get_osfhandle(fd);
1861 _Py_END_SUPPRESS_IPH
1862 if (handle == INVALID_HANDLE_VALUE) {
1863 PyErr_SetFromErrno(PyExc_OSError);
1864 return -1;
1865 }
1866
1867 Py_BEGIN_ALLOW_THREADS
1868 _Py_BEGIN_SUPPRESS_IPH
1869 fd = dup(fd);
1870 _Py_END_SUPPRESS_IPH
1871 Py_END_ALLOW_THREADS
1872 if (fd < 0) {
1873 PyErr_SetFromErrno(PyExc_OSError);
1874 return -1;
1875 }
1876
1877 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1878 _Py_BEGIN_SUPPRESS_IPH
1879 close(fd);
1880 _Py_END_SUPPRESS_IPH
1881 return -1;
1882 }
1883 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1884 Py_BEGIN_ALLOW_THREADS
1885 _Py_BEGIN_SUPPRESS_IPH
1886 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1887 _Py_END_SUPPRESS_IPH
1888 Py_END_ALLOW_THREADS
1889 if (fd < 0) {
1890 PyErr_SetFromErrno(PyExc_OSError);
1891 return -1;
1892 }
1893
1894 #else
1895 Py_BEGIN_ALLOW_THREADS
1896 _Py_BEGIN_SUPPRESS_IPH
1897 fd = dup(fd);
1898 _Py_END_SUPPRESS_IPH
1899 Py_END_ALLOW_THREADS
1900 if (fd < 0) {
1901 PyErr_SetFromErrno(PyExc_OSError);
1902 return -1;
1903 }
1904
1905 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1906 _Py_BEGIN_SUPPRESS_IPH
1907 close(fd);
1908 _Py_END_SUPPRESS_IPH
1909 return -1;
1910 }
1911 #endif
1912 return fd;
1913 }
1914
1915 #ifndef MS_WINDOWS
1916 /* Get the blocking mode of the file descriptor.
1917 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1918 raise an exception and return -1 on error. */
1919 int
_Py_get_blocking(int fd)1920 _Py_get_blocking(int fd)
1921 {
1922 int flags;
1923 _Py_BEGIN_SUPPRESS_IPH
1924 flags = fcntl(fd, F_GETFL, 0);
1925 _Py_END_SUPPRESS_IPH
1926 if (flags < 0) {
1927 PyErr_SetFromErrno(PyExc_OSError);
1928 return -1;
1929 }
1930
1931 return !(flags & O_NONBLOCK);
1932 }
1933
1934 /* Set the blocking mode of the specified file descriptor.
1935
1936 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1937 otherwise.
1938
1939 Return 0 on success, raise an exception and return -1 on error. */
1940 int
_Py_set_blocking(int fd,int blocking)1941 _Py_set_blocking(int fd, int blocking)
1942 {
1943 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1944 int arg = !blocking;
1945 if (ioctl(fd, FIONBIO, &arg) < 0)
1946 goto error;
1947 #else
1948 int flags, res;
1949
1950 _Py_BEGIN_SUPPRESS_IPH
1951 flags = fcntl(fd, F_GETFL, 0);
1952 if (flags >= 0) {
1953 if (blocking)
1954 flags = flags & (~O_NONBLOCK);
1955 else
1956 flags = flags | O_NONBLOCK;
1957
1958 res = fcntl(fd, F_SETFL, flags);
1959 } else {
1960 res = -1;
1961 }
1962 _Py_END_SUPPRESS_IPH
1963
1964 if (res < 0)
1965 goto error;
1966 #endif
1967 return 0;
1968
1969 error:
1970 PyErr_SetFromErrno(PyExc_OSError);
1971 return -1;
1972 }
1973 #endif
1974
1975
1976 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)1977 _Py_GetLocaleconvNumeric(struct lconv *lc,
1978 PyObject **decimal_point, PyObject **thousands_sep)
1979 {
1980 assert(decimal_point != NULL);
1981 assert(thousands_sep != NULL);
1982
1983 #ifndef MS_WINDOWS
1984 int change_locale = 0;
1985 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
1986 change_locale = 1;
1987 }
1988 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
1989 change_locale = 1;
1990 }
1991
1992 /* Keep a copy of the LC_CTYPE locale */
1993 char *oldloc = NULL, *loc = NULL;
1994 if (change_locale) {
1995 oldloc = setlocale(LC_CTYPE, NULL);
1996 if (!oldloc) {
1997 PyErr_SetString(PyExc_RuntimeWarning,
1998 "failed to get LC_CTYPE locale");
1999 return -1;
2000 }
2001
2002 oldloc = _PyMem_Strdup(oldloc);
2003 if (!oldloc) {
2004 PyErr_NoMemory();
2005 return -1;
2006 }
2007
2008 loc = setlocale(LC_NUMERIC, NULL);
2009 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2010 loc = NULL;
2011 }
2012
2013 if (loc != NULL) {
2014 /* Only set the locale temporarily the LC_CTYPE locale
2015 if LC_NUMERIC locale is different than LC_CTYPE locale and
2016 decimal_point and/or thousands_sep are non-ASCII or longer than
2017 1 byte */
2018 setlocale(LC_CTYPE, loc);
2019 }
2020 }
2021
2022 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2023 #else /* MS_WINDOWS */
2024 /* Use _W_* fields of Windows strcut lconv */
2025 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2026 #endif /* MS_WINDOWS */
2027
2028 int res = -1;
2029
2030 *decimal_point = GET_LOCALE_STRING(decimal_point);
2031 if (*decimal_point == NULL) {
2032 goto done;
2033 }
2034
2035 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2036 if (*thousands_sep == NULL) {
2037 goto done;
2038 }
2039
2040 res = 0;
2041
2042 done:
2043 #ifndef MS_WINDOWS
2044 if (loc != NULL) {
2045 setlocale(LC_CTYPE, oldloc);
2046 }
2047 PyMem_Free(oldloc);
2048 #endif
2049 return res;
2050
2051 #undef GET_LOCALE_STRING
2052 }
2053