1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h" // SEP
4 #include <locale.h>
5
6 #ifdef MS_WINDOWS
7 # include <malloc.h>
8 # include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19
20 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
21 #include <iconv.h>
22 #endif
23
24 #ifdef HAVE_FCNTL_H
25 #include <fcntl.h>
26 #endif /* HAVE_FCNTL_H */
27
28 #ifdef O_CLOEXEC
29 /* Does open() support the O_CLOEXEC flag? Possible values:
30
31 -1: unknown
32 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
33 1: open() supports O_CLOEXEC flag, close-on-exec is set
34
35 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
36 and os.open(). */
37 int _Py_open_cloexec_works = -1;
38 #endif
39
40 // The value must be the same in unicodeobject.c.
41 #define MAX_UNICODE 0x10ffff
42
43 // mbstowcs() and mbrtowc() errors
44 static const size_t DECODE_ERROR = ((size_t)-1);
45 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
46
47
48 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)49 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
50 {
51 switch (errors)
52 {
53 case _Py_ERROR_STRICT:
54 *surrogateescape = 0;
55 return 0;
56 case _Py_ERROR_SURROGATEESCAPE:
57 *surrogateescape = 1;
58 return 0;
59 default:
60 return -1;
61 }
62 }
63
64
65 PyObject *
_Py_device_encoding(int fd)66 _Py_device_encoding(int fd)
67 {
68 #if defined(MS_WINDOWS)
69 UINT cp;
70 #endif
71 int valid;
72 Py_BEGIN_ALLOW_THREADS
73 _Py_BEGIN_SUPPRESS_IPH
74 valid = isatty(fd);
75 _Py_END_SUPPRESS_IPH
76 Py_END_ALLOW_THREADS
77 if (!valid)
78 Py_RETURN_NONE;
79
80 #if defined(MS_WINDOWS)
81 if (fd == 0)
82 cp = GetConsoleCP();
83 else if (fd == 1 || fd == 2)
84 cp = GetConsoleOutputCP();
85 else
86 cp = 0;
87 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
88 has no console */
89 if (cp != 0)
90 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
91 #elif defined(CODESET)
92 {
93 char *codeset = nl_langinfo(CODESET);
94 if (codeset != NULL && codeset[0] != 0)
95 return PyUnicode_FromString(codeset);
96 }
97 #endif
98 Py_RETURN_NONE;
99 }
100
101
102 static size_t
is_valid_wide_char(wchar_t ch)103 is_valid_wide_char(wchar_t ch)
104 {
105 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
106 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
107 for non-Unicode locales, which makes values higher than MAX_UNICODE
108 possibly valid. */
109 return 1;
110 #endif
111 if (Py_UNICODE_IS_SURROGATE(ch)) {
112 // Reject lone surrogate characters
113 return 0;
114 }
115 if (ch > MAX_UNICODE) {
116 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
117 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
118 // it creates characters outside the [U+0000; U+10ffff] range:
119 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
120 return 0;
121 }
122 return 1;
123 }
124
125
126 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)127 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
128 {
129 size_t count = mbstowcs(dest, src, n);
130 if (dest != NULL && count != DECODE_ERROR) {
131 for (size_t i=0; i < count; i++) {
132 wchar_t ch = dest[i];
133 if (!is_valid_wide_char(ch)) {
134 return DECODE_ERROR;
135 }
136 }
137 }
138 return count;
139 }
140
141
142 #ifdef HAVE_MBRTOWC
143 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)144 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
145 {
146 assert(pwc != NULL);
147 size_t count = mbrtowc(pwc, str, len, pmbs);
148 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
149 if (!is_valid_wide_char(*pwc)) {
150 return DECODE_ERROR;
151 }
152 }
153 return count;
154 }
155 #endif
156
157
158 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
159
160 #define USE_FORCE_ASCII
161
162 extern int _Py_normalize_encoding(const char *, char *, size_t);
163
164 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
165 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
166 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
167 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
168 locale.getpreferredencoding() codec. For example, if command line arguments
169 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
170 UnicodeEncodeError instead of retrieving the original byte string.
171
172 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
173 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
174 one byte in range 0x80-0xff can be decoded from the locale encoding. The
175 workaround is also enabled on error, for example if getting the locale
176 failed.
177
178 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
179 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
180 ASCII encoding in this case.
181
182 Values of force_ascii:
183
184 1: the workaround is used: Py_EncodeLocale() uses
185 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
186 decode_ascii()
187 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
188 Py_DecodeLocale() uses mbstowcs()
189 -1: unknown, need to call check_force_ascii() to get the value
190 */
191 static int force_ascii = -1;
192
193 static int
check_force_ascii(void)194 check_force_ascii(void)
195 {
196 char *loc = setlocale(LC_CTYPE, NULL);
197 if (loc == NULL) {
198 goto error;
199 }
200 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
201 /* the LC_CTYPE locale is different than C and POSIX */
202 return 0;
203 }
204
205 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
206 const char *codeset = nl_langinfo(CODESET);
207 if (!codeset || codeset[0] == '\0') {
208 /* CODESET is not set or empty */
209 goto error;
210 }
211
212 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
213 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
214 goto error;
215 }
216
217 #ifdef __hpux
218 if (strcmp(encoding, "roman8") == 0) {
219 unsigned char ch;
220 wchar_t wch;
221 size_t res;
222
223 ch = (unsigned char)0xA7;
224 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
225 if (res != DECODE_ERROR && wch == L'\xA7') {
226 /* On HP-UX with C locale or the POSIX locale,
227 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
228 Latin1 encoding in practice. Force ASCII in this case.
229
230 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
231 return 1;
232 }
233 }
234 #else
235 const char* ascii_aliases[] = {
236 "ascii",
237 /* Aliases from Lib/encodings/aliases.py */
238 "646",
239 "ansi_x3.4_1968",
240 "ansi_x3.4_1986",
241 "ansi_x3_4_1968",
242 "cp367",
243 "csascii",
244 "ibm367",
245 "iso646_us",
246 "iso_646.irv_1991",
247 "iso_ir_6",
248 "us",
249 "us_ascii",
250 NULL
251 };
252
253 int is_ascii = 0;
254 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
255 if (strcmp(encoding, *alias) == 0) {
256 is_ascii = 1;
257 break;
258 }
259 }
260 if (!is_ascii) {
261 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
262 return 0;
263 }
264
265 for (unsigned int i=0x80; i<=0xff; i++) {
266 char ch[1];
267 wchar_t wch[1];
268 size_t res;
269
270 unsigned uch = (unsigned char)i;
271 ch[0] = (char)uch;
272 res = _Py_mbstowcs(wch, ch, 1);
273 if (res != DECODE_ERROR) {
274 /* decoding a non-ASCII character from the locale encoding succeed:
275 the locale encoding is not ASCII, force ASCII */
276 return 1;
277 }
278 }
279 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
280 encoding: the locale encoding is really ASCII */
281 #endif /* !defined(__hpux) */
282 return 0;
283 #else
284 /* nl_langinfo(CODESET) is not available: always force ASCII */
285 return 1;
286 #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
287
288 error:
289 /* if an error occurred, force the ASCII encoding */
290 return 1;
291 }
292
293
294 int
_Py_GetForceASCII(void)295 _Py_GetForceASCII(void)
296 {
297 if (force_ascii == -1) {
298 force_ascii = check_force_ascii();
299 }
300 return force_ascii;
301 }
302
303
304 void
_Py_ResetForceASCII(void)305 _Py_ResetForceASCII(void)
306 {
307 force_ascii = -1;
308 }
309
310
311 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)312 encode_ascii(const wchar_t *text, char **str,
313 size_t *error_pos, const char **reason,
314 int raw_malloc, _Py_error_handler errors)
315 {
316 char *result = NULL, *out;
317 size_t len, i;
318 wchar_t ch;
319
320 int surrogateescape;
321 if (get_surrogateescape(errors, &surrogateescape) < 0) {
322 return -3;
323 }
324
325 len = wcslen(text);
326
327 /* +1 for NULL byte */
328 if (raw_malloc) {
329 result = PyMem_RawMalloc(len + 1);
330 }
331 else {
332 result = PyMem_Malloc(len + 1);
333 }
334 if (result == NULL) {
335 return -1;
336 }
337
338 out = result;
339 for (i=0; i<len; i++) {
340 ch = text[i];
341
342 if (ch <= 0x7f) {
343 /* ASCII character */
344 *out++ = (char)ch;
345 }
346 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
347 /* UTF-8b surrogate */
348 *out++ = (char)(ch - 0xdc00);
349 }
350 else {
351 if (raw_malloc) {
352 PyMem_RawFree(result);
353 }
354 else {
355 PyMem_Free(result);
356 }
357 if (error_pos != NULL) {
358 *error_pos = i;
359 }
360 if (reason) {
361 *reason = "encoding error";
362 }
363 return -2;
364 }
365 }
366 *out = '\0';
367 *str = result;
368 return 0;
369 }
370 #else
371 int
_Py_GetForceASCII(void)372 _Py_GetForceASCII(void)
373 {
374 return 0;
375 }
376
377 void
_Py_ResetForceASCII(void)378 _Py_ResetForceASCII(void)
379 {
380 /* nothing to do */
381 }
382 #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
383
384
385 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
386 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)387 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
388 const char **reason, _Py_error_handler errors)
389 {
390 wchar_t *res;
391 unsigned char *in;
392 wchar_t *out;
393 size_t argsize = strlen(arg) + 1;
394
395 int surrogateescape;
396 if (get_surrogateescape(errors, &surrogateescape) < 0) {
397 return -3;
398 }
399
400 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
401 return -1;
402 }
403 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
404 if (!res) {
405 return -1;
406 }
407
408 out = res;
409 for (in = (unsigned char*)arg; *in; in++) {
410 unsigned char ch = *in;
411 if (ch < 128) {
412 *out++ = ch;
413 }
414 else {
415 if (!surrogateescape) {
416 PyMem_RawFree(res);
417 if (wlen) {
418 *wlen = in - (unsigned char*)arg;
419 }
420 if (reason) {
421 *reason = "decoding error";
422 }
423 return -2;
424 }
425 *out++ = 0xdc00 + ch;
426 }
427 }
428 *out = 0;
429
430 if (wlen != NULL) {
431 *wlen = out - res;
432 }
433 *wstr = res;
434 return 0;
435 }
436 #endif /* !HAVE_MBRTOWC */
437
438 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)439 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
440 const char **reason, _Py_error_handler errors)
441 {
442 wchar_t *res;
443 size_t argsize;
444 size_t count;
445 #ifdef HAVE_MBRTOWC
446 unsigned char *in;
447 wchar_t *out;
448 mbstate_t mbs;
449 #endif
450
451 int surrogateescape;
452 if (get_surrogateescape(errors, &surrogateescape) < 0) {
453 return -3;
454 }
455
456 #ifdef HAVE_BROKEN_MBSTOWCS
457 /* Some platforms have a broken implementation of
458 * mbstowcs which does not count the characters that
459 * would result from conversion. Use an upper bound.
460 */
461 argsize = strlen(arg);
462 #else
463 argsize = _Py_mbstowcs(NULL, arg, 0);
464 #endif
465 if (argsize != DECODE_ERROR) {
466 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
467 return -1;
468 }
469 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
470 if (!res) {
471 return -1;
472 }
473
474 count = _Py_mbstowcs(res, arg, argsize + 1);
475 if (count != DECODE_ERROR) {
476 *wstr = res;
477 if (wlen != NULL) {
478 *wlen = count;
479 }
480 return 0;
481 }
482 PyMem_RawFree(res);
483 }
484
485 /* Conversion failed. Fall back to escaping with surrogateescape. */
486 #ifdef HAVE_MBRTOWC
487 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
488
489 /* Overallocate; as multi-byte characters are in the argument, the
490 actual output could use less memory. */
491 argsize = strlen(arg) + 1;
492 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
493 return -1;
494 }
495 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
496 if (!res) {
497 return -1;
498 }
499
500 in = (unsigned char*)arg;
501 out = res;
502 memset(&mbs, 0, sizeof mbs);
503 while (argsize) {
504 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
505 if (converted == 0) {
506 /* Reached end of string; null char stored. */
507 break;
508 }
509
510 if (converted == INCOMPLETE_CHARACTER) {
511 /* Incomplete character. This should never happen,
512 since we provide everything that we have -
513 unless there is a bug in the C library, or I
514 misunderstood how mbrtowc works. */
515 goto decode_error;
516 }
517
518 if (converted == DECODE_ERROR) {
519 if (!surrogateescape) {
520 goto decode_error;
521 }
522
523 /* Decoding error. Escape as UTF-8b, and start over in the initial
524 shift state. */
525 *out++ = 0xdc00 + *in++;
526 argsize--;
527 memset(&mbs, 0, sizeof mbs);
528 continue;
529 }
530
531 // _Py_mbrtowc() reject lone surrogate characters
532 assert(!Py_UNICODE_IS_SURROGATE(*out));
533
534 /* successfully converted some bytes */
535 in += converted;
536 argsize -= converted;
537 out++;
538 }
539 if (wlen != NULL) {
540 *wlen = out - res;
541 }
542 *wstr = res;
543 return 0;
544
545 decode_error:
546 PyMem_RawFree(res);
547 if (wlen) {
548 *wlen = in - (unsigned char*)arg;
549 }
550 if (reason) {
551 *reason = "decoding error";
552 }
553 return -2;
554 #else /* HAVE_MBRTOWC */
555 /* Cannot use C locale for escaping; manually escape as if charset
556 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
557 correctly in the locale's charset, which must be an ASCII superset. */
558 return decode_ascii(arg, wstr, wlen, reason, errors);
559 #endif /* HAVE_MBRTOWC */
560 }
561
562
563 /* Decode a byte string from the locale encoding.
564
565 Use the strict error handler if 'surrogateescape' is zero. Use the
566 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
567 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
568 can be decoded as a surrogate character, escape the bytes using the
569 surrogateescape error handler instead of decoding them.
570
571 On success, return 0 and write the newly allocated wide character string into
572 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
573 the number of wide characters excluding the null character into *wlen.
574
575 On memory allocation failure, return -1.
576
577 On decoding error, return -2. If wlen is not NULL, write the start of
578 invalid byte sequence in the input string into *wlen. If reason is not NULL,
579 write the decoding error message into *reason.
580
581 Return -3 if the error handler 'errors' is not supported.
582
583 Use the Py_EncodeLocaleEx() function to encode the character string back to
584 a byte string. */
585 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)586 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
587 const char **reason,
588 int current_locale, _Py_error_handler errors)
589 {
590 if (current_locale) {
591 #ifdef _Py_FORCE_UTF8_LOCALE
592 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
593 errors);
594 #else
595 return decode_current_locale(arg, wstr, wlen, reason, errors);
596 #endif
597 }
598
599 #ifdef _Py_FORCE_UTF8_FS_ENCODING
600 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
601 errors);
602 #else
603 int use_utf8 = (Py_UTF8Mode == 1);
604 #ifdef MS_WINDOWS
605 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
606 #endif
607 if (use_utf8) {
608 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
609 errors);
610 }
611
612 #ifdef USE_FORCE_ASCII
613 if (force_ascii == -1) {
614 force_ascii = check_force_ascii();
615 }
616
617 if (force_ascii) {
618 /* force ASCII encoding to workaround mbstowcs() issue */
619 return decode_ascii(arg, wstr, wlen, reason, errors);
620 }
621 #endif
622
623 return decode_current_locale(arg, wstr, wlen, reason, errors);
624 #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
625 }
626
627
628 /* Decode a byte string from the locale encoding with the
629 surrogateescape error handler: undecodable bytes are decoded as characters
630 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
631 character, escape the bytes using the surrogateescape error handler instead
632 of decoding them.
633
634 Return a pointer to a newly allocated wide character string, use
635 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
636 wide characters excluding the null character into *size
637
638 Return NULL on decoding error or memory allocation error. If *size* is not
639 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
640 decoding error.
641
642 Decoding errors should never happen, unless there is a bug in the C
643 library.
644
645 Use the Py_EncodeLocale() function to encode the character string back to a
646 byte string. */
647 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)648 Py_DecodeLocale(const char* arg, size_t *wlen)
649 {
650 wchar_t *wstr;
651 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
652 NULL, 0,
653 _Py_ERROR_SURROGATEESCAPE);
654 if (res != 0) {
655 assert(res != -3);
656 if (wlen != NULL) {
657 *wlen = (size_t)res;
658 }
659 return NULL;
660 }
661 return wstr;
662 }
663
664
665 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)666 encode_current_locale(const wchar_t *text, char **str,
667 size_t *error_pos, const char **reason,
668 int raw_malloc, _Py_error_handler errors)
669 {
670 const size_t len = wcslen(text);
671 char *result = NULL, *bytes = NULL;
672 size_t i, size, converted;
673 wchar_t c, buf[2];
674
675 int surrogateescape;
676 if (get_surrogateescape(errors, &surrogateescape) < 0) {
677 return -3;
678 }
679
680 /* The function works in two steps:
681 1. compute the length of the output buffer in bytes (size)
682 2. outputs the bytes */
683 size = 0;
684 buf[1] = 0;
685 while (1) {
686 for (i=0; i < len; i++) {
687 c = text[i];
688 if (c >= 0xdc80 && c <= 0xdcff) {
689 if (!surrogateescape) {
690 goto encode_error;
691 }
692 /* UTF-8b surrogate */
693 if (bytes != NULL) {
694 *bytes++ = c - 0xdc00;
695 size--;
696 }
697 else {
698 size++;
699 }
700 continue;
701 }
702 else {
703 buf[0] = c;
704 if (bytes != NULL) {
705 converted = wcstombs(bytes, buf, size);
706 }
707 else {
708 converted = wcstombs(NULL, buf, 0);
709 }
710 if (converted == DECODE_ERROR) {
711 goto encode_error;
712 }
713 if (bytes != NULL) {
714 bytes += converted;
715 size -= converted;
716 }
717 else {
718 size += converted;
719 }
720 }
721 }
722 if (result != NULL) {
723 *bytes = '\0';
724 break;
725 }
726
727 size += 1; /* nul byte at the end */
728 if (raw_malloc) {
729 result = PyMem_RawMalloc(size);
730 }
731 else {
732 result = PyMem_Malloc(size);
733 }
734 if (result == NULL) {
735 return -1;
736 }
737 bytes = result;
738 }
739 *str = result;
740 return 0;
741
742 encode_error:
743 if (raw_malloc) {
744 PyMem_RawFree(result);
745 }
746 else {
747 PyMem_Free(result);
748 }
749 if (error_pos != NULL) {
750 *error_pos = i;
751 }
752 if (reason) {
753 *reason = "encoding error";
754 }
755 return -2;
756 }
757
758
759 /* Encode a string to the locale encoding.
760
761 Parameters:
762
763 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
764 of PyMem_Malloc().
765 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
766 Python filesystem encoding.
767 * errors: error handler like "strict" or "surrogateescape".
768
769 Return value:
770
771 0: success, *str is set to a newly allocated decoded string.
772 -1: memory allocation failure
773 -2: encoding error, set *error_pos and *reason (if set).
774 -3: the error handler 'errors' is not supported.
775 */
776 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)777 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
778 const char **reason,
779 int raw_malloc, int current_locale, _Py_error_handler errors)
780 {
781 if (current_locale) {
782 #ifdef _Py_FORCE_UTF8_LOCALE
783 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
784 raw_malloc, errors);
785 #else
786 return encode_current_locale(text, str, error_pos, reason,
787 raw_malloc, errors);
788 #endif
789 }
790
791 #ifdef _Py_FORCE_UTF8_FS_ENCODING
792 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
793 raw_malloc, errors);
794 #else
795 int use_utf8 = (Py_UTF8Mode == 1);
796 #ifdef MS_WINDOWS
797 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
798 #endif
799 if (use_utf8) {
800 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
801 raw_malloc, errors);
802 }
803
804 #ifdef USE_FORCE_ASCII
805 if (force_ascii == -1) {
806 force_ascii = check_force_ascii();
807 }
808
809 if (force_ascii) {
810 return encode_ascii(text, str, error_pos, reason,
811 raw_malloc, errors);
812 }
813 #endif
814
815 return encode_current_locale(text, str, error_pos, reason,
816 raw_malloc, errors);
817 #endif /* _Py_FORCE_UTF8_FS_ENCODING */
818 }
819
820 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)821 encode_locale(const wchar_t *text, size_t *error_pos,
822 int raw_malloc, int current_locale)
823 {
824 char *str;
825 int res = encode_locale_ex(text, &str, error_pos, NULL,
826 raw_malloc, current_locale,
827 _Py_ERROR_SURROGATEESCAPE);
828 if (res != -2 && error_pos) {
829 *error_pos = (size_t)-1;
830 }
831 if (res != 0) {
832 return NULL;
833 }
834 return str;
835 }
836
837 /* Encode a wide character string to the locale encoding with the
838 surrogateescape error handler: surrogate characters in the range
839 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
840
841 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
842 the memory. Return NULL on encoding or memory allocation error.
843
844 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
845 to the index of the invalid character on encoding error.
846
847 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
848 character string. */
849 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)850 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
851 {
852 return encode_locale(text, error_pos, 0, 0);
853 }
854
855
856 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
857 instead of PyMem_Free(). */
858 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)859 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
860 {
861 return encode_locale(text, error_pos, 1, 0);
862 }
863
864
865 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)866 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
867 size_t *error_pos, const char **reason,
868 int current_locale, _Py_error_handler errors)
869 {
870 return encode_locale_ex(text, str, error_pos, reason, 1,
871 current_locale, errors);
872 }
873
874 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
875
876 /* Check whether current locale uses Unicode as internal wchar_t form. */
877 int
_Py_LocaleUsesNonUnicodeWchar(void)878 _Py_LocaleUsesNonUnicodeWchar(void)
879 {
880 /* Oracle Solaris uses non-Unicode internal wchar_t form for
881 non-Unicode locales and hence needs conversion to UTF first. */
882 char* codeset = nl_langinfo(CODESET);
883 if (!codeset) {
884 return 0;
885 }
886 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
887 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
888 }
889
890 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)891 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
892 const char *tocode, const char *fromcode)
893 {
894 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
895
896 /* Ensure we won't overflow the size. */
897 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
898 PyErr_NoMemory();
899 return NULL;
900 }
901
902 /* the string doesn't have to be NULL terminated */
903 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
904 if (target == NULL) {
905 PyErr_NoMemory();
906 return NULL;
907 }
908
909 iconv_t cd = iconv_open(tocode, fromcode);
910 if (cd == (iconv_t)-1) {
911 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
912 PyMem_Free(target);
913 return NULL;
914 }
915
916 char *inbuf = (char *) source;
917 char *outbuf = (char *) target;
918 size_t inbytesleft = sizeof(wchar_t) * size;
919 size_t outbytesleft = inbytesleft;
920
921 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
922 if (ret == DECODE_ERROR) {
923 PyErr_Format(PyExc_ValueError, "iconv() failed");
924 PyMem_Free(target);
925 iconv_close(cd);
926 return NULL;
927 }
928
929 iconv_close(cd);
930 return target;
931 }
932
933 /* Convert a wide character string to the UCS-4 encoded string. This
934 is necessary on systems where internal form of wchar_t are not Unicode
935 code points (e.g. Oracle Solaris).
936
937 Return a pointer to a newly allocated string, use PyMem_Free() to free
938 the memory. Return NULL and raise exception on conversion or memory
939 allocation error. */
940 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)941 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
942 {
943 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
944 }
945
946 /* Convert a UCS-4 encoded string to native wide character string. This
947 is necessary on systems where internal form of wchar_t are not Unicode
948 code points (e.g. Oracle Solaris).
949
950 The conversion is done in place. This can be done because both wchar_t
951 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
952 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
953 which is currently the only system using these functions; it doesn't have
954 to be for other systems).
955
956 Return 0 on success. Return -1 and raise exception on conversion
957 or memory allocation error. */
958 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)959 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
960 {
961 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
962 if (!result) {
963 return -1;
964 }
965 memcpy(unicode, result, size * sizeof(wchar_t));
966 PyMem_Free(result);
967 return 0;
968 }
969 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
970
971 #ifdef MS_WINDOWS
972 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
973
974 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)975 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
976 {
977 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
978 /* Cannot simply cast and dereference in_ptr,
979 since it might not be aligned properly */
980 __int64 in;
981 memcpy(&in, in_ptr, sizeof(in));
982 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
983 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
984 }
985
986 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)987 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
988 {
989 /* XXX endianness */
990 __int64 out;
991 out = time_in + secs_between_epochs;
992 out = out * 10000000 + nsec_in / 100;
993 memcpy(out_ptr, &out, sizeof(out));
994 }
995
996 /* Below, we *know* that ugo+r is 0444 */
997 #if _S_IREAD != 0400
998 #error Unsupported C library
999 #endif
1000 static int
attributes_to_mode(DWORD attr)1001 attributes_to_mode(DWORD attr)
1002 {
1003 int m = 0;
1004 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1005 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1006 else
1007 m |= _S_IFREG;
1008 if (attr & FILE_ATTRIBUTE_READONLY)
1009 m |= 0444;
1010 else
1011 m |= 0666;
1012 return m;
1013 }
1014
1015 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)1016 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1017 struct _Py_stat_struct *result)
1018 {
1019 memset(result, 0, sizeof(*result));
1020 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1021 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1022 result->st_dev = info->dwVolumeSerialNumber;
1023 result->st_rdev = result->st_dev;
1024 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1025 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1026 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1027 result->st_nlink = info->nNumberOfLinks;
1028 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1029 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1030 open other name surrogate reparse points without traversing them. To
1031 detect/handle these, check st_file_attributes and st_reparse_tag. */
1032 result->st_reparse_tag = reparse_tag;
1033 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1034 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1035 /* first clear the S_IFMT bits */
1036 result->st_mode ^= (result->st_mode & S_IFMT);
1037 /* now set the bits that make this a symlink */
1038 result->st_mode |= S_IFLNK;
1039 }
1040 result->st_file_attributes = info->dwFileAttributes;
1041 }
1042 #endif
1043
1044 /* Return information about a file.
1045
1046 On POSIX, use fstat().
1047
1048 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1049 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1050 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1051 #23152.
1052
1053 On Windows, set the last Windows error and return nonzero on error. On
1054 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1055 success. */
1056 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1057 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1058 {
1059 #ifdef MS_WINDOWS
1060 BY_HANDLE_FILE_INFORMATION info;
1061 HANDLE h;
1062 int type;
1063
1064 _Py_BEGIN_SUPPRESS_IPH
1065 h = (HANDLE)_get_osfhandle(fd);
1066 _Py_END_SUPPRESS_IPH
1067
1068 if (h == INVALID_HANDLE_VALUE) {
1069 /* errno is already set by _get_osfhandle, but we also set
1070 the Win32 error for callers who expect that */
1071 SetLastError(ERROR_INVALID_HANDLE);
1072 return -1;
1073 }
1074 memset(status, 0, sizeof(*status));
1075
1076 type = GetFileType(h);
1077 if (type == FILE_TYPE_UNKNOWN) {
1078 DWORD error = GetLastError();
1079 if (error != 0) {
1080 errno = winerror_to_errno(error);
1081 return -1;
1082 }
1083 /* else: valid but unknown file */
1084 }
1085
1086 if (type != FILE_TYPE_DISK) {
1087 if (type == FILE_TYPE_CHAR)
1088 status->st_mode = _S_IFCHR;
1089 else if (type == FILE_TYPE_PIPE)
1090 status->st_mode = _S_IFIFO;
1091 return 0;
1092 }
1093
1094 if (!GetFileInformationByHandle(h, &info)) {
1095 /* The Win32 error is already set, but we also set errno for
1096 callers who expect it */
1097 errno = winerror_to_errno(GetLastError());
1098 return -1;
1099 }
1100
1101 _Py_attribute_data_to_stat(&info, 0, status);
1102 /* specific to fstat() */
1103 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1104 return 0;
1105 #else
1106 return fstat(fd, status);
1107 #endif
1108 }
1109
1110 /* Return information about a file.
1111
1112 On POSIX, use fstat().
1113
1114 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1115 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1116 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1117 #23152.
1118
1119 Raise an exception and return -1 on error. On Windows, set the last Windows
1120 error on error. On POSIX, set errno on error. Fill status and return 0 on
1121 success.
1122
1123 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1124 to call fstat(). The caller must hold the GIL. */
1125 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1126 _Py_fstat(int fd, struct _Py_stat_struct *status)
1127 {
1128 int res;
1129
1130 assert(PyGILState_Check());
1131
1132 Py_BEGIN_ALLOW_THREADS
1133 res = _Py_fstat_noraise(fd, status);
1134 Py_END_ALLOW_THREADS
1135
1136 if (res != 0) {
1137 #ifdef MS_WINDOWS
1138 PyErr_SetFromWindowsErr(0);
1139 #else
1140 PyErr_SetFromErrno(PyExc_OSError);
1141 #endif
1142 return -1;
1143 }
1144 return 0;
1145 }
1146
1147 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1148 call stat() otherwise. Only fill st_mode attribute on Windows.
1149
1150 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1151 raised. */
1152
1153 int
_Py_stat(PyObject * path,struct stat * statbuf)1154 _Py_stat(PyObject *path, struct stat *statbuf)
1155 {
1156 #ifdef MS_WINDOWS
1157 int err;
1158 struct _stat wstatbuf;
1159 const wchar_t *wpath;
1160
1161 _Py_COMP_DIAG_PUSH
1162 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1163 wpath = _PyUnicode_AsUnicode(path);
1164 _Py_COMP_DIAG_POP
1165 if (wpath == NULL)
1166 return -2;
1167
1168 err = _wstat(wpath, &wstatbuf);
1169 if (!err)
1170 statbuf->st_mode = wstatbuf.st_mode;
1171 return err;
1172 #else
1173 int ret;
1174 PyObject *bytes;
1175 char *cpath;
1176
1177 bytes = PyUnicode_EncodeFSDefault(path);
1178 if (bytes == NULL)
1179 return -2;
1180
1181 /* check for embedded null bytes */
1182 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1183 Py_DECREF(bytes);
1184 return -2;
1185 }
1186
1187 ret = stat(cpath, statbuf);
1188 Py_DECREF(bytes);
1189 return ret;
1190 #endif
1191 }
1192
1193
1194 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1195 static int
get_inheritable(int fd,int raise)1196 get_inheritable(int fd, int raise)
1197 {
1198 #ifdef MS_WINDOWS
1199 HANDLE handle;
1200 DWORD flags;
1201
1202 _Py_BEGIN_SUPPRESS_IPH
1203 handle = (HANDLE)_get_osfhandle(fd);
1204 _Py_END_SUPPRESS_IPH
1205 if (handle == INVALID_HANDLE_VALUE) {
1206 if (raise)
1207 PyErr_SetFromErrno(PyExc_OSError);
1208 return -1;
1209 }
1210
1211 if (!GetHandleInformation(handle, &flags)) {
1212 if (raise)
1213 PyErr_SetFromWindowsErr(0);
1214 return -1;
1215 }
1216
1217 return (flags & HANDLE_FLAG_INHERIT);
1218 #else
1219 int flags;
1220
1221 flags = fcntl(fd, F_GETFD, 0);
1222 if (flags == -1) {
1223 if (raise)
1224 PyErr_SetFromErrno(PyExc_OSError);
1225 return -1;
1226 }
1227 return !(flags & FD_CLOEXEC);
1228 #endif
1229 }
1230
1231 /* Get the inheritable flag of the specified file descriptor.
1232 Return 1 if the file descriptor can be inherited, 0 if it cannot,
1233 raise an exception and return -1 on error. */
1234 int
_Py_get_inheritable(int fd)1235 _Py_get_inheritable(int fd)
1236 {
1237 return get_inheritable(fd, 1);
1238 }
1239
1240
1241 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1242 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1243 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1244 {
1245 #ifdef MS_WINDOWS
1246 HANDLE handle;
1247 DWORD flags;
1248 #else
1249 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1250 static int ioctl_works = -1;
1251 int request;
1252 int err;
1253 #endif
1254 int flags, new_flags;
1255 int res;
1256 #endif
1257
1258 /* atomic_flag_works can only be used to make the file descriptor
1259 non-inheritable */
1260 assert(!(atomic_flag_works != NULL && inheritable));
1261
1262 if (atomic_flag_works != NULL && !inheritable) {
1263 if (*atomic_flag_works == -1) {
1264 int isInheritable = get_inheritable(fd, raise);
1265 if (isInheritable == -1)
1266 return -1;
1267 *atomic_flag_works = !isInheritable;
1268 }
1269
1270 if (*atomic_flag_works)
1271 return 0;
1272 }
1273
1274 #ifdef MS_WINDOWS
1275 _Py_BEGIN_SUPPRESS_IPH
1276 handle = (HANDLE)_get_osfhandle(fd);
1277 _Py_END_SUPPRESS_IPH
1278 if (handle == INVALID_HANDLE_VALUE) {
1279 if (raise)
1280 PyErr_SetFromErrno(PyExc_OSError);
1281 return -1;
1282 }
1283
1284 if (inheritable)
1285 flags = HANDLE_FLAG_INHERIT;
1286 else
1287 flags = 0;
1288
1289 /* This check can be removed once support for Windows 7 ends. */
1290 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1291 GetFileType(handle) == FILE_TYPE_CHAR)
1292
1293 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1294 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1295 if (raise)
1296 PyErr_SetFromWindowsErr(0);
1297 return -1;
1298 }
1299 #undef CONSOLE_PSEUDOHANDLE
1300 return 0;
1301
1302 #else
1303
1304 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1305 if (ioctl_works != 0 && raise != 0) {
1306 /* fast-path: ioctl() only requires one syscall */
1307 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1308 * thus avoid using ioctl() so we skip the fast-path. */
1309 if (inheritable)
1310 request = FIONCLEX;
1311 else
1312 request = FIOCLEX;
1313 err = ioctl(fd, request, NULL);
1314 if (!err) {
1315 ioctl_works = 1;
1316 return 0;
1317 }
1318
1319 #ifdef O_PATH
1320 if (errno == EBADF) {
1321 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1322 // on O_PATH file descriptors. Fall through to the fcntl()
1323 // implementation.
1324 }
1325 else
1326 #endif
1327 if (errno != ENOTTY && errno != EACCES) {
1328 if (raise)
1329 PyErr_SetFromErrno(PyExc_OSError);
1330 return -1;
1331 }
1332 else {
1333 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1334 device". The ioctl is declared but not supported by the kernel.
1335 Remember that ioctl() doesn't work. It is the case on
1336 Illumos-based OS for example.
1337
1338 Issue #27057: When SELinux policy disallows ioctl it will fail
1339 with EACCES. While FIOCLEX is safe operation it may be
1340 unavailable because ioctl was denied altogether.
1341 This can be the case on Android. */
1342 ioctl_works = 0;
1343 }
1344 /* fallback to fcntl() if ioctl() does not work */
1345 }
1346 #endif
1347
1348 /* slow-path: fcntl() requires two syscalls */
1349 flags = fcntl(fd, F_GETFD);
1350 if (flags < 0) {
1351 if (raise)
1352 PyErr_SetFromErrno(PyExc_OSError);
1353 return -1;
1354 }
1355
1356 if (inheritable) {
1357 new_flags = flags & ~FD_CLOEXEC;
1358 }
1359 else {
1360 new_flags = flags | FD_CLOEXEC;
1361 }
1362
1363 if (new_flags == flags) {
1364 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1365 return 0;
1366 }
1367
1368 res = fcntl(fd, F_SETFD, new_flags);
1369 if (res < 0) {
1370 if (raise)
1371 PyErr_SetFromErrno(PyExc_OSError);
1372 return -1;
1373 }
1374 return 0;
1375 #endif
1376 }
1377
1378 /* Make the file descriptor non-inheritable.
1379 Return 0 on success, set errno and return -1 on error. */
1380 static int
make_non_inheritable(int fd)1381 make_non_inheritable(int fd)
1382 {
1383 return set_inheritable(fd, 0, 0, NULL);
1384 }
1385
1386 /* Set the inheritable flag of the specified file descriptor.
1387 On success: return 0, on error: raise an exception and return -1.
1388
1389 If atomic_flag_works is not NULL:
1390
1391 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1392 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1393 set the inheritable flag
1394 * if *atomic_flag_works==1: do nothing
1395 * if *atomic_flag_works==0: set inheritable flag to False
1396
1397 Set atomic_flag_works to NULL if no atomic flag was used to create the
1398 file descriptor.
1399
1400 atomic_flag_works can only be used to make a file descriptor
1401 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1402 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1403 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1404 {
1405 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1406 }
1407
1408 /* Same as _Py_set_inheritable() but on error, set errno and
1409 don't raise an exception.
1410 This function is async-signal-safe. */
1411 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1412 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1413 {
1414 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1415 }
1416
1417 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1418 _Py_open_impl(const char *pathname, int flags, int gil_held)
1419 {
1420 int fd;
1421 int async_err = 0;
1422 #ifndef MS_WINDOWS
1423 int *atomic_flag_works;
1424 #endif
1425
1426 #ifdef MS_WINDOWS
1427 flags |= O_NOINHERIT;
1428 #elif defined(O_CLOEXEC)
1429 atomic_flag_works = &_Py_open_cloexec_works;
1430 flags |= O_CLOEXEC;
1431 #else
1432 atomic_flag_works = NULL;
1433 #endif
1434
1435 if (gil_held) {
1436 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1437 if (pathname_obj == NULL) {
1438 return -1;
1439 }
1440 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1441 Py_DECREF(pathname_obj);
1442 return -1;
1443 }
1444
1445 do {
1446 Py_BEGIN_ALLOW_THREADS
1447 fd = open(pathname, flags);
1448 Py_END_ALLOW_THREADS
1449 } while (fd < 0
1450 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1451 if (async_err) {
1452 Py_DECREF(pathname_obj);
1453 return -1;
1454 }
1455 if (fd < 0) {
1456 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1457 Py_DECREF(pathname_obj);
1458 return -1;
1459 }
1460 Py_DECREF(pathname_obj);
1461 }
1462 else {
1463 fd = open(pathname, flags);
1464 if (fd < 0)
1465 return -1;
1466 }
1467
1468 #ifndef MS_WINDOWS
1469 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1470 close(fd);
1471 return -1;
1472 }
1473 #endif
1474
1475 return fd;
1476 }
1477
1478 /* Open a file with the specified flags (wrapper to open() function).
1479 Return a file descriptor on success. Raise an exception and return -1 on
1480 error.
1481
1482 The file descriptor is created non-inheritable.
1483
1484 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1485 except if the Python signal handler raises an exception.
1486
1487 Release the GIL to call open(). The caller must hold the GIL. */
1488 int
_Py_open(const char * pathname,int flags)1489 _Py_open(const char *pathname, int flags)
1490 {
1491 /* _Py_open() must be called with the GIL held. */
1492 assert(PyGILState_Check());
1493 return _Py_open_impl(pathname, flags, 1);
1494 }
1495
1496 /* Open a file with the specified flags (wrapper to open() function).
1497 Return a file descriptor on success. Set errno and return -1 on error.
1498
1499 The file descriptor is created non-inheritable.
1500
1501 If interrupted by a signal, fail with EINTR. */
1502 int
_Py_open_noraise(const char * pathname,int flags)1503 _Py_open_noraise(const char *pathname, int flags)
1504 {
1505 return _Py_open_impl(pathname, flags, 0);
1506 }
1507
1508 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1509 encoding and use fopen() otherwise.
1510
1511 The file descriptor is created non-inheritable.
1512
1513 If interrupted by a signal, fail with EINTR. */
1514 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1515 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1516 {
1517 FILE *f;
1518 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1519 return NULL;
1520 }
1521 #ifndef MS_WINDOWS
1522 char *cpath;
1523 char cmode[10];
1524 size_t r;
1525 r = wcstombs(cmode, mode, 10);
1526 if (r == DECODE_ERROR || r >= 10) {
1527 errno = EINVAL;
1528 return NULL;
1529 }
1530 cpath = _Py_EncodeLocaleRaw(path, NULL);
1531 if (cpath == NULL) {
1532 return NULL;
1533 }
1534 f = fopen(cpath, cmode);
1535 PyMem_RawFree(cpath);
1536 #else
1537 f = _wfopen(path, mode);
1538 #endif
1539 if (f == NULL)
1540 return NULL;
1541 if (make_non_inheritable(fileno(f)) < 0) {
1542 fclose(f);
1543 return NULL;
1544 }
1545 return f;
1546 }
1547
1548 /* Wrapper to fopen().
1549
1550 The file descriptor is created non-inheritable.
1551
1552 If interrupted by a signal, fail with EINTR. */
1553 FILE*
_Py_fopen(const char * pathname,const char * mode)1554 _Py_fopen(const char *pathname, const char *mode)
1555 {
1556 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1557 if (pathname_obj == NULL) {
1558 return NULL;
1559 }
1560 if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1561 Py_DECREF(pathname_obj);
1562 return NULL;
1563 }
1564 Py_DECREF(pathname_obj);
1565
1566 FILE *f = fopen(pathname, mode);
1567 if (f == NULL)
1568 return NULL;
1569 if (make_non_inheritable(fileno(f)) < 0) {
1570 fclose(f);
1571 return NULL;
1572 }
1573 return f;
1574 }
1575
1576 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1577 encoding and call fopen() otherwise.
1578
1579 Return the new file object on success. Raise an exception and return NULL
1580 on error.
1581
1582 The file descriptor is created non-inheritable.
1583
1584 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1585 except if the Python signal handler raises an exception.
1586
1587 Release the GIL to call _wfopen() or fopen(). The caller must hold
1588 the GIL. */
1589 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1590 _Py_fopen_obj(PyObject *path, const char *mode)
1591 {
1592 FILE *f;
1593 int async_err = 0;
1594 #ifdef MS_WINDOWS
1595 const wchar_t *wpath;
1596 wchar_t wmode[10];
1597 int usize;
1598
1599 assert(PyGILState_Check());
1600
1601 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1602 return NULL;
1603 }
1604 if (!PyUnicode_Check(path)) {
1605 PyErr_Format(PyExc_TypeError,
1606 "str file path expected under Windows, got %R",
1607 Py_TYPE(path));
1608 return NULL;
1609 }
1610 _Py_COMP_DIAG_PUSH
1611 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1612 wpath = _PyUnicode_AsUnicode(path);
1613 _Py_COMP_DIAG_POP
1614 if (wpath == NULL)
1615 return NULL;
1616
1617 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1618 wmode, Py_ARRAY_LENGTH(wmode));
1619 if (usize == 0) {
1620 PyErr_SetFromWindowsErr(0);
1621 return NULL;
1622 }
1623
1624 do {
1625 Py_BEGIN_ALLOW_THREADS
1626 f = _wfopen(wpath, wmode);
1627 Py_END_ALLOW_THREADS
1628 } while (f == NULL
1629 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1630 #else
1631 PyObject *bytes;
1632 const char *path_bytes;
1633
1634 assert(PyGILState_Check());
1635
1636 if (!PyUnicode_FSConverter(path, &bytes))
1637 return NULL;
1638 path_bytes = PyBytes_AS_STRING(bytes);
1639
1640 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1641 Py_DECREF(bytes);
1642 return NULL;
1643 }
1644
1645 do {
1646 Py_BEGIN_ALLOW_THREADS
1647 f = fopen(path_bytes, mode);
1648 Py_END_ALLOW_THREADS
1649 } while (f == NULL
1650 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1651
1652 Py_DECREF(bytes);
1653 #endif
1654 if (async_err)
1655 return NULL;
1656
1657 if (f == NULL) {
1658 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1659 return NULL;
1660 }
1661
1662 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1663 fclose(f);
1664 return NULL;
1665 }
1666 return f;
1667 }
1668
1669 /* Read count bytes from fd into buf.
1670
1671 On success, return the number of read bytes, it can be lower than count.
1672 If the current file offset is at or past the end of file, no bytes are read,
1673 and read() returns zero.
1674
1675 On error, raise an exception, set errno and return -1.
1676
1677 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1678 If the Python signal handler raises an exception, the function returns -1
1679 (the syscall is not retried).
1680
1681 Release the GIL to call read(). The caller must hold the GIL. */
1682 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1683 _Py_read(int fd, void *buf, size_t count)
1684 {
1685 Py_ssize_t n;
1686 int err;
1687 int async_err = 0;
1688
1689 assert(PyGILState_Check());
1690
1691 /* _Py_read() must not be called with an exception set, otherwise the
1692 * caller may think that read() was interrupted by a signal and the signal
1693 * handler raised an exception. */
1694 assert(!PyErr_Occurred());
1695
1696 if (count > _PY_READ_MAX) {
1697 count = _PY_READ_MAX;
1698 }
1699
1700 _Py_BEGIN_SUPPRESS_IPH
1701 do {
1702 Py_BEGIN_ALLOW_THREADS
1703 errno = 0;
1704 #ifdef MS_WINDOWS
1705 n = read(fd, buf, (int)count);
1706 #else
1707 n = read(fd, buf, count);
1708 #endif
1709 /* save/restore errno because PyErr_CheckSignals()
1710 * and PyErr_SetFromErrno() can modify it */
1711 err = errno;
1712 Py_END_ALLOW_THREADS
1713 } while (n < 0 && err == EINTR &&
1714 !(async_err = PyErr_CheckSignals()));
1715 _Py_END_SUPPRESS_IPH
1716
1717 if (async_err) {
1718 /* read() was interrupted by a signal (failed with EINTR)
1719 * and the Python signal handler raised an exception */
1720 errno = err;
1721 assert(errno == EINTR && PyErr_Occurred());
1722 return -1;
1723 }
1724 if (n < 0) {
1725 PyErr_SetFromErrno(PyExc_OSError);
1726 errno = err;
1727 return -1;
1728 }
1729
1730 return n;
1731 }
1732
1733 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1734 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1735 {
1736 Py_ssize_t n;
1737 int err;
1738 int async_err = 0;
1739
1740 _Py_BEGIN_SUPPRESS_IPH
1741 #ifdef MS_WINDOWS
1742 if (count > 32767) {
1743 /* Issue #11395: the Windows console returns an error (12: not
1744 enough space error) on writing into stdout if stdout mode is
1745 binary and the length is greater than 66,000 bytes (or less,
1746 depending on heap usage). */
1747 if (gil_held) {
1748 Py_BEGIN_ALLOW_THREADS
1749 if (isatty(fd)) {
1750 count = 32767;
1751 }
1752 Py_END_ALLOW_THREADS
1753 } else {
1754 if (isatty(fd)) {
1755 count = 32767;
1756 }
1757 }
1758 }
1759 #endif
1760 if (count > _PY_WRITE_MAX) {
1761 count = _PY_WRITE_MAX;
1762 }
1763
1764 if (gil_held) {
1765 do {
1766 Py_BEGIN_ALLOW_THREADS
1767 errno = 0;
1768 #ifdef MS_WINDOWS
1769 n = write(fd, buf, (int)count);
1770 #else
1771 n = write(fd, buf, count);
1772 #endif
1773 /* save/restore errno because PyErr_CheckSignals()
1774 * and PyErr_SetFromErrno() can modify it */
1775 err = errno;
1776 Py_END_ALLOW_THREADS
1777 } while (n < 0 && err == EINTR &&
1778 !(async_err = PyErr_CheckSignals()));
1779 }
1780 else {
1781 do {
1782 errno = 0;
1783 #ifdef MS_WINDOWS
1784 n = write(fd, buf, (int)count);
1785 #else
1786 n = write(fd, buf, count);
1787 #endif
1788 err = errno;
1789 } while (n < 0 && err == EINTR);
1790 }
1791 _Py_END_SUPPRESS_IPH
1792
1793 if (async_err) {
1794 /* write() was interrupted by a signal (failed with EINTR)
1795 and the Python signal handler raised an exception (if gil_held is
1796 nonzero). */
1797 errno = err;
1798 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1799 return -1;
1800 }
1801 if (n < 0) {
1802 if (gil_held)
1803 PyErr_SetFromErrno(PyExc_OSError);
1804 errno = err;
1805 return -1;
1806 }
1807
1808 return n;
1809 }
1810
1811 /* Write count bytes of buf into fd.
1812
1813 On success, return the number of written bytes, it can be lower than count
1814 including 0. On error, raise an exception, set errno and return -1.
1815
1816 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1817 If the Python signal handler raises an exception, the function returns -1
1818 (the syscall is not retried).
1819
1820 Release the GIL to call write(). The caller must hold the GIL. */
1821 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1822 _Py_write(int fd, const void *buf, size_t count)
1823 {
1824 assert(PyGILState_Check());
1825
1826 /* _Py_write() must not be called with an exception set, otherwise the
1827 * caller may think that write() was interrupted by a signal and the signal
1828 * handler raised an exception. */
1829 assert(!PyErr_Occurred());
1830
1831 return _Py_write_impl(fd, buf, count, 1);
1832 }
1833
1834 /* Write count bytes of buf into fd.
1835 *
1836 * On success, return the number of written bytes, it can be lower than count
1837 * including 0. On error, set errno and return -1.
1838 *
1839 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1840 * without calling the Python signal handler. */
1841 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1842 _Py_write_noraise(int fd, const void *buf, size_t count)
1843 {
1844 return _Py_write_impl(fd, buf, count, 0);
1845 }
1846
1847 #ifdef HAVE_READLINK
1848
1849 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1850 the result from the locale encoding.
1851
1852 Return -1 on encoding error, on readlink() error, if the internal buffer is
1853 too short, on decoding error, or if 'buf' is too short. */
1854 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1855 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1856 {
1857 char *cpath;
1858 char cbuf[MAXPATHLEN];
1859 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1860 wchar_t *wbuf;
1861 Py_ssize_t res;
1862 size_t r1;
1863
1864 cpath = _Py_EncodeLocaleRaw(path, NULL);
1865 if (cpath == NULL) {
1866 errno = EINVAL;
1867 return -1;
1868 }
1869 res = readlink(cpath, cbuf, cbuf_len);
1870 PyMem_RawFree(cpath);
1871 if (res == -1) {
1872 return -1;
1873 }
1874 if ((size_t)res == cbuf_len) {
1875 errno = EINVAL;
1876 return -1;
1877 }
1878 cbuf[res] = '\0'; /* buf will be null terminated */
1879 wbuf = Py_DecodeLocale(cbuf, &r1);
1880 if (wbuf == NULL) {
1881 errno = EINVAL;
1882 return -1;
1883 }
1884 /* wbuf must have space to store the trailing NUL character */
1885 if (buflen <= r1) {
1886 PyMem_RawFree(wbuf);
1887 errno = EINVAL;
1888 return -1;
1889 }
1890 wcsncpy(buf, wbuf, buflen);
1891 PyMem_RawFree(wbuf);
1892 return (int)r1;
1893 }
1894 #endif
1895
1896 #ifdef HAVE_REALPATH
1897
1898 /* Return the canonicalized absolute pathname. Encode path to the locale
1899 encoding, decode the result from the locale encoding.
1900
1901 Return NULL on encoding error, realpath() error, decoding error
1902 or if 'resolved_path' is too short. */
1903 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1904 _Py_wrealpath(const wchar_t *path,
1905 wchar_t *resolved_path, size_t resolved_path_len)
1906 {
1907 char *cpath;
1908 char cresolved_path[MAXPATHLEN];
1909 wchar_t *wresolved_path;
1910 char *res;
1911 size_t r;
1912 cpath = _Py_EncodeLocaleRaw(path, NULL);
1913 if (cpath == NULL) {
1914 errno = EINVAL;
1915 return NULL;
1916 }
1917 res = realpath(cpath, cresolved_path);
1918 PyMem_RawFree(cpath);
1919 if (res == NULL)
1920 return NULL;
1921
1922 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1923 if (wresolved_path == NULL) {
1924 errno = EINVAL;
1925 return NULL;
1926 }
1927 /* wresolved_path must have space to store the trailing NUL character */
1928 if (resolved_path_len <= r) {
1929 PyMem_RawFree(wresolved_path);
1930 errno = EINVAL;
1931 return NULL;
1932 }
1933 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1934 PyMem_RawFree(wresolved_path);
1935 return resolved_path;
1936 }
1937 #endif
1938
1939
1940 #ifndef MS_WINDOWS
1941 int
_Py_isabs(const wchar_t * path)1942 _Py_isabs(const wchar_t *path)
1943 {
1944 return (path[0] == SEP);
1945 }
1946 #endif
1947
1948
1949 /* Get an absolute path.
1950 On error (ex: fail to get the current directory), return -1.
1951 On memory allocation failure, set *abspath_p to NULL and return 0.
1952 On success, return a newly allocated to *abspath_p to and return 0.
1953 The string must be freed by PyMem_RawFree(). */
1954 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)1955 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1956 {
1957 #ifdef MS_WINDOWS
1958 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1959 DWORD result;
1960
1961 result = GetFullPathNameW(path,
1962 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1963 NULL);
1964 if (!result) {
1965 return -1;
1966 }
1967
1968 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1969 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1970 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1971 }
1972 else {
1973 woutbufp = NULL;
1974 }
1975 if (!woutbufp) {
1976 *abspath_p = NULL;
1977 return 0;
1978 }
1979
1980 result = GetFullPathNameW(path, result, woutbufp, NULL);
1981 if (!result) {
1982 PyMem_RawFree(woutbufp);
1983 return -1;
1984 }
1985 }
1986
1987 if (woutbufp != woutbuf) {
1988 *abspath_p = woutbufp;
1989 return 0;
1990 }
1991
1992 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1993 return 0;
1994 #else
1995 if (_Py_isabs(path)) {
1996 *abspath_p = _PyMem_RawWcsdup(path);
1997 return 0;
1998 }
1999
2000 wchar_t cwd[MAXPATHLEN + 1];
2001 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2002 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2003 /* unable to get the current directory */
2004 return -1;
2005 }
2006
2007 size_t cwd_len = wcslen(cwd);
2008 size_t path_len = wcslen(path);
2009 size_t len = cwd_len + 1 + path_len + 1;
2010 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2011 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2012 }
2013 else {
2014 *abspath_p = NULL;
2015 }
2016 if (*abspath_p == NULL) {
2017 return 0;
2018 }
2019
2020 wchar_t *abspath = *abspath_p;
2021 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2022 abspath += cwd_len;
2023
2024 *abspath = (wchar_t)SEP;
2025 abspath++;
2026
2027 memcpy(abspath, path, path_len * sizeof(wchar_t));
2028 abspath += path_len;
2029
2030 *abspath = 0;
2031 return 0;
2032 #endif
2033 }
2034
2035
2036 /* Get the current directory. buflen is the buffer size in wide characters
2037 including the null character. Decode the path from the locale encoding.
2038
2039 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2040 too short. */
2041 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2042 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2043 {
2044 #ifdef MS_WINDOWS
2045 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2046 return _wgetcwd(buf, ibuflen);
2047 #else
2048 char fname[MAXPATHLEN];
2049 wchar_t *wname;
2050 size_t len;
2051
2052 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2053 return NULL;
2054 wname = Py_DecodeLocale(fname, &len);
2055 if (wname == NULL)
2056 return NULL;
2057 /* wname must have space to store the trailing NUL character */
2058 if (buflen <= len) {
2059 PyMem_RawFree(wname);
2060 return NULL;
2061 }
2062 wcsncpy(buf, wname, buflen);
2063 PyMem_RawFree(wname);
2064 return buf;
2065 #endif
2066 }
2067
2068 /* Duplicate a file descriptor. The new file descriptor is created as
2069 non-inheritable. Return a new file descriptor on success, raise an OSError
2070 exception and return -1 on error.
2071
2072 The GIL is released to call dup(). The caller must hold the GIL. */
2073 int
_Py_dup(int fd)2074 _Py_dup(int fd)
2075 {
2076 #ifdef MS_WINDOWS
2077 HANDLE handle;
2078 #endif
2079
2080 assert(PyGILState_Check());
2081
2082 #ifdef MS_WINDOWS
2083 _Py_BEGIN_SUPPRESS_IPH
2084 handle = (HANDLE)_get_osfhandle(fd);
2085 _Py_END_SUPPRESS_IPH
2086 if (handle == INVALID_HANDLE_VALUE) {
2087 PyErr_SetFromErrno(PyExc_OSError);
2088 return -1;
2089 }
2090
2091 Py_BEGIN_ALLOW_THREADS
2092 _Py_BEGIN_SUPPRESS_IPH
2093 fd = dup(fd);
2094 _Py_END_SUPPRESS_IPH
2095 Py_END_ALLOW_THREADS
2096 if (fd < 0) {
2097 PyErr_SetFromErrno(PyExc_OSError);
2098 return -1;
2099 }
2100
2101 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2102 _Py_BEGIN_SUPPRESS_IPH
2103 close(fd);
2104 _Py_END_SUPPRESS_IPH
2105 return -1;
2106 }
2107 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2108 Py_BEGIN_ALLOW_THREADS
2109 _Py_BEGIN_SUPPRESS_IPH
2110 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2111 _Py_END_SUPPRESS_IPH
2112 Py_END_ALLOW_THREADS
2113 if (fd < 0) {
2114 PyErr_SetFromErrno(PyExc_OSError);
2115 return -1;
2116 }
2117
2118 #else
2119 Py_BEGIN_ALLOW_THREADS
2120 _Py_BEGIN_SUPPRESS_IPH
2121 fd = dup(fd);
2122 _Py_END_SUPPRESS_IPH
2123 Py_END_ALLOW_THREADS
2124 if (fd < 0) {
2125 PyErr_SetFromErrno(PyExc_OSError);
2126 return -1;
2127 }
2128
2129 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2130 _Py_BEGIN_SUPPRESS_IPH
2131 close(fd);
2132 _Py_END_SUPPRESS_IPH
2133 return -1;
2134 }
2135 #endif
2136 return fd;
2137 }
2138
2139 #ifndef MS_WINDOWS
2140 /* Get the blocking mode of the file descriptor.
2141 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2142 raise an exception and return -1 on error. */
2143 int
_Py_get_blocking(int fd)2144 _Py_get_blocking(int fd)
2145 {
2146 int flags;
2147 _Py_BEGIN_SUPPRESS_IPH
2148 flags = fcntl(fd, F_GETFL, 0);
2149 _Py_END_SUPPRESS_IPH
2150 if (flags < 0) {
2151 PyErr_SetFromErrno(PyExc_OSError);
2152 return -1;
2153 }
2154
2155 return !(flags & O_NONBLOCK);
2156 }
2157
2158 /* Set the blocking mode of the specified file descriptor.
2159
2160 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2161 otherwise.
2162
2163 Return 0 on success, raise an exception and return -1 on error. */
2164 int
_Py_set_blocking(int fd,int blocking)2165 _Py_set_blocking(int fd, int blocking)
2166 {
2167 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
2168 int arg = !blocking;
2169 if (ioctl(fd, FIONBIO, &arg) < 0)
2170 goto error;
2171 #else
2172 int flags, res;
2173
2174 _Py_BEGIN_SUPPRESS_IPH
2175 flags = fcntl(fd, F_GETFL, 0);
2176 if (flags >= 0) {
2177 if (blocking)
2178 flags = flags & (~O_NONBLOCK);
2179 else
2180 flags = flags | O_NONBLOCK;
2181
2182 res = fcntl(fd, F_SETFL, flags);
2183 } else {
2184 res = -1;
2185 }
2186 _Py_END_SUPPRESS_IPH
2187
2188 if (res < 0)
2189 goto error;
2190 #endif
2191 return 0;
2192
2193 error:
2194 PyErr_SetFromErrno(PyExc_OSError);
2195 return -1;
2196 }
2197 #endif
2198
2199
2200 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2201 _Py_GetLocaleconvNumeric(struct lconv *lc,
2202 PyObject **decimal_point, PyObject **thousands_sep)
2203 {
2204 assert(decimal_point != NULL);
2205 assert(thousands_sep != NULL);
2206
2207 #ifndef MS_WINDOWS
2208 int change_locale = 0;
2209 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2210 change_locale = 1;
2211 }
2212 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2213 change_locale = 1;
2214 }
2215
2216 /* Keep a copy of the LC_CTYPE locale */
2217 char *oldloc = NULL, *loc = NULL;
2218 if (change_locale) {
2219 oldloc = setlocale(LC_CTYPE, NULL);
2220 if (!oldloc) {
2221 PyErr_SetString(PyExc_RuntimeWarning,
2222 "failed to get LC_CTYPE locale");
2223 return -1;
2224 }
2225
2226 oldloc = _PyMem_Strdup(oldloc);
2227 if (!oldloc) {
2228 PyErr_NoMemory();
2229 return -1;
2230 }
2231
2232 loc = setlocale(LC_NUMERIC, NULL);
2233 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2234 loc = NULL;
2235 }
2236
2237 if (loc != NULL) {
2238 /* Only set the locale temporarily the LC_CTYPE locale
2239 if LC_NUMERIC locale is different than LC_CTYPE locale and
2240 decimal_point and/or thousands_sep are non-ASCII or longer than
2241 1 byte */
2242 setlocale(LC_CTYPE, loc);
2243 }
2244 }
2245
2246 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2247 #else /* MS_WINDOWS */
2248 /* Use _W_* fields of Windows strcut lconv */
2249 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2250 #endif /* MS_WINDOWS */
2251
2252 int res = -1;
2253
2254 *decimal_point = GET_LOCALE_STRING(decimal_point);
2255 if (*decimal_point == NULL) {
2256 goto done;
2257 }
2258
2259 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2260 if (*thousands_sep == NULL) {
2261 goto done;
2262 }
2263
2264 res = 0;
2265
2266 done:
2267 #ifndef MS_WINDOWS
2268 if (loc != NULL) {
2269 setlocale(LC_CTYPE, oldloc);
2270 }
2271 PyMem_Free(oldloc);
2272 #endif
2273 return res;
2274
2275 #undef GET_LOCALE_STRING
2276 }
2277