1 #include "Python.h"
2 #include "pycore_fileutils.h" // fileutils definitions
3 #include "pycore_runtime.h" // _PyRuntime
4 #include "osdefs.h" // SEP
5 #include <locale.h>
6 #include <stdlib.h> // mbstowcs()
7
8 #ifdef MS_WINDOWS
9 # include <malloc.h>
10 # include <windows.h>
11 # include <pathcch.h> // PathCchCombineEx
12 extern int winerror_to_errno(int);
13 #endif
14
15 #ifdef HAVE_LANGINFO_H
16 #include <langinfo.h>
17 #endif
18
19 #ifdef HAVE_SYS_IOCTL_H
20 #include <sys/ioctl.h>
21 #endif
22
23 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
24 #include <iconv.h>
25 #endif
26
27 #ifdef HAVE_FCNTL_H
28 #include <fcntl.h>
29 #endif /* HAVE_FCNTL_H */
30
31 #ifdef O_CLOEXEC
32 /* Does open() support the O_CLOEXEC flag? Possible values:
33
34 -1: unknown
35 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
36 1: open() supports O_CLOEXEC flag, close-on-exec is set
37
38 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
39 and os.open(). */
40 int _Py_open_cloexec_works = -1;
41 #endif
42
43 // The value must be the same in unicodeobject.c.
44 #define MAX_UNICODE 0x10ffff
45
46 // mbstowcs() and mbrtowc() errors
47 static const size_t DECODE_ERROR = ((size_t)-1);
48 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
49
50
51 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)52 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
53 {
54 switch (errors)
55 {
56 case _Py_ERROR_STRICT:
57 *surrogateescape = 0;
58 return 0;
59 case _Py_ERROR_SURROGATEESCAPE:
60 *surrogateescape = 1;
61 return 0;
62 default:
63 return -1;
64 }
65 }
66
67
68 PyObject *
_Py_device_encoding(int fd)69 _Py_device_encoding(int fd)
70 {
71 int valid;
72 Py_BEGIN_ALLOW_THREADS
73 _Py_BEGIN_SUPPRESS_IPH
74 valid = isatty(fd);
75 _Py_END_SUPPRESS_IPH
76 Py_END_ALLOW_THREADS
77 if (!valid)
78 Py_RETURN_NONE;
79
80 #if defined(MS_WINDOWS)
81 UINT cp;
82 if (fd == 0)
83 cp = GetConsoleCP();
84 else if (fd == 1 || fd == 2)
85 cp = GetConsoleOutputCP();
86 else
87 cp = 0;
88 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
89 has no console */
90 if (cp == 0) {
91 Py_RETURN_NONE;
92 }
93
94 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
95 #else
96 return _Py_GetLocaleEncodingObject();
97 #endif
98 }
99
100
101 static size_t
is_valid_wide_char(wchar_t ch)102 is_valid_wide_char(wchar_t ch)
103 {
104 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
105 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
106 for non-Unicode locales, which makes values higher than MAX_UNICODE
107 possibly valid. */
108 return 1;
109 #endif
110 if (Py_UNICODE_IS_SURROGATE(ch)) {
111 // Reject lone surrogate characters
112 return 0;
113 }
114 if (ch > MAX_UNICODE) {
115 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
116 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
117 // it creates characters outside the [U+0000; U+10ffff] range:
118 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
119 return 0;
120 }
121 return 1;
122 }
123
124
125 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)126 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
127 {
128 size_t count = mbstowcs(dest, src, n);
129 if (dest != NULL && count != DECODE_ERROR) {
130 for (size_t i=0; i < count; i++) {
131 wchar_t ch = dest[i];
132 if (!is_valid_wide_char(ch)) {
133 return DECODE_ERROR;
134 }
135 }
136 }
137 return count;
138 }
139
140
141 #ifdef HAVE_MBRTOWC
142 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)143 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
144 {
145 assert(pwc != NULL);
146 size_t count = mbrtowc(pwc, str, len, pmbs);
147 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
148 if (!is_valid_wide_char(*pwc)) {
149 return DECODE_ERROR;
150 }
151 }
152 return count;
153 }
154 #endif
155
156
157 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
158
159 #define USE_FORCE_ASCII
160
161 extern int _Py_normalize_encoding(const char *, char *, size_t);
162
163 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
164 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
165 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
166 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
167 locale.getpreferredencoding() codec. For example, if command line arguments
168 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
169 UnicodeEncodeError instead of retrieving the original byte string.
170
171 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
172 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
173 one byte in range 0x80-0xff can be decoded from the locale encoding. The
174 workaround is also enabled on error, for example if getting the locale
175 failed.
176
177 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
178 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
179 ASCII encoding in this case.
180
181 Values of force_ascii:
182
183 1: the workaround is used: Py_EncodeLocale() uses
184 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
185 decode_ascii()
186 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
187 Py_DecodeLocale() uses mbstowcs()
188 -1: unknown, need to call check_force_ascii() to get the value
189 */
190 static int force_ascii = -1;
191
192 static int
check_force_ascii(void)193 check_force_ascii(void)
194 {
195 char *loc = setlocale(LC_CTYPE, NULL);
196 if (loc == NULL) {
197 goto error;
198 }
199 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
200 /* the LC_CTYPE locale is different than C and POSIX */
201 return 0;
202 }
203
204 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
205 const char *codeset = nl_langinfo(CODESET);
206 if (!codeset || codeset[0] == '\0') {
207 /* CODESET is not set or empty */
208 goto error;
209 }
210
211 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
212 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
213 goto error;
214 }
215
216 #ifdef __hpux
217 if (strcmp(encoding, "roman8") == 0) {
218 unsigned char ch;
219 wchar_t wch;
220 size_t res;
221
222 ch = (unsigned char)0xA7;
223 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
224 if (res != DECODE_ERROR && wch == L'\xA7') {
225 /* On HP-UX with C locale or the POSIX locale,
226 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
227 Latin1 encoding in practice. Force ASCII in this case.
228
229 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
230 return 1;
231 }
232 }
233 #else
234 const char* ascii_aliases[] = {
235 "ascii",
236 /* Aliases from Lib/encodings/aliases.py */
237 "646",
238 "ansi_x3.4_1968",
239 "ansi_x3.4_1986",
240 "ansi_x3_4_1968",
241 "cp367",
242 "csascii",
243 "ibm367",
244 "iso646_us",
245 "iso_646.irv_1991",
246 "iso_ir_6",
247 "us",
248 "us_ascii",
249 NULL
250 };
251
252 int is_ascii = 0;
253 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
254 if (strcmp(encoding, *alias) == 0) {
255 is_ascii = 1;
256 break;
257 }
258 }
259 if (!is_ascii) {
260 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
261 return 0;
262 }
263
264 for (unsigned int i=0x80; i<=0xff; i++) {
265 char ch[1];
266 wchar_t wch[1];
267 size_t res;
268
269 unsigned uch = (unsigned char)i;
270 ch[0] = (char)uch;
271 res = _Py_mbstowcs(wch, ch, 1);
272 if (res != DECODE_ERROR) {
273 /* decoding a non-ASCII character from the locale encoding succeed:
274 the locale encoding is not ASCII, force ASCII */
275 return 1;
276 }
277 }
278 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
279 encoding: the locale encoding is really ASCII */
280 #endif /* !defined(__hpux) */
281 return 0;
282 #else
283 /* nl_langinfo(CODESET) is not available: always force ASCII */
284 return 1;
285 #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
286
287 error:
288 /* if an error occurred, force the ASCII encoding */
289 return 1;
290 }
291
292
293 int
_Py_GetForceASCII(void)294 _Py_GetForceASCII(void)
295 {
296 if (force_ascii == -1) {
297 force_ascii = check_force_ascii();
298 }
299 return force_ascii;
300 }
301
302
303 void
_Py_ResetForceASCII(void)304 _Py_ResetForceASCII(void)
305 {
306 force_ascii = -1;
307 }
308
309
310 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)311 encode_ascii(const wchar_t *text, char **str,
312 size_t *error_pos, const char **reason,
313 int raw_malloc, _Py_error_handler errors)
314 {
315 char *result = NULL, *out;
316 size_t len, i;
317 wchar_t ch;
318
319 int surrogateescape;
320 if (get_surrogateescape(errors, &surrogateescape) < 0) {
321 return -3;
322 }
323
324 len = wcslen(text);
325
326 /* +1 for NULL byte */
327 if (raw_malloc) {
328 result = PyMem_RawMalloc(len + 1);
329 }
330 else {
331 result = PyMem_Malloc(len + 1);
332 }
333 if (result == NULL) {
334 return -1;
335 }
336
337 out = result;
338 for (i=0; i<len; i++) {
339 ch = text[i];
340
341 if (ch <= 0x7f) {
342 /* ASCII character */
343 *out++ = (char)ch;
344 }
345 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
346 /* UTF-8b surrogate */
347 *out++ = (char)(ch - 0xdc00);
348 }
349 else {
350 if (raw_malloc) {
351 PyMem_RawFree(result);
352 }
353 else {
354 PyMem_Free(result);
355 }
356 if (error_pos != NULL) {
357 *error_pos = i;
358 }
359 if (reason) {
360 *reason = "encoding error";
361 }
362 return -2;
363 }
364 }
365 *out = '\0';
366 *str = result;
367 return 0;
368 }
369 #else
370 int
_Py_GetForceASCII(void)371 _Py_GetForceASCII(void)
372 {
373 return 0;
374 }
375
376 void
_Py_ResetForceASCII(void)377 _Py_ResetForceASCII(void)
378 {
379 /* nothing to do */
380 }
381 #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
382
383
384 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
385 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)386 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
387 const char **reason, _Py_error_handler errors)
388 {
389 wchar_t *res;
390 unsigned char *in;
391 wchar_t *out;
392 size_t argsize = strlen(arg) + 1;
393
394 int surrogateescape;
395 if (get_surrogateescape(errors, &surrogateescape) < 0) {
396 return -3;
397 }
398
399 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
400 return -1;
401 }
402 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
403 if (!res) {
404 return -1;
405 }
406
407 out = res;
408 for (in = (unsigned char*)arg; *in; in++) {
409 unsigned char ch = *in;
410 if (ch < 128) {
411 *out++ = ch;
412 }
413 else {
414 if (!surrogateescape) {
415 PyMem_RawFree(res);
416 if (wlen) {
417 *wlen = in - (unsigned char*)arg;
418 }
419 if (reason) {
420 *reason = "decoding error";
421 }
422 return -2;
423 }
424 *out++ = 0xdc00 + ch;
425 }
426 }
427 *out = 0;
428
429 if (wlen != NULL) {
430 *wlen = out - res;
431 }
432 *wstr = res;
433 return 0;
434 }
435 #endif /* !HAVE_MBRTOWC */
436
437 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)438 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
439 const char **reason, _Py_error_handler errors)
440 {
441 wchar_t *res;
442 size_t argsize;
443 size_t count;
444 #ifdef HAVE_MBRTOWC
445 unsigned char *in;
446 wchar_t *out;
447 mbstate_t mbs;
448 #endif
449
450 int surrogateescape;
451 if (get_surrogateescape(errors, &surrogateescape) < 0) {
452 return -3;
453 }
454
455 #ifdef HAVE_BROKEN_MBSTOWCS
456 /* Some platforms have a broken implementation of
457 * mbstowcs which does not count the characters that
458 * would result from conversion. Use an upper bound.
459 */
460 argsize = strlen(arg);
461 #else
462 argsize = _Py_mbstowcs(NULL, arg, 0);
463 #endif
464 if (argsize != DECODE_ERROR) {
465 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
466 return -1;
467 }
468 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
469 if (!res) {
470 return -1;
471 }
472
473 count = _Py_mbstowcs(res, arg, argsize + 1);
474 if (count != DECODE_ERROR) {
475 *wstr = res;
476 if (wlen != NULL) {
477 *wlen = count;
478 }
479 return 0;
480 }
481 PyMem_RawFree(res);
482 }
483
484 /* Conversion failed. Fall back to escaping with surrogateescape. */
485 #ifdef HAVE_MBRTOWC
486 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
487
488 /* Overallocate; as multi-byte characters are in the argument, the
489 actual output could use less memory. */
490 argsize = strlen(arg) + 1;
491 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
492 return -1;
493 }
494 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
495 if (!res) {
496 return -1;
497 }
498
499 in = (unsigned char*)arg;
500 out = res;
501 memset(&mbs, 0, sizeof mbs);
502 while (argsize) {
503 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
504 if (converted == 0) {
505 /* Reached end of string; null char stored. */
506 break;
507 }
508
509 if (converted == INCOMPLETE_CHARACTER) {
510 /* Incomplete character. This should never happen,
511 since we provide everything that we have -
512 unless there is a bug in the C library, or I
513 misunderstood how mbrtowc works. */
514 goto decode_error;
515 }
516
517 if (converted == DECODE_ERROR) {
518 if (!surrogateescape) {
519 goto decode_error;
520 }
521
522 /* Decoding error. Escape as UTF-8b, and start over in the initial
523 shift state. */
524 *out++ = 0xdc00 + *in++;
525 argsize--;
526 memset(&mbs, 0, sizeof mbs);
527 continue;
528 }
529
530 // _Py_mbrtowc() reject lone surrogate characters
531 assert(!Py_UNICODE_IS_SURROGATE(*out));
532
533 /* successfully converted some bytes */
534 in += converted;
535 argsize -= converted;
536 out++;
537 }
538 if (wlen != NULL) {
539 *wlen = out - res;
540 }
541 *wstr = res;
542 return 0;
543
544 decode_error:
545 PyMem_RawFree(res);
546 if (wlen) {
547 *wlen = in - (unsigned char*)arg;
548 }
549 if (reason) {
550 *reason = "decoding error";
551 }
552 return -2;
553 #else /* HAVE_MBRTOWC */
554 /* Cannot use C locale for escaping; manually escape as if charset
555 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
556 correctly in the locale's charset, which must be an ASCII superset. */
557 return decode_ascii(arg, wstr, wlen, reason, errors);
558 #endif /* HAVE_MBRTOWC */
559 }
560
561
562 /* Decode a byte string from the locale encoding.
563
564 Use the strict error handler if 'surrogateescape' is zero. Use the
565 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
566 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
567 can be decoded as a surrogate character, escape the bytes using the
568 surrogateescape error handler instead of decoding them.
569
570 On success, return 0 and write the newly allocated wide character string into
571 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
572 the number of wide characters excluding the null character into *wlen.
573
574 On memory allocation failure, return -1.
575
576 On decoding error, return -2. If wlen is not NULL, write the start of
577 invalid byte sequence in the input string into *wlen. If reason is not NULL,
578 write the decoding error message into *reason.
579
580 Return -3 if the error handler 'errors' is not supported.
581
582 Use the Py_EncodeLocaleEx() function to encode the character string back to
583 a byte string. */
584 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)585 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
586 const char **reason,
587 int current_locale, _Py_error_handler errors)
588 {
589 if (current_locale) {
590 #ifdef _Py_FORCE_UTF8_LOCALE
591 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
592 errors);
593 #else
594 return decode_current_locale(arg, wstr, wlen, reason, errors);
595 #endif
596 }
597
598 #ifdef _Py_FORCE_UTF8_FS_ENCODING
599 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
600 errors);
601 #else
602 int use_utf8 = (Py_UTF8Mode == 1);
603 #ifdef MS_WINDOWS
604 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
605 #endif
606 if (use_utf8) {
607 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
608 errors);
609 }
610
611 #ifdef USE_FORCE_ASCII
612 if (force_ascii == -1) {
613 force_ascii = check_force_ascii();
614 }
615
616 if (force_ascii) {
617 /* force ASCII encoding to workaround mbstowcs() issue */
618 return decode_ascii(arg, wstr, wlen, reason, errors);
619 }
620 #endif
621
622 return decode_current_locale(arg, wstr, wlen, reason, errors);
623 #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
624 }
625
626
627 /* Decode a byte string from the locale encoding with the
628 surrogateescape error handler: undecodable bytes are decoded as characters
629 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
630 character, escape the bytes using the surrogateescape error handler instead
631 of decoding them.
632
633 Return a pointer to a newly allocated wide character string, use
634 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
635 wide characters excluding the null character into *size
636
637 Return NULL on decoding error or memory allocation error. If *size* is not
638 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
639 decoding error.
640
641 Decoding errors should never happen, unless there is a bug in the C
642 library.
643
644 Use the Py_EncodeLocale() function to encode the character string back to a
645 byte string. */
646 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)647 Py_DecodeLocale(const char* arg, size_t *wlen)
648 {
649 wchar_t *wstr;
650 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
651 NULL, 0,
652 _Py_ERROR_SURROGATEESCAPE);
653 if (res != 0) {
654 assert(res != -3);
655 if (wlen != NULL) {
656 *wlen = (size_t)res;
657 }
658 return NULL;
659 }
660 return wstr;
661 }
662
663
664 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)665 encode_current_locale(const wchar_t *text, char **str,
666 size_t *error_pos, const char **reason,
667 int raw_malloc, _Py_error_handler errors)
668 {
669 const size_t len = wcslen(text);
670 char *result = NULL, *bytes = NULL;
671 size_t i, size, converted;
672 wchar_t c, buf[2];
673
674 int surrogateescape;
675 if (get_surrogateescape(errors, &surrogateescape) < 0) {
676 return -3;
677 }
678
679 /* The function works in two steps:
680 1. compute the length of the output buffer in bytes (size)
681 2. outputs the bytes */
682 size = 0;
683 buf[1] = 0;
684 while (1) {
685 for (i=0; i < len; i++) {
686 c = text[i];
687 if (c >= 0xdc80 && c <= 0xdcff) {
688 if (!surrogateescape) {
689 goto encode_error;
690 }
691 /* UTF-8b surrogate */
692 if (bytes != NULL) {
693 *bytes++ = c - 0xdc00;
694 size--;
695 }
696 else {
697 size++;
698 }
699 continue;
700 }
701 else {
702 buf[0] = c;
703 if (bytes != NULL) {
704 converted = wcstombs(bytes, buf, size);
705 }
706 else {
707 converted = wcstombs(NULL, buf, 0);
708 }
709 if (converted == DECODE_ERROR) {
710 goto encode_error;
711 }
712 if (bytes != NULL) {
713 bytes += converted;
714 size -= converted;
715 }
716 else {
717 size += converted;
718 }
719 }
720 }
721 if (result != NULL) {
722 *bytes = '\0';
723 break;
724 }
725
726 size += 1; /* nul byte at the end */
727 if (raw_malloc) {
728 result = PyMem_RawMalloc(size);
729 }
730 else {
731 result = PyMem_Malloc(size);
732 }
733 if (result == NULL) {
734 return -1;
735 }
736 bytes = result;
737 }
738 *str = result;
739 return 0;
740
741 encode_error:
742 if (raw_malloc) {
743 PyMem_RawFree(result);
744 }
745 else {
746 PyMem_Free(result);
747 }
748 if (error_pos != NULL) {
749 *error_pos = i;
750 }
751 if (reason) {
752 *reason = "encoding error";
753 }
754 return -2;
755 }
756
757
758 /* Encode a string to the locale encoding.
759
760 Parameters:
761
762 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
763 of PyMem_Malloc().
764 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
765 Python filesystem encoding.
766 * errors: error handler like "strict" or "surrogateescape".
767
768 Return value:
769
770 0: success, *str is set to a newly allocated decoded string.
771 -1: memory allocation failure
772 -2: encoding error, set *error_pos and *reason (if set).
773 -3: the error handler 'errors' is not supported.
774 */
775 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)776 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
777 const char **reason,
778 int raw_malloc, int current_locale, _Py_error_handler errors)
779 {
780 if (current_locale) {
781 #ifdef _Py_FORCE_UTF8_LOCALE
782 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
783 raw_malloc, errors);
784 #else
785 return encode_current_locale(text, str, error_pos, reason,
786 raw_malloc, errors);
787 #endif
788 }
789
790 #ifdef _Py_FORCE_UTF8_FS_ENCODING
791 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
792 raw_malloc, errors);
793 #else
794 int use_utf8 = (Py_UTF8Mode == 1);
795 #ifdef MS_WINDOWS
796 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
797 #endif
798 if (use_utf8) {
799 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
800 raw_malloc, errors);
801 }
802
803 #ifdef USE_FORCE_ASCII
804 if (force_ascii == -1) {
805 force_ascii = check_force_ascii();
806 }
807
808 if (force_ascii) {
809 return encode_ascii(text, str, error_pos, reason,
810 raw_malloc, errors);
811 }
812 #endif
813
814 return encode_current_locale(text, str, error_pos, reason,
815 raw_malloc, errors);
816 #endif /* _Py_FORCE_UTF8_FS_ENCODING */
817 }
818
819 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)820 encode_locale(const wchar_t *text, size_t *error_pos,
821 int raw_malloc, int current_locale)
822 {
823 char *str;
824 int res = encode_locale_ex(text, &str, error_pos, NULL,
825 raw_malloc, current_locale,
826 _Py_ERROR_SURROGATEESCAPE);
827 if (res != -2 && error_pos) {
828 *error_pos = (size_t)-1;
829 }
830 if (res != 0) {
831 return NULL;
832 }
833 return str;
834 }
835
836 /* Encode a wide character string to the locale encoding with the
837 surrogateescape error handler: surrogate characters in the range
838 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
839
840 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
841 the memory. Return NULL on encoding or memory allocation error.
842
843 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
844 to the index of the invalid character on encoding error.
845
846 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
847 character string. */
848 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)849 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
850 {
851 return encode_locale(text, error_pos, 0, 0);
852 }
853
854
855 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
856 instead of PyMem_Free(). */
857 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)858 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
859 {
860 return encode_locale(text, error_pos, 1, 0);
861 }
862
863
864 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)865 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
866 size_t *error_pos, const char **reason,
867 int current_locale, _Py_error_handler errors)
868 {
869 return encode_locale_ex(text, str, error_pos, reason, 1,
870 current_locale, errors);
871 }
872
873
874 // Get the current locale encoding name:
875 //
876 // - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
877 // - Return "UTF-8" if the UTF-8 Mode is enabled
878 // - On Windows, return the ANSI code page (ex: "cp1250")
879 // - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
880 // - Otherwise, return nl_langinfo(CODESET).
881 //
882 // Return NULL on memory allocation failure.
883 //
884 // See also config_get_locale_encoding()
885 wchar_t*
_Py_GetLocaleEncoding(void)886 _Py_GetLocaleEncoding(void)
887 {
888 #ifdef _Py_FORCE_UTF8_LOCALE
889 // On Android langinfo.h and CODESET are missing,
890 // and UTF-8 is always used in mbstowcs() and wcstombs().
891 return _PyMem_RawWcsdup(L"UTF-8");
892 #else
893 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
894 if (preconfig->utf8_mode) {
895 return _PyMem_RawWcsdup(L"UTF-8");
896 }
897
898 #ifdef MS_WINDOWS
899 wchar_t encoding[23];
900 unsigned int ansi_codepage = GetACP();
901 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
902 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
903 return _PyMem_RawWcsdup(encoding);
904 #else
905 const char *encoding = nl_langinfo(CODESET);
906 if (!encoding || encoding[0] == '\0') {
907 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
908 // macOS if the LC_CTYPE locale is not supported.
909 return _PyMem_RawWcsdup(L"UTF-8");
910 }
911
912 wchar_t *wstr;
913 int res = decode_current_locale(encoding, &wstr, NULL,
914 NULL, _Py_ERROR_SURROGATEESCAPE);
915 if (res < 0) {
916 return NULL;
917 }
918 return wstr;
919 #endif // !MS_WINDOWS
920
921 #endif // !_Py_FORCE_UTF8_LOCALE
922 }
923
924
925 PyObject *
_Py_GetLocaleEncodingObject(void)926 _Py_GetLocaleEncodingObject(void)
927 {
928 wchar_t *encoding = _Py_GetLocaleEncoding();
929 if (encoding == NULL) {
930 PyErr_NoMemory();
931 return NULL;
932 }
933
934 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
935 PyMem_RawFree(encoding);
936 return str;
937 }
938
939 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
940
941 /* Check whether current locale uses Unicode as internal wchar_t form. */
942 int
_Py_LocaleUsesNonUnicodeWchar(void)943 _Py_LocaleUsesNonUnicodeWchar(void)
944 {
945 /* Oracle Solaris uses non-Unicode internal wchar_t form for
946 non-Unicode locales and hence needs conversion to UTF first. */
947 char* codeset = nl_langinfo(CODESET);
948 if (!codeset) {
949 return 0;
950 }
951 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
952 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
953 }
954
955 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)956 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
957 const char *tocode, const char *fromcode)
958 {
959 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
960
961 /* Ensure we won't overflow the size. */
962 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
963 PyErr_NoMemory();
964 return NULL;
965 }
966
967 /* the string doesn't have to be NULL terminated */
968 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
969 if (target == NULL) {
970 PyErr_NoMemory();
971 return NULL;
972 }
973
974 iconv_t cd = iconv_open(tocode, fromcode);
975 if (cd == (iconv_t)-1) {
976 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
977 PyMem_Free(target);
978 return NULL;
979 }
980
981 char *inbuf = (char *) source;
982 char *outbuf = (char *) target;
983 size_t inbytesleft = sizeof(wchar_t) * size;
984 size_t outbytesleft = inbytesleft;
985
986 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
987 if (ret == DECODE_ERROR) {
988 PyErr_Format(PyExc_ValueError, "iconv() failed");
989 PyMem_Free(target);
990 iconv_close(cd);
991 return NULL;
992 }
993
994 iconv_close(cd);
995 return target;
996 }
997
998 /* Convert a wide character string to the UCS-4 encoded string. This
999 is necessary on systems where internal form of wchar_t are not Unicode
1000 code points (e.g. Oracle Solaris).
1001
1002 Return a pointer to a newly allocated string, use PyMem_Free() to free
1003 the memory. Return NULL and raise exception on conversion or memory
1004 allocation error. */
1005 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)1006 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1007 {
1008 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1009 }
1010
1011 /* Convert a UCS-4 encoded string to native wide character string. This
1012 is necessary on systems where internal form of wchar_t are not Unicode
1013 code points (e.g. Oracle Solaris).
1014
1015 The conversion is done in place. This can be done because both wchar_t
1016 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1017 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1018 which is currently the only system using these functions; it doesn't have
1019 to be for other systems).
1020
1021 Return 0 on success. Return -1 and raise exception on conversion
1022 or memory allocation error. */
1023 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)1024 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1025 {
1026 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1027 if (!result) {
1028 return -1;
1029 }
1030 memcpy(unicode, result, size * sizeof(wchar_t));
1031 PyMem_Free(result);
1032 return 0;
1033 }
1034 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1035
1036 #ifdef MS_WINDOWS
1037 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1038
1039 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)1040 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1041 {
1042 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1043 /* Cannot simply cast and dereference in_ptr,
1044 since it might not be aligned properly */
1045 __int64 in;
1046 memcpy(&in, in_ptr, sizeof(in));
1047 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1048 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1049 }
1050
1051 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)1052 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1053 {
1054 /* XXX endianness */
1055 __int64 out;
1056 out = time_in + secs_between_epochs;
1057 out = out * 10000000 + nsec_in / 100;
1058 memcpy(out_ptr, &out, sizeof(out));
1059 }
1060
1061 /* Below, we *know* that ugo+r is 0444 */
1062 #if _S_IREAD != 0400
1063 #error Unsupported C library
1064 #endif
1065 static int
attributes_to_mode(DWORD attr)1066 attributes_to_mode(DWORD attr)
1067 {
1068 int m = 0;
1069 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1070 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1071 else
1072 m |= _S_IFREG;
1073 if (attr & FILE_ATTRIBUTE_READONLY)
1074 m |= 0444;
1075 else
1076 m |= 0666;
1077 return m;
1078 }
1079
1080 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)1081 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1082 struct _Py_stat_struct *result)
1083 {
1084 memset(result, 0, sizeof(*result));
1085 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1086 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1087 result->st_dev = info->dwVolumeSerialNumber;
1088 result->st_rdev = result->st_dev;
1089 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1090 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1091 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1092 result->st_nlink = info->nNumberOfLinks;
1093 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1094 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1095 open other name surrogate reparse points without traversing them. To
1096 detect/handle these, check st_file_attributes and st_reparse_tag. */
1097 result->st_reparse_tag = reparse_tag;
1098 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1099 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1100 /* first clear the S_IFMT bits */
1101 result->st_mode ^= (result->st_mode & S_IFMT);
1102 /* now set the bits that make this a symlink */
1103 result->st_mode |= S_IFLNK;
1104 }
1105 result->st_file_attributes = info->dwFileAttributes;
1106 }
1107 #endif
1108
1109 /* Return information about a file.
1110
1111 On POSIX, use fstat().
1112
1113 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1114 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1115 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1116 #23152.
1117
1118 On Windows, set the last Windows error and return nonzero on error. On
1119 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1120 success. */
1121 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1122 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1123 {
1124 #ifdef MS_WINDOWS
1125 BY_HANDLE_FILE_INFORMATION info;
1126 HANDLE h;
1127 int type;
1128
1129 h = _Py_get_osfhandle_noraise(fd);
1130
1131 if (h == INVALID_HANDLE_VALUE) {
1132 /* errno is already set by _get_osfhandle, but we also set
1133 the Win32 error for callers who expect that */
1134 SetLastError(ERROR_INVALID_HANDLE);
1135 return -1;
1136 }
1137 memset(status, 0, sizeof(*status));
1138
1139 type = GetFileType(h);
1140 if (type == FILE_TYPE_UNKNOWN) {
1141 DWORD error = GetLastError();
1142 if (error != 0) {
1143 errno = winerror_to_errno(error);
1144 return -1;
1145 }
1146 /* else: valid but unknown file */
1147 }
1148
1149 if (type != FILE_TYPE_DISK) {
1150 if (type == FILE_TYPE_CHAR)
1151 status->st_mode = _S_IFCHR;
1152 else if (type == FILE_TYPE_PIPE)
1153 status->st_mode = _S_IFIFO;
1154 return 0;
1155 }
1156
1157 if (!GetFileInformationByHandle(h, &info)) {
1158 /* The Win32 error is already set, but we also set errno for
1159 callers who expect it */
1160 errno = winerror_to_errno(GetLastError());
1161 return -1;
1162 }
1163
1164 _Py_attribute_data_to_stat(&info, 0, status);
1165 /* specific to fstat() */
1166 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1167 return 0;
1168 #else
1169 return fstat(fd, status);
1170 #endif
1171 }
1172
1173 /* Return information about a file.
1174
1175 On POSIX, use fstat().
1176
1177 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1178 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1179 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1180 #23152.
1181
1182 Raise an exception and return -1 on error. On Windows, set the last Windows
1183 error on error. On POSIX, set errno on error. Fill status and return 0 on
1184 success.
1185
1186 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1187 to call fstat(). The caller must hold the GIL. */
1188 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1189 _Py_fstat(int fd, struct _Py_stat_struct *status)
1190 {
1191 int res;
1192
1193 assert(PyGILState_Check());
1194
1195 Py_BEGIN_ALLOW_THREADS
1196 res = _Py_fstat_noraise(fd, status);
1197 Py_END_ALLOW_THREADS
1198
1199 if (res != 0) {
1200 #ifdef MS_WINDOWS
1201 PyErr_SetFromWindowsErr(0);
1202 #else
1203 PyErr_SetFromErrno(PyExc_OSError);
1204 #endif
1205 return -1;
1206 }
1207 return 0;
1208 }
1209
1210 /* Like _Py_stat() but with a raw filename. */
1211 int
_Py_wstat(const wchar_t * path,struct stat * buf)1212 _Py_wstat(const wchar_t* path, struct stat *buf)
1213 {
1214 int err;
1215 #ifdef MS_WINDOWS
1216 struct _stat wstatbuf;
1217 err = _wstat(path, &wstatbuf);
1218 if (!err) {
1219 buf->st_mode = wstatbuf.st_mode;
1220 }
1221 #else
1222 char *fname;
1223 fname = _Py_EncodeLocaleRaw(path, NULL);
1224 if (fname == NULL) {
1225 errno = EINVAL;
1226 return -1;
1227 }
1228 err = stat(fname, buf);
1229 PyMem_RawFree(fname);
1230 #endif
1231 return err;
1232 }
1233
1234
1235 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1236 call stat() otherwise. Only fill st_mode attribute on Windows.
1237
1238 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1239 raised. */
1240
1241 int
_Py_stat(PyObject * path,struct stat * statbuf)1242 _Py_stat(PyObject *path, struct stat *statbuf)
1243 {
1244 #ifdef MS_WINDOWS
1245 int err;
1246
1247 #if USE_UNICODE_WCHAR_CACHE
1248 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1249 #else /* USE_UNICODE_WCHAR_CACHE */
1250 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1251 #endif /* USE_UNICODE_WCHAR_CACHE */
1252 if (wpath == NULL)
1253 return -2;
1254
1255 err = _Py_wstat(wpath, statbuf);
1256 #if !USE_UNICODE_WCHAR_CACHE
1257 PyMem_Free(wpath);
1258 #endif /* USE_UNICODE_WCHAR_CACHE */
1259 return err;
1260 #else
1261 int ret;
1262 PyObject *bytes;
1263 char *cpath;
1264
1265 bytes = PyUnicode_EncodeFSDefault(path);
1266 if (bytes == NULL)
1267 return -2;
1268
1269 /* check for embedded null bytes */
1270 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1271 Py_DECREF(bytes);
1272 return -2;
1273 }
1274
1275 ret = stat(cpath, statbuf);
1276 Py_DECREF(bytes);
1277 return ret;
1278 #endif
1279 }
1280
1281
1282 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1283 static int
get_inheritable(int fd,int raise)1284 get_inheritable(int fd, int raise)
1285 {
1286 #ifdef MS_WINDOWS
1287 HANDLE handle;
1288 DWORD flags;
1289
1290 handle = _Py_get_osfhandle_noraise(fd);
1291 if (handle == INVALID_HANDLE_VALUE) {
1292 if (raise)
1293 PyErr_SetFromErrno(PyExc_OSError);
1294 return -1;
1295 }
1296
1297 if (!GetHandleInformation(handle, &flags)) {
1298 if (raise)
1299 PyErr_SetFromWindowsErr(0);
1300 return -1;
1301 }
1302
1303 return (flags & HANDLE_FLAG_INHERIT);
1304 #else
1305 int flags;
1306
1307 flags = fcntl(fd, F_GETFD, 0);
1308 if (flags == -1) {
1309 if (raise)
1310 PyErr_SetFromErrno(PyExc_OSError);
1311 return -1;
1312 }
1313 return !(flags & FD_CLOEXEC);
1314 #endif
1315 }
1316
1317 /* Get the inheritable flag of the specified file descriptor.
1318 Return 1 if the file descriptor can be inherited, 0 if it cannot,
1319 raise an exception and return -1 on error. */
1320 int
_Py_get_inheritable(int fd)1321 _Py_get_inheritable(int fd)
1322 {
1323 return get_inheritable(fd, 1);
1324 }
1325
1326
1327 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1328 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1329 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1330 {
1331 #ifdef MS_WINDOWS
1332 HANDLE handle;
1333 DWORD flags;
1334 #else
1335 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1336 static int ioctl_works = -1;
1337 int request;
1338 int err;
1339 #endif
1340 int flags, new_flags;
1341 int res;
1342 #endif
1343
1344 /* atomic_flag_works can only be used to make the file descriptor
1345 non-inheritable */
1346 assert(!(atomic_flag_works != NULL && inheritable));
1347
1348 if (atomic_flag_works != NULL && !inheritable) {
1349 if (*atomic_flag_works == -1) {
1350 int isInheritable = get_inheritable(fd, raise);
1351 if (isInheritable == -1)
1352 return -1;
1353 *atomic_flag_works = !isInheritable;
1354 }
1355
1356 if (*atomic_flag_works)
1357 return 0;
1358 }
1359
1360 #ifdef MS_WINDOWS
1361 handle = _Py_get_osfhandle_noraise(fd);
1362 if (handle == INVALID_HANDLE_VALUE) {
1363 if (raise)
1364 PyErr_SetFromErrno(PyExc_OSError);
1365 return -1;
1366 }
1367
1368 if (inheritable)
1369 flags = HANDLE_FLAG_INHERIT;
1370 else
1371 flags = 0;
1372
1373 /* This check can be removed once support for Windows 7 ends. */
1374 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1375 GetFileType(handle) == FILE_TYPE_CHAR)
1376
1377 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1378 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1379 if (raise)
1380 PyErr_SetFromWindowsErr(0);
1381 return -1;
1382 }
1383 #undef CONSOLE_PSEUDOHANDLE
1384 return 0;
1385
1386 #else
1387
1388 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1389 if (ioctl_works != 0 && raise != 0) {
1390 /* fast-path: ioctl() only requires one syscall */
1391 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1392 * thus avoid using ioctl() so we skip the fast-path. */
1393 if (inheritable)
1394 request = FIONCLEX;
1395 else
1396 request = FIOCLEX;
1397 err = ioctl(fd, request, NULL);
1398 if (!err) {
1399 ioctl_works = 1;
1400 return 0;
1401 }
1402
1403 #ifdef O_PATH
1404 if (errno == EBADF) {
1405 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1406 // on O_PATH file descriptors. Fall through to the fcntl()
1407 // implementation.
1408 }
1409 else
1410 #endif
1411 if (errno != ENOTTY && errno != EACCES) {
1412 if (raise)
1413 PyErr_SetFromErrno(PyExc_OSError);
1414 return -1;
1415 }
1416 else {
1417 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1418 device". The ioctl is declared but not supported by the kernel.
1419 Remember that ioctl() doesn't work. It is the case on
1420 Illumos-based OS for example.
1421
1422 Issue #27057: When SELinux policy disallows ioctl it will fail
1423 with EACCES. While FIOCLEX is safe operation it may be
1424 unavailable because ioctl was denied altogether.
1425 This can be the case on Android. */
1426 ioctl_works = 0;
1427 }
1428 /* fallback to fcntl() if ioctl() does not work */
1429 }
1430 #endif
1431
1432 /* slow-path: fcntl() requires two syscalls */
1433 flags = fcntl(fd, F_GETFD);
1434 if (flags < 0) {
1435 if (raise)
1436 PyErr_SetFromErrno(PyExc_OSError);
1437 return -1;
1438 }
1439
1440 if (inheritable) {
1441 new_flags = flags & ~FD_CLOEXEC;
1442 }
1443 else {
1444 new_flags = flags | FD_CLOEXEC;
1445 }
1446
1447 if (new_flags == flags) {
1448 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1449 return 0;
1450 }
1451
1452 res = fcntl(fd, F_SETFD, new_flags);
1453 if (res < 0) {
1454 if (raise)
1455 PyErr_SetFromErrno(PyExc_OSError);
1456 return -1;
1457 }
1458 return 0;
1459 #endif
1460 }
1461
1462 /* Make the file descriptor non-inheritable.
1463 Return 0 on success, set errno and return -1 on error. */
1464 static int
make_non_inheritable(int fd)1465 make_non_inheritable(int fd)
1466 {
1467 return set_inheritable(fd, 0, 0, NULL);
1468 }
1469
1470 /* Set the inheritable flag of the specified file descriptor.
1471 On success: return 0, on error: raise an exception and return -1.
1472
1473 If atomic_flag_works is not NULL:
1474
1475 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1476 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1477 set the inheritable flag
1478 * if *atomic_flag_works==1: do nothing
1479 * if *atomic_flag_works==0: set inheritable flag to False
1480
1481 Set atomic_flag_works to NULL if no atomic flag was used to create the
1482 file descriptor.
1483
1484 atomic_flag_works can only be used to make a file descriptor
1485 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1486 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1487 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1488 {
1489 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1490 }
1491
1492 /* Same as _Py_set_inheritable() but on error, set errno and
1493 don't raise an exception.
1494 This function is async-signal-safe. */
1495 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1496 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1497 {
1498 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1499 }
1500
1501 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1502 _Py_open_impl(const char *pathname, int flags, int gil_held)
1503 {
1504 int fd;
1505 int async_err = 0;
1506 #ifndef MS_WINDOWS
1507 int *atomic_flag_works;
1508 #endif
1509
1510 #ifdef MS_WINDOWS
1511 flags |= O_NOINHERIT;
1512 #elif defined(O_CLOEXEC)
1513 atomic_flag_works = &_Py_open_cloexec_works;
1514 flags |= O_CLOEXEC;
1515 #else
1516 atomic_flag_works = NULL;
1517 #endif
1518
1519 if (gil_held) {
1520 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1521 if (pathname_obj == NULL) {
1522 return -1;
1523 }
1524 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1525 Py_DECREF(pathname_obj);
1526 return -1;
1527 }
1528
1529 do {
1530 Py_BEGIN_ALLOW_THREADS
1531 fd = open(pathname, flags);
1532 Py_END_ALLOW_THREADS
1533 } while (fd < 0
1534 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1535 if (async_err) {
1536 Py_DECREF(pathname_obj);
1537 return -1;
1538 }
1539 if (fd < 0) {
1540 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1541 Py_DECREF(pathname_obj);
1542 return -1;
1543 }
1544 Py_DECREF(pathname_obj);
1545 }
1546 else {
1547 fd = open(pathname, flags);
1548 if (fd < 0)
1549 return -1;
1550 }
1551
1552 #ifndef MS_WINDOWS
1553 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1554 close(fd);
1555 return -1;
1556 }
1557 #endif
1558
1559 return fd;
1560 }
1561
1562 /* Open a file with the specified flags (wrapper to open() function).
1563 Return a file descriptor on success. Raise an exception and return -1 on
1564 error.
1565
1566 The file descriptor is created non-inheritable.
1567
1568 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1569 except if the Python signal handler raises an exception.
1570
1571 Release the GIL to call open(). The caller must hold the GIL. */
1572 int
_Py_open(const char * pathname,int flags)1573 _Py_open(const char *pathname, int flags)
1574 {
1575 /* _Py_open() must be called with the GIL held. */
1576 assert(PyGILState_Check());
1577 return _Py_open_impl(pathname, flags, 1);
1578 }
1579
1580 /* Open a file with the specified flags (wrapper to open() function).
1581 Return a file descriptor on success. Set errno and return -1 on error.
1582
1583 The file descriptor is created non-inheritable.
1584
1585 If interrupted by a signal, fail with EINTR. */
1586 int
_Py_open_noraise(const char * pathname,int flags)1587 _Py_open_noraise(const char *pathname, int flags)
1588 {
1589 return _Py_open_impl(pathname, flags, 0);
1590 }
1591
1592 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1593 encoding and use fopen() otherwise.
1594
1595 The file descriptor is created non-inheritable.
1596
1597 If interrupted by a signal, fail with EINTR. */
1598 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1599 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1600 {
1601 FILE *f;
1602 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1603 return NULL;
1604 }
1605 #ifndef MS_WINDOWS
1606 char *cpath;
1607 char cmode[10];
1608 size_t r;
1609 r = wcstombs(cmode, mode, 10);
1610 if (r == DECODE_ERROR || r >= 10) {
1611 errno = EINVAL;
1612 return NULL;
1613 }
1614 cpath = _Py_EncodeLocaleRaw(path, NULL);
1615 if (cpath == NULL) {
1616 return NULL;
1617 }
1618 f = fopen(cpath, cmode);
1619 PyMem_RawFree(cpath);
1620 #else
1621 f = _wfopen(path, mode);
1622 #endif
1623 if (f == NULL)
1624 return NULL;
1625 if (make_non_inheritable(fileno(f)) < 0) {
1626 fclose(f);
1627 return NULL;
1628 }
1629 return f;
1630 }
1631
1632
1633 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1634 encoding and call fopen() otherwise.
1635
1636 Return the new file object on success. Raise an exception and return NULL
1637 on error.
1638
1639 The file descriptor is created non-inheritable.
1640
1641 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1642 except if the Python signal handler raises an exception.
1643
1644 Release the GIL to call _wfopen() or fopen(). The caller must hold
1645 the GIL. */
1646 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1647 _Py_fopen_obj(PyObject *path, const char *mode)
1648 {
1649 FILE *f;
1650 int async_err = 0;
1651 #ifdef MS_WINDOWS
1652 wchar_t wmode[10];
1653 int usize;
1654
1655 assert(PyGILState_Check());
1656
1657 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1658 return NULL;
1659 }
1660 if (!PyUnicode_Check(path)) {
1661 PyErr_Format(PyExc_TypeError,
1662 "str file path expected under Windows, got %R",
1663 Py_TYPE(path));
1664 return NULL;
1665 }
1666 #if USE_UNICODE_WCHAR_CACHE
1667 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1668 #else /* USE_UNICODE_WCHAR_CACHE */
1669 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1670 #endif /* USE_UNICODE_WCHAR_CACHE */
1671 if (wpath == NULL)
1672 return NULL;
1673
1674 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1675 wmode, Py_ARRAY_LENGTH(wmode));
1676 if (usize == 0) {
1677 PyErr_SetFromWindowsErr(0);
1678 #if !USE_UNICODE_WCHAR_CACHE
1679 PyMem_Free(wpath);
1680 #endif /* USE_UNICODE_WCHAR_CACHE */
1681 return NULL;
1682 }
1683
1684 do {
1685 Py_BEGIN_ALLOW_THREADS
1686 f = _wfopen(wpath, wmode);
1687 Py_END_ALLOW_THREADS
1688 } while (f == NULL
1689 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1690 #if !USE_UNICODE_WCHAR_CACHE
1691 PyMem_Free(wpath);
1692 #endif /* USE_UNICODE_WCHAR_CACHE */
1693 #else
1694 PyObject *bytes;
1695 const char *path_bytes;
1696
1697 assert(PyGILState_Check());
1698
1699 if (!PyUnicode_FSConverter(path, &bytes))
1700 return NULL;
1701 path_bytes = PyBytes_AS_STRING(bytes);
1702
1703 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1704 Py_DECREF(bytes);
1705 return NULL;
1706 }
1707
1708 do {
1709 Py_BEGIN_ALLOW_THREADS
1710 f = fopen(path_bytes, mode);
1711 Py_END_ALLOW_THREADS
1712 } while (f == NULL
1713 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1714
1715 Py_DECREF(bytes);
1716 #endif
1717 if (async_err)
1718 return NULL;
1719
1720 if (f == NULL) {
1721 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1722 return NULL;
1723 }
1724
1725 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1726 fclose(f);
1727 return NULL;
1728 }
1729 return f;
1730 }
1731
1732 /* Read count bytes from fd into buf.
1733
1734 On success, return the number of read bytes, it can be lower than count.
1735 If the current file offset is at or past the end of file, no bytes are read,
1736 and read() returns zero.
1737
1738 On error, raise an exception, set errno and return -1.
1739
1740 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1741 If the Python signal handler raises an exception, the function returns -1
1742 (the syscall is not retried).
1743
1744 Release the GIL to call read(). The caller must hold the GIL. */
1745 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1746 _Py_read(int fd, void *buf, size_t count)
1747 {
1748 Py_ssize_t n;
1749 int err;
1750 int async_err = 0;
1751
1752 assert(PyGILState_Check());
1753
1754 /* _Py_read() must not be called with an exception set, otherwise the
1755 * caller may think that read() was interrupted by a signal and the signal
1756 * handler raised an exception. */
1757 assert(!PyErr_Occurred());
1758
1759 if (count > _PY_READ_MAX) {
1760 count = _PY_READ_MAX;
1761 }
1762
1763 _Py_BEGIN_SUPPRESS_IPH
1764 do {
1765 Py_BEGIN_ALLOW_THREADS
1766 errno = 0;
1767 #ifdef MS_WINDOWS
1768 n = read(fd, buf, (int)count);
1769 #else
1770 n = read(fd, buf, count);
1771 #endif
1772 /* save/restore errno because PyErr_CheckSignals()
1773 * and PyErr_SetFromErrno() can modify it */
1774 err = errno;
1775 Py_END_ALLOW_THREADS
1776 } while (n < 0 && err == EINTR &&
1777 !(async_err = PyErr_CheckSignals()));
1778 _Py_END_SUPPRESS_IPH
1779
1780 if (async_err) {
1781 /* read() was interrupted by a signal (failed with EINTR)
1782 * and the Python signal handler raised an exception */
1783 errno = err;
1784 assert(errno == EINTR && PyErr_Occurred());
1785 return -1;
1786 }
1787 if (n < 0) {
1788 PyErr_SetFromErrno(PyExc_OSError);
1789 errno = err;
1790 return -1;
1791 }
1792
1793 return n;
1794 }
1795
1796 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1797 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1798 {
1799 Py_ssize_t n;
1800 int err;
1801 int async_err = 0;
1802
1803 _Py_BEGIN_SUPPRESS_IPH
1804 #ifdef MS_WINDOWS
1805 if (count > 32767) {
1806 /* Issue #11395: the Windows console returns an error (12: not
1807 enough space error) on writing into stdout if stdout mode is
1808 binary and the length is greater than 66,000 bytes (or less,
1809 depending on heap usage). */
1810 if (gil_held) {
1811 Py_BEGIN_ALLOW_THREADS
1812 if (isatty(fd)) {
1813 count = 32767;
1814 }
1815 Py_END_ALLOW_THREADS
1816 } else {
1817 if (isatty(fd)) {
1818 count = 32767;
1819 }
1820 }
1821 }
1822 #endif
1823 if (count > _PY_WRITE_MAX) {
1824 count = _PY_WRITE_MAX;
1825 }
1826
1827 if (gil_held) {
1828 do {
1829 Py_BEGIN_ALLOW_THREADS
1830 errno = 0;
1831 #ifdef MS_WINDOWS
1832 n = write(fd, buf, (int)count);
1833 #else
1834 n = write(fd, buf, count);
1835 #endif
1836 /* save/restore errno because PyErr_CheckSignals()
1837 * and PyErr_SetFromErrno() can modify it */
1838 err = errno;
1839 Py_END_ALLOW_THREADS
1840 } while (n < 0 && err == EINTR &&
1841 !(async_err = PyErr_CheckSignals()));
1842 }
1843 else {
1844 do {
1845 errno = 0;
1846 #ifdef MS_WINDOWS
1847 n = write(fd, buf, (int)count);
1848 #else
1849 n = write(fd, buf, count);
1850 #endif
1851 err = errno;
1852 } while (n < 0 && err == EINTR);
1853 }
1854 _Py_END_SUPPRESS_IPH
1855
1856 if (async_err) {
1857 /* write() was interrupted by a signal (failed with EINTR)
1858 and the Python signal handler raised an exception (if gil_held is
1859 nonzero). */
1860 errno = err;
1861 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1862 return -1;
1863 }
1864 if (n < 0) {
1865 if (gil_held)
1866 PyErr_SetFromErrno(PyExc_OSError);
1867 errno = err;
1868 return -1;
1869 }
1870
1871 return n;
1872 }
1873
1874 /* Write count bytes of buf into fd.
1875
1876 On success, return the number of written bytes, it can be lower than count
1877 including 0. On error, raise an exception, set errno and return -1.
1878
1879 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1880 If the Python signal handler raises an exception, the function returns -1
1881 (the syscall is not retried).
1882
1883 Release the GIL to call write(). The caller must hold the GIL. */
1884 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1885 _Py_write(int fd, const void *buf, size_t count)
1886 {
1887 assert(PyGILState_Check());
1888
1889 /* _Py_write() must not be called with an exception set, otherwise the
1890 * caller may think that write() was interrupted by a signal and the signal
1891 * handler raised an exception. */
1892 assert(!PyErr_Occurred());
1893
1894 return _Py_write_impl(fd, buf, count, 1);
1895 }
1896
1897 /* Write count bytes of buf into fd.
1898 *
1899 * On success, return the number of written bytes, it can be lower than count
1900 * including 0. On error, set errno and return -1.
1901 *
1902 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1903 * without calling the Python signal handler. */
1904 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1905 _Py_write_noraise(int fd, const void *buf, size_t count)
1906 {
1907 return _Py_write_impl(fd, buf, count, 0);
1908 }
1909
1910 #ifdef HAVE_READLINK
1911
1912 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1913 the result from the locale encoding.
1914
1915 Return -1 on encoding error, on readlink() error, if the internal buffer is
1916 too short, on decoding error, or if 'buf' is too short. */
1917 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1918 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1919 {
1920 char *cpath;
1921 char cbuf[MAXPATHLEN];
1922 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1923 wchar_t *wbuf;
1924 Py_ssize_t res;
1925 size_t r1;
1926
1927 cpath = _Py_EncodeLocaleRaw(path, NULL);
1928 if (cpath == NULL) {
1929 errno = EINVAL;
1930 return -1;
1931 }
1932 res = readlink(cpath, cbuf, cbuf_len);
1933 PyMem_RawFree(cpath);
1934 if (res == -1) {
1935 return -1;
1936 }
1937 if ((size_t)res == cbuf_len) {
1938 errno = EINVAL;
1939 return -1;
1940 }
1941 cbuf[res] = '\0'; /* buf will be null terminated */
1942 wbuf = Py_DecodeLocale(cbuf, &r1);
1943 if (wbuf == NULL) {
1944 errno = EINVAL;
1945 return -1;
1946 }
1947 /* wbuf must have space to store the trailing NUL character */
1948 if (buflen <= r1) {
1949 PyMem_RawFree(wbuf);
1950 errno = EINVAL;
1951 return -1;
1952 }
1953 wcsncpy(buf, wbuf, buflen);
1954 PyMem_RawFree(wbuf);
1955 return (int)r1;
1956 }
1957 #endif
1958
1959 #ifdef HAVE_REALPATH
1960
1961 /* Return the canonicalized absolute pathname. Encode path to the locale
1962 encoding, decode the result from the locale encoding.
1963
1964 Return NULL on encoding error, realpath() error, decoding error
1965 or if 'resolved_path' is too short. */
1966 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1967 _Py_wrealpath(const wchar_t *path,
1968 wchar_t *resolved_path, size_t resolved_path_len)
1969 {
1970 char *cpath;
1971 char cresolved_path[MAXPATHLEN];
1972 wchar_t *wresolved_path;
1973 char *res;
1974 size_t r;
1975 cpath = _Py_EncodeLocaleRaw(path, NULL);
1976 if (cpath == NULL) {
1977 errno = EINVAL;
1978 return NULL;
1979 }
1980 res = realpath(cpath, cresolved_path);
1981 PyMem_RawFree(cpath);
1982 if (res == NULL)
1983 return NULL;
1984
1985 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1986 if (wresolved_path == NULL) {
1987 errno = EINVAL;
1988 return NULL;
1989 }
1990 /* wresolved_path must have space to store the trailing NUL character */
1991 if (resolved_path_len <= r) {
1992 PyMem_RawFree(wresolved_path);
1993 errno = EINVAL;
1994 return NULL;
1995 }
1996 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1997 PyMem_RawFree(wresolved_path);
1998 return resolved_path;
1999 }
2000 #endif
2001
2002
2003 int
_Py_isabs(const wchar_t * path)2004 _Py_isabs(const wchar_t *path)
2005 {
2006 #ifdef MS_WINDOWS
2007 const wchar_t *tail;
2008 HRESULT hr = PathCchSkipRoot(path, &tail);
2009 if (FAILED(hr) || path == tail) {
2010 return 0;
2011 }
2012 if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2013 // Exclude paths with leading SEP
2014 return 0;
2015 }
2016 if (tail == &path[2] && path[1] == L':') {
2017 // Exclude drive-relative paths (e.g. C:filename.ext)
2018 return 0;
2019 }
2020 return 1;
2021 #else
2022 return (path[0] == SEP);
2023 #endif
2024 }
2025
2026
2027 /* Get an absolute path.
2028 On error (ex: fail to get the current directory), return -1.
2029 On memory allocation failure, set *abspath_p to NULL and return 0.
2030 On success, return a newly allocated to *abspath_p to and return 0.
2031 The string must be freed by PyMem_RawFree(). */
2032 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)2033 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2034 {
2035 if (path[0] == '\0' || !wcscmp(path, L".")) {
2036 wchar_t cwd[MAXPATHLEN + 1];
2037 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2038 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2039 /* unable to get the current directory */
2040 return -1;
2041 }
2042 *abspath_p = _PyMem_RawWcsdup(cwd);
2043 return 0;
2044 }
2045
2046 if (_Py_isabs(path)) {
2047 *abspath_p = _PyMem_RawWcsdup(path);
2048 return 0;
2049 }
2050
2051 #ifdef MS_WINDOWS
2052 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
2053 DWORD result;
2054
2055 result = GetFullPathNameW(path,
2056 Py_ARRAY_LENGTH(woutbuf), woutbuf,
2057 NULL);
2058 if (!result) {
2059 return -1;
2060 }
2061
2062 if (result >= Py_ARRAY_LENGTH(woutbuf)) {
2063 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2064 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
2065 }
2066 else {
2067 woutbufp = NULL;
2068 }
2069 if (!woutbufp) {
2070 *abspath_p = NULL;
2071 return 0;
2072 }
2073
2074 result = GetFullPathNameW(path, result, woutbufp, NULL);
2075 if (!result) {
2076 PyMem_RawFree(woutbufp);
2077 return -1;
2078 }
2079 }
2080
2081 if (woutbufp != woutbuf) {
2082 *abspath_p = woutbufp;
2083 return 0;
2084 }
2085
2086 *abspath_p = _PyMem_RawWcsdup(woutbufp);
2087 return 0;
2088 #else
2089 wchar_t cwd[MAXPATHLEN + 1];
2090 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2091 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2092 /* unable to get the current directory */
2093 return -1;
2094 }
2095
2096 size_t cwd_len = wcslen(cwd);
2097 size_t path_len = wcslen(path);
2098 size_t len = cwd_len + 1 + path_len + 1;
2099 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2100 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2101 }
2102 else {
2103 *abspath_p = NULL;
2104 }
2105 if (*abspath_p == NULL) {
2106 return 0;
2107 }
2108
2109 wchar_t *abspath = *abspath_p;
2110 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2111 abspath += cwd_len;
2112
2113 *abspath = (wchar_t)SEP;
2114 abspath++;
2115
2116 memcpy(abspath, path, path_len * sizeof(wchar_t));
2117 abspath += path_len;
2118
2119 *abspath = 0;
2120 return 0;
2121 #endif
2122 }
2123
2124
2125 // The caller must ensure "buffer" is big enough.
2126 static int
join_relfile(wchar_t * buffer,size_t bufsize,const wchar_t * dirname,const wchar_t * relfile)2127 join_relfile(wchar_t *buffer, size_t bufsize,
2128 const wchar_t *dirname, const wchar_t *relfile)
2129 {
2130 #ifdef MS_WINDOWS
2131 if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2132 PATHCCH_ALLOW_LONG_PATHS | PATHCCH_FORCE_ENABLE_LONG_NAME_PROCESS))) {
2133 return -1;
2134 }
2135 #else
2136 assert(!_Py_isabs(relfile));
2137 size_t dirlen = wcslen(dirname);
2138 size_t rellen = wcslen(relfile);
2139 size_t maxlen = bufsize - 1;
2140 if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2141 return -1;
2142 }
2143 if (dirlen == 0) {
2144 // We do not add a leading separator.
2145 wcscpy(buffer, relfile);
2146 }
2147 else {
2148 if (dirname != buffer) {
2149 wcscpy(buffer, dirname);
2150 }
2151 size_t relstart = dirlen;
2152 if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2153 buffer[dirlen] = SEP;
2154 relstart += 1;
2155 }
2156 wcscpy(&buffer[relstart], relfile);
2157 }
2158 #endif
2159 return 0;
2160 }
2161
2162 /* Join the two paths together, like os.path.join(). Return NULL
2163 if memory could not be allocated. The caller is responsible
2164 for calling PyMem_RawFree() on the result. */
2165 wchar_t *
_Py_join_relfile(const wchar_t * dirname,const wchar_t * relfile)2166 _Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2167 {
2168 assert(dirname != NULL && relfile != NULL);
2169 #ifndef MS_WINDOWS
2170 assert(!_Py_isabs(relfile));
2171 #endif
2172 size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2173 size_t bufsize = maxlen + 1;
2174 wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2175 if (filename == NULL) {
2176 return NULL;
2177 }
2178 assert(wcslen(dirname) < MAXPATHLEN);
2179 assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2180 join_relfile(filename, bufsize, dirname, relfile);
2181 return filename;
2182 }
2183
2184 /* Join the two paths together, like os.path.join().
2185 dirname: the target buffer with the dirname already in place,
2186 including trailing NUL
2187 relfile: this must be a relative path
2188 bufsize: total allocated size of the buffer
2189 Return -1 if anything is wrong with the path lengths. */
2190 int
_Py_add_relfile(wchar_t * dirname,const wchar_t * relfile,size_t bufsize)2191 _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2192 {
2193 assert(dirname != NULL && relfile != NULL);
2194 assert(bufsize > 0);
2195 return join_relfile(dirname, bufsize, dirname, relfile);
2196 }
2197
2198
2199 size_t
_Py_find_basename(const wchar_t * filename)2200 _Py_find_basename(const wchar_t *filename)
2201 {
2202 for (size_t i = wcslen(filename); i > 0; --i) {
2203 if (filename[i] == SEP) {
2204 return i + 1;
2205 }
2206 }
2207 return 0;
2208 }
2209
2210 /* In-place path normalisation. Returns the start of the normalized
2211 path, which will be within the original buffer. Guaranteed to not
2212 make the path longer, and will not fail. 'size' is the length of
2213 the path, if known. If -1, the first null character will be assumed
2214 to be the end of the path. */
2215 wchar_t *
_Py_normpath(wchar_t * path,Py_ssize_t size)2216 _Py_normpath(wchar_t *path, Py_ssize_t size)
2217 {
2218 if (!path[0] || size == 0) {
2219 return path;
2220 }
2221 wchar_t lastC = L'\0';
2222 wchar_t *p1 = path;
2223 wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2224 wchar_t *p2 = path;
2225 wchar_t *minP2 = path;
2226
2227 #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2228 #ifdef ALTSEP
2229 #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2230 #else
2231 #define IS_SEP(x) (*(x) == SEP)
2232 #endif
2233 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2234
2235 // Skip leading '.\'
2236 if (p1[0] == L'.' && IS_SEP(&p1[1])) {
2237 path = &path[2];
2238 while (IS_SEP(path) && !IS_END(path)) {
2239 path++;
2240 }
2241 p1 = p2 = minP2 = path;
2242 lastC = SEP;
2243 }
2244 #ifdef MS_WINDOWS
2245 // Skip past drive segment and update minP2
2246 else if (p1[0] && p1[1] == L':') {
2247 *p2++ = *p1++;
2248 *p2++ = *p1++;
2249 minP2 = p2;
2250 lastC = L':';
2251 }
2252 // Skip past all \\-prefixed paths, including \\?\, \\.\,
2253 // and network paths, including the first segment.
2254 else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
2255 int sepCount = 2;
2256 *p2++ = SEP;
2257 *p2++ = SEP;
2258 p1 += 2;
2259 for (; !IS_END(p1) && sepCount; ++p1) {
2260 if (IS_SEP(p1)) {
2261 --sepCount;
2262 *p2++ = lastC = SEP;
2263 } else {
2264 *p2++ = lastC = *p1;
2265 }
2266 }
2267 minP2 = p2;
2268 }
2269 #else
2270 // Skip past two leading SEPs
2271 else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
2272 *p2++ = *p1++;
2273 *p2++ = *p1++;
2274 minP2 = p2;
2275 lastC = SEP;
2276 }
2277 #endif /* MS_WINDOWS */
2278
2279 /* if pEnd is specified, check that. Else, check for null terminator */
2280 for (; !IS_END(p1); ++p1) {
2281 wchar_t c = *p1;
2282 #ifdef ALTSEP
2283 if (c == ALTSEP) {
2284 c = SEP;
2285 }
2286 #endif
2287 if (lastC == SEP) {
2288 if (c == L'.') {
2289 int sep_at_1 = SEP_OR_END(&p1[1]);
2290 int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2291 if (sep_at_2 && p1[1] == L'.') {
2292 wchar_t *p3 = p2;
2293 while (p3 != minP2 && *--p3 == SEP) { }
2294 while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2295 if (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])) {
2296 // Previous segment is also ../, so append instead
2297 *p2++ = L'.';
2298 *p2++ = L'.';
2299 lastC = L'.';
2300 } else if (p3[0] == SEP) {
2301 // Absolute path, so absorb segment
2302 p2 = p3 + 1;
2303 } else {
2304 p2 = p3;
2305 }
2306 p1 += 1;
2307 } else if (sep_at_1) {
2308 } else {
2309 *p2++ = lastC = c;
2310 }
2311 } else if (c == SEP) {
2312 } else {
2313 *p2++ = lastC = c;
2314 }
2315 } else {
2316 *p2++ = lastC = c;
2317 }
2318 }
2319 *p2 = L'\0';
2320 if (p2 != minP2) {
2321 while (--p2 != minP2 && *p2 == SEP) {
2322 *p2 = L'\0';
2323 }
2324 }
2325 #undef SEP_OR_END
2326 #undef IS_SEP
2327 #undef IS_END
2328 return path;
2329 }
2330
2331
2332 /* Get the current directory. buflen is the buffer size in wide characters
2333 including the null character. Decode the path from the locale encoding.
2334
2335 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2336 too short. */
2337 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2338 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2339 {
2340 #ifdef MS_WINDOWS
2341 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2342 return _wgetcwd(buf, ibuflen);
2343 #else
2344 char fname[MAXPATHLEN];
2345 wchar_t *wname;
2346 size_t len;
2347
2348 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2349 return NULL;
2350 wname = Py_DecodeLocale(fname, &len);
2351 if (wname == NULL)
2352 return NULL;
2353 /* wname must have space to store the trailing NUL character */
2354 if (buflen <= len) {
2355 PyMem_RawFree(wname);
2356 return NULL;
2357 }
2358 wcsncpy(buf, wname, buflen);
2359 PyMem_RawFree(wname);
2360 return buf;
2361 #endif
2362 }
2363
2364 /* Duplicate a file descriptor. The new file descriptor is created as
2365 non-inheritable. Return a new file descriptor on success, raise an OSError
2366 exception and return -1 on error.
2367
2368 The GIL is released to call dup(). The caller must hold the GIL. */
2369 int
_Py_dup(int fd)2370 _Py_dup(int fd)
2371 {
2372 #ifdef MS_WINDOWS
2373 HANDLE handle;
2374 #endif
2375
2376 assert(PyGILState_Check());
2377
2378 #ifdef MS_WINDOWS
2379 handle = _Py_get_osfhandle(fd);
2380 if (handle == INVALID_HANDLE_VALUE)
2381 return -1;
2382
2383 Py_BEGIN_ALLOW_THREADS
2384 _Py_BEGIN_SUPPRESS_IPH
2385 fd = dup(fd);
2386 _Py_END_SUPPRESS_IPH
2387 Py_END_ALLOW_THREADS
2388 if (fd < 0) {
2389 PyErr_SetFromErrno(PyExc_OSError);
2390 return -1;
2391 }
2392
2393 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2394 _Py_BEGIN_SUPPRESS_IPH
2395 close(fd);
2396 _Py_END_SUPPRESS_IPH
2397 return -1;
2398 }
2399 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2400 Py_BEGIN_ALLOW_THREADS
2401 _Py_BEGIN_SUPPRESS_IPH
2402 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2403 _Py_END_SUPPRESS_IPH
2404 Py_END_ALLOW_THREADS
2405 if (fd < 0) {
2406 PyErr_SetFromErrno(PyExc_OSError);
2407 return -1;
2408 }
2409
2410 #else
2411 Py_BEGIN_ALLOW_THREADS
2412 _Py_BEGIN_SUPPRESS_IPH
2413 fd = dup(fd);
2414 _Py_END_SUPPRESS_IPH
2415 Py_END_ALLOW_THREADS
2416 if (fd < 0) {
2417 PyErr_SetFromErrno(PyExc_OSError);
2418 return -1;
2419 }
2420
2421 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2422 _Py_BEGIN_SUPPRESS_IPH
2423 close(fd);
2424 _Py_END_SUPPRESS_IPH
2425 return -1;
2426 }
2427 #endif
2428 return fd;
2429 }
2430
2431 #ifndef MS_WINDOWS
2432 /* Get the blocking mode of the file descriptor.
2433 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2434 raise an exception and return -1 on error. */
2435 int
_Py_get_blocking(int fd)2436 _Py_get_blocking(int fd)
2437 {
2438 int flags;
2439 _Py_BEGIN_SUPPRESS_IPH
2440 flags = fcntl(fd, F_GETFL, 0);
2441 _Py_END_SUPPRESS_IPH
2442 if (flags < 0) {
2443 PyErr_SetFromErrno(PyExc_OSError);
2444 return -1;
2445 }
2446
2447 return !(flags & O_NONBLOCK);
2448 }
2449
2450 /* Set the blocking mode of the specified file descriptor.
2451
2452 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2453 otherwise.
2454
2455 Return 0 on success, raise an exception and return -1 on error. */
2456 int
_Py_set_blocking(int fd,int blocking)2457 _Py_set_blocking(int fd, int blocking)
2458 {
2459 /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2460 Use fcntl() instead. */
2461 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2462 int arg = !blocking;
2463 if (ioctl(fd, FIONBIO, &arg) < 0)
2464 goto error;
2465 #else
2466 int flags, res;
2467
2468 _Py_BEGIN_SUPPRESS_IPH
2469 flags = fcntl(fd, F_GETFL, 0);
2470 if (flags >= 0) {
2471 if (blocking)
2472 flags = flags & (~O_NONBLOCK);
2473 else
2474 flags = flags | O_NONBLOCK;
2475
2476 res = fcntl(fd, F_SETFL, flags);
2477 } else {
2478 res = -1;
2479 }
2480 _Py_END_SUPPRESS_IPH
2481
2482 if (res < 0)
2483 goto error;
2484 #endif
2485 return 0;
2486
2487 error:
2488 PyErr_SetFromErrno(PyExc_OSError);
2489 return -1;
2490 }
2491 #else /* MS_WINDOWS */
2492 void*
_Py_get_osfhandle_noraise(int fd)2493 _Py_get_osfhandle_noraise(int fd)
2494 {
2495 void *handle;
2496 _Py_BEGIN_SUPPRESS_IPH
2497 handle = (void*)_get_osfhandle(fd);
2498 _Py_END_SUPPRESS_IPH
2499 return handle;
2500 }
2501
2502 void*
_Py_get_osfhandle(int fd)2503 _Py_get_osfhandle(int fd)
2504 {
2505 void *handle = _Py_get_osfhandle_noraise(fd);
2506 if (handle == INVALID_HANDLE_VALUE)
2507 PyErr_SetFromErrno(PyExc_OSError);
2508
2509 return handle;
2510 }
2511
2512 int
_Py_open_osfhandle_noraise(void * handle,int flags)2513 _Py_open_osfhandle_noraise(void *handle, int flags)
2514 {
2515 int fd;
2516 _Py_BEGIN_SUPPRESS_IPH
2517 fd = _open_osfhandle((intptr_t)handle, flags);
2518 _Py_END_SUPPRESS_IPH
2519 return fd;
2520 }
2521
2522 int
_Py_open_osfhandle(void * handle,int flags)2523 _Py_open_osfhandle(void *handle, int flags)
2524 {
2525 int fd = _Py_open_osfhandle_noraise(handle, flags);
2526 if (fd == -1)
2527 PyErr_SetFromErrno(PyExc_OSError);
2528
2529 return fd;
2530 }
2531 #endif /* MS_WINDOWS */
2532
2533 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2534 _Py_GetLocaleconvNumeric(struct lconv *lc,
2535 PyObject **decimal_point, PyObject **thousands_sep)
2536 {
2537 assert(decimal_point != NULL);
2538 assert(thousands_sep != NULL);
2539
2540 #ifndef MS_WINDOWS
2541 int change_locale = 0;
2542 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2543 change_locale = 1;
2544 }
2545 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2546 change_locale = 1;
2547 }
2548
2549 /* Keep a copy of the LC_CTYPE locale */
2550 char *oldloc = NULL, *loc = NULL;
2551 if (change_locale) {
2552 oldloc = setlocale(LC_CTYPE, NULL);
2553 if (!oldloc) {
2554 PyErr_SetString(PyExc_RuntimeWarning,
2555 "failed to get LC_CTYPE locale");
2556 return -1;
2557 }
2558
2559 oldloc = _PyMem_Strdup(oldloc);
2560 if (!oldloc) {
2561 PyErr_NoMemory();
2562 return -1;
2563 }
2564
2565 loc = setlocale(LC_NUMERIC, NULL);
2566 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2567 loc = NULL;
2568 }
2569
2570 if (loc != NULL) {
2571 /* Only set the locale temporarily the LC_CTYPE locale
2572 if LC_NUMERIC locale is different than LC_CTYPE locale and
2573 decimal_point and/or thousands_sep are non-ASCII or longer than
2574 1 byte */
2575 setlocale(LC_CTYPE, loc);
2576 }
2577 }
2578
2579 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2580 #else /* MS_WINDOWS */
2581 /* Use _W_* fields of Windows strcut lconv */
2582 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2583 #endif /* MS_WINDOWS */
2584
2585 int res = -1;
2586
2587 *decimal_point = GET_LOCALE_STRING(decimal_point);
2588 if (*decimal_point == NULL) {
2589 goto done;
2590 }
2591
2592 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2593 if (*thousands_sep == NULL) {
2594 goto done;
2595 }
2596
2597 res = 0;
2598
2599 done:
2600 #ifndef MS_WINDOWS
2601 if (loc != NULL) {
2602 setlocale(LC_CTYPE, oldloc);
2603 }
2604 PyMem_Free(oldloc);
2605 #endif
2606 return res;
2607
2608 #undef GET_LOCALE_STRING
2609 }
2610
2611 /* Our selection logic for which function to use is as follows:
2612 * 1. If close_range(2) is available, always prefer that; it's better for
2613 * contiguous ranges like this than fdwalk(3) which entails iterating over
2614 * the entire fd space and simply doing nothing for those outside the range.
2615 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2616 * closing up to sysconf(_SC_OPEN_MAX).
2617 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2618 * as that will be more performant if the range happens to have any chunk of
2619 * non-opened fd in the middle.
2620 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2621 */
2622 #ifdef __FreeBSD__
2623 # define USE_CLOSEFROM
2624 #endif /* __FreeBSD__ */
2625
2626 #ifdef HAVE_FDWALK
2627 # define USE_FDWALK
2628 #endif /* HAVE_FDWALK */
2629
2630 #ifdef USE_FDWALK
2631 static int
_fdwalk_close_func(void * lohi,int fd)2632 _fdwalk_close_func(void *lohi, int fd)
2633 {
2634 int lo = ((int *)lohi)[0];
2635 int hi = ((int *)lohi)[1];
2636
2637 if (fd >= hi) {
2638 return 1;
2639 }
2640 else if (fd >= lo) {
2641 /* Ignore errors */
2642 (void)close(fd);
2643 }
2644 return 0;
2645 }
2646 #endif /* USE_FDWALK */
2647
2648 /* Closes all file descriptors in [first, last], ignoring errors. */
2649 void
_Py_closerange(int first,int last)2650 _Py_closerange(int first, int last)
2651 {
2652 first = Py_MAX(first, 0);
2653 _Py_BEGIN_SUPPRESS_IPH
2654 #ifdef HAVE_CLOSE_RANGE
2655 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2656 /* Any errors encountered while closing file descriptors are ignored;
2657 * ENOSYS means no kernel support, though,
2658 * so we'll fallback to the other methods. */
2659 }
2660 else
2661 #endif /* HAVE_CLOSE_RANGE */
2662 #ifdef USE_CLOSEFROM
2663 if (last >= sysconf(_SC_OPEN_MAX)) {
2664 /* Any errors encountered while closing file descriptors are ignored */
2665 closefrom(first);
2666 }
2667 else
2668 #endif /* USE_CLOSEFROM */
2669 #ifdef USE_FDWALK
2670 {
2671 int lohi[2];
2672 lohi[0] = first;
2673 lohi[1] = last + 1;
2674 fdwalk(_fdwalk_close_func, lohi);
2675 }
2676 #else
2677 {
2678 for (int i = first; i <= last; i++) {
2679 /* Ignore errors */
2680 (void)close(i);
2681 }
2682 }
2683 #endif /* USE_FDWALK */
2684 _Py_END_SUPPRESS_IPH
2685 }
2686