1 // Various functions, mostly string utilities, that are used by most parts of fish.
2 #include "config.h"
3
4 #ifdef HAVE_BACKTRACE_SYMBOLS
5 #include <cxxabi.h>
6 #endif
7
8 #include <ctype.h>
9 #include <dlfcn.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #include <limits.h>
13 #include <paths.h>
14 #include <pthread.h>
15 #include <stdarg.h>
16 #include <stddef.h>
17 #include <stdint.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <sys/stat.h>
21 #include <sys/time.h>
22 #include <termios.h>
23 #include <unistd.h>
24 #include <wctype.h>
25
26 #include <cstring>
27 #include <cwchar>
28 #ifdef HAVE_EXECINFO_H
29 #include <execinfo.h>
30 #endif
31
32 #ifdef __linux__
33 // Includes for WSL detection
34 #include <sys/utsname.h>
35 #endif
36
37 #include <algorithm>
38 #include <atomic>
39 #include <memory> // IWYU pragma: keep
40 #include <type_traits>
41
42 #include "common.h"
43 #include "env.h"
44 #include "expand.h"
45 #include "fallback.h" // IWYU pragma: keep
46 #include "flog.h"
47 #include "future_feature_flags.h"
48 #include "global_safety.h"
49 #include "iothread.h"
50 #include "parser.h"
51 #include "proc.h"
52 #include "signal.h"
53 #include "termsize.h"
54 #include "wcstringutil.h"
55 #include "wildcard.h"
56 #include "wutil.h" // IWYU pragma: keep
57
58 // Keep after "common.h"
59 #ifdef __BSD__
60 #include <sys/sysctl.h>
61 #elif defined(__APPLE__)
62 #include <mach-o/dyld.h>
63 #endif
64
65 struct termios shell_modes;
66
67 const wcstring g_empty_string{};
68
69 /// This allows us to notice when we've forked.
70 static relaxed_atomic_bool_t is_forked_proc{false};
71 /// This allows us to bypass the main thread checks
72 static relaxed_atomic_bool_t thread_asserts_cfg_for_testing{false};
73
74 static relaxed_atomic_t<wchar_t> ellipsis_char;
get_ellipsis_char()75 wchar_t get_ellipsis_char() { return ellipsis_char; }
76
77 static relaxed_atomic_t<const wchar_t *> ellipsis_str;
get_ellipsis_str()78 const wchar_t *get_ellipsis_str() { return ellipsis_str; }
79
80 static relaxed_atomic_t<const wchar_t *> omitted_newline_str;
get_omitted_newline_str()81 const wchar_t *get_omitted_newline_str() { return omitted_newline_str; }
82
83 static relaxed_atomic_t<int> omitted_newline_width;
get_omitted_newline_width()84 int get_omitted_newline_width() { return omitted_newline_width; }
85
86 static relaxed_atomic_t<wchar_t> obfuscation_read_char;
get_obfuscation_read_char()87 wchar_t get_obfuscation_read_char() { return obfuscation_read_char; }
88
89 bool g_profiling_active = false;
90 const wchar_t *program_name;
91 std::atomic<int> debug_level{1}; // default maximum debug output level (errors and warnings)
92
93 /// Be able to restore the term's foreground process group.
94 /// This is set during startup and not modified after.
95 static relaxed_atomic_t<pid_t> initial_fg_process_group{-1};
96
97 static void debug_shared(wchar_t msg_level, const wcstring &msg);
98
99 #if defined(OS_IS_CYGWIN) || defined(WSL)
100 // MS Windows tty devices do not currently have either a read or write timestamp. Those
101 // respective fields of `struct stat` are always the current time. Which means we can't
102 // use them. So we assume no external program has written to the terminal behind our
103 // back. This makes multiline promptusable. See issue #2859 and
104 // https://github.com/Microsoft/BashOnWindows/issues/545
105 const bool has_working_tty_timestamps = false;
106 #else
107 const bool has_working_tty_timestamps = true;
108 #endif
109
110 /// Convert a character to its integer equivalent if it is a valid character for the requested base.
111 /// Return the integer value if it is valid else -1.
convert_digit(wchar_t d,int base)112 long convert_digit(wchar_t d, int base) {
113 long res = -1;
114 if ((d <= L'9') && (d >= L'0')) {
115 res = d - L'0';
116 } else if ((d <= L'z') && (d >= L'a')) {
117 res = d + 10 - L'a';
118 } else if ((d <= L'Z') && (d >= L'A')) {
119 res = d + 10 - L'A';
120 }
121 if (res >= base) {
122 res = -1;
123 }
124
125 return res;
126 }
127
128 /// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
is_hex_digit(int c)129 static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
130
131 /// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
convert_hex_digit(wchar_t d)132 static long convert_hex_digit(wchar_t d) {
133 if ((d <= L'9') && (d >= L'0')) {
134 return d - L'0';
135 } else if ((d <= L'Z') && (d >= L'A')) {
136 return 10 + d - L'A';
137 }
138
139 return -1;
140 }
141
is_windows_subsystem_for_linux()142 bool is_windows_subsystem_for_linux() {
143 #if defined(WSL)
144 return true;
145 #elif not defined(__linux__)
146 return false;
147 #else
148 // We are purposely not using std::call_once as it may invoke locking, which is an unnecessary
149 // overhead since there's no actual race condition here - even if multiple threads call this
150 // routine simultaneously the first time around, we just end up needlessly querying uname(2) one
151 // more time.
152
153 static bool wsl_state = [] {
154 utsname info;
155 uname(&info);
156
157 // Sample utsname.release under WSL, testing for something like `4.4.0-17763-Microsoft`
158 if (std::strstr(info.release, "Microsoft") != nullptr) {
159 const char *dash = std::strchr(info.release, '-');
160 if (dash == nullptr || strtod(dash + 1, nullptr) < 17763) {
161 // #5298, #5661: There are acknowledged, published, and (later) fixed issues with
162 // job control under early WSL releases that prevent fish from running correctly,
163 // with unexpected failures when piping. Fish 3.0 nightly builds worked around this
164 // issue with some needlessly complicated code that was later stripped from the
165 // fish 3.0 release, so we just bail. Note that fish 2.0 was also broken, but we
166 // just didn't warn about it.
167
168 // #6038 & 5101bde: It's been requested that there be some sort of way to disable
169 // this check: if the environment variable FISH_NO_WSL_CHECK is present, this test
170 // is bypassed. We intentionally do not include this in the error message because
171 // it'll only allow fish to run but not to actually work. Here be dragons!
172 if (getenv("FISH_NO_WSL_CHECK") == nullptr) {
173 FLOGF(error,
174 "This version of WSL has known bugs that prevent fish from working."
175 "Please upgrade to Windows 10 1809 (17763) or higher to use fish!");
176 }
177 }
178
179 return true;
180 } else {
181 return false;
182 }
183 }();
184
185 // Subsequent calls to this function may take place after fork() and before exec() in
186 // postfork.cpp. Make sure we never dynamically allocate any memory in the fast path!
187 return wsl_state;
188 #endif
189 }
190
191 #ifdef HAVE_BACKTRACE_SYMBOLS
192 // This function produces a stack backtrace with demangled function & method names. It is based on
193 // https://gist.github.com/fmela/591333 but adapted to the style of the fish project.
demangled_backtrace(int max_frames,int skip_levels)194 [[gnu::noinline]] static wcstring_list_t demangled_backtrace(int max_frames, int skip_levels) {
195 void *callstack[128];
196 const int n_max_frames = sizeof(callstack) / sizeof(callstack[0]);
197 int n_frames = backtrace(callstack, n_max_frames);
198 char **symbols = backtrace_symbols(callstack, n_frames);
199 wchar_t text[1024];
200 wcstring_list_t backtrace_text;
201
202 if (skip_levels + max_frames < n_frames) n_frames = skip_levels + max_frames;
203
204 for (int i = skip_levels; i < n_frames; i++) {
205 Dl_info info;
206 if (dladdr(callstack[i], &info) && info.dli_sname) {
207 char *demangled = nullptr;
208 int status = -1;
209 if (info.dli_sname[0] == '_')
210 demangled = abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status);
211 swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s + %td", i - skip_levels,
212 status == 0 ? demangled
213 : info.dli_sname == nullptr ? symbols[i]
214 : info.dli_sname,
215 static_cast<char *>(callstack[i]) - static_cast<const char *>(info.dli_saddr));
216 free(demangled);
217 } else {
218 swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s", i - skip_levels, symbols[i]);
219 }
220 backtrace_text.push_back(text);
221 }
222 free(symbols);
223 return backtrace_text;
224 }
225
show_stackframe(const wchar_t msg_level,int frame_count,int skip_levels)226 [[gnu::noinline]] void show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) {
227 if (frame_count < 1) return;
228
229 wcstring_list_t bt = demangled_backtrace(frame_count, skip_levels + 2);
230 debug_shared(msg_level, L"Backtrace:\n" + join_strings(bt, L'\n') + L'\n');
231 }
232
233 #else // HAVE_BACKTRACE_SYMBOLS
234
show_stackframe(const wchar_t msg_level,int,int)235 [[gnu::noinline]] void show_stackframe(const wchar_t msg_level, int, int) {
236 debug_shared(msg_level, L"Sorry, but your system does not support backtraces");
237 }
238 #endif // HAVE_BACKTRACE_SYMBOLS
239
240 /// \return the smallest pointer in the range [start, start + len] which is aligned to Align.
241 /// If there is no such pointer, return \p start + len.
242 /// alignment must be a power of 2 and in range [1, 64].
243 /// This is intended to return the end point of the "unaligned prefix" of a vectorized loop.
244 template <size_t Align>
align_start(const char * start,size_t len)245 inline const char *align_start(const char *start, size_t len) {
246 static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
247 static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
248 uintptr_t startu = reinterpret_cast<uintptr_t>(start);
249 // How much do we have to add to start to make it 0 mod Align?
250 // To compute 17 up-aligned by 8, compute its skew 17 % 8, yielding 1,
251 // and then we will add 8 - 1. Of course if we align 16 with the same idea, we will
252 // add 8 instead of 0, so then mod the summand by Align again.
253 // Note all of these mods are optimized to masks.
254 uintptr_t add_which_aligns = Align - (startu % Align);
255 add_which_aligns %= Align;
256 // Add that much but not more than len. If we add 'add_which_aligns' we may overflow the
257 // pointer.
258 return start + std::min(static_cast<size_t>(add_which_aligns), len);
259 }
260
261 /// \return the largest pointer in the range [start, start + len] which is aligned to Align.
262 /// If there is no such pointer, return \p start.
263 /// This is intended to be the start point of the "unaligned suffix" of a vectorized loop.
264 template <size_t Align>
align_end(const char * start,size_t len)265 inline const char *align_end(const char *start, size_t len) {
266 static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
267 static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
268 // How much do we have to subtract to align it? Its value, mod Align.
269 uintptr_t endu = reinterpret_cast<uintptr_t>(start + len);
270 uintptr_t sub_which_aligns = endu % Align;
271 return start + len - std::min(static_cast<size_t>(sub_which_aligns), len);
272 }
273
274 /// \return the count of initial characters in \p in which are ASCII.
count_ascii_prefix(const char * in,size_t in_len)275 static size_t count_ascii_prefix(const char *in, size_t in_len) {
276 // We'll use aligned reads of this type.
277 using WordType = uint32_t;
278 const char *aligned_start = align_start<alignof(WordType)>(in, in_len);
279 const char *aligned_end = align_end<alignof(WordType)>(in, in_len);
280
281 // Consume the unaligned prefix.
282 for (const char *cursor = in; cursor < aligned_start; cursor++) {
283 if (cursor[0] & 0x80) return &cursor[0] - in;
284 }
285
286 // Consume the aligned middle.
287 for (const char *cursor = aligned_start; cursor < aligned_end; cursor += sizeof(WordType)) {
288 if (*reinterpret_cast<const WordType *>(cursor) & 0x80808080) {
289 if (cursor[0] & 0x80) return &cursor[0] - in;
290 if (cursor[1] & 0x80) return &cursor[1] - in;
291 if (cursor[2] & 0x80) return &cursor[2] - in;
292 return &cursor[3] - in;
293 }
294 }
295
296 // Consume the unaligned suffix.
297 for (const char *cursor = aligned_end; cursor < in + in_len; cursor++) {
298 if (cursor[0] & 0x80) return &cursor[0] - in;
299 }
300 return in_len;
301 }
302
303 /// Converts the narrow character string \c in into its wide equivalent, and return it.
304 ///
305 /// The string may contain embedded nulls.
306 ///
307 /// This function encodes illegal character sequences in a reversible way using the private use
308 /// area.
str2wcs_internal(const char * in,const size_t in_len)309 static wcstring str2wcs_internal(const char *in, const size_t in_len) {
310 if (in_len == 0) return wcstring();
311 assert(in != nullptr);
312
313 wcstring result;
314 result.reserve(in_len);
315
316 // In the unlikely event that MB_CUR_MAX is 1, then we are just going to append.
317 if (MB_CUR_MAX == 1) {
318 size_t in_pos = 0;
319 while (in_pos < in_len) {
320 result.push_back(static_cast<unsigned char>(in[in_pos]));
321 in_pos++;
322 }
323 return result;
324 }
325
326 size_t in_pos = 0;
327 mbstate_t state = {};
328 while (in_pos < in_len) {
329 // Append any initial sequence of ascii characters.
330 // Note we do not support character sets which are not supersets of ASCII.
331 size_t ascii_prefix_length = count_ascii_prefix(&in[in_pos], in_len - in_pos);
332 result.insert(result.end(), &in[in_pos], &in[in_pos + ascii_prefix_length]);
333 in_pos += ascii_prefix_length;
334 assert(in_pos <= in_len && "Position overflowed length");
335 if (in_pos == in_len) break;
336
337 // We have found a non-ASCII character.
338 bool use_encode_direct = false;
339 size_t ret = 0;
340 wchar_t wc = 0;
341
342 if (false) {
343 #if defined(HAVE_BROKEN_MBRTOWC_UTF8)
344 } else if ((in[in_pos] & 0xF8) == 0xF8) {
345 // Protect against broken std::mbrtowc() implementations which attempt to encode UTF-8
346 // sequences longer than four bytes (e.g., OS X Snow Leopard).
347 use_encode_direct = true;
348 #endif
349 } else if (sizeof(wchar_t) == 2 && //!OCLINT(constant if expression)
350 (in[in_pos] & 0xF8) == 0xF0) {
351 // Assume we are in a UTF-16 environment (e.g., Cygwin) using a UTF-8 encoding.
352 // The bits set check will be true for a four byte UTF-8 sequence that requires
353 // two UTF-16 chars. Something that doesn't work with our simple use of std::mbrtowc().
354 use_encode_direct = true;
355 } else {
356 ret = std::mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
357 // Determine whether to encode this character with our crazy scheme.
358 if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
359 use_encode_direct = true;
360 } else if (wc == INTERNAL_SEPARATOR) {
361 use_encode_direct = true;
362 } else if (ret == static_cast<size_t>(-2)) {
363 // Incomplete sequence.
364 use_encode_direct = true;
365 } else if (ret == static_cast<size_t>(-1)) {
366 // Invalid data.
367 use_encode_direct = true;
368 } else if (ret > in_len - in_pos) {
369 // Other error codes? Terrifying, should never happen.
370 use_encode_direct = true;
371 } else if (sizeof(wchar_t) == 2 && wc >= 0xD800 && //!OCLINT(constant if expression)
372 wc <= 0xDFFF) {
373 // If we get a surrogate pair char on a UTF-16 system (e.g., Cygwin) then
374 // it's guaranteed the UTF-8 decoding is wrong so use direct encoding.
375 use_encode_direct = true;
376 }
377 }
378
379 if (use_encode_direct) {
380 wc = ENCODE_DIRECT_BASE + static_cast<unsigned char>(in[in_pos]);
381 result.push_back(wc);
382 in_pos++;
383 std::memset(&state, 0, sizeof state);
384 } else if (ret == 0) { // embedded null byte!
385 result.push_back(L'\0');
386 in_pos++;
387 std::memset(&state, 0, sizeof state);
388 } else { // normal case
389 result.push_back(wc);
390 in_pos += ret;
391 }
392 }
393
394 return result;
395 }
396
str2wcstring(const char * in,size_t len)397 wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in, len); }
398
str2wcstring(const char * in)399 wcstring str2wcstring(const char *in) { return str2wcs_internal(in, std::strlen(in)); }
400
str2wcstring(const std::string & in)401 wcstring str2wcstring(const std::string &in) {
402 // Handles embedded nulls!
403 return str2wcs_internal(in.data(), in.size());
404 }
405
str2wcstring(const std::string & in,size_t len)406 wcstring str2wcstring(const std::string &in, size_t len) {
407 // Handles embedded nulls!
408 return str2wcs_internal(in.data(), len);
409 }
410
wcs2string(const wcstring & input)411 std::string wcs2string(const wcstring &input) { return wcs2string(input.data(), input.size()); }
412
wcs2string(const wchar_t * in,size_t len)413 std::string wcs2string(const wchar_t *in, size_t len) {
414 if (len == 0) return std::string{};
415 std::string result;
416 wcs2string_appending(in, len, &result);
417 return result;
418 }
419
wcs2string_appending(const wchar_t * in,size_t len,std::string * receiver)420 void wcs2string_appending(const wchar_t *in, size_t len, std::string *receiver) {
421 assert(receiver && "Null receiver");
422 receiver->reserve(receiver->size() + len);
423 wcs2string_callback(in, len, [&](const char *buff, size_t bufflen) {
424 receiver->append(buff, bufflen);
425 return true;
426 });
427 }
428
429 /// Test if the character can be encoded using the current locale.
can_be_encoded(wchar_t wc)430 static bool can_be_encoded(wchar_t wc) {
431 char converted[MB_LEN_MAX];
432 mbstate_t state = {};
433
434 return std::wcrtomb(converted, wc, &state) != static_cast<size_t>(-1);
435 }
436
format_string(const wchar_t * format,...)437 wcstring format_string(const wchar_t *format, ...) {
438 va_list va;
439 va_start(va, format);
440 wcstring result = vformat_string(format, va);
441 va_end(va);
442 return result;
443 }
444
append_formatv(wcstring & target,const wchar_t * format,va_list va_orig)445 void append_formatv(wcstring &target, const wchar_t *format, va_list va_orig) {
446 const int saved_err = errno;
447 // As far as I know, there is no way to check if a vswprintf-call failed because of a badly
448 // formated string option or because the supplied destination string was to small. In GLIBC,
449 // errno seems to be set to EINVAL either way.
450 //
451 // Because of this, on failure we try to increase the buffer size until the free space is
452 // larger than max_size, at which point it will conclude that the error was probably due to a
453 // badly formated string option, and return an error. Make sure to null terminate string before
454 // that, though.
455 const size_t max_size = (128 * 1024 * 1024);
456 wchar_t static_buff[256];
457 size_t size = 0;
458 wchar_t *buff = nullptr;
459 int status = -1;
460 while (status < 0) {
461 // Reallocate if necessary.
462 if (size == 0) {
463 buff = static_buff;
464 size = sizeof static_buff;
465 } else {
466 size *= 2;
467 if (size >= max_size) {
468 buff[0] = '\0';
469 break;
470 }
471 buff = static_cast<wchar_t *>(realloc((buff == static_buff ? nullptr : buff), size));
472 assert(buff != nullptr);
473 }
474
475 // Try printing.
476 va_list va;
477 va_copy(va, va_orig);
478 status = std::vswprintf(buff, size / sizeof(wchar_t), format, va);
479 va_end(va);
480 }
481
482 target.append(buff);
483
484 if (buff != static_buff) {
485 free(buff);
486 }
487
488 errno = saved_err;
489 }
490
vformat_string(const wchar_t * format,va_list va_orig)491 wcstring vformat_string(const wchar_t *format, va_list va_orig) {
492 wcstring result;
493 append_formatv(result, format, va_orig);
494 return result;
495 }
496
append_format(wcstring & str,const wchar_t * format,...)497 void append_format(wcstring &str, const wchar_t *format, ...) {
498 va_list va;
499 va_start(va, format);
500 append_formatv(str, format, va);
501 va_end(va);
502 }
503
quote_end(const wchar_t * pos)504 wchar_t *quote_end(const wchar_t *pos) {
505 wchar_t c = *pos;
506
507 while (true) {
508 pos++;
509
510 if (!*pos) return nullptr;
511
512 if (*pos == L'\\') {
513 pos++;
514 if (!*pos) return nullptr;
515 } else {
516 if (*pos == c) {
517 return const_cast<wchar_t *>(pos);
518 }
519 }
520 }
521 return nullptr;
522 }
523
fish_setlocale()524 void fish_setlocale() {
525 // Use various Unicode symbols if they can be encoded using the current locale, else a simple
526 // ASCII char alternative. All of the can_be_encoded() invocations should return the same
527 // true/false value since the code points are in the BMP but we're going to be paranoid. This
528 // is also technically wrong if we're not in a Unicode locale but we expect (or hope)
529 // can_be_encoded() will return false in that case.
530 if (can_be_encoded(L'\u2026')) {
531 ellipsis_char = L'\u2026';
532 ellipsis_str = L"\u2026";
533 } else {
534 ellipsis_char = L'$'; // "horizontal ellipsis"
535 ellipsis_str = L"...";
536 }
537
538 if (is_windows_subsystem_for_linux()) {
539 // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
540 // they can be *encoded* just fine. Use alternative glyphs.
541 omitted_newline_str = L"\u00b6"; // "pilcrow"
542 omitted_newline_width = 1;
543 obfuscation_read_char = L'\u2022'; // "bullet"
544 } else if (is_console_session()) {
545 omitted_newline_str = L"^J";
546 omitted_newline_width = 2;
547 obfuscation_read_char = L'*';
548 } else {
549 if (can_be_encoded(L'\u23CE')) {
550 omitted_newline_str = L"\u23CE"; // "return symbol" (⏎)
551 omitted_newline_width = 1;
552 } else {
553 omitted_newline_str = L"^J";
554 omitted_newline_width = 2;
555 }
556 obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#'; // "black circle"
557 }
558 }
559
read_blocked(int fd,void * buf,size_t count)560 long read_blocked(int fd, void *buf, size_t count) {
561 ssize_t res;
562 do {
563 res = read(fd, buf, count);
564 } while (res < 0 && errno == EINTR);
565 return res;
566 }
567
568 /// Loop a write request while failure is non-critical. Return -1 and set errno in case of critical
569 /// error.
write_loop(int fd,const char * buff,size_t count)570 ssize_t write_loop(int fd, const char *buff, size_t count) {
571 size_t out_cum = 0;
572 while (out_cum < count) {
573 ssize_t out = write(fd, &buff[out_cum], count - out_cum);
574 if (out < 0) {
575 if (errno != EAGAIN && errno != EINTR) {
576 return -1;
577 }
578 } else {
579 out_cum += static_cast<size_t>(out);
580 }
581 }
582 return static_cast<ssize_t>(out_cum);
583 }
584
read_loop(int fd,void * buff,size_t count)585 ssize_t read_loop(int fd, void *buff, size_t count) {
586 ssize_t result;
587 do {
588 result = read(fd, buff, count);
589 } while (result < 0 && (errno == EAGAIN || errno == EINTR));
590 return result;
591 }
592
593 /// Hack to not print error messages in the tests. Do not call this from functions in this module
594 /// like `debug()`. It is only intended to suppress diagnostic noise from testing things like the
595 /// fish parser where we expect a lot of diagnostic messages due to testing error conditions.
should_suppress_stderr_for_tests()596 bool should_suppress_stderr_for_tests() {
597 return program_name && !std::wcscmp(program_name, TESTS_PROGRAM_NAME);
598 }
599
debug_shared(const wchar_t level,const wcstring & msg)600 static void debug_shared(const wchar_t level, const wcstring &msg) {
601 pid_t current_pid;
602 if (!is_forked_child()) {
603 std::fwprintf(stderr, L"<%lc> %ls: %ls\n", level, program_name, msg.c_str());
604 } else {
605 current_pid = getpid();
606 std::fwprintf(stderr, L"<%lc> %ls: %d: %ls\n", level, program_name, current_pid,
607 msg.c_str());
608 }
609 }
610
debug_safe(int level,const char * msg,const char * param1,const char * param2,const char * param3,const char * param4,const char * param5,const char * param6,const char * param7,const char * param8,const char * param9,const char * param10,const char * param11,const char * param12)611 void debug_safe(int level, const char *msg, const char *param1, const char *param2,
612 const char *param3, const char *param4, const char *param5, const char *param6,
613 const char *param7, const char *param8, const char *param9, const char *param10,
614 const char *param11, const char *param12) {
615 const char *const params[] = {param1, param2, param3, param4, param5, param6,
616 param7, param8, param9, param10, param11, param12};
617 if (!msg) return;
618
619 // Can't call fwprintf, that may allocate memory Just call write() over and over.
620 if (level > debug_level) return;
621 int errno_old = errno;
622
623 size_t param_idx = 0;
624 const char *cursor = msg;
625 while (*cursor != '\0') {
626 const char *end = std::strchr(cursor, '%');
627 if (end == nullptr) end = cursor + std::strlen(cursor);
628
629 ignore_result(write(STDERR_FILENO, cursor, end - cursor));
630
631 if (end[0] == '%' && end[1] == 's') {
632 // Handle a format string.
633 assert(param_idx < sizeof params / sizeof *params);
634 const char *format = params[param_idx++];
635 if (!format) format = "(null)";
636 ignore_result(write(STDERR_FILENO, format, std::strlen(format)));
637 cursor = end + 2;
638 } else if (end[0] == '\0') {
639 // Must be at the end of the string.
640 cursor = end;
641 } else {
642 // Some other format specifier, just skip it.
643 cursor = end + 1;
644 }
645 }
646
647 // We always append a newline.
648 ignore_result(write(STDERR_FILENO, "\n", 1));
649
650 errno = errno_old;
651 }
652
653 // Careful to not negate LLONG_MIN.
absolute_value(long long x)654 static unsigned long long absolute_value(long long x) {
655 if (x >= 0) return static_cast<unsigned long long>(x);
656 x = -(x + 1);
657 return static_cast<unsigned long long>(x) + 1;
658 }
659
660 template <typename CharT>
format_safe_impl(CharT * buff,size_t size,unsigned long long val)661 void format_safe_impl(CharT *buff, size_t size, unsigned long long val) {
662 size_t idx = 0;
663 if (val == 0) {
664 buff[idx++] = '0';
665 } else {
666 // Generate the string backwards, then reverse it.
667 while (val != 0) {
668 buff[idx++] = (val % 10) + '0';
669 val /= 10;
670 }
671 std::reverse(buff, buff + idx);
672 }
673 buff[idx++] = '\0';
674 assert(idx <= size && "Buffer overflowed");
675 }
676
format_long_safe(char buff[64],long val)677 void format_long_safe(char buff[64], long val) {
678 unsigned long long uval = absolute_value(val);
679 if (val >= 0) {
680 format_safe_impl(buff, 64, uval);
681 } else {
682 buff[0] = '-';
683 format_safe_impl(buff + 1, 63, uval);
684 }
685 }
686
format_long_safe(wchar_t buff[64],long val)687 void format_long_safe(wchar_t buff[64], long val) {
688 unsigned long long uval = absolute_value(val);
689 if (val >= 0) {
690 format_safe_impl(buff, 64, uval);
691 } else {
692 buff[0] = '-';
693 format_safe_impl(buff + 1, 63, uval);
694 }
695 }
696
format_ullong_safe(wchar_t buff[64],unsigned long long val)697 void format_ullong_safe(wchar_t buff[64], unsigned long long val) {
698 return format_safe_impl(buff, 64, val);
699 }
700
narrow_string_safe(char buff[64],const wchar_t * s)701 void narrow_string_safe(char buff[64], const wchar_t *s) {
702 size_t idx = 0;
703 for (size_t widx = 0; s[widx] != L'\0'; widx++) {
704 wchar_t c = s[widx];
705 if (c <= 127) {
706 buff[idx++] = char(c);
707 if (idx + 1 == 64) {
708 break;
709 }
710 }
711 }
712 buff[idx] = '\0';
713 }
714
reformat_for_screen(const wcstring & msg,const termsize_t & termsize)715 wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize) {
716 wcstring buff;
717 int line_width = 0;
718 int screen_width = termsize.width;
719
720 if (screen_width) {
721 const wchar_t *start = msg.c_str();
722 const wchar_t *pos = start;
723 while (true) {
724 int overflow = 0;
725
726 int tok_width = 0;
727
728 // Tokenize on whitespace, and also calculate the width of the token.
729 while (*pos && (!std::wcschr(L" \n\r\t", *pos))) {
730 // Check is token is wider than one line. If so we mark it as an overflow and break
731 // the token.
732 if ((tok_width + fish_wcwidth(*pos)) > (screen_width - 1)) {
733 overflow = 1;
734 break;
735 }
736
737 tok_width += fish_wcwidth(*pos);
738 pos++;
739 }
740
741 // If token is zero character long, we don't do anything.
742 if (pos == start) {
743 pos = pos + 1;
744 } else if (overflow) {
745 // In case of overflow, we print a newline, except if we already are at position 0.
746 wchar_t *token = wcsndup(start, pos - start);
747 if (line_width != 0) buff.push_back(L'\n');
748 buff.append(format_string(L"%ls-\n", token));
749 free(token);
750 line_width = 0;
751 } else {
752 // Print the token.
753 wchar_t *token = wcsndup(start, pos - start);
754 if ((line_width + (line_width != 0 ? 1 : 0) + tok_width) > screen_width) {
755 buff.push_back(L'\n');
756 line_width = 0;
757 }
758 buff.append(format_string(L"%ls%ls", line_width ? L" " : L"", token));
759 free(token);
760 line_width += (line_width != 0 ? 1 : 0) + tok_width;
761 }
762
763 // Break on end of string.
764 if (!*pos) {
765 break;
766 }
767
768 start = pos;
769 }
770 } else {
771 buff.append(msg);
772 }
773 buff.push_back(L'\n');
774 return buff;
775 }
776
777 /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
escape_string_url(const wcstring & in,wcstring & out)778 static void escape_string_url(const wcstring &in, wcstring &out) {
779 const std::string narrow = wcs2string(in);
780 for (auto &c1 : narrow) {
781 // This silliness is so we get the correct result whether chars are signed or unsigned.
782 unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
783 if (!(c2 & 0x80) &&
784 (isalnum(c2) || c2 == '/' || c2 == '.' || c2 == '~' || c2 == '-' || c2 == '_')) {
785 // The above characters don't need to be encoded.
786 out.push_back(static_cast<wchar_t>(c2));
787 } else {
788 // All other chars need to have their UTF-8 representation encoded in hex.
789 wchar_t buf[4];
790 swprintf(buf, sizeof buf / sizeof buf[0], L"%%%02X", c2);
791 out.append(buf);
792 }
793 }
794 }
795
796 /// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
797 /// chars.
unescape_string_url(const wchar_t * in,wcstring * out)798 static bool unescape_string_url(const wchar_t *in, wcstring *out) {
799 std::string result;
800 result.reserve(out->size());
801 for (wchar_t c = *in; c; c = *++in) {
802 if (c > 0x7F) return false; // invalid character means we can't decode the string
803 if (c == '%') {
804 int c1 = in[1];
805 if (c1 == 0) return false; // found unexpected end of string
806 if (c1 == '%') {
807 result.push_back('%');
808 in++;
809 } else {
810 int c2 = in[2];
811 if (c2 == 0) return false; // string ended prematurely
812 long d1 = convert_digit(c1, 16);
813 if (d1 < 0) return false;
814 long d2 = convert_digit(c2, 16);
815 if (d2 < 0) return false;
816 result.push_back(16 * d1 + d2);
817 in += 2;
818 }
819 } else {
820 result.push_back(c);
821 }
822 }
823
824 *out = str2wcstring(result);
825 return true;
826 }
827
828 /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
escape_string_var(const wcstring & in,wcstring & out)829 static void escape_string_var(const wcstring &in, wcstring &out) {
830 bool prev_was_hex_encoded = false;
831 const std::string narrow = wcs2string(in);
832 for (auto c1 : narrow) {
833 // This silliness is so we get the correct result whether chars are signed or unsigned.
834 unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
835 if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
836 // ASCII alphanumerics don't need to be encoded.
837 if (prev_was_hex_encoded) {
838 out.push_back(L'_');
839 prev_was_hex_encoded = false;
840 }
841 out.push_back(static_cast<wchar_t>(c2));
842 } else if (c2 == '_') {
843 // Underscores are encoded by doubling them.
844 out.append(L"__");
845 prev_was_hex_encoded = false;
846 } else {
847 // All other chars need to have their UTF-8 representation encoded in hex.
848 wchar_t buf[4];
849 swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
850 out.append(buf);
851 prev_was_hex_encoded = true;
852 }
853 }
854 if (prev_was_hex_encoded) {
855 out.push_back(L'_');
856 }
857 }
858
859 /// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
860 /// chars.
unescape_string_var(const wchar_t * in,wcstring * out)861 static bool unescape_string_var(const wchar_t *in, wcstring *out) {
862 std::string result;
863 result.reserve(out->size());
864 bool prev_was_hex_encoded = false;
865 for (wchar_t c = *in; c; c = *++in) {
866 if (c > 0x7F) return false; // invalid character means we can't decode the string
867 if (c == '_') {
868 int c1 = in[1];
869 if (c1 == 0) {
870 if (prev_was_hex_encoded) break;
871 return false; // found unexpected escape char at end of string
872 }
873 if (c1 == '_') {
874 result.push_back('_');
875 in++;
876 } else if (is_hex_digit(c1)) {
877 int c2 = in[2];
878 if (c2 == 0) return false; // string ended prematurely
879 long d1 = convert_hex_digit(c1);
880 if (d1 < 0) return false;
881 long d2 = convert_hex_digit(c2);
882 if (d2 < 0) return false;
883 result.push_back(16 * d1 + d2);
884 in += 2;
885 prev_was_hex_encoded = true;
886 }
887 // No "else" clause because if the first char after an underscore is not another
888 // underscore or a valid hex character then the underscore is there to improve
889 // readability after we've encoded a character not valid in a var name.
890 } else {
891 result.push_back(c);
892 }
893 }
894
895 *out = str2wcstring(result);
896 return true;
897 }
898
899 /// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
escape_string_script(const wchar_t * orig_in,size_t in_len,wcstring & out,escape_flags_t flags)900 static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
901 escape_flags_t flags) {
902 const wchar_t *in = orig_in;
903 const bool escape_all = static_cast<bool>(flags & ESCAPE_ALL);
904 const bool no_quoted = static_cast<bool>(flags & ESCAPE_NO_QUOTED);
905 const bool no_tilde = static_cast<bool>(flags & ESCAPE_NO_TILDE);
906 const bool no_caret = feature_test(features_t::stderr_nocaret);
907 const bool no_qmark = feature_test(features_t::qmark_noglob);
908
909 bool need_escape = false;
910 bool need_complex_escape = false;
911
912 if (!no_quoted && in_len == 0) {
913 out.assign(L"''");
914 return;
915 }
916
917 for (size_t i = 0; i < in_len; i++) {
918 if ((*in >= ENCODE_DIRECT_BASE) && (*in < ENCODE_DIRECT_BASE + 256)) {
919 int val = *in - ENCODE_DIRECT_BASE;
920 int tmp;
921
922 out += L'\\';
923 out += L'X';
924
925 tmp = val / 16;
926 out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
927
928 tmp = val % 16;
929 out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
930 need_escape = need_complex_escape = true;
931
932 } else {
933 wchar_t c = *in;
934 switch (c) {
935 case L'\t': {
936 out += L'\\';
937 out += L't';
938 need_escape = need_complex_escape = true;
939 break;
940 }
941 case L'\n': {
942 out += L'\\';
943 out += L'n';
944 need_escape = need_complex_escape = true;
945 break;
946 }
947 case L'\b': {
948 out += L'\\';
949 out += L'b';
950 need_escape = need_complex_escape = true;
951 break;
952 }
953 case L'\r': {
954 out += L'\\';
955 out += L'r';
956 need_escape = need_complex_escape = true;
957 break;
958 }
959 case L'\x1B': {
960 out += L'\\';
961 out += L'e';
962 need_escape = need_complex_escape = true;
963 break;
964 }
965 case L'\x7F': {
966 out += L'\\';
967 out += L'x';
968 out += L'7';
969 out += L'f';
970 need_escape = need_complex_escape = true;
971 break;
972 }
973 case L'\\':
974 case L'\'': {
975 need_escape = need_complex_escape = true;
976 out += L'\\';
977 out += *in;
978 break;
979 }
980 case ANY_CHAR: {
981 // See #1614
982 out += L'?';
983 break;
984 }
985 case ANY_STRING: {
986 out += L'*';
987 break;
988 }
989 case ANY_STRING_RECURSIVE: {
990 out += L"**";
991 break;
992 }
993
994 case L'&':
995 case L'$':
996 case L' ':
997 case L'#':
998 case L'^':
999 case L'<':
1000 case L'>':
1001 case L'(':
1002 case L')':
1003 case L'[':
1004 case L']':
1005 case L'{':
1006 case L'}':
1007 case L'?':
1008 case L'*':
1009 case L'|':
1010 case L';':
1011 case L'"':
1012 case L'%':
1013 case L'~': {
1014 bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) ||
1015 (c == L'?' && no_qmark);
1016 if (!char_is_normal) {
1017 need_escape = true;
1018 if (escape_all) out += L'\\';
1019 }
1020 out += *in;
1021 break;
1022 }
1023
1024 default: {
1025 if (*in < 32) {
1026 if (*in < 27 && *in > 0) {
1027 out += L'\\';
1028 out += L'c';
1029 out += L'a' + *in - 1;
1030
1031 need_escape = need_complex_escape = true;
1032 break;
1033 }
1034
1035 int tmp = (*in) % 16;
1036 out += L'\\';
1037 out += L'x';
1038 out += ((*in > 15) ? L'1' : L'0');
1039 out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
1040 need_escape = need_complex_escape = true;
1041 } else {
1042 out += *in;
1043 }
1044 break;
1045 }
1046 }
1047 }
1048
1049 in++;
1050 }
1051
1052 // Use quoted escaping if possible, since most people find it easier to read.
1053 if (!no_quoted && need_escape && !need_complex_escape && escape_all) {
1054 wchar_t single_quote = L'\'';
1055 out.clear();
1056 out.reserve(2 + in_len);
1057 out.push_back(single_quote);
1058 out.append(orig_in, in_len);
1059 out.push_back(single_quote);
1060 }
1061 }
1062
1063 /// Escapes a string for use in a regex string. Not safe for use with `eval` as only
1064 /// characters reserved by PCRE2 are escaped, i.e. it relies on fish's automatic escaping
1065 /// of subshell output in subsequent concatenation or for use as an argument.
1066 /// \param in is the raw string to be searched for literally when substituted in a PCRE2 expression.
escape_string_pcre2(const wcstring & in)1067 static wcstring escape_string_pcre2(const wcstring &in) {
1068 wcstring out;
1069 out.reserve(in.size() * 1.3); // a wild guess
1070
1071 for (auto c : in) {
1072 switch (c) {
1073 case L'.':
1074 case L'^':
1075 case L'$':
1076 case L'*':
1077 case L'+':
1078 case L'(':
1079 case L')':
1080 case L'?':
1081 case L'[':
1082 case L'{':
1083 case L'}':
1084 case L'\\':
1085 case L'|':
1086 // these two only *need* to be escaped within a character class, and technically it
1087 // makes no sense to ever use process substitution output to compose a character class,
1088 // but...
1089 case L'-':
1090 case L']':
1091 out.push_back('\\');
1092 /* FALLTHROUGH */
1093 default:
1094 out.push_back(c);
1095 }
1096 }
1097
1098 return out;
1099 }
1100
escape_string(const wchar_t * in,escape_flags_t flags,escape_string_style_t style)1101 wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style) {
1102 wcstring result;
1103
1104 switch (style) {
1105 case STRING_STYLE_SCRIPT: {
1106 escape_string_script(in, std::wcslen(in), result, flags);
1107 break;
1108 }
1109 case STRING_STYLE_URL: {
1110 escape_string_url(in, result);
1111 break;
1112 }
1113 case STRING_STYLE_VAR: {
1114 escape_string_var(in, result);
1115 break;
1116 }
1117 case STRING_STYLE_REGEX: {
1118 result = escape_string_pcre2(in);
1119 break;
1120 }
1121 }
1122
1123 return result;
1124 }
1125
escape_string(const wcstring & in,escape_flags_t flags,escape_string_style_t style)1126 wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style) {
1127 wcstring result;
1128
1129 switch (style) {
1130 case STRING_STYLE_SCRIPT: {
1131 escape_string_script(in.c_str(), in.size(), result, flags);
1132 break;
1133 }
1134 case STRING_STYLE_URL: {
1135 escape_string_url(in, result);
1136 break;
1137 }
1138 case STRING_STYLE_VAR: {
1139 escape_string_var(in, result);
1140 break;
1141 }
1142 case STRING_STYLE_REGEX: {
1143 result = escape_string_pcre2(in);
1144 break;
1145 }
1146 }
1147
1148 return result;
1149 }
1150
1151 /// Helper to return the last character in a string, or none.
string_last_char(const wcstring & str)1152 static maybe_t<wchar_t> string_last_char(const wcstring &str) {
1153 if (str.empty()) return none();
1154 return str.back();
1155 }
1156
1157 /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
1158 /// appending to result. Return the number of characters consumed, or none on error.
read_unquoted_escape(const wchar_t * input,wcstring * result,bool allow_incomplete,bool unescape_special)1159 maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
1160 bool unescape_special) {
1161 assert(input[0] == L'\\' && "Not an escape");
1162
1163 // Here's the character we'll ultimately append, or none. Note that L'\0' is a
1164 // valid thing to append.
1165 maybe_t<wchar_t> result_char_or_none = none();
1166
1167 bool errored = false;
1168 size_t in_pos = 1; // in_pos always tracks the next character to read (and therefore the number
1169 // of characters read so far)
1170 const wchar_t c = input[in_pos++];
1171 switch (c) {
1172 // A null character after a backslash is an error.
1173 case L'\0': {
1174 // Adjust in_pos to only include the backslash.
1175 assert(in_pos > 0);
1176 in_pos--;
1177
1178 // It's an error, unless we're allowing incomplete escapes.
1179 if (!allow_incomplete) errored = true;
1180 break;
1181 }
1182 // Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal.
1183 case L'0':
1184 case L'1':
1185 case L'2':
1186 case L'3':
1187 case L'4':
1188 case L'5':
1189 case L'6':
1190 case L'7':
1191 case L'u':
1192 case L'U':
1193 case L'x':
1194 case L'X': {
1195 long long res = 0;
1196 size_t chars = 2;
1197 int base = 16;
1198 bool byte_literal = false;
1199 wchar_t max_val = ASCII_MAX;
1200
1201 switch (c) {
1202 case L'u': {
1203 chars = 4;
1204 max_val = UCS2_MAX;
1205 break;
1206 }
1207 case L'U': {
1208 chars = 8;
1209 max_val = WCHAR_MAX;
1210
1211 // Don't exceed the largest Unicode code point - see #1107.
1212 if (0x10FFFF < max_val) max_val = static_cast<wchar_t>(0x10FFFF);
1213 break;
1214 }
1215 case L'x': {
1216 chars = 2;
1217 max_val = ASCII_MAX;
1218 break;
1219 }
1220 case L'X': {
1221 byte_literal = true;
1222 max_val = BYTE_MAX;
1223 break;
1224 }
1225 default: {
1226 base = 8;
1227 chars = 3;
1228 // Note that in_pos currently is just after the first post-backslash character;
1229 // we want to start our escape from there.
1230 assert(in_pos > 0);
1231 in_pos--;
1232 break;
1233 }
1234 }
1235
1236 for (size_t i = 0; i < chars; i++) {
1237 long d = convert_digit(input[in_pos], base);
1238 if (d < 0) {
1239 break;
1240 }
1241
1242 res = (res * base) + d;
1243 in_pos++;
1244 }
1245
1246 if (res <= max_val) {
1247 result_char_or_none =
1248 static_cast<wchar_t>((byte_literal ? ENCODE_DIRECT_BASE : 0) + res);
1249 } else {
1250 errored = true;
1251 }
1252
1253 break;
1254 }
1255 // \a means bell (alert).
1256 case L'a': {
1257 result_char_or_none = L'\a';
1258 break;
1259 }
1260 // \b means backspace.
1261 case L'b': {
1262 result_char_or_none = L'\b';
1263 break;
1264 }
1265 // \cX means control sequence X.
1266 case L'c': {
1267 const wchar_t sequence_char = input[in_pos++];
1268 if (sequence_char >= L'a' && sequence_char <= (L'a' + 32)) {
1269 result_char_or_none = sequence_char - L'a' + 1;
1270 } else if (sequence_char >= L'A' && sequence_char <= (L'A' + 32)) {
1271 result_char_or_none = sequence_char - L'A' + 1;
1272 } else {
1273 errored = true;
1274 }
1275 break;
1276 }
1277 // \x1B means escape.
1278 case L'e': {
1279 result_char_or_none = L'\x1B';
1280 break;
1281 }
1282 // \f means form feed.
1283 case L'f': {
1284 result_char_or_none = L'\f';
1285 break;
1286 }
1287 // \n means newline.
1288 case L'n': {
1289 result_char_or_none = L'\n';
1290 break;
1291 }
1292 // \r means carriage return.
1293 case L'r': {
1294 result_char_or_none = L'\r';
1295 break;
1296 }
1297 // \t means tab.
1298 case L't': {
1299 result_char_or_none = L'\t';
1300 break;
1301 }
1302 // \v means vertical tab.
1303 case L'v': {
1304 result_char_or_none = L'\v';
1305 break;
1306 }
1307 // If a backslash is followed by an actual newline, swallow them both.
1308 case L'\n': {
1309 result_char_or_none = none();
1310 break;
1311 }
1312 default: {
1313 if (unescape_special) result->push_back(INTERNAL_SEPARATOR);
1314 result_char_or_none = c;
1315 break;
1316 }
1317 }
1318
1319 if (!errored && result_char_or_none.has_value()) {
1320 result->push_back(*result_char_or_none);
1321 }
1322 if (errored) return none();
1323
1324 return in_pos;
1325 }
1326
1327 /// Returns the unescaped version of input_str into output_str (by reference). Returns true if
1328 /// successful. If false, the contents of output_str are undefined (!).
unescape_string_internal(const wchar_t * const input,const size_t input_len,wcstring * output_str,unescape_flags_t flags)1329 static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
1330 wcstring *output_str, unescape_flags_t flags) {
1331 // Set up result string, which we'll swap with the output on success.
1332 wcstring result;
1333 result.reserve(input_len);
1334
1335 const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
1336 const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
1337 const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
1338
1339 // The positions of open braces.
1340 std::vector<size_t> braces;
1341 // The positions of variable expansions or brace ","s.
1342 // We only read braces as expanders if there's a variable expansion or "," in them.
1343 std::vector<size_t> vars_or_seps;
1344 int brace_count = 0;
1345
1346 bool errored = false;
1347 enum {
1348 mode_unquoted,
1349 mode_single_quotes,
1350 mode_double_quotes,
1351 } mode = mode_unquoted;
1352
1353 for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
1354 const wchar_t c = input[input_position];
1355 // Here's the character we'll append to result, or none() to suppress it.
1356 maybe_t<wchar_t> to_append_or_none = c;
1357 if (mode == mode_unquoted) {
1358 switch (c) {
1359 case L'\\': {
1360 if (!ignore_backslashes) {
1361 // Backslashes (escapes) are complicated and may result in errors, or
1362 // appending INTERNAL_SEPARATORs, so we have to handle them specially.
1363 auto escape_chars = read_unquoted_escape(
1364 input + input_position, &result, allow_incomplete, unescape_special);
1365 if (!escape_chars) {
1366 // A none() return indicates an error.
1367 errored = true;
1368 } else {
1369 // Skip over the characters we read, minus one because the outer loop
1370 // will increment it.
1371 assert(*escape_chars > 0);
1372 input_position += *escape_chars - 1;
1373 }
1374 // We've already appended, don't append anything else.
1375 to_append_or_none = none();
1376 }
1377 break;
1378 }
1379 case L'~': {
1380 if (unescape_special && (input_position == 0)) {
1381 to_append_or_none = HOME_DIRECTORY;
1382 }
1383 break;
1384 }
1385 case L'%': {
1386 // Note that this only recognizes %self if the string is literally %self.
1387 // %self/foo will NOT match this.
1388 if (unescape_special && input_position == 0 &&
1389 !std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
1390 to_append_or_none = PROCESS_EXPAND_SELF;
1391 input_position += PROCESS_EXPAND_SELF_STR_LEN - 1; // skip over 'self's
1392 }
1393 break;
1394 }
1395 case L'*': {
1396 if (unescape_special) {
1397 // In general, this is ANY_STRING. But as a hack, if the last appended char
1398 // is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
1399 // reflect the fact that ** is the recursive wildcard.
1400 if (string_last_char(result) == ANY_STRING) {
1401 assert(!result.empty());
1402 result.resize(result.size() - 1);
1403 to_append_or_none = ANY_STRING_RECURSIVE;
1404 } else {
1405 to_append_or_none = ANY_STRING;
1406 }
1407 }
1408 break;
1409 }
1410 case L'?': {
1411 if (unescape_special && !feature_test(features_t::qmark_noglob)) {
1412 to_append_or_none = ANY_CHAR;
1413 }
1414 break;
1415 }
1416 case L'$': {
1417 if (unescape_special) {
1418 to_append_or_none = VARIABLE_EXPAND;
1419 vars_or_seps.push_back(input_position);
1420 }
1421 break;
1422 }
1423 case L'{': {
1424 if (unescape_special) {
1425 brace_count++;
1426 to_append_or_none = BRACE_BEGIN;
1427 // We need to store where the brace *ends up* in the output.
1428 braces.push_back(result.size());
1429 }
1430 break;
1431 }
1432 case L'}': {
1433 if (unescape_special) {
1434 // HACK: The completion machinery sometimes hands us partial tokens.
1435 // We can't parse them properly, but it shouldn't hurt,
1436 // so we don't assert here.
1437 // See #4954.
1438 // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
1439 // shouldn't be able to get here");
1440 brace_count--;
1441 to_append_or_none = BRACE_END;
1442 if (!braces.empty()) {
1443 // If we didn't have a var or separator since the last '{',
1444 // put the literal back.
1445 if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
1446 result[braces.back()] = L'{';
1447 // We also need to turn all spaces back.
1448 for (size_t i = braces.back() + 1; i < result.size(); i++) {
1449 if (result[i] == BRACE_SPACE) result[i] = L' ';
1450 }
1451 to_append_or_none = L'}';
1452 }
1453
1454 // Remove all seps inside the current brace pair, so if we have a
1455 // surrounding pair we only get seps inside *that*.
1456 if (!vars_or_seps.empty()) {
1457 while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
1458 vars_or_seps.pop_back();
1459 }
1460 braces.pop_back();
1461 }
1462 }
1463 break;
1464 }
1465 case L',': {
1466 if (unescape_special && brace_count > 0) {
1467 to_append_or_none = BRACE_SEP;
1468 vars_or_seps.push_back(input_position);
1469 }
1470 break;
1471 }
1472 case L' ': {
1473 if (unescape_special && brace_count > 0) {
1474 to_append_or_none = BRACE_SPACE;
1475 }
1476 break;
1477 }
1478 case L'\'': {
1479 mode = mode_single_quotes;
1480 to_append_or_none =
1481 unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1482 break;
1483 }
1484 case L'\"': {
1485 mode = mode_double_quotes;
1486 to_append_or_none =
1487 unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1488 break;
1489 }
1490 default: {
1491 break;
1492 }
1493 }
1494 } else if (mode == mode_single_quotes) {
1495 if (c == L'\\') {
1496 // A backslash may or may not escape something in single quotes.
1497 switch (input[input_position + 1]) {
1498 case '\\':
1499 case L'\'': {
1500 to_append_or_none = input[input_position + 1];
1501 input_position += 1; // skip over the backslash
1502 break;
1503 }
1504 case L'\0': {
1505 if (!allow_incomplete) {
1506 errored = true;
1507 } else {
1508 // PCA this line had the following cryptic comment: 'We may ever escape
1509 // a NULL character, but still appending a \ in case I am wrong.' Not
1510 // sure what it means or the importance of this.
1511 input_position += 1; /* Skip over the backslash */
1512 to_append_or_none = L'\\';
1513 }
1514 break;
1515 }
1516 default: {
1517 // Literal backslash that doesn't escape anything! Leave things alone; we'll
1518 // append the backslash itself.
1519 break;
1520 }
1521 }
1522 } else if (c == L'\'') {
1523 to_append_or_none =
1524 unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1525 mode = mode_unquoted;
1526 }
1527 } else if (mode == mode_double_quotes) {
1528 switch (c) {
1529 case L'"': {
1530 mode = mode_unquoted;
1531 to_append_or_none =
1532 unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1533 break;
1534 }
1535 case '\\': {
1536 switch (input[input_position + 1]) {
1537 case L'\0': {
1538 if (!allow_incomplete) {
1539 errored = true;
1540 } else {
1541 to_append_or_none = L'\0';
1542 }
1543 break;
1544 }
1545 case '\\':
1546 case L'$':
1547 case '"': {
1548 to_append_or_none = input[input_position + 1];
1549 input_position += 1; /* Skip over the backslash */
1550 break;
1551 }
1552 case '\n': {
1553 /* Swallow newline */
1554 to_append_or_none = none();
1555 input_position += 1; /* Skip over the backslash */
1556 break;
1557 }
1558 default: {
1559 /* Literal backslash that doesn't escape anything! Leave things alone;
1560 * we'll append the backslash itself */
1561 break;
1562 }
1563 }
1564 break;
1565 }
1566 case '$': {
1567 if (unescape_special) {
1568 to_append_or_none = VARIABLE_EXPAND_SINGLE;
1569 vars_or_seps.push_back(input_position);
1570 }
1571 break;
1572 }
1573 default: {
1574 break;
1575 }
1576 }
1577 }
1578
1579 // Now maybe append the char.
1580 if (to_append_or_none.has_value()) {
1581 result.push_back(*to_append_or_none);
1582 }
1583 }
1584
1585 // Return the string by reference, and then success.
1586 if (!errored) {
1587 *output_str = std::move(result);
1588 }
1589 return !errored;
1590 }
1591
unescape_string_in_place(wcstring * str,unescape_flags_t escape_special)1592 bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
1593 assert(str != nullptr);
1594 wcstring output;
1595 bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
1596 if (success) {
1597 *str = std::move(output);
1598 }
1599 return success;
1600 }
1601
unescape_string(const wchar_t * input,wcstring * output,unescape_flags_t escape_special,escape_string_style_t style)1602 bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
1603 escape_string_style_t style) {
1604 bool success = false;
1605 switch (style) {
1606 case STRING_STYLE_SCRIPT: {
1607 success = unescape_string_internal(input, std::wcslen(input), output, escape_special);
1608 break;
1609 }
1610 case STRING_STYLE_URL: {
1611 success = unescape_string_url(input, output);
1612 break;
1613 }
1614 case STRING_STYLE_VAR: {
1615 success = unescape_string_var(input, output);
1616 break;
1617 }
1618 case STRING_STYLE_REGEX: {
1619 // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
1620 success = false;
1621 break;
1622 }
1623 }
1624 if (!success) output->clear();
1625 return success;
1626 }
1627
unescape_string(const wcstring & input,wcstring * output,unescape_flags_t escape_special,escape_string_style_t style)1628 bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
1629 escape_string_style_t style) {
1630 bool success = false;
1631 switch (style) {
1632 case STRING_STYLE_SCRIPT: {
1633 success = unescape_string_internal(input.c_str(), input.size(), output, escape_special);
1634 break;
1635 }
1636 case STRING_STYLE_URL: {
1637 success = unescape_string_url(input.c_str(), output);
1638 break;
1639 }
1640 case STRING_STYLE_VAR: {
1641 success = unescape_string_var(input.c_str(), output);
1642 break;
1643 }
1644 case STRING_STYLE_REGEX: {
1645 // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
1646 success = false;
1647 break;
1648 }
1649 }
1650 if (!success) output->clear();
1651 return success;
1652 }
1653
bugreport()1654 [[gnu::noinline]] void bugreport() {
1655 FLOG(error, _(L"This is a bug. Break on 'bugreport' to debug."));
1656 FLOG(error, _(L"If you can reproduce it, please report: "), PACKAGE_BUGREPORT, L'.');
1657 }
1658
format_size(long long sz)1659 wcstring format_size(long long sz) {
1660 wcstring result;
1661 const wchar_t *sz_name[] = {L"kB", L"MB", L"GB", L"TB", L"PB", L"EB", L"ZB", L"YB", nullptr};
1662
1663 if (sz < 0) {
1664 result.append(L"unknown");
1665 } else if (sz < 1) {
1666 result.append(_(L"empty"));
1667 } else if (sz < 1024) {
1668 result.append(format_string(L"%lldB", sz));
1669 } else {
1670 int i;
1671
1672 for (i = 0; sz_name[i]; i++) {
1673 if (sz < (1024 * 1024) || !sz_name[i + 1]) {
1674 long isz = (static_cast<long>(sz)) / 1024;
1675 if (isz > 9)
1676 result.append(format_string(L"%ld%ls", isz, sz_name[i]));
1677 else
1678 result.append(
1679 format_string(L"%.1f%ls", static_cast<double>(sz) / 1024, sz_name[i]));
1680 break;
1681 }
1682 sz /= 1024;
1683 }
1684 }
1685 return result;
1686 }
1687
1688 /// Crappy function to extract the most significant digit of an unsigned long long value.
extract_most_significant_digit(unsigned long long * xp)1689 static char extract_most_significant_digit(unsigned long long *xp) {
1690 unsigned long long place_value = 1;
1691 unsigned long long x = *xp;
1692 while (x >= 10) {
1693 x /= 10;
1694 place_value *= 10;
1695 }
1696 *xp -= (place_value * x);
1697 return x + '0';
1698 }
1699
append_ull(char * buff,unsigned long long val,size_t * inout_idx,size_t max_len)1700 static void append_ull(char *buff, unsigned long long val, size_t *inout_idx, size_t max_len) {
1701 size_t idx = *inout_idx;
1702 while (val > 0 && idx < max_len) buff[idx++] = extract_most_significant_digit(&val);
1703 *inout_idx = idx;
1704 }
1705
append_str(char * buff,const char * str,size_t * inout_idx,size_t max_len)1706 static void append_str(char *buff, const char *str, size_t *inout_idx, size_t max_len) {
1707 size_t idx = *inout_idx;
1708 while (*str && idx < max_len) buff[idx++] = *str++;
1709 *inout_idx = idx;
1710 }
1711
format_size_safe(char buff[128],unsigned long long sz)1712 void format_size_safe(char buff[128], unsigned long long sz) {
1713 const size_t buff_size = 128;
1714 const size_t max_len = buff_size - 1; // need to leave room for a null terminator
1715 std::memset(buff, 0, buff_size);
1716 size_t idx = 0;
1717 const char *const sz_name[] = {"kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", nullptr};
1718 if (sz < 1) {
1719 strncpy(buff, "empty", buff_size);
1720 } else if (sz < 1024) {
1721 append_ull(buff, sz, &idx, max_len);
1722 append_str(buff, "B", &idx, max_len);
1723 } else {
1724 for (size_t i = 0; sz_name[i]; i++) {
1725 if (sz < (1024 * 1024) || !sz_name[i + 1]) {
1726 unsigned long long isz = sz / 1024;
1727 if (isz > 9) {
1728 append_ull(buff, isz, &idx, max_len);
1729 } else {
1730 append_ull(buff, isz, &idx, max_len);
1731
1732 // Maybe append a single fraction digit.
1733 unsigned long long remainder = sz % 1024;
1734 if (remainder > 0) {
1735 char tmp[3] = {'.', extract_most_significant_digit(&remainder), 0};
1736 append_str(buff, tmp, &idx, max_len);
1737 }
1738 }
1739 append_str(buff, sz_name[i], &idx, max_len);
1740 break;
1741 }
1742 sz /= 1024;
1743 }
1744 }
1745 }
1746
1747 /// Return the number of seconds from the UNIX epoch, with subsecond precision. This function uses
1748 /// the gettimeofday function and will have the same precision as that function.
timef()1749 double timef() {
1750 struct timeval tv;
1751 assert_with_errno(gettimeofday(&tv, nullptr) != -1);
1752 // return (double)tv.tv_sec + 0.000001 * tv.tv_usec;
1753 return static_cast<double>(tv.tv_sec) + 1e-6 * tv.tv_usec;
1754 }
1755
exit_without_destructors(int code)1756 void exit_without_destructors(int code) { _exit(code); }
1757
1758 extern "C" {
debug_thread_error(void)1759 [[gnu::noinline]] void debug_thread_error(void) {
1760 // Wait for a SIGINT. We can't use sigsuspend() because the signal may be delivered on another
1761 // thread.
1762 sigchecker_t sigint(topic_t::sighupint);
1763 sigint.wait();
1764 }
1765 }
1766
set_main_thread()1767 void set_main_thread() {
1768 // Just call thread_id() once to force increment of thread_id.
1769 uint64_t tid = thread_id();
1770 assert(tid == 1 && "main thread should have thread ID 1");
1771 (void)tid;
1772 }
1773
configure_thread_assertions_for_testing()1774 void configure_thread_assertions_for_testing() { thread_asserts_cfg_for_testing = true; }
1775
is_forked_child()1776 bool is_forked_child() { return is_forked_proc; }
1777
setup_fork_guards()1778 void setup_fork_guards() {
1779 is_forked_proc = false;
1780 static std::once_flag fork_guard_flag;
1781 std::call_once(fork_guard_flag,
1782 [] { pthread_atfork(nullptr, nullptr, [] { is_forked_proc = true; }); });
1783 }
1784
save_term_foreground_process_group()1785 void save_term_foreground_process_group() {
1786 ASSERT_IS_MAIN_THREAD();
1787 initial_fg_process_group = tcgetpgrp(STDIN_FILENO);
1788 }
1789
restore_term_foreground_process_group_for_exit()1790 void restore_term_foreground_process_group_for_exit() {
1791 // We wish to restore the tty to the initial owner. There's two ways this can go wrong:
1792 // 1. We may steal the tty from someone else (#7060).
1793 // 2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit.
1794 // Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP.
1795 // Note initial_fg_process_group == 0 is possible with Linux pid namespaces.
1796 // This is called during shutdown and from a signal handler. We don't bother to complain on
1797 // failure because doing so is unlikely to be noticed.
1798 if (initial_fg_process_group > 0 && initial_fg_process_group != getpgrp()) {
1799 (void)signal(SIGTTOU, SIG_IGN);
1800 (void)tcsetpgrp(STDIN_FILENO, initial_fg_process_group);
1801 }
1802 }
1803
is_main_thread()1804 bool is_main_thread() { return thread_id() == 1; }
1805
assert_is_main_thread(const char * who)1806 void assert_is_main_thread(const char *who) {
1807 if (!is_main_thread() && !thread_asserts_cfg_for_testing) {
1808 FLOGF(error, L"%s called off of main thread.", who);
1809 FLOGF(error, L"Break on debug_thread_error to debug.");
1810 debug_thread_error();
1811 }
1812 }
1813
assert_is_not_forked_child(const char * who)1814 void assert_is_not_forked_child(const char *who) {
1815 if (is_forked_child()) {
1816 FLOGF(error, L"%s called in a forked child.", who);
1817 FLOG(error, L"Break on debug_thread_error to debug.");
1818 debug_thread_error();
1819 }
1820 }
1821
assert_is_background_thread(const char * who)1822 void assert_is_background_thread(const char *who) {
1823 if (is_main_thread() && !thread_asserts_cfg_for_testing) {
1824 FLOGF(error, L"%s called on the main thread (may block!).", who);
1825 FLOG(error, L"Break on debug_thread_error to debug.");
1826 debug_thread_error();
1827 }
1828 }
1829
assert_is_locked(std::mutex & mutex,const char * who,const char * caller)1830 void assert_is_locked(std::mutex &mutex, const char *who, const char *caller) {
1831 // Note that std::mutex.try_lock() is allowed to return false when the mutex isn't
1832 // actually locked; fortunately we are checking the opposite so we're safe.
1833 if (mutex.try_lock()) {
1834 FLOGF(error, L"%s is not locked when it should be in '%s'", who, caller);
1835 FLOG(error, L"Break on debug_thread_error to debug.");
1836 debug_thread_error();
1837 mutex.unlock();
1838 }
1839 }
1840
1841 /// Test if the specified character is in a range that fish uses interally to store special tokens.
1842 ///
1843 /// NOTE: This is used when tokenizing the input. It is also used when reading input, before
1844 /// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted
1845 /// string. We don't want external input to be able to feed reserved characters into our
1846 /// lexer/parser or code evaluator.
1847 //
1848 // TODO: Actually implement the replacement as documented above.
fish_reserved_codepoint(wchar_t c)1849 bool fish_reserved_codepoint(wchar_t c) {
1850 return (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) ||
1851 (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END);
1852 }
1853
1854 /// Reopen stdin, stdout and/or stderr on /dev/null. This is invoked when we find that our tty has
1855 /// become invalid.
redirect_tty_output()1856 void redirect_tty_output() {
1857 struct termios t;
1858 int fd = open("/dev/null", O_WRONLY);
1859 if (fd == -1) {
1860 __fish_assert("Could not open /dev/null!", __FILE__, __LINE__, errno);
1861 }
1862 if (tcgetattr(STDIN_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDIN_FILENO);
1863 if (tcgetattr(STDOUT_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDOUT_FILENO);
1864 if (tcgetattr(STDERR_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDERR_FILENO);
1865 close(fd);
1866 }
1867
1868 /// Display a failed assertion message, dump a stack trace if possible, then die.
__fish_assert(const char * msg,const char * file,size_t line,int error)1869 [[noreturn]] void __fish_assert(const char *msg, const char *file, size_t line, int error) {
1870 if (error) {
1871 FLOGF(error, L"%s:%zu: failed assertion: %s: errno %d (%s)", file, line, msg, error,
1872 std::strerror(error));
1873 } else {
1874 FLOGF(error, L"%s:%zu: failed assertion: %s", file, line, msg);
1875 }
1876 show_stackframe(L'E', 99, 1);
1877 abort();
1878 }
1879
1880 /// Test if the given char is valid in a variable name.
valid_var_name_char(wchar_t chr)1881 bool valid_var_name_char(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
1882
1883 /// Test if the given string is a valid variable name.
valid_var_name(const wcstring & str)1884 bool valid_var_name(const wcstring &str) {
1885 // Note do not use c_str(), we want to fail on embedded nul bytes.
1886 return !str.empty() && std::all_of(str.begin(), str.end(), valid_var_name_char);
1887 }
1888
valid_var_name(const wchar_t * str)1889 bool valid_var_name(const wchar_t *str) {
1890 if (str[0] == L'\0') return false;
1891 for (size_t i = 0; str[i] != L'\0'; i++) {
1892 if (!valid_var_name_char(str[i])) return false;
1893 }
1894 return true;
1895 }
1896
1897 /// Test if the string is a valid function name.
valid_func_name(const wcstring & str)1898 bool valid_func_name(const wcstring &str) {
1899 if (str.empty()) return false;
1900 if (str.at(0) == L'-') return false;
1901 if (str.find_first_of(L'/') != wcstring::npos) return false;
1902 return true;
1903 }
1904
1905 /// Return the path to the current executable. This needs to be realpath'd.
get_executable_path(const char * argv0)1906 std::string get_executable_path(const char *argv0) {
1907 char buff[PATH_MAX];
1908
1909 #ifdef __APPLE__
1910 // On OS X use it's proprietary API to get the path to the executable.
1911 // This is basically grabbing exec_path after argc, argv, envp, ...: for us
1912 // https://opensource.apple.com/source/adv_cmds/adv_cmds-163/ps/print.c
1913 uint32_t buffSize = sizeof buff;
1914 if (_NSGetExecutablePath(buff, &buffSize) == 0) return std::string(buff);
1915 #elif defined(__BSD__) && defined(KERN_PROC_PATHNAME) && !defined(__NetBSD__)
1916 // BSDs do not have /proc by default, (although it can be mounted as procfs via the Linux
1917 // compatibility layer). We can use sysctl instead: per sysctl(3), passing in a process ID of -1
1918 // returns the value for the current process.
1919 //
1920 // (this is broken on NetBSD, while /proc works, so we use that)
1921 size_t buff_size = sizeof buff;
1922 int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
1923 int result = sysctl(name, sizeof(name) / sizeof(int), buff, &buff_size, nullptr, 0);
1924 if (result != 0) {
1925 wperror(L"sysctl KERN_PROC_PATHNAME");
1926 } else {
1927 return std::string(buff);
1928 }
1929 #else
1930 // On other unixes, fall back to the Linux-ish /proc/ directory
1931 ssize_t len;
1932 len = readlink("/proc/self/exe", buff, sizeof buff - 1); // Linux
1933 if (len == -1) {
1934 len = readlink("/proc/curproc/file", buff, sizeof buff - 1); // other BSDs
1935 if (len == -1) {
1936 len = readlink("/proc/self/path/a.out", buff, sizeof buff - 1); // Solaris
1937 }
1938 }
1939 if (len > 0) {
1940 buff[len] = '\0';
1941 return std::string(buff);
1942 }
1943 #endif
1944
1945 // Just return argv0, which probably won't work (i.e. it's not an absolute path or a path
1946 // relative to the working directory, but instead something the caller found via $PATH). We'll
1947 // eventually fall back to the compile time paths.
1948 return std::string(argv0 ? argv0 : "");
1949 }
1950
1951 /// Return a path to a directory where we can store temporary files.
get_path_to_tmp_dir()1952 std::string get_path_to_tmp_dir() {
1953 char *env_tmpdir = getenv("TMPDIR");
1954 if (env_tmpdir) {
1955 return env_tmpdir;
1956 }
1957 #if defined(_CS_DARWIN_USER_TEMP_DIR)
1958 char osx_tmpdir[PATH_MAX];
1959 size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, osx_tmpdir, PATH_MAX);
1960 if (0 < n && n <= PATH_MAX) {
1961 return osx_tmpdir;
1962 } else {
1963 return "/tmp";
1964 }
1965 #elif defined(P_tmpdir)
1966 return P_tmpdir;
1967 #elif defined(_PATH_TMP)
1968 return _PATH_TMP;
1969 #else
1970 return "/tmp";
1971 #endif
1972 }
1973
1974 // This function attempts to distinguish between a console session (at the actual login vty) and a
1975 // session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
1976 // there are few values of $TERM that we can interpret as being exclusively console sessions, and
1977 // most common operating systems do not use them. The value is cached for the duration of the fish
1978 // session. We err on the side of assuming it's not a console session. This approach isn't
1979 // bullet-proof and that's OK.
is_console_session()1980 bool is_console_session() {
1981 static const bool console_session = [] {
1982 ASSERT_IS_MAIN_THREAD();
1983
1984 const char *tty_name = ttyname(0);
1985 constexpr auto len = const_strlen("/dev/tty");
1986 const char *TERM = getenv("TERM");
1987 return
1988 // Test that the tty matches /dev/(console|dcons|tty[uv\d])
1989 tty_name &&
1990 ((strncmp(tty_name, "/dev/tty", len) == 0 &&
1991 (tty_name[len] == 'u' || tty_name[len] == 'v' || isdigit(tty_name[len]))) ||
1992 strcmp(tty_name, "/dev/dcons") == 0 || strcmp(tty_name, "/dev/console") == 0)
1993 // and that $TERM is simple, e.g. `xterm` or `vt100`, not `xterm-something`
1994 && (!TERM || !strchr(TERM, '-') || !strcmp(TERM, "sun-color"));
1995 }();
1996 return console_session;
1997 }
1998
1999 static_assert(const_strcmp("", "a") < 0, "const_strcmp failure");
2000 static_assert(const_strcmp("a", "a") == 0, "const_strcmp failure");
2001 static_assert(const_strcmp("a", "") > 0, "const_strcmp failure");
2002 static_assert(const_strcmp("aa", "a") > 0, "const_strcmp failure");
2003 static_assert(const_strcmp("a", "aa") < 0, "const_strcmp failure");
2004 static_assert(const_strcmp("b", "aa") > 0, "const_strcmp failure");
2005
2006 static_assert(const_strlen("") == 0, "const_strlen failure");
2007 static_assert(const_strlen("a") == 1, "const_strlen failure");
2008 static_assert(const_strlen("hello") == 5, "const_strlen failure");
2009