1 // Various functions, mostly string utilities, that are used by most parts of fish.
2 #include "config.h"
3 
4 #ifdef HAVE_BACKTRACE_SYMBOLS
5 #include <cxxabi.h>
6 #endif
7 
8 #include <ctype.h>
9 #include <dlfcn.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #include <limits.h>
13 #include <paths.h>
14 #include <pthread.h>
15 #include <stdarg.h>
16 #include <stddef.h>
17 #include <stdint.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <sys/stat.h>
21 #include <sys/time.h>
22 #include <termios.h>
23 #include <unistd.h>
24 #include <wctype.h>
25 
26 #include <cstring>
27 #include <cwchar>
28 #ifdef HAVE_EXECINFO_H
29 #include <execinfo.h>
30 #endif
31 
32 #ifdef __linux__
33 // Includes for WSL detection
34 #include <sys/utsname.h>
35 #endif
36 
37 #include <algorithm>
38 #include <atomic>
39 #include <memory>  // IWYU pragma: keep
40 #include <type_traits>
41 
42 #include "common.h"
43 #include "env.h"
44 #include "expand.h"
45 #include "fallback.h"  // IWYU pragma: keep
46 #include "flog.h"
47 #include "future_feature_flags.h"
48 #include "global_safety.h"
49 #include "iothread.h"
50 #include "parser.h"
51 #include "proc.h"
52 #include "signal.h"
53 #include "termsize.h"
54 #include "wcstringutil.h"
55 #include "wildcard.h"
56 #include "wutil.h"  // IWYU pragma: keep
57 
58 // Keep after "common.h"
59 #ifdef __BSD__
60 #include <sys/sysctl.h>
61 #elif defined(__APPLE__)
62 #include <mach-o/dyld.h>
63 #endif
64 
65 struct termios shell_modes;
66 
67 const wcstring g_empty_string{};
68 
69 /// This allows us to notice when we've forked.
70 static relaxed_atomic_bool_t is_forked_proc{false};
71 /// This allows us to bypass the main thread checks
72 static relaxed_atomic_bool_t thread_asserts_cfg_for_testing{false};
73 
74 static relaxed_atomic_t<wchar_t> ellipsis_char;
get_ellipsis_char()75 wchar_t get_ellipsis_char() { return ellipsis_char; }
76 
77 static relaxed_atomic_t<const wchar_t *> ellipsis_str;
get_ellipsis_str()78 const wchar_t *get_ellipsis_str() { return ellipsis_str; }
79 
80 static relaxed_atomic_t<const wchar_t *> omitted_newline_str;
get_omitted_newline_str()81 const wchar_t *get_omitted_newline_str() { return omitted_newline_str; }
82 
83 static relaxed_atomic_t<int> omitted_newline_width;
get_omitted_newline_width()84 int get_omitted_newline_width() { return omitted_newline_width; }
85 
86 static relaxed_atomic_t<wchar_t> obfuscation_read_char;
get_obfuscation_read_char()87 wchar_t get_obfuscation_read_char() { return obfuscation_read_char; }
88 
89 bool g_profiling_active = false;
90 const wchar_t *program_name;
91 std::atomic<int> debug_level{1};  // default maximum debug output level (errors and warnings)
92 
93 /// Be able to restore the term's foreground process group.
94 /// This is set during startup and not modified after.
95 static relaxed_atomic_t<pid_t> initial_fg_process_group{-1};
96 
97 static void debug_shared(wchar_t msg_level, const wcstring &msg);
98 
99 #if defined(OS_IS_CYGWIN) || defined(WSL)
100 // MS Windows tty devices do not currently have either a read or write timestamp. Those
101 // respective fields of `struct stat` are always the current time. Which means we can't
102 // use them. So we assume no external program has written to the terminal behind our
103 // back. This makes multiline promptusable. See issue #2859 and
104 // https://github.com/Microsoft/BashOnWindows/issues/545
105 const bool has_working_tty_timestamps = false;
106 #else
107 const bool has_working_tty_timestamps = true;
108 #endif
109 
110 /// Convert a character to its integer equivalent if it is a valid character for the requested base.
111 /// Return the integer value if it is valid else -1.
convert_digit(wchar_t d,int base)112 long convert_digit(wchar_t d, int base) {
113     long res = -1;
114     if ((d <= L'9') && (d >= L'0')) {
115         res = d - L'0';
116     } else if ((d <= L'z') && (d >= L'a')) {
117         res = d + 10 - L'a';
118     } else if ((d <= L'Z') && (d >= L'A')) {
119         res = d + 10 - L'A';
120     }
121     if (res >= base) {
122         res = -1;
123     }
124 
125     return res;
126 }
127 
128 /// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
is_hex_digit(int c)129 static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
130 
131 /// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
convert_hex_digit(wchar_t d)132 static long convert_hex_digit(wchar_t d) {
133     if ((d <= L'9') && (d >= L'0')) {
134         return d - L'0';
135     } else if ((d <= L'Z') && (d >= L'A')) {
136         return 10 + d - L'A';
137     }
138 
139     return -1;
140 }
141 
is_windows_subsystem_for_linux()142 bool is_windows_subsystem_for_linux() {
143 #if defined(WSL)
144     return true;
145 #elif not defined(__linux__)
146     return false;
147 #else
148     // We are purposely not using std::call_once as it may invoke locking, which is an unnecessary
149     // overhead since there's no actual race condition here - even if multiple threads call this
150     // routine simultaneously the first time around, we just end up needlessly querying uname(2) one
151     // more time.
152 
153     static bool wsl_state = [] {
154         utsname info;
155         uname(&info);
156 
157         // Sample utsname.release under WSL, testing for something like `4.4.0-17763-Microsoft`
158         if (std::strstr(info.release, "Microsoft") != nullptr) {
159             const char *dash = std::strchr(info.release, '-');
160             if (dash == nullptr || strtod(dash + 1, nullptr) < 17763) {
161                 // #5298, #5661: There are acknowledged, published, and (later) fixed issues with
162                 // job control under early WSL releases that prevent fish from running correctly,
163                 // with unexpected failures when piping. Fish 3.0 nightly builds worked around this
164                 // issue with some needlessly complicated code that was later stripped from the
165                 // fish 3.0 release, so we just bail. Note that fish 2.0 was also broken, but we
166                 // just didn't warn about it.
167 
168                 // #6038 & 5101bde: It's been requested that there be some sort of way to disable
169                 // this check: if the environment variable FISH_NO_WSL_CHECK is present, this test
170                 // is bypassed. We intentionally do not include this in the error message because
171                 // it'll only allow fish to run but not to actually work. Here be dragons!
172                 if (getenv("FISH_NO_WSL_CHECK") == nullptr) {
173                     FLOGF(error,
174                           "This version of WSL has known bugs that prevent fish from working."
175                           "Please upgrade to Windows 10 1809 (17763) or higher to use fish!");
176                 }
177             }
178 
179             return true;
180         } else {
181             return false;
182         }
183     }();
184 
185     // Subsequent calls to this function may take place after fork() and before exec() in
186     // postfork.cpp. Make sure we never dynamically allocate any memory in the fast path!
187     return wsl_state;
188 #endif
189 }
190 
191 #ifdef HAVE_BACKTRACE_SYMBOLS
192 // This function produces a stack backtrace with demangled function & method names. It is based on
193 // https://gist.github.com/fmela/591333 but adapted to the style of the fish project.
demangled_backtrace(int max_frames,int skip_levels)194 [[gnu::noinline]] static wcstring_list_t demangled_backtrace(int max_frames, int skip_levels) {
195     void *callstack[128];
196     const int n_max_frames = sizeof(callstack) / sizeof(callstack[0]);
197     int n_frames = backtrace(callstack, n_max_frames);
198     char **symbols = backtrace_symbols(callstack, n_frames);
199     wchar_t text[1024];
200     wcstring_list_t backtrace_text;
201 
202     if (skip_levels + max_frames < n_frames) n_frames = skip_levels + max_frames;
203 
204     for (int i = skip_levels; i < n_frames; i++) {
205         Dl_info info;
206         if (dladdr(callstack[i], &info) && info.dli_sname) {
207             char *demangled = nullptr;
208             int status = -1;
209             if (info.dli_sname[0] == '_')
210                 demangled = abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status);
211             swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s + %td", i - skip_levels,
212                      status == 0                 ? demangled
213                      : info.dli_sname == nullptr ? symbols[i]
214                                                  : info.dli_sname,
215                      static_cast<char *>(callstack[i]) - static_cast<const char *>(info.dli_saddr));
216             free(demangled);
217         } else {
218             swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s", i - skip_levels, symbols[i]);
219         }
220         backtrace_text.push_back(text);
221     }
222     free(symbols);
223     return backtrace_text;
224 }
225 
show_stackframe(const wchar_t msg_level,int frame_count,int skip_levels)226 [[gnu::noinline]] void show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) {
227     if (frame_count < 1) return;
228 
229     wcstring_list_t bt = demangled_backtrace(frame_count, skip_levels + 2);
230     debug_shared(msg_level, L"Backtrace:\n" + join_strings(bt, L'\n') + L'\n');
231 }
232 
233 #else   // HAVE_BACKTRACE_SYMBOLS
234 
show_stackframe(const wchar_t msg_level,int,int)235 [[gnu::noinline]] void show_stackframe(const wchar_t msg_level, int, int) {
236     debug_shared(msg_level, L"Sorry, but your system does not support backtraces");
237 }
238 #endif  // HAVE_BACKTRACE_SYMBOLS
239 
240 /// \return the smallest pointer in the range [start, start + len] which is aligned to Align.
241 /// If there is no such pointer, return \p start + len.
242 /// alignment must be a power of 2 and in range [1, 64].
243 /// This is intended to return the end point of the "unaligned prefix" of a vectorized loop.
244 template <size_t Align>
align_start(const char * start,size_t len)245 inline const char *align_start(const char *start, size_t len) {
246     static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
247     static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
248     uintptr_t startu = reinterpret_cast<uintptr_t>(start);
249     // How much do we have to add to start to make it 0 mod Align?
250     // To compute 17 up-aligned by 8, compute its skew 17 % 8, yielding 1,
251     // and then we will add 8 - 1. Of course if we align 16 with the same idea, we will
252     // add 8 instead of 0, so then mod the summand by Align again.
253     // Note all of these mods are optimized to masks.
254     uintptr_t add_which_aligns = Align - (startu % Align);
255     add_which_aligns %= Align;
256     // Add that much but not more than len. If we add 'add_which_aligns' we may overflow the
257     // pointer.
258     return start + std::min(static_cast<size_t>(add_which_aligns), len);
259 }
260 
261 /// \return the largest pointer in the range [start, start + len] which is aligned to Align.
262 /// If there is no such pointer, return \p start.
263 /// This is intended to be the start point of the "unaligned suffix" of a vectorized loop.
264 template <size_t Align>
align_end(const char * start,size_t len)265 inline const char *align_end(const char *start, size_t len) {
266     static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
267     static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
268     // How much do we have to subtract to align it? Its value, mod Align.
269     uintptr_t endu = reinterpret_cast<uintptr_t>(start + len);
270     uintptr_t sub_which_aligns = endu % Align;
271     return start + len - std::min(static_cast<size_t>(sub_which_aligns), len);
272 }
273 
274 /// \return the count of initial characters in \p in which are ASCII.
count_ascii_prefix(const char * in,size_t in_len)275 static size_t count_ascii_prefix(const char *in, size_t in_len) {
276     // We'll use aligned reads of this type.
277     using WordType = uint32_t;
278     const char *aligned_start = align_start<alignof(WordType)>(in, in_len);
279     const char *aligned_end = align_end<alignof(WordType)>(in, in_len);
280 
281     // Consume the unaligned prefix.
282     for (const char *cursor = in; cursor < aligned_start; cursor++) {
283         if (cursor[0] & 0x80) return &cursor[0] - in;
284     }
285 
286     // Consume the aligned middle.
287     for (const char *cursor = aligned_start; cursor < aligned_end; cursor += sizeof(WordType)) {
288         if (*reinterpret_cast<const WordType *>(cursor) & 0x80808080) {
289             if (cursor[0] & 0x80) return &cursor[0] - in;
290             if (cursor[1] & 0x80) return &cursor[1] - in;
291             if (cursor[2] & 0x80) return &cursor[2] - in;
292             return &cursor[3] - in;
293         }
294     }
295 
296     // Consume the unaligned suffix.
297     for (const char *cursor = aligned_end; cursor < in + in_len; cursor++) {
298         if (cursor[0] & 0x80) return &cursor[0] - in;
299     }
300     return in_len;
301 }
302 
303 /// Converts the narrow character string \c in into its wide equivalent, and return it.
304 ///
305 /// The string may contain embedded nulls.
306 ///
307 /// This function encodes illegal character sequences in a reversible way using the private use
308 /// area.
str2wcs_internal(const char * in,const size_t in_len)309 static wcstring str2wcs_internal(const char *in, const size_t in_len) {
310     if (in_len == 0) return wcstring();
311     assert(in != nullptr);
312 
313     wcstring result;
314     result.reserve(in_len);
315 
316     // In the unlikely event that MB_CUR_MAX is 1, then we are just going to append.
317     if (MB_CUR_MAX == 1) {
318         size_t in_pos = 0;
319         while (in_pos < in_len) {
320             result.push_back(static_cast<unsigned char>(in[in_pos]));
321             in_pos++;
322         }
323         return result;
324     }
325 
326     size_t in_pos = 0;
327     mbstate_t state = {};
328     while (in_pos < in_len) {
329         // Append any initial sequence of ascii characters.
330         // Note we do not support character sets which are not supersets of ASCII.
331         size_t ascii_prefix_length = count_ascii_prefix(&in[in_pos], in_len - in_pos);
332         result.insert(result.end(), &in[in_pos], &in[in_pos + ascii_prefix_length]);
333         in_pos += ascii_prefix_length;
334         assert(in_pos <= in_len && "Position overflowed length");
335         if (in_pos == in_len) break;
336 
337         // We have found a non-ASCII character.
338         bool use_encode_direct = false;
339         size_t ret = 0;
340         wchar_t wc = 0;
341 
342         if (false) {
343 #if defined(HAVE_BROKEN_MBRTOWC_UTF8)
344         } else if ((in[in_pos] & 0xF8) == 0xF8) {
345             // Protect against broken std::mbrtowc() implementations which attempt to encode UTF-8
346             // sequences longer than four bytes (e.g., OS X Snow Leopard).
347             use_encode_direct = true;
348 #endif
349         } else if (sizeof(wchar_t) == 2 &&  //!OCLINT(constant if expression)
350                    (in[in_pos] & 0xF8) == 0xF0) {
351             // Assume we are in a UTF-16 environment (e.g., Cygwin) using a UTF-8 encoding.
352             // The bits set check will be true for a four byte UTF-8 sequence that requires
353             // two UTF-16 chars. Something that doesn't work with our simple use of std::mbrtowc().
354             use_encode_direct = true;
355         } else {
356             ret = std::mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
357             // Determine whether to encode this character with our crazy scheme.
358             if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
359                 use_encode_direct = true;
360             } else if (wc == INTERNAL_SEPARATOR) {
361                 use_encode_direct = true;
362             } else if (ret == static_cast<size_t>(-2)) {
363                 // Incomplete sequence.
364                 use_encode_direct = true;
365             } else if (ret == static_cast<size_t>(-1)) {
366                 // Invalid data.
367                 use_encode_direct = true;
368             } else if (ret > in_len - in_pos) {
369                 // Other error codes? Terrifying, should never happen.
370                 use_encode_direct = true;
371             } else if (sizeof(wchar_t) == 2 && wc >= 0xD800 &&  //!OCLINT(constant if expression)
372                        wc <= 0xDFFF) {
373                 // If we get a surrogate pair char on a UTF-16 system (e.g., Cygwin) then
374                 // it's guaranteed the UTF-8 decoding is wrong so use direct encoding.
375                 use_encode_direct = true;
376             }
377         }
378 
379         if (use_encode_direct) {
380             wc = ENCODE_DIRECT_BASE + static_cast<unsigned char>(in[in_pos]);
381             result.push_back(wc);
382             in_pos++;
383             std::memset(&state, 0, sizeof state);
384         } else if (ret == 0) {  // embedded null byte!
385             result.push_back(L'\0');
386             in_pos++;
387             std::memset(&state, 0, sizeof state);
388         } else {  // normal case
389             result.push_back(wc);
390             in_pos += ret;
391         }
392     }
393 
394     return result;
395 }
396 
str2wcstring(const char * in,size_t len)397 wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in, len); }
398 
str2wcstring(const char * in)399 wcstring str2wcstring(const char *in) { return str2wcs_internal(in, std::strlen(in)); }
400 
str2wcstring(const std::string & in)401 wcstring str2wcstring(const std::string &in) {
402     // Handles embedded nulls!
403     return str2wcs_internal(in.data(), in.size());
404 }
405 
str2wcstring(const std::string & in,size_t len)406 wcstring str2wcstring(const std::string &in, size_t len) {
407     // Handles embedded nulls!
408     return str2wcs_internal(in.data(), len);
409 }
410 
wcs2string(const wcstring & input)411 std::string wcs2string(const wcstring &input) { return wcs2string(input.data(), input.size()); }
412 
wcs2string(const wchar_t * in,size_t len)413 std::string wcs2string(const wchar_t *in, size_t len) {
414     if (len == 0) return std::string{};
415     std::string result;
416     wcs2string_appending(in, len, &result);
417     return result;
418 }
419 
wcs2string_appending(const wchar_t * in,size_t len,std::string * receiver)420 void wcs2string_appending(const wchar_t *in, size_t len, std::string *receiver) {
421     assert(receiver && "Null receiver");
422     receiver->reserve(receiver->size() + len);
423     wcs2string_callback(in, len, [&](const char *buff, size_t bufflen) {
424         receiver->append(buff, bufflen);
425         return true;
426     });
427 }
428 
429 /// Test if the character can be encoded using the current locale.
can_be_encoded(wchar_t wc)430 static bool can_be_encoded(wchar_t wc) {
431     char converted[MB_LEN_MAX];
432     mbstate_t state = {};
433 
434     return std::wcrtomb(converted, wc, &state) != static_cast<size_t>(-1);
435 }
436 
format_string(const wchar_t * format,...)437 wcstring format_string(const wchar_t *format, ...) {
438     va_list va;
439     va_start(va, format);
440     wcstring result = vformat_string(format, va);
441     va_end(va);
442     return result;
443 }
444 
append_formatv(wcstring & target,const wchar_t * format,va_list va_orig)445 void append_formatv(wcstring &target, const wchar_t *format, va_list va_orig) {
446     const int saved_err = errno;
447     // As far as I know, there is no way to check if a vswprintf-call failed because of a badly
448     // formated string option or because the supplied destination string was to small. In GLIBC,
449     // errno seems to be set to EINVAL either way.
450     //
451     // Because of this, on failure we try to increase the buffer size until the free space is
452     // larger than max_size, at which point it will conclude that the error was probably due to a
453     // badly formated string option, and return an error. Make sure to null terminate string before
454     // that, though.
455     const size_t max_size = (128 * 1024 * 1024);
456     wchar_t static_buff[256];
457     size_t size = 0;
458     wchar_t *buff = nullptr;
459     int status = -1;
460     while (status < 0) {
461         // Reallocate if necessary.
462         if (size == 0) {
463             buff = static_buff;
464             size = sizeof static_buff;
465         } else {
466             size *= 2;
467             if (size >= max_size) {
468                 buff[0] = '\0';
469                 break;
470             }
471             buff = static_cast<wchar_t *>(realloc((buff == static_buff ? nullptr : buff), size));
472             assert(buff != nullptr);
473         }
474 
475         // Try printing.
476         va_list va;
477         va_copy(va, va_orig);
478         status = std::vswprintf(buff, size / sizeof(wchar_t), format, va);
479         va_end(va);
480     }
481 
482     target.append(buff);
483 
484     if (buff != static_buff) {
485         free(buff);
486     }
487 
488     errno = saved_err;
489 }
490 
vformat_string(const wchar_t * format,va_list va_orig)491 wcstring vformat_string(const wchar_t *format, va_list va_orig) {
492     wcstring result;
493     append_formatv(result, format, va_orig);
494     return result;
495 }
496 
append_format(wcstring & str,const wchar_t * format,...)497 void append_format(wcstring &str, const wchar_t *format, ...) {
498     va_list va;
499     va_start(va, format);
500     append_formatv(str, format, va);
501     va_end(va);
502 }
503 
quote_end(const wchar_t * pos)504 wchar_t *quote_end(const wchar_t *pos) {
505     wchar_t c = *pos;
506 
507     while (true) {
508         pos++;
509 
510         if (!*pos) return nullptr;
511 
512         if (*pos == L'\\') {
513             pos++;
514             if (!*pos) return nullptr;
515         } else {
516             if (*pos == c) {
517                 return const_cast<wchar_t *>(pos);
518             }
519         }
520     }
521     return nullptr;
522 }
523 
fish_setlocale()524 void fish_setlocale() {
525     // Use various Unicode symbols if they can be encoded using the current locale, else a simple
526     // ASCII char alternative. All of the can_be_encoded() invocations should return the same
527     // true/false value since the code points are in the BMP but we're going to be paranoid. This
528     // is also technically wrong if we're not in a Unicode locale but we expect (or hope)
529     // can_be_encoded() will return false in that case.
530     if (can_be_encoded(L'\u2026')) {
531         ellipsis_char = L'\u2026';
532         ellipsis_str = L"\u2026";
533     } else {
534         ellipsis_char = L'$';  // "horizontal ellipsis"
535         ellipsis_str = L"...";
536     }
537 
538     if (is_windows_subsystem_for_linux()) {
539         // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
540         // they can be *encoded* just fine. Use alternative glyphs.
541         omitted_newline_str = L"\u00b6";  // "pilcrow"
542         omitted_newline_width = 1;
543         obfuscation_read_char = L'\u2022';  // "bullet"
544     } else if (is_console_session()) {
545         omitted_newline_str = L"^J";
546         omitted_newline_width = 2;
547         obfuscation_read_char = L'*';
548     } else {
549         if (can_be_encoded(L'\u23CE')) {
550             omitted_newline_str = L"\u23CE";  // "return symbol" (⏎)
551             omitted_newline_width = 1;
552         } else {
553             omitted_newline_str = L"^J";
554             omitted_newline_width = 2;
555         }
556         obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#';  // "black circle"
557     }
558 }
559 
read_blocked(int fd,void * buf,size_t count)560 long read_blocked(int fd, void *buf, size_t count) {
561     ssize_t res;
562     do {
563         res = read(fd, buf, count);
564     } while (res < 0 && errno == EINTR);
565     return res;
566 }
567 
568 /// Loop a write request while failure is non-critical. Return -1 and set errno in case of critical
569 /// error.
write_loop(int fd,const char * buff,size_t count)570 ssize_t write_loop(int fd, const char *buff, size_t count) {
571     size_t out_cum = 0;
572     while (out_cum < count) {
573         ssize_t out = write(fd, &buff[out_cum], count - out_cum);
574         if (out < 0) {
575             if (errno != EAGAIN && errno != EINTR) {
576                 return -1;
577             }
578         } else {
579             out_cum += static_cast<size_t>(out);
580         }
581     }
582     return static_cast<ssize_t>(out_cum);
583 }
584 
read_loop(int fd,void * buff,size_t count)585 ssize_t read_loop(int fd, void *buff, size_t count) {
586     ssize_t result;
587     do {
588         result = read(fd, buff, count);
589     } while (result < 0 && (errno == EAGAIN || errno == EINTR));
590     return result;
591 }
592 
593 /// Hack to not print error messages in the tests. Do not call this from functions in this module
594 /// like `debug()`. It is only intended to suppress diagnostic noise from testing things like the
595 /// fish parser where we expect a lot of diagnostic messages due to testing error conditions.
should_suppress_stderr_for_tests()596 bool should_suppress_stderr_for_tests() {
597     return program_name && !std::wcscmp(program_name, TESTS_PROGRAM_NAME);
598 }
599 
debug_shared(const wchar_t level,const wcstring & msg)600 static void debug_shared(const wchar_t level, const wcstring &msg) {
601     pid_t current_pid;
602     if (!is_forked_child()) {
603         std::fwprintf(stderr, L"<%lc> %ls: %ls\n", level, program_name, msg.c_str());
604     } else {
605         current_pid = getpid();
606         std::fwprintf(stderr, L"<%lc> %ls: %d: %ls\n", level, program_name, current_pid,
607                       msg.c_str());
608     }
609 }
610 
debug_safe(int level,const char * msg,const char * param1,const char * param2,const char * param3,const char * param4,const char * param5,const char * param6,const char * param7,const char * param8,const char * param9,const char * param10,const char * param11,const char * param12)611 void debug_safe(int level, const char *msg, const char *param1, const char *param2,
612                 const char *param3, const char *param4, const char *param5, const char *param6,
613                 const char *param7, const char *param8, const char *param9, const char *param10,
614                 const char *param11, const char *param12) {
615     const char *const params[] = {param1, param2, param3, param4,  param5,  param6,
616                                   param7, param8, param9, param10, param11, param12};
617     if (!msg) return;
618 
619     // Can't call fwprintf, that may allocate memory Just call write() over and over.
620     if (level > debug_level) return;
621     int errno_old = errno;
622 
623     size_t param_idx = 0;
624     const char *cursor = msg;
625     while (*cursor != '\0') {
626         const char *end = std::strchr(cursor, '%');
627         if (end == nullptr) end = cursor + std::strlen(cursor);
628 
629         ignore_result(write(STDERR_FILENO, cursor, end - cursor));
630 
631         if (end[0] == '%' && end[1] == 's') {
632             // Handle a format string.
633             assert(param_idx < sizeof params / sizeof *params);
634             const char *format = params[param_idx++];
635             if (!format) format = "(null)";
636             ignore_result(write(STDERR_FILENO, format, std::strlen(format)));
637             cursor = end + 2;
638         } else if (end[0] == '\0') {
639             // Must be at the end of the string.
640             cursor = end;
641         } else {
642             // Some other format specifier, just skip it.
643             cursor = end + 1;
644         }
645     }
646 
647     // We always append a newline.
648     ignore_result(write(STDERR_FILENO, "\n", 1));
649 
650     errno = errno_old;
651 }
652 
653 // Careful to not negate LLONG_MIN.
absolute_value(long long x)654 static unsigned long long absolute_value(long long x) {
655     if (x >= 0) return static_cast<unsigned long long>(x);
656     x = -(x + 1);
657     return static_cast<unsigned long long>(x) + 1;
658 }
659 
660 template <typename CharT>
format_safe_impl(CharT * buff,size_t size,unsigned long long val)661 void format_safe_impl(CharT *buff, size_t size, unsigned long long val) {
662     size_t idx = 0;
663     if (val == 0) {
664         buff[idx++] = '0';
665     } else {
666         // Generate the string backwards, then reverse it.
667         while (val != 0) {
668             buff[idx++] = (val % 10) + '0';
669             val /= 10;
670         }
671         std::reverse(buff, buff + idx);
672     }
673     buff[idx++] = '\0';
674     assert(idx <= size && "Buffer overflowed");
675 }
676 
format_long_safe(char buff[64],long val)677 void format_long_safe(char buff[64], long val) {
678     unsigned long long uval = absolute_value(val);
679     if (val >= 0) {
680         format_safe_impl(buff, 64, uval);
681     } else {
682         buff[0] = '-';
683         format_safe_impl(buff + 1, 63, uval);
684     }
685 }
686 
format_long_safe(wchar_t buff[64],long val)687 void format_long_safe(wchar_t buff[64], long val) {
688     unsigned long long uval = absolute_value(val);
689     if (val >= 0) {
690         format_safe_impl(buff, 64, uval);
691     } else {
692         buff[0] = '-';
693         format_safe_impl(buff + 1, 63, uval);
694     }
695 }
696 
format_ullong_safe(wchar_t buff[64],unsigned long long val)697 void format_ullong_safe(wchar_t buff[64], unsigned long long val) {
698     return format_safe_impl(buff, 64, val);
699 }
700 
narrow_string_safe(char buff[64],const wchar_t * s)701 void narrow_string_safe(char buff[64], const wchar_t *s) {
702     size_t idx = 0;
703     for (size_t widx = 0; s[widx] != L'\0'; widx++) {
704         wchar_t c = s[widx];
705         if (c <= 127) {
706             buff[idx++] = char(c);
707             if (idx + 1 == 64) {
708                 break;
709             }
710         }
711     }
712     buff[idx] = '\0';
713 }
714 
reformat_for_screen(const wcstring & msg,const termsize_t & termsize)715 wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize) {
716     wcstring buff;
717     int line_width = 0;
718     int screen_width = termsize.width;
719 
720     if (screen_width) {
721         const wchar_t *start = msg.c_str();
722         const wchar_t *pos = start;
723         while (true) {
724             int overflow = 0;
725 
726             int tok_width = 0;
727 
728             // Tokenize on whitespace, and also calculate the width of the token.
729             while (*pos && (!std::wcschr(L" \n\r\t", *pos))) {
730                 // Check is token is wider than one line. If so we mark it as an overflow and break
731                 // the token.
732                 if ((tok_width + fish_wcwidth(*pos)) > (screen_width - 1)) {
733                     overflow = 1;
734                     break;
735                 }
736 
737                 tok_width += fish_wcwidth(*pos);
738                 pos++;
739             }
740 
741             // If token is zero character long, we don't do anything.
742             if (pos == start) {
743                 pos = pos + 1;
744             } else if (overflow) {
745                 // In case of overflow, we print a newline, except if we already are at position 0.
746                 wchar_t *token = wcsndup(start, pos - start);
747                 if (line_width != 0) buff.push_back(L'\n');
748                 buff.append(format_string(L"%ls-\n", token));
749                 free(token);
750                 line_width = 0;
751             } else {
752                 // Print the token.
753                 wchar_t *token = wcsndup(start, pos - start);
754                 if ((line_width + (line_width != 0 ? 1 : 0) + tok_width) > screen_width) {
755                     buff.push_back(L'\n');
756                     line_width = 0;
757                 }
758                 buff.append(format_string(L"%ls%ls", line_width ? L" " : L"", token));
759                 free(token);
760                 line_width += (line_width != 0 ? 1 : 0) + tok_width;
761             }
762 
763             // Break on end of string.
764             if (!*pos) {
765                 break;
766             }
767 
768             start = pos;
769         }
770     } else {
771         buff.append(msg);
772     }
773     buff.push_back(L'\n');
774     return buff;
775 }
776 
777 /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
escape_string_url(const wcstring & in,wcstring & out)778 static void escape_string_url(const wcstring &in, wcstring &out) {
779     const std::string narrow = wcs2string(in);
780     for (auto &c1 : narrow) {
781         // This silliness is so we get the correct result whether chars are signed or unsigned.
782         unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
783         if (!(c2 & 0x80) &&
784             (isalnum(c2) || c2 == '/' || c2 == '.' || c2 == '~' || c2 == '-' || c2 == '_')) {
785             // The above characters don't need to be encoded.
786             out.push_back(static_cast<wchar_t>(c2));
787         } else {
788             // All other chars need to have their UTF-8 representation encoded in hex.
789             wchar_t buf[4];
790             swprintf(buf, sizeof buf / sizeof buf[0], L"%%%02X", c2);
791             out.append(buf);
792         }
793     }
794 }
795 
796 /// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
797 /// chars.
unescape_string_url(const wchar_t * in,wcstring * out)798 static bool unescape_string_url(const wchar_t *in, wcstring *out) {
799     std::string result;
800     result.reserve(out->size());
801     for (wchar_t c = *in; c; c = *++in) {
802         if (c > 0x7F) return false;  // invalid character means we can't decode the string
803         if (c == '%') {
804             int c1 = in[1];
805             if (c1 == 0) return false;  // found unexpected end of string
806             if (c1 == '%') {
807                 result.push_back('%');
808                 in++;
809             } else {
810                 int c2 = in[2];
811                 if (c2 == 0) return false;  // string ended prematurely
812                 long d1 = convert_digit(c1, 16);
813                 if (d1 < 0) return false;
814                 long d2 = convert_digit(c2, 16);
815                 if (d2 < 0) return false;
816                 result.push_back(16 * d1 + d2);
817                 in += 2;
818             }
819         } else {
820             result.push_back(c);
821         }
822     }
823 
824     *out = str2wcstring(result);
825     return true;
826 }
827 
828 /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
escape_string_var(const wcstring & in,wcstring & out)829 static void escape_string_var(const wcstring &in, wcstring &out) {
830     bool prev_was_hex_encoded = false;
831     const std::string narrow = wcs2string(in);
832     for (auto c1 : narrow) {
833         // This silliness is so we get the correct result whether chars are signed or unsigned.
834         unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
835         if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
836             // ASCII alphanumerics don't need to be encoded.
837             if (prev_was_hex_encoded) {
838                 out.push_back(L'_');
839                 prev_was_hex_encoded = false;
840             }
841             out.push_back(static_cast<wchar_t>(c2));
842         } else if (c2 == '_') {
843             // Underscores are encoded by doubling them.
844             out.append(L"__");
845             prev_was_hex_encoded = false;
846         } else {
847             // All other chars need to have their UTF-8 representation encoded in hex.
848             wchar_t buf[4];
849             swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
850             out.append(buf);
851             prev_was_hex_encoded = true;
852         }
853     }
854     if (prev_was_hex_encoded) {
855         out.push_back(L'_');
856     }
857 }
858 
859 /// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
860 /// chars.
unescape_string_var(const wchar_t * in,wcstring * out)861 static bool unescape_string_var(const wchar_t *in, wcstring *out) {
862     std::string result;
863     result.reserve(out->size());
864     bool prev_was_hex_encoded = false;
865     for (wchar_t c = *in; c; c = *++in) {
866         if (c > 0x7F) return false;  // invalid character means we can't decode the string
867         if (c == '_') {
868             int c1 = in[1];
869             if (c1 == 0) {
870                 if (prev_was_hex_encoded) break;
871                 return false;  // found unexpected escape char at end of string
872             }
873             if (c1 == '_') {
874                 result.push_back('_');
875                 in++;
876             } else if (is_hex_digit(c1)) {
877                 int c2 = in[2];
878                 if (c2 == 0) return false;  // string ended prematurely
879                 long d1 = convert_hex_digit(c1);
880                 if (d1 < 0) return false;
881                 long d2 = convert_hex_digit(c2);
882                 if (d2 < 0) return false;
883                 result.push_back(16 * d1 + d2);
884                 in += 2;
885                 prev_was_hex_encoded = true;
886             }
887             // No "else" clause because if the first char after an underscore is not another
888             // underscore or a valid hex character then the underscore is there to improve
889             // readability after we've encoded a character not valid in a var name.
890         } else {
891             result.push_back(c);
892         }
893     }
894 
895     *out = str2wcstring(result);
896     return true;
897 }
898 
899 /// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
escape_string_script(const wchar_t * orig_in,size_t in_len,wcstring & out,escape_flags_t flags)900 static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
901                                  escape_flags_t flags) {
902     const wchar_t *in = orig_in;
903     const bool escape_all = static_cast<bool>(flags & ESCAPE_ALL);
904     const bool no_quoted = static_cast<bool>(flags & ESCAPE_NO_QUOTED);
905     const bool no_tilde = static_cast<bool>(flags & ESCAPE_NO_TILDE);
906     const bool no_caret = feature_test(features_t::stderr_nocaret);
907     const bool no_qmark = feature_test(features_t::qmark_noglob);
908 
909     bool need_escape = false;
910     bool need_complex_escape = false;
911 
912     if (!no_quoted && in_len == 0) {
913         out.assign(L"''");
914         return;
915     }
916 
917     for (size_t i = 0; i < in_len; i++) {
918         if ((*in >= ENCODE_DIRECT_BASE) && (*in < ENCODE_DIRECT_BASE + 256)) {
919             int val = *in - ENCODE_DIRECT_BASE;
920             int tmp;
921 
922             out += L'\\';
923             out += L'X';
924 
925             tmp = val / 16;
926             out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
927 
928             tmp = val % 16;
929             out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
930             need_escape = need_complex_escape = true;
931 
932         } else {
933             wchar_t c = *in;
934             switch (c) {
935                 case L'\t': {
936                     out += L'\\';
937                     out += L't';
938                     need_escape = need_complex_escape = true;
939                     break;
940                 }
941                 case L'\n': {
942                     out += L'\\';
943                     out += L'n';
944                     need_escape = need_complex_escape = true;
945                     break;
946                 }
947                 case L'\b': {
948                     out += L'\\';
949                     out += L'b';
950                     need_escape = need_complex_escape = true;
951                     break;
952                 }
953                 case L'\r': {
954                     out += L'\\';
955                     out += L'r';
956                     need_escape = need_complex_escape = true;
957                     break;
958                 }
959                 case L'\x1B': {
960                     out += L'\\';
961                     out += L'e';
962                     need_escape = need_complex_escape = true;
963                     break;
964                 }
965                 case L'\x7F': {
966                     out += L'\\';
967                     out += L'x';
968                     out += L'7';
969                     out += L'f';
970                     need_escape = need_complex_escape = true;
971                     break;
972                 }
973                 case L'\\':
974                 case L'\'': {
975                     need_escape = need_complex_escape = true;
976                     out += L'\\';
977                     out += *in;
978                     break;
979                 }
980                 case ANY_CHAR: {
981                     // See #1614
982                     out += L'?';
983                     break;
984                 }
985                 case ANY_STRING: {
986                     out += L'*';
987                     break;
988                 }
989                 case ANY_STRING_RECURSIVE: {
990                     out += L"**";
991                     break;
992                 }
993 
994                 case L'&':
995                 case L'$':
996                 case L' ':
997                 case L'#':
998                 case L'^':
999                 case L'<':
1000                 case L'>':
1001                 case L'(':
1002                 case L')':
1003                 case L'[':
1004                 case L']':
1005                 case L'{':
1006                 case L'}':
1007                 case L'?':
1008                 case L'*':
1009                 case L'|':
1010                 case L';':
1011                 case L'"':
1012                 case L'%':
1013                 case L'~': {
1014                     bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) ||
1015                                           (c == L'?' && no_qmark);
1016                     if (!char_is_normal) {
1017                         need_escape = true;
1018                         if (escape_all) out += L'\\';
1019                     }
1020                     out += *in;
1021                     break;
1022                 }
1023 
1024                 default: {
1025                     if (*in < 32) {
1026                         if (*in < 27 && *in > 0) {
1027                             out += L'\\';
1028                             out += L'c';
1029                             out += L'a' + *in - 1;
1030 
1031                             need_escape = need_complex_escape = true;
1032                             break;
1033                         }
1034 
1035                         int tmp = (*in) % 16;
1036                         out += L'\\';
1037                         out += L'x';
1038                         out += ((*in > 15) ? L'1' : L'0');
1039                         out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
1040                         need_escape = need_complex_escape = true;
1041                     } else {
1042                         out += *in;
1043                     }
1044                     break;
1045                 }
1046             }
1047         }
1048 
1049         in++;
1050     }
1051 
1052     // Use quoted escaping if possible, since most people find it easier to read.
1053     if (!no_quoted && need_escape && !need_complex_escape && escape_all) {
1054         wchar_t single_quote = L'\'';
1055         out.clear();
1056         out.reserve(2 + in_len);
1057         out.push_back(single_quote);
1058         out.append(orig_in, in_len);
1059         out.push_back(single_quote);
1060     }
1061 }
1062 
1063 /// Escapes a string for use in a regex string. Not safe for use with `eval` as only
1064 /// characters reserved by PCRE2 are escaped, i.e. it relies on fish's automatic escaping
1065 /// of subshell output in subsequent concatenation or for use as an argument.
1066 /// \param in is the raw string to be searched for literally when substituted in a PCRE2 expression.
escape_string_pcre2(const wcstring & in)1067 static wcstring escape_string_pcre2(const wcstring &in) {
1068     wcstring out;
1069     out.reserve(in.size() * 1.3);  // a wild guess
1070 
1071     for (auto c : in) {
1072         switch (c) {
1073             case L'.':
1074             case L'^':
1075             case L'$':
1076             case L'*':
1077             case L'+':
1078             case L'(':
1079             case L')':
1080             case L'?':
1081             case L'[':
1082             case L'{':
1083             case L'}':
1084             case L'\\':
1085             case L'|':
1086             // these two only *need* to be escaped within a character class, and technically it
1087             // makes no sense to ever use process substitution output to compose a character class,
1088             // but...
1089             case L'-':
1090             case L']':
1091                 out.push_back('\\');
1092                 /* FALLTHROUGH */
1093             default:
1094                 out.push_back(c);
1095         }
1096     }
1097 
1098     return out;
1099 }
1100 
escape_string(const wchar_t * in,escape_flags_t flags,escape_string_style_t style)1101 wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style) {
1102     wcstring result;
1103 
1104     switch (style) {
1105         case STRING_STYLE_SCRIPT: {
1106             escape_string_script(in, std::wcslen(in), result, flags);
1107             break;
1108         }
1109         case STRING_STYLE_URL: {
1110             escape_string_url(in, result);
1111             break;
1112         }
1113         case STRING_STYLE_VAR: {
1114             escape_string_var(in, result);
1115             break;
1116         }
1117         case STRING_STYLE_REGEX: {
1118             result = escape_string_pcre2(in);
1119             break;
1120         }
1121     }
1122 
1123     return result;
1124 }
1125 
escape_string(const wcstring & in,escape_flags_t flags,escape_string_style_t style)1126 wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style) {
1127     wcstring result;
1128 
1129     switch (style) {
1130         case STRING_STYLE_SCRIPT: {
1131             escape_string_script(in.c_str(), in.size(), result, flags);
1132             break;
1133         }
1134         case STRING_STYLE_URL: {
1135             escape_string_url(in, result);
1136             break;
1137         }
1138         case STRING_STYLE_VAR: {
1139             escape_string_var(in, result);
1140             break;
1141         }
1142         case STRING_STYLE_REGEX: {
1143             result = escape_string_pcre2(in);
1144             break;
1145         }
1146     }
1147 
1148     return result;
1149 }
1150 
1151 /// Helper to return the last character in a string, or none.
string_last_char(const wcstring & str)1152 static maybe_t<wchar_t> string_last_char(const wcstring &str) {
1153     if (str.empty()) return none();
1154     return str.back();
1155 }
1156 
1157 /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
1158 /// appending to result. Return the number of characters consumed, or none on error.
read_unquoted_escape(const wchar_t * input,wcstring * result,bool allow_incomplete,bool unescape_special)1159 maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
1160                                      bool unescape_special) {
1161     assert(input[0] == L'\\' && "Not an escape");
1162 
1163     // Here's the character we'll ultimately append, or none. Note that L'\0' is a
1164     // valid thing to append.
1165     maybe_t<wchar_t> result_char_or_none = none();
1166 
1167     bool errored = false;
1168     size_t in_pos = 1;  // in_pos always tracks the next character to read (and therefore the number
1169                         // of characters read so far)
1170     const wchar_t c = input[in_pos++];
1171     switch (c) {
1172         // A null character after a backslash is an error.
1173         case L'\0': {
1174             // Adjust in_pos to only include the backslash.
1175             assert(in_pos > 0);
1176             in_pos--;
1177 
1178             // It's an error, unless we're allowing incomplete escapes.
1179             if (!allow_incomplete) errored = true;
1180             break;
1181         }
1182         // Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal.
1183         case L'0':
1184         case L'1':
1185         case L'2':
1186         case L'3':
1187         case L'4':
1188         case L'5':
1189         case L'6':
1190         case L'7':
1191         case L'u':
1192         case L'U':
1193         case L'x':
1194         case L'X': {
1195             long long res = 0;
1196             size_t chars = 2;
1197             int base = 16;
1198             bool byte_literal = false;
1199             wchar_t max_val = ASCII_MAX;
1200 
1201             switch (c) {
1202                 case L'u': {
1203                     chars = 4;
1204                     max_val = UCS2_MAX;
1205                     break;
1206                 }
1207                 case L'U': {
1208                     chars = 8;
1209                     max_val = WCHAR_MAX;
1210 
1211                     // Don't exceed the largest Unicode code point - see #1107.
1212                     if (0x10FFFF < max_val) max_val = static_cast<wchar_t>(0x10FFFF);
1213                     break;
1214                 }
1215                 case L'x': {
1216                     chars = 2;
1217                     max_val = ASCII_MAX;
1218                     break;
1219                 }
1220                 case L'X': {
1221                     byte_literal = true;
1222                     max_val = BYTE_MAX;
1223                     break;
1224                 }
1225                 default: {
1226                     base = 8;
1227                     chars = 3;
1228                     // Note that in_pos currently is just after the first post-backslash character;
1229                     // we want to start our escape from there.
1230                     assert(in_pos > 0);
1231                     in_pos--;
1232                     break;
1233                 }
1234             }
1235 
1236             for (size_t i = 0; i < chars; i++) {
1237                 long d = convert_digit(input[in_pos], base);
1238                 if (d < 0) {
1239                     break;
1240                 }
1241 
1242                 res = (res * base) + d;
1243                 in_pos++;
1244             }
1245 
1246             if (res <= max_val) {
1247                 result_char_or_none =
1248                     static_cast<wchar_t>((byte_literal ? ENCODE_DIRECT_BASE : 0) + res);
1249             } else {
1250                 errored = true;
1251             }
1252 
1253             break;
1254         }
1255         // \a means bell (alert).
1256         case L'a': {
1257             result_char_or_none = L'\a';
1258             break;
1259         }
1260         // \b means backspace.
1261         case L'b': {
1262             result_char_or_none = L'\b';
1263             break;
1264         }
1265         // \cX means control sequence X.
1266         case L'c': {
1267             const wchar_t sequence_char = input[in_pos++];
1268             if (sequence_char >= L'a' && sequence_char <= (L'a' + 32)) {
1269                 result_char_or_none = sequence_char - L'a' + 1;
1270             } else if (sequence_char >= L'A' && sequence_char <= (L'A' + 32)) {
1271                 result_char_or_none = sequence_char - L'A' + 1;
1272             } else {
1273                 errored = true;
1274             }
1275             break;
1276         }
1277         // \x1B means escape.
1278         case L'e': {
1279             result_char_or_none = L'\x1B';
1280             break;
1281         }
1282         // \f means form feed.
1283         case L'f': {
1284             result_char_or_none = L'\f';
1285             break;
1286         }
1287         // \n means newline.
1288         case L'n': {
1289             result_char_or_none = L'\n';
1290             break;
1291         }
1292         // \r means carriage return.
1293         case L'r': {
1294             result_char_or_none = L'\r';
1295             break;
1296         }
1297         // \t means tab.
1298         case L't': {
1299             result_char_or_none = L'\t';
1300             break;
1301         }
1302         // \v means vertical tab.
1303         case L'v': {
1304             result_char_or_none = L'\v';
1305             break;
1306         }
1307         // If a backslash is followed by an actual newline, swallow them both.
1308         case L'\n': {
1309             result_char_or_none = none();
1310             break;
1311         }
1312         default: {
1313             if (unescape_special) result->push_back(INTERNAL_SEPARATOR);
1314             result_char_or_none = c;
1315             break;
1316         }
1317     }
1318 
1319     if (!errored && result_char_or_none.has_value()) {
1320         result->push_back(*result_char_or_none);
1321     }
1322     if (errored) return none();
1323 
1324     return in_pos;
1325 }
1326 
1327 /// Returns the unescaped version of input_str into output_str (by reference). Returns true if
1328 /// successful. If false, the contents of output_str are undefined (!).
unescape_string_internal(const wchar_t * const input,const size_t input_len,wcstring * output_str,unescape_flags_t flags)1329 static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
1330                                      wcstring *output_str, unescape_flags_t flags) {
1331     // Set up result string, which we'll swap with the output on success.
1332     wcstring result;
1333     result.reserve(input_len);
1334 
1335     const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
1336     const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
1337     const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
1338 
1339     // The positions of open braces.
1340     std::vector<size_t> braces;
1341     // The positions of variable expansions or brace ","s.
1342     // We only read braces as expanders if there's a variable expansion or "," in them.
1343     std::vector<size_t> vars_or_seps;
1344     int brace_count = 0;
1345 
1346     bool errored = false;
1347     enum {
1348         mode_unquoted,
1349         mode_single_quotes,
1350         mode_double_quotes,
1351     } mode = mode_unquoted;
1352 
1353     for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
1354         const wchar_t c = input[input_position];
1355         // Here's the character we'll append to result, or none() to suppress it.
1356         maybe_t<wchar_t> to_append_or_none = c;
1357         if (mode == mode_unquoted) {
1358             switch (c) {
1359                 case L'\\': {
1360                     if (!ignore_backslashes) {
1361                         // Backslashes (escapes) are complicated and may result in errors, or
1362                         // appending INTERNAL_SEPARATORs, so we have to handle them specially.
1363                         auto escape_chars = read_unquoted_escape(
1364                             input + input_position, &result, allow_incomplete, unescape_special);
1365                         if (!escape_chars) {
1366                             // A none() return indicates an error.
1367                             errored = true;
1368                         } else {
1369                             // Skip over the characters we read, minus one because the outer loop
1370                             // will increment it.
1371                             assert(*escape_chars > 0);
1372                             input_position += *escape_chars - 1;
1373                         }
1374                         // We've already appended, don't append anything else.
1375                         to_append_or_none = none();
1376                     }
1377                     break;
1378                 }
1379                 case L'~': {
1380                     if (unescape_special && (input_position == 0)) {
1381                         to_append_or_none = HOME_DIRECTORY;
1382                     }
1383                     break;
1384                 }
1385                 case L'%': {
1386                     // Note that this only recognizes %self if the string is literally %self.
1387                     // %self/foo will NOT match this.
1388                     if (unescape_special && input_position == 0 &&
1389                         !std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
1390                         to_append_or_none = PROCESS_EXPAND_SELF;
1391                         input_position += PROCESS_EXPAND_SELF_STR_LEN - 1;  // skip over 'self's
1392                     }
1393                     break;
1394                 }
1395                 case L'*': {
1396                     if (unescape_special) {
1397                         // In general, this is ANY_STRING. But as a hack, if the last appended char
1398                         // is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
1399                         // reflect the fact that ** is the recursive wildcard.
1400                         if (string_last_char(result) == ANY_STRING) {
1401                             assert(!result.empty());
1402                             result.resize(result.size() - 1);
1403                             to_append_or_none = ANY_STRING_RECURSIVE;
1404                         } else {
1405                             to_append_or_none = ANY_STRING;
1406                         }
1407                     }
1408                     break;
1409                 }
1410                 case L'?': {
1411                     if (unescape_special && !feature_test(features_t::qmark_noglob)) {
1412                         to_append_or_none = ANY_CHAR;
1413                     }
1414                     break;
1415                 }
1416                 case L'$': {
1417                     if (unescape_special) {
1418                         to_append_or_none = VARIABLE_EXPAND;
1419                         vars_or_seps.push_back(input_position);
1420                     }
1421                     break;
1422                 }
1423                 case L'{': {
1424                     if (unescape_special) {
1425                         brace_count++;
1426                         to_append_or_none = BRACE_BEGIN;
1427                         // We need to store where the brace *ends up* in the output.
1428                         braces.push_back(result.size());
1429                     }
1430                     break;
1431                 }
1432                 case L'}': {
1433                     if (unescape_special) {
1434                         // HACK: The completion machinery sometimes hands us partial tokens.
1435                         // We can't parse them properly, but it shouldn't hurt,
1436                         // so we don't assert here.
1437                         // See #4954.
1438                         // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
1439                         // shouldn't be able to get here");
1440                         brace_count--;
1441                         to_append_or_none = BRACE_END;
1442                         if (!braces.empty()) {
1443                             // If we didn't have a var or separator since the last '{',
1444                             // put the literal back.
1445                             if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
1446                                 result[braces.back()] = L'{';
1447                                 // We also need to turn all spaces back.
1448                                 for (size_t i = braces.back() + 1; i < result.size(); i++) {
1449                                     if (result[i] == BRACE_SPACE) result[i] = L' ';
1450                                 }
1451                                 to_append_or_none = L'}';
1452                             }
1453 
1454                             // Remove all seps inside the current brace pair, so if we have a
1455                             // surrounding pair we only get seps inside *that*.
1456                             if (!vars_or_seps.empty()) {
1457                                 while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
1458                                     vars_or_seps.pop_back();
1459                             }
1460                             braces.pop_back();
1461                         }
1462                     }
1463                     break;
1464                 }
1465                 case L',': {
1466                     if (unescape_special && brace_count > 0) {
1467                         to_append_or_none = BRACE_SEP;
1468                         vars_or_seps.push_back(input_position);
1469                     }
1470                     break;
1471                 }
1472                 case L' ': {
1473                     if (unescape_special && brace_count > 0) {
1474                         to_append_or_none = BRACE_SPACE;
1475                     }
1476                     break;
1477                 }
1478                 case L'\'': {
1479                     mode = mode_single_quotes;
1480                     to_append_or_none =
1481                         unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1482                     break;
1483                 }
1484                 case L'\"': {
1485                     mode = mode_double_quotes;
1486                     to_append_or_none =
1487                         unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1488                     break;
1489                 }
1490                 default: {
1491                     break;
1492                 }
1493             }
1494         } else if (mode == mode_single_quotes) {
1495             if (c == L'\\') {
1496                 // A backslash may or may not escape something in single quotes.
1497                 switch (input[input_position + 1]) {
1498                     case '\\':
1499                     case L'\'': {
1500                         to_append_or_none = input[input_position + 1];
1501                         input_position += 1;  // skip over the backslash
1502                         break;
1503                     }
1504                     case L'\0': {
1505                         if (!allow_incomplete) {
1506                             errored = true;
1507                         } else {
1508                             // PCA this line had the following cryptic comment: 'We may ever escape
1509                             // a NULL character, but still appending a \ in case I am wrong.' Not
1510                             // sure what it means or the importance of this.
1511                             input_position += 1; /* Skip over the backslash */
1512                             to_append_or_none = L'\\';
1513                         }
1514                         break;
1515                     }
1516                     default: {
1517                         // Literal backslash that doesn't escape anything! Leave things alone; we'll
1518                         // append the backslash itself.
1519                         break;
1520                     }
1521                 }
1522             } else if (c == L'\'') {
1523                 to_append_or_none =
1524                     unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1525                 mode = mode_unquoted;
1526             }
1527         } else if (mode == mode_double_quotes) {
1528             switch (c) {
1529                 case L'"': {
1530                     mode = mode_unquoted;
1531                     to_append_or_none =
1532                         unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
1533                     break;
1534                 }
1535                 case '\\': {
1536                     switch (input[input_position + 1]) {
1537                         case L'\0': {
1538                             if (!allow_incomplete) {
1539                                 errored = true;
1540                             } else {
1541                                 to_append_or_none = L'\0';
1542                             }
1543                             break;
1544                         }
1545                         case '\\':
1546                         case L'$':
1547                         case '"': {
1548                             to_append_or_none = input[input_position + 1];
1549                             input_position += 1; /* Skip over the backslash */
1550                             break;
1551                         }
1552                         case '\n': {
1553                             /* Swallow newline */
1554                             to_append_or_none = none();
1555                             input_position += 1; /* Skip over the backslash */
1556                             break;
1557                         }
1558                         default: {
1559                             /* Literal backslash that doesn't escape anything! Leave things alone;
1560                              * we'll append the backslash itself */
1561                             break;
1562                         }
1563                     }
1564                     break;
1565                 }
1566                 case '$': {
1567                     if (unescape_special) {
1568                         to_append_or_none = VARIABLE_EXPAND_SINGLE;
1569                         vars_or_seps.push_back(input_position);
1570                     }
1571                     break;
1572                 }
1573                 default: {
1574                     break;
1575                 }
1576             }
1577         }
1578 
1579         // Now maybe append the char.
1580         if (to_append_or_none.has_value()) {
1581             result.push_back(*to_append_or_none);
1582         }
1583     }
1584 
1585     // Return the string by reference, and then success.
1586     if (!errored) {
1587         *output_str = std::move(result);
1588     }
1589     return !errored;
1590 }
1591 
unescape_string_in_place(wcstring * str,unescape_flags_t escape_special)1592 bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
1593     assert(str != nullptr);
1594     wcstring output;
1595     bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
1596     if (success) {
1597         *str = std::move(output);
1598     }
1599     return success;
1600 }
1601 
unescape_string(const wchar_t * input,wcstring * output,unescape_flags_t escape_special,escape_string_style_t style)1602 bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
1603                      escape_string_style_t style) {
1604     bool success = false;
1605     switch (style) {
1606         case STRING_STYLE_SCRIPT: {
1607             success = unescape_string_internal(input, std::wcslen(input), output, escape_special);
1608             break;
1609         }
1610         case STRING_STYLE_URL: {
1611             success = unescape_string_url(input, output);
1612             break;
1613         }
1614         case STRING_STYLE_VAR: {
1615             success = unescape_string_var(input, output);
1616             break;
1617         }
1618         case STRING_STYLE_REGEX: {
1619             // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
1620             success = false;
1621             break;
1622         }
1623     }
1624     if (!success) output->clear();
1625     return success;
1626 }
1627 
unescape_string(const wcstring & input,wcstring * output,unescape_flags_t escape_special,escape_string_style_t style)1628 bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
1629                      escape_string_style_t style) {
1630     bool success = false;
1631     switch (style) {
1632         case STRING_STYLE_SCRIPT: {
1633             success = unescape_string_internal(input.c_str(), input.size(), output, escape_special);
1634             break;
1635         }
1636         case STRING_STYLE_URL: {
1637             success = unescape_string_url(input.c_str(), output);
1638             break;
1639         }
1640         case STRING_STYLE_VAR: {
1641             success = unescape_string_var(input.c_str(), output);
1642             break;
1643         }
1644         case STRING_STYLE_REGEX: {
1645             // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
1646             success = false;
1647             break;
1648         }
1649     }
1650     if (!success) output->clear();
1651     return success;
1652 }
1653 
bugreport()1654 [[gnu::noinline]] void bugreport() {
1655     FLOG(error, _(L"This is a bug. Break on 'bugreport' to debug."));
1656     FLOG(error, _(L"If you can reproduce it, please report: "), PACKAGE_BUGREPORT, L'.');
1657 }
1658 
format_size(long long sz)1659 wcstring format_size(long long sz) {
1660     wcstring result;
1661     const wchar_t *sz_name[] = {L"kB", L"MB", L"GB", L"TB", L"PB", L"EB", L"ZB", L"YB", nullptr};
1662 
1663     if (sz < 0) {
1664         result.append(L"unknown");
1665     } else if (sz < 1) {
1666         result.append(_(L"empty"));
1667     } else if (sz < 1024) {
1668         result.append(format_string(L"%lldB", sz));
1669     } else {
1670         int i;
1671 
1672         for (i = 0; sz_name[i]; i++) {
1673             if (sz < (1024 * 1024) || !sz_name[i + 1]) {
1674                 long isz = (static_cast<long>(sz)) / 1024;
1675                 if (isz > 9)
1676                     result.append(format_string(L"%ld%ls", isz, sz_name[i]));
1677                 else
1678                     result.append(
1679                         format_string(L"%.1f%ls", static_cast<double>(sz) / 1024, sz_name[i]));
1680                 break;
1681             }
1682             sz /= 1024;
1683         }
1684     }
1685     return result;
1686 }
1687 
1688 /// Crappy function to extract the most significant digit of an unsigned long long value.
extract_most_significant_digit(unsigned long long * xp)1689 static char extract_most_significant_digit(unsigned long long *xp) {
1690     unsigned long long place_value = 1;
1691     unsigned long long x = *xp;
1692     while (x >= 10) {
1693         x /= 10;
1694         place_value *= 10;
1695     }
1696     *xp -= (place_value * x);
1697     return x + '0';
1698 }
1699 
append_ull(char * buff,unsigned long long val,size_t * inout_idx,size_t max_len)1700 static void append_ull(char *buff, unsigned long long val, size_t *inout_idx, size_t max_len) {
1701     size_t idx = *inout_idx;
1702     while (val > 0 && idx < max_len) buff[idx++] = extract_most_significant_digit(&val);
1703     *inout_idx = idx;
1704 }
1705 
append_str(char * buff,const char * str,size_t * inout_idx,size_t max_len)1706 static void append_str(char *buff, const char *str, size_t *inout_idx, size_t max_len) {
1707     size_t idx = *inout_idx;
1708     while (*str && idx < max_len) buff[idx++] = *str++;
1709     *inout_idx = idx;
1710 }
1711 
format_size_safe(char buff[128],unsigned long long sz)1712 void format_size_safe(char buff[128], unsigned long long sz) {
1713     const size_t buff_size = 128;
1714     const size_t max_len = buff_size - 1;  // need to leave room for a null terminator
1715     std::memset(buff, 0, buff_size);
1716     size_t idx = 0;
1717     const char *const sz_name[] = {"kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", nullptr};
1718     if (sz < 1) {
1719         strncpy(buff, "empty", buff_size);
1720     } else if (sz < 1024) {
1721         append_ull(buff, sz, &idx, max_len);
1722         append_str(buff, "B", &idx, max_len);
1723     } else {
1724         for (size_t i = 0; sz_name[i]; i++) {
1725             if (sz < (1024 * 1024) || !sz_name[i + 1]) {
1726                 unsigned long long isz = sz / 1024;
1727                 if (isz > 9) {
1728                     append_ull(buff, isz, &idx, max_len);
1729                 } else {
1730                     append_ull(buff, isz, &idx, max_len);
1731 
1732                     // Maybe append a single fraction digit.
1733                     unsigned long long remainder = sz % 1024;
1734                     if (remainder > 0) {
1735                         char tmp[3] = {'.', extract_most_significant_digit(&remainder), 0};
1736                         append_str(buff, tmp, &idx, max_len);
1737                     }
1738                 }
1739                 append_str(buff, sz_name[i], &idx, max_len);
1740                 break;
1741             }
1742             sz /= 1024;
1743         }
1744     }
1745 }
1746 
1747 /// Return the number of seconds from the UNIX epoch, with subsecond precision. This function uses
1748 /// the gettimeofday function and will have the same precision as that function.
timef()1749 double timef() {
1750     struct timeval tv;
1751     assert_with_errno(gettimeofday(&tv, nullptr) != -1);
1752     // return (double)tv.tv_sec + 0.000001 * tv.tv_usec;
1753     return static_cast<double>(tv.tv_sec) + 1e-6 * tv.tv_usec;
1754 }
1755 
exit_without_destructors(int code)1756 void exit_without_destructors(int code) { _exit(code); }
1757 
1758 extern "C" {
debug_thread_error(void)1759 [[gnu::noinline]] void debug_thread_error(void) {
1760     // Wait for a SIGINT. We can't use sigsuspend() because the signal may be delivered on another
1761     // thread.
1762     sigchecker_t sigint(topic_t::sighupint);
1763     sigint.wait();
1764 }
1765 }
1766 
set_main_thread()1767 void set_main_thread() {
1768     // Just call thread_id() once to force increment of thread_id.
1769     uint64_t tid = thread_id();
1770     assert(tid == 1 && "main thread should have thread ID 1");
1771     (void)tid;
1772 }
1773 
configure_thread_assertions_for_testing()1774 void configure_thread_assertions_for_testing() { thread_asserts_cfg_for_testing = true; }
1775 
is_forked_child()1776 bool is_forked_child() { return is_forked_proc; }
1777 
setup_fork_guards()1778 void setup_fork_guards() {
1779     is_forked_proc = false;
1780     static std::once_flag fork_guard_flag;
1781     std::call_once(fork_guard_flag,
1782                    [] { pthread_atfork(nullptr, nullptr, [] { is_forked_proc = true; }); });
1783 }
1784 
save_term_foreground_process_group()1785 void save_term_foreground_process_group() {
1786     ASSERT_IS_MAIN_THREAD();
1787     initial_fg_process_group = tcgetpgrp(STDIN_FILENO);
1788 }
1789 
restore_term_foreground_process_group_for_exit()1790 void restore_term_foreground_process_group_for_exit() {
1791     // We wish to restore the tty to the initial owner. There's two ways this can go wrong:
1792     //  1. We may steal the tty from someone else (#7060).
1793     //  2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit.
1794     // Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP.
1795     // Note initial_fg_process_group == 0 is possible with Linux pid namespaces.
1796     // This is called during shutdown and from a signal handler. We don't bother to complain on
1797     // failure because doing so is unlikely to be noticed.
1798     if (initial_fg_process_group > 0 && initial_fg_process_group != getpgrp()) {
1799         (void)signal(SIGTTOU, SIG_IGN);
1800         (void)tcsetpgrp(STDIN_FILENO, initial_fg_process_group);
1801     }
1802 }
1803 
is_main_thread()1804 bool is_main_thread() { return thread_id() == 1; }
1805 
assert_is_main_thread(const char * who)1806 void assert_is_main_thread(const char *who) {
1807     if (!is_main_thread() && !thread_asserts_cfg_for_testing) {
1808         FLOGF(error, L"%s called off of main thread.", who);
1809         FLOGF(error, L"Break on debug_thread_error to debug.");
1810         debug_thread_error();
1811     }
1812 }
1813 
assert_is_not_forked_child(const char * who)1814 void assert_is_not_forked_child(const char *who) {
1815     if (is_forked_child()) {
1816         FLOGF(error, L"%s called in a forked child.", who);
1817         FLOG(error, L"Break on debug_thread_error to debug.");
1818         debug_thread_error();
1819     }
1820 }
1821 
assert_is_background_thread(const char * who)1822 void assert_is_background_thread(const char *who) {
1823     if (is_main_thread() && !thread_asserts_cfg_for_testing) {
1824         FLOGF(error, L"%s called on the main thread (may block!).", who);
1825         FLOG(error, L"Break on debug_thread_error to debug.");
1826         debug_thread_error();
1827     }
1828 }
1829 
assert_is_locked(std::mutex & mutex,const char * who,const char * caller)1830 void assert_is_locked(std::mutex &mutex, const char *who, const char *caller) {
1831     // Note that std::mutex.try_lock() is allowed to return false when the mutex isn't
1832     // actually locked; fortunately we are checking the opposite so we're safe.
1833     if (mutex.try_lock()) {
1834         FLOGF(error, L"%s is not locked when it should be in '%s'", who, caller);
1835         FLOG(error, L"Break on debug_thread_error to debug.");
1836         debug_thread_error();
1837         mutex.unlock();
1838     }
1839 }
1840 
1841 /// Test if the specified character is in a range that fish uses interally to store special tokens.
1842 ///
1843 /// NOTE: This is used when tokenizing the input. It is also used when reading input, before
1844 /// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted
1845 /// string. We don't want external input to be able to feed reserved characters into our
1846 /// lexer/parser or code evaluator.
1847 //
1848 // TODO: Actually implement the replacement as documented above.
fish_reserved_codepoint(wchar_t c)1849 bool fish_reserved_codepoint(wchar_t c) {
1850     return (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) ||
1851            (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END);
1852 }
1853 
1854 /// Reopen stdin, stdout and/or stderr on /dev/null. This is invoked when we find that our tty has
1855 /// become invalid.
redirect_tty_output()1856 void redirect_tty_output() {
1857     struct termios t;
1858     int fd = open("/dev/null", O_WRONLY);
1859     if (fd == -1) {
1860         __fish_assert("Could not open /dev/null!", __FILE__, __LINE__, errno);
1861     }
1862     if (tcgetattr(STDIN_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDIN_FILENO);
1863     if (tcgetattr(STDOUT_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDOUT_FILENO);
1864     if (tcgetattr(STDERR_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDERR_FILENO);
1865     close(fd);
1866 }
1867 
1868 /// Display a failed assertion message, dump a stack trace if possible, then die.
__fish_assert(const char * msg,const char * file,size_t line,int error)1869 [[noreturn]] void __fish_assert(const char *msg, const char *file, size_t line, int error) {
1870     if (error) {
1871         FLOGF(error, L"%s:%zu: failed assertion: %s: errno %d (%s)", file, line, msg, error,
1872               std::strerror(error));
1873     } else {
1874         FLOGF(error, L"%s:%zu: failed assertion: %s", file, line, msg);
1875     }
1876     show_stackframe(L'E', 99, 1);
1877     abort();
1878 }
1879 
1880 /// Test if the given char is valid in a variable name.
valid_var_name_char(wchar_t chr)1881 bool valid_var_name_char(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
1882 
1883 /// Test if the given string is a valid variable name.
valid_var_name(const wcstring & str)1884 bool valid_var_name(const wcstring &str) {
1885     // Note do not use c_str(), we want to fail on embedded nul bytes.
1886     return !str.empty() && std::all_of(str.begin(), str.end(), valid_var_name_char);
1887 }
1888 
valid_var_name(const wchar_t * str)1889 bool valid_var_name(const wchar_t *str) {
1890     if (str[0] == L'\0') return false;
1891     for (size_t i = 0; str[i] != L'\0'; i++) {
1892         if (!valid_var_name_char(str[i])) return false;
1893     }
1894     return true;
1895 }
1896 
1897 /// Test if the string is a valid function name.
valid_func_name(const wcstring & str)1898 bool valid_func_name(const wcstring &str) {
1899     if (str.empty()) return false;
1900     if (str.at(0) == L'-') return false;
1901     if (str.find_first_of(L'/') != wcstring::npos) return false;
1902     return true;
1903 }
1904 
1905 /// Return the path to the current executable. This needs to be realpath'd.
get_executable_path(const char * argv0)1906 std::string get_executable_path(const char *argv0) {
1907     char buff[PATH_MAX];
1908 
1909 #ifdef __APPLE__
1910     // On OS X use it's proprietary API to get the path to the executable.
1911     // This is basically grabbing exec_path after argc, argv, envp, ...: for us
1912     // https://opensource.apple.com/source/adv_cmds/adv_cmds-163/ps/print.c
1913     uint32_t buffSize = sizeof buff;
1914     if (_NSGetExecutablePath(buff, &buffSize) == 0) return std::string(buff);
1915 #elif defined(__BSD__) && defined(KERN_PROC_PATHNAME) && !defined(__NetBSD__)
1916     // BSDs do not have /proc by default, (although it can be mounted as procfs via the Linux
1917     // compatibility layer). We can use sysctl instead: per sysctl(3), passing in a process ID of -1
1918     // returns the value for the current process.
1919     //
1920     // (this is broken on NetBSD, while /proc works, so we use that)
1921     size_t buff_size = sizeof buff;
1922     int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
1923     int result = sysctl(name, sizeof(name) / sizeof(int), buff, &buff_size, nullptr, 0);
1924     if (result != 0) {
1925         wperror(L"sysctl KERN_PROC_PATHNAME");
1926     } else {
1927         return std::string(buff);
1928     }
1929 #else
1930     // On other unixes, fall back to the Linux-ish /proc/ directory
1931     ssize_t len;
1932     len = readlink("/proc/self/exe", buff, sizeof buff - 1);  // Linux
1933     if (len == -1) {
1934         len = readlink("/proc/curproc/file", buff, sizeof buff - 1);  // other BSDs
1935         if (len == -1) {
1936             len = readlink("/proc/self/path/a.out", buff, sizeof buff - 1);  // Solaris
1937         }
1938     }
1939     if (len > 0) {
1940         buff[len] = '\0';
1941         return std::string(buff);
1942     }
1943 #endif
1944 
1945     // Just return argv0, which probably won't work (i.e. it's not an absolute path or a path
1946     // relative to the working directory, but instead something the caller found via $PATH). We'll
1947     // eventually fall back to the compile time paths.
1948     return std::string(argv0 ? argv0 : "");
1949 }
1950 
1951 /// Return a path to a directory where we can store temporary files.
get_path_to_tmp_dir()1952 std::string get_path_to_tmp_dir() {
1953     char *env_tmpdir = getenv("TMPDIR");
1954     if (env_tmpdir) {
1955         return env_tmpdir;
1956     }
1957 #if defined(_CS_DARWIN_USER_TEMP_DIR)
1958     char osx_tmpdir[PATH_MAX];
1959     size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, osx_tmpdir, PATH_MAX);
1960     if (0 < n && n <= PATH_MAX) {
1961         return osx_tmpdir;
1962     } else {
1963         return "/tmp";
1964     }
1965 #elif defined(P_tmpdir)
1966     return P_tmpdir;
1967 #elif defined(_PATH_TMP)
1968     return _PATH_TMP;
1969 #else
1970     return "/tmp";
1971 #endif
1972 }
1973 
1974 // This function attempts to distinguish between a console session (at the actual login vty) and a
1975 // session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
1976 // there are few values of $TERM that we can interpret as being exclusively console sessions, and
1977 // most common operating systems do not use them. The value is cached for the duration of the fish
1978 // session. We err on the side of assuming it's not a console session. This approach isn't
1979 // bullet-proof and that's OK.
is_console_session()1980 bool is_console_session() {
1981     static const bool console_session = [] {
1982         ASSERT_IS_MAIN_THREAD();
1983 
1984         const char *tty_name = ttyname(0);
1985         constexpr auto len = const_strlen("/dev/tty");
1986         const char *TERM = getenv("TERM");
1987         return
1988             // Test that the tty matches /dev/(console|dcons|tty[uv\d])
1989             tty_name &&
1990             ((strncmp(tty_name, "/dev/tty", len) == 0 &&
1991               (tty_name[len] == 'u' || tty_name[len] == 'v' || isdigit(tty_name[len]))) ||
1992              strcmp(tty_name, "/dev/dcons") == 0 || strcmp(tty_name, "/dev/console") == 0)
1993             // and that $TERM is simple, e.g. `xterm` or `vt100`, not `xterm-something`
1994             && (!TERM || !strchr(TERM, '-') || !strcmp(TERM, "sun-color"));
1995     }();
1996     return console_session;
1997 }
1998 
1999 static_assert(const_strcmp("", "a") < 0, "const_strcmp failure");
2000 static_assert(const_strcmp("a", "a") == 0, "const_strcmp failure");
2001 static_assert(const_strcmp("a", "") > 0, "const_strcmp failure");
2002 static_assert(const_strcmp("aa", "a") > 0, "const_strcmp failure");
2003 static_assert(const_strcmp("a", "aa") < 0, "const_strcmp failure");
2004 static_assert(const_strcmp("b", "aa") > 0, "const_strcmp failure");
2005 
2006 static_assert(const_strlen("") == 0, "const_strlen failure");
2007 static_assert(const_strlen("a") == 1, "const_strlen failure");
2008 static_assert(const_strlen("hello") == 5, "const_strlen failure");
2009