15ffd83dbSDimitry Andric //===-- Args.cpp ----------------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "lldb/Utility/Args.h" 100b57cec5SDimitry Andric #include "lldb/Utility/ConstString.h" 110b57cec5SDimitry Andric #include "lldb/Utility/FileSpec.h" 120b57cec5SDimitry Andric #include "lldb/Utility/Stream.h" 130b57cec5SDimitry Andric #include "lldb/Utility/StringList.h" 140b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h" 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric using namespace lldb; 170b57cec5SDimitry Andric using namespace lldb_private; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric // A helper function for argument parsing. 200b57cec5SDimitry Andric // Parses the initial part of the first argument using normal double quote 210b57cec5SDimitry Andric // rules: backslash escapes the double quote and itself. The parsed string is 220b57cec5SDimitry Andric // appended to the second argument. The function returns the unparsed portion 230b57cec5SDimitry Andric // of the string, starting at the closing quote. 240b57cec5SDimitry Andric static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted, 250b57cec5SDimitry Andric std::string &result) { 260b57cec5SDimitry Andric // Inside double quotes, '\' and '"' are special. 270b57cec5SDimitry Andric static const char *k_escapable_characters = "\"\\"; 280b57cec5SDimitry Andric while (true) { 290b57cec5SDimitry Andric // Skip over over regular characters and append them. 300b57cec5SDimitry Andric size_t regular = quoted.find_first_of(k_escapable_characters); 310b57cec5SDimitry Andric result += quoted.substr(0, regular); 320b57cec5SDimitry Andric quoted = quoted.substr(regular); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric // If we have reached the end of string or the closing quote, we're done. 350b57cec5SDimitry Andric if (quoted.empty() || quoted.front() == '"') 360b57cec5SDimitry Andric break; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric // We have found a backslash. 390b57cec5SDimitry Andric quoted = quoted.drop_front(); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric if (quoted.empty()) { 420b57cec5SDimitry Andric // A lone backslash at the end of string, let's just append it. 430b57cec5SDimitry Andric result += '\\'; 440b57cec5SDimitry Andric break; 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 475ffd83dbSDimitry Andric // If the character after the backslash is not an allowed escapable 480b57cec5SDimitry Andric // character, we leave the character sequence untouched. 490b57cec5SDimitry Andric if (strchr(k_escapable_characters, quoted.front()) == nullptr) 500b57cec5SDimitry Andric result += '\\'; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric result += quoted.front(); 530b57cec5SDimitry Andric quoted = quoted.drop_front(); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric return quoted; 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static size_t ArgvToArgc(const char **argv) { 600b57cec5SDimitry Andric if (!argv) 610b57cec5SDimitry Andric return 0; 620b57cec5SDimitry Andric size_t count = 0; 630b57cec5SDimitry Andric while (*argv++) 640b57cec5SDimitry Andric ++count; 650b57cec5SDimitry Andric return count; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric // Trims all whitespace that can separate command line arguments from the left 690b57cec5SDimitry Andric // side of the string. 700b57cec5SDimitry Andric static llvm::StringRef ltrimForArgs(llvm::StringRef str) { 710b57cec5SDimitry Andric static const char *k_space_separators = " \t"; 720b57cec5SDimitry Andric return str.ltrim(k_space_separators); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // A helper function for SetCommandString. Parses a single argument from the 760b57cec5SDimitry Andric // command string, processing quotes and backslashes in a shell-like manner. 770b57cec5SDimitry Andric // The function returns a tuple consisting of the parsed argument, the quote 780b57cec5SDimitry Andric // char used, and the unparsed portion of the string starting at the first 790b57cec5SDimitry Andric // unqouted, unescaped whitespace character. 800b57cec5SDimitry Andric static std::tuple<std::string, char, llvm::StringRef> 810b57cec5SDimitry Andric ParseSingleArgument(llvm::StringRef command) { 820b57cec5SDimitry Andric // Argument can be split into multiple discontiguous pieces, for example: 830b57cec5SDimitry Andric // "Hello ""World" 840b57cec5SDimitry Andric // this would result in a single argument "Hello World" (without the quotes) 850b57cec5SDimitry Andric // since the quotes would be removed and there is not space between the 860b57cec5SDimitry Andric // strings. 870b57cec5SDimitry Andric std::string arg; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric // Since we can have multiple quotes that form a single command in a command 900b57cec5SDimitry Andric // like: "Hello "world'!' (which will make a single argument "Hello world!") 910b57cec5SDimitry Andric // we remember the first quote character we encounter and use that for the 920b57cec5SDimitry Andric // quote character. 930b57cec5SDimitry Andric char first_quote_char = '\0'; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric bool arg_complete = false; 960b57cec5SDimitry Andric do { 970b57cec5SDimitry Andric // Skip over over regular characters and append them. 980b57cec5SDimitry Andric size_t regular = command.find_first_of(" \t\r\"'`\\"); 990b57cec5SDimitry Andric arg += command.substr(0, regular); 1000b57cec5SDimitry Andric command = command.substr(regular); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric if (command.empty()) 1030b57cec5SDimitry Andric break; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric char special = command.front(); 1060b57cec5SDimitry Andric command = command.drop_front(); 1070b57cec5SDimitry Andric switch (special) { 1080b57cec5SDimitry Andric case '\\': 1090b57cec5SDimitry Andric if (command.empty()) { 1100b57cec5SDimitry Andric arg += '\\'; 1110b57cec5SDimitry Andric break; 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric 1145ffd83dbSDimitry Andric // If the character after the backslash is not an allowed escapable 1150b57cec5SDimitry Andric // character, we leave the character sequence untouched. 1160b57cec5SDimitry Andric if (strchr(" \t\\'\"`", command.front()) == nullptr) 1170b57cec5SDimitry Andric arg += '\\'; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric arg += command.front(); 1200b57cec5SDimitry Andric command = command.drop_front(); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric break; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric case ' ': 1250b57cec5SDimitry Andric case '\t': 1260b57cec5SDimitry Andric case '\r': 1270b57cec5SDimitry Andric // We are not inside any quotes, we just found a space after an argument. 1280b57cec5SDimitry Andric // We are done. 1290b57cec5SDimitry Andric arg_complete = true; 1300b57cec5SDimitry Andric break; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric case '"': 1330b57cec5SDimitry Andric case '\'': 1340b57cec5SDimitry Andric case '`': 1350b57cec5SDimitry Andric // We found the start of a quote scope. 1360b57cec5SDimitry Andric if (first_quote_char == '\0') 1370b57cec5SDimitry Andric first_quote_char = special; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (special == '"') 1400b57cec5SDimitry Andric command = ParseDoubleQuotes(command, arg); 1410b57cec5SDimitry Andric else { 1420b57cec5SDimitry Andric // For single quotes, we simply skip ahead to the matching quote 1430b57cec5SDimitry Andric // character (or the end of the string). 1440b57cec5SDimitry Andric size_t quoted = command.find(special); 1450b57cec5SDimitry Andric arg += command.substr(0, quoted); 1460b57cec5SDimitry Andric command = command.substr(quoted); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // If we found a closing quote, skip it. 1500b57cec5SDimitry Andric if (!command.empty()) 1510b57cec5SDimitry Andric command = command.drop_front(); 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric break; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } while (!arg_complete); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric return std::make_tuple(arg, first_quote_char, command); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote) : quote(quote) { 1610b57cec5SDimitry Andric size_t size = str.size(); 1620b57cec5SDimitry Andric ptr.reset(new char[size + 1]); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric ::memcpy(data(), str.data() ? str.data() : "", size); 1650b57cec5SDimitry Andric ptr[size] = 0; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric // Args constructor 1690b57cec5SDimitry Andric Args::Args(llvm::StringRef command) { SetCommandString(command); } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric Args::Args(const Args &rhs) { *this = rhs; } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric Args::Args(const StringList &list) : Args() { 1749dba64beSDimitry Andric for (const std::string &arg : list) 1759dba64beSDimitry Andric AppendArgument(arg); 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric 178e8d8bef9SDimitry Andric Args::Args(llvm::ArrayRef<llvm::StringRef> args) : Args() { 179e8d8bef9SDimitry Andric for (llvm::StringRef arg : args) 180e8d8bef9SDimitry Andric AppendArgument(arg); 181e8d8bef9SDimitry Andric } 182e8d8bef9SDimitry Andric 1830b57cec5SDimitry Andric Args &Args::operator=(const Args &rhs) { 1840b57cec5SDimitry Andric Clear(); 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric m_argv.clear(); 1870b57cec5SDimitry Andric m_entries.clear(); 1880b57cec5SDimitry Andric for (auto &entry : rhs.m_entries) { 1899dba64beSDimitry Andric m_entries.emplace_back(entry.ref(), entry.quote); 1900b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric m_argv.push_back(nullptr); 1930b57cec5SDimitry Andric return *this; 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric // Destructor 197fe6060f1SDimitry Andric Args::~Args() = default; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric void Args::Dump(Stream &s, const char *label_name) const { 2000b57cec5SDimitry Andric if (!label_name) 2010b57cec5SDimitry Andric return; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric int i = 0; 2040b57cec5SDimitry Andric for (auto &entry : m_entries) { 2050b57cec5SDimitry Andric s.Indent(); 2069dba64beSDimitry Andric s.Format("{0}[{1}]=\"{2}\"\n", label_name, i++, entry.ref()); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric s.Format("{0}[{1}]=NULL\n", label_name, i); 2090b57cec5SDimitry Andric s.EOL(); 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric bool Args::GetCommandString(std::string &command) const { 2130b57cec5SDimitry Andric command.clear(); 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric for (size_t i = 0; i < m_entries.size(); ++i) { 2160b57cec5SDimitry Andric if (i > 0) 2170b57cec5SDimitry Andric command += ' '; 218bdd1243dSDimitry Andric char quote = m_entries[i].quote; 219bdd1243dSDimitry Andric if (quote != '\0') 220bdd1243dSDimitry Andric command += quote; 2219dba64beSDimitry Andric command += m_entries[i].ref(); 222bdd1243dSDimitry Andric if (quote != '\0') 223bdd1243dSDimitry Andric command += quote; 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric return !m_entries.empty(); 2270b57cec5SDimitry Andric } 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric bool Args::GetQuotedCommandString(std::string &command) const { 2300b57cec5SDimitry Andric command.clear(); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric for (size_t i = 0; i < m_entries.size(); ++i) { 2330b57cec5SDimitry Andric if (i > 0) 2340b57cec5SDimitry Andric command += ' '; 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric if (m_entries[i].quote) { 2370b57cec5SDimitry Andric command += m_entries[i].quote; 2389dba64beSDimitry Andric command += m_entries[i].ref(); 2390b57cec5SDimitry Andric command += m_entries[i].quote; 2400b57cec5SDimitry Andric } else { 2419dba64beSDimitry Andric command += m_entries[i].ref(); 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric return !m_entries.empty(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric void Args::SetCommandString(llvm::StringRef command) { 2490b57cec5SDimitry Andric Clear(); 2500b57cec5SDimitry Andric m_argv.clear(); 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric command = ltrimForArgs(command); 2530b57cec5SDimitry Andric std::string arg; 2540b57cec5SDimitry Andric char quote; 2550b57cec5SDimitry Andric while (!command.empty()) { 2560b57cec5SDimitry Andric std::tie(arg, quote, command) = ParseSingleArgument(command); 2570b57cec5SDimitry Andric m_entries.emplace_back(arg, quote); 2580b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 2590b57cec5SDimitry Andric command = ltrimForArgs(command); 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric m_argv.push_back(nullptr); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric const char *Args::GetArgumentAtIndex(size_t idx) const { 2650b57cec5SDimitry Andric if (idx < m_argv.size()) 2660b57cec5SDimitry Andric return m_argv[idx]; 2670b57cec5SDimitry Andric return nullptr; 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric char **Args::GetArgumentVector() { 2710b57cec5SDimitry Andric assert(!m_argv.empty()); 2720b57cec5SDimitry Andric // TODO: functions like execve and posix_spawnp exhibit undefined behavior 2730b57cec5SDimitry Andric // when argv or envp is null. So the code below is actually wrong. However, 2740b57cec5SDimitry Andric // other code in LLDB depends on it being null. The code has been acting 2750b57cec5SDimitry Andric // this way for some time, so it makes sense to leave it this way until 2760b57cec5SDimitry Andric // someone has the time to come along and fix it. 2770b57cec5SDimitry Andric return (m_argv.size() > 1) ? m_argv.data() : nullptr; 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric const char **Args::GetConstArgumentVector() const { 2810b57cec5SDimitry Andric assert(!m_argv.empty()); 2820b57cec5SDimitry Andric return (m_argv.size() > 1) ? const_cast<const char **>(m_argv.data()) 2830b57cec5SDimitry Andric : nullptr; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric void Args::Shift() { 2870b57cec5SDimitry Andric // Don't pop the last NULL terminator from the argv array 2880b57cec5SDimitry Andric if (m_entries.empty()) 2890b57cec5SDimitry Andric return; 2900b57cec5SDimitry Andric m_argv.erase(m_argv.begin()); 2910b57cec5SDimitry Andric m_entries.erase(m_entries.begin()); 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric void Args::Unshift(llvm::StringRef arg_str, char quote_char) { 2950b57cec5SDimitry Andric InsertArgumentAtIndex(0, arg_str, quote_char); 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric void Args::AppendArguments(const Args &rhs) { 2990b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3000b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3010b57cec5SDimitry Andric m_argv.pop_back(); 3020b57cec5SDimitry Andric for (auto &entry : rhs.m_entries) { 3039dba64beSDimitry Andric m_entries.emplace_back(entry.ref(), entry.quote); 3040b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric m_argv.push_back(nullptr); 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric void Args::AppendArguments(const char **argv) { 3100b57cec5SDimitry Andric size_t argc = ArgvToArgc(argv); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3130b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3140b57cec5SDimitry Andric m_argv.pop_back(); 315bdd1243dSDimitry Andric for (auto arg : llvm::ArrayRef(argv, argc)) { 3160b57cec5SDimitry Andric m_entries.emplace_back(arg, '\0'); 3170b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric m_argv.push_back(nullptr); 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) { 3240b57cec5SDimitry Andric InsertArgumentAtIndex(GetArgumentCount(), arg_str, quote_char); 3250b57cec5SDimitry Andric } 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andric void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 3280b57cec5SDimitry Andric char quote_char) { 3290b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3300b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric if (idx > m_entries.size()) 3330b57cec5SDimitry Andric return; 3340b57cec5SDimitry Andric m_entries.emplace(m_entries.begin() + idx, arg_str, quote_char); 3350b57cec5SDimitry Andric m_argv.insert(m_argv.begin() + idx, m_entries[idx].data()); 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 3390b57cec5SDimitry Andric char quote_char) { 3400b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3410b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric if (idx >= m_entries.size()) 3440b57cec5SDimitry Andric return; 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric m_entries[idx] = ArgEntry(arg_str, quote_char); 3470b57cec5SDimitry Andric m_argv[idx] = m_entries[idx].data(); 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric void Args::DeleteArgumentAtIndex(size_t idx) { 3510b57cec5SDimitry Andric if (idx >= m_entries.size()) 3520b57cec5SDimitry Andric return; 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric m_argv.erase(m_argv.begin() + idx); 3550b57cec5SDimitry Andric m_entries.erase(m_entries.begin() + idx); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric void Args::SetArguments(size_t argc, const char **argv) { 3590b57cec5SDimitry Andric Clear(); 3600b57cec5SDimitry Andric 361bdd1243dSDimitry Andric auto args = llvm::ArrayRef(argv, argc); 3620b57cec5SDimitry Andric m_entries.resize(argc); 3630b57cec5SDimitry Andric m_argv.resize(argc + 1); 3640b57cec5SDimitry Andric for (size_t i = 0; i < args.size(); ++i) { 3650b57cec5SDimitry Andric char quote = 3660b57cec5SDimitry Andric ((args[i][0] == '\'') || (args[i][0] == '"') || (args[i][0] == '`')) 3670b57cec5SDimitry Andric ? args[i][0] 3680b57cec5SDimitry Andric : '\0'; 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric m_entries[i] = ArgEntry(args[i], quote); 3710b57cec5SDimitry Andric m_argv[i] = m_entries[i].data(); 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric void Args::SetArguments(const char **argv) { 3760b57cec5SDimitry Andric SetArguments(ArgvToArgc(argv), argv); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric void Args::Clear() { 3800b57cec5SDimitry Andric m_entries.clear(); 3810b57cec5SDimitry Andric m_argv.clear(); 3820b57cec5SDimitry Andric m_argv.push_back(nullptr); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 385e8d8bef9SDimitry Andric std::string Args::GetShellSafeArgument(const FileSpec &shell, 386e8d8bef9SDimitry Andric llvm::StringRef unsafe_arg) { 3870b57cec5SDimitry Andric struct ShellDescriptor { 3880b57cec5SDimitry Andric ConstString m_basename; 389e8d8bef9SDimitry Andric llvm::StringRef m_escapables; 3900b57cec5SDimitry Andric }; 3910b57cec5SDimitry Andric 39281ad6265SDimitry Andric static ShellDescriptor g_Shells[] = {{ConstString("bash"), " '\"<>()&;"}, 393753f127fSDimitry Andric {ConstString("fish"), " '\"<>()&\\|;"}, 39481ad6265SDimitry Andric {ConstString("tcsh"), " '\"<>()&;"}, 395fe6060f1SDimitry Andric {ConstString("zsh"), " '\"<>()&;\\|"}, 39681ad6265SDimitry Andric {ConstString("sh"), " '\"<>()&;"}}; 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric // safe minimal set 399e8d8bef9SDimitry Andric llvm::StringRef escapables = " '\""; 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric if (auto basename = shell.GetFilename()) { 4020b57cec5SDimitry Andric for (const auto &Shell : g_Shells) { 4030b57cec5SDimitry Andric if (Shell.m_basename == basename) { 4040b57cec5SDimitry Andric escapables = Shell.m_escapables; 4050b57cec5SDimitry Andric break; 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 410e8d8bef9SDimitry Andric std::string safe_arg; 411e8d8bef9SDimitry Andric safe_arg.reserve(unsafe_arg.size()); 412e8d8bef9SDimitry Andric // Add a \ before every character that needs to be escaped. 413e8d8bef9SDimitry Andric for (char c : unsafe_arg) { 414e8d8bef9SDimitry Andric if (escapables.contains(c)) 415e8d8bef9SDimitry Andric safe_arg.push_back('\\'); 416e8d8bef9SDimitry Andric safe_arg.push_back(c); 4170b57cec5SDimitry Andric } 418e8d8bef9SDimitry Andric return safe_arg; 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric lldb::Encoding Args::StringToEncoding(llvm::StringRef s, 4220b57cec5SDimitry Andric lldb::Encoding fail_value) { 4230b57cec5SDimitry Andric return llvm::StringSwitch<lldb::Encoding>(s) 4240b57cec5SDimitry Andric .Case("uint", eEncodingUint) 4250b57cec5SDimitry Andric .Case("sint", eEncodingSint) 4260b57cec5SDimitry Andric .Case("ieee754", eEncodingIEEE754) 4270b57cec5SDimitry Andric .Case("vector", eEncodingVector) 4280b57cec5SDimitry Andric .Default(fail_value); 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric uint32_t Args::StringToGenericRegister(llvm::StringRef s) { 4320b57cec5SDimitry Andric if (s.empty()) 4330b57cec5SDimitry Andric return LLDB_INVALID_REGNUM; 4340b57cec5SDimitry Andric uint32_t result = llvm::StringSwitch<uint32_t>(s) 4350b57cec5SDimitry Andric .Case("pc", LLDB_REGNUM_GENERIC_PC) 4360b57cec5SDimitry Andric .Case("sp", LLDB_REGNUM_GENERIC_SP) 4370b57cec5SDimitry Andric .Case("fp", LLDB_REGNUM_GENERIC_FP) 4380b57cec5SDimitry Andric .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA) 4390b57cec5SDimitry Andric .Case("flags", LLDB_REGNUM_GENERIC_FLAGS) 4400b57cec5SDimitry Andric .Case("arg1", LLDB_REGNUM_GENERIC_ARG1) 4410b57cec5SDimitry Andric .Case("arg2", LLDB_REGNUM_GENERIC_ARG2) 4420b57cec5SDimitry Andric .Case("arg3", LLDB_REGNUM_GENERIC_ARG3) 4430b57cec5SDimitry Andric .Case("arg4", LLDB_REGNUM_GENERIC_ARG4) 4440b57cec5SDimitry Andric .Case("arg5", LLDB_REGNUM_GENERIC_ARG5) 4450b57cec5SDimitry Andric .Case("arg6", LLDB_REGNUM_GENERIC_ARG6) 4460b57cec5SDimitry Andric .Case("arg7", LLDB_REGNUM_GENERIC_ARG7) 4470b57cec5SDimitry Andric .Case("arg8", LLDB_REGNUM_GENERIC_ARG8) 4480b57cec5SDimitry Andric .Default(LLDB_INVALID_REGNUM); 4490b57cec5SDimitry Andric return result; 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric void Args::EncodeEscapeSequences(const char *src, std::string &dst) { 4530b57cec5SDimitry Andric dst.clear(); 4540b57cec5SDimitry Andric if (src) { 4550b57cec5SDimitry Andric for (const char *p = src; *p != '\0'; ++p) { 4560b57cec5SDimitry Andric size_t non_special_chars = ::strcspn(p, "\\"); 4570b57cec5SDimitry Andric if (non_special_chars > 0) { 4580b57cec5SDimitry Andric dst.append(p, non_special_chars); 4590b57cec5SDimitry Andric p += non_special_chars; 4600b57cec5SDimitry Andric if (*p == '\0') 4610b57cec5SDimitry Andric break; 4620b57cec5SDimitry Andric } 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric if (*p == '\\') { 4650b57cec5SDimitry Andric ++p; // skip the slash 4660b57cec5SDimitry Andric switch (*p) { 4670b57cec5SDimitry Andric case 'a': 4680b57cec5SDimitry Andric dst.append(1, '\a'); 4690b57cec5SDimitry Andric break; 4700b57cec5SDimitry Andric case 'b': 4710b57cec5SDimitry Andric dst.append(1, '\b'); 4720b57cec5SDimitry Andric break; 4730b57cec5SDimitry Andric case 'f': 4740b57cec5SDimitry Andric dst.append(1, '\f'); 4750b57cec5SDimitry Andric break; 4760b57cec5SDimitry Andric case 'n': 4770b57cec5SDimitry Andric dst.append(1, '\n'); 4780b57cec5SDimitry Andric break; 4790b57cec5SDimitry Andric case 'r': 4800b57cec5SDimitry Andric dst.append(1, '\r'); 4810b57cec5SDimitry Andric break; 4820b57cec5SDimitry Andric case 't': 4830b57cec5SDimitry Andric dst.append(1, '\t'); 4840b57cec5SDimitry Andric break; 4850b57cec5SDimitry Andric case 'v': 4860b57cec5SDimitry Andric dst.append(1, '\v'); 4870b57cec5SDimitry Andric break; 4880b57cec5SDimitry Andric case '\\': 4890b57cec5SDimitry Andric dst.append(1, '\\'); 4900b57cec5SDimitry Andric break; 4910b57cec5SDimitry Andric case '\'': 4920b57cec5SDimitry Andric dst.append(1, '\''); 4930b57cec5SDimitry Andric break; 4940b57cec5SDimitry Andric case '"': 4950b57cec5SDimitry Andric dst.append(1, '"'); 4960b57cec5SDimitry Andric break; 4970b57cec5SDimitry Andric case '0': 4980b57cec5SDimitry Andric // 1 to 3 octal chars 4990b57cec5SDimitry Andric { 5000b57cec5SDimitry Andric // Make a string that can hold onto the initial zero char, up to 3 5010b57cec5SDimitry Andric // octal digits, and a terminating NULL. 5020b57cec5SDimitry Andric char oct_str[5] = {'\0', '\0', '\0', '\0', '\0'}; 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric int i; 5050b57cec5SDimitry Andric for (i = 0; (p[i] >= '0' && p[i] <= '7') && i < 4; ++i) 5060b57cec5SDimitry Andric oct_str[i] = p[i]; 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric // We don't want to consume the last octal character since the main 5090b57cec5SDimitry Andric // for loop will do this for us, so we advance p by one less than i 5100b57cec5SDimitry Andric // (even if i is zero) 5110b57cec5SDimitry Andric p += i - 1; 5120b57cec5SDimitry Andric unsigned long octal_value = ::strtoul(oct_str, nullptr, 8); 5130b57cec5SDimitry Andric if (octal_value <= UINT8_MAX) { 5140b57cec5SDimitry Andric dst.append(1, static_cast<char>(octal_value)); 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric } 5170b57cec5SDimitry Andric break; 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric case 'x': 5200b57cec5SDimitry Andric // hex number in the format 5210b57cec5SDimitry Andric if (isxdigit(p[1])) { 5220b57cec5SDimitry Andric ++p; // Skip the 'x' 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric // Make a string that can hold onto two hex chars plus a 5250b57cec5SDimitry Andric // NULL terminator 5260b57cec5SDimitry Andric char hex_str[3] = {*p, '\0', '\0'}; 5270b57cec5SDimitry Andric if (isxdigit(p[1])) { 5280b57cec5SDimitry Andric ++p; // Skip the first of the two hex chars 5290b57cec5SDimitry Andric hex_str[1] = *p; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric unsigned long hex_value = strtoul(hex_str, nullptr, 16); 5330b57cec5SDimitry Andric if (hex_value <= UINT8_MAX) 5340b57cec5SDimitry Andric dst.append(1, static_cast<char>(hex_value)); 5350b57cec5SDimitry Andric } else { 5360b57cec5SDimitry Andric dst.append(1, 'x'); 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric break; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric default: 5410b57cec5SDimitry Andric // Just desensitize any other character by just printing what came 5420b57cec5SDimitry Andric // after the '\' 5430b57cec5SDimitry Andric dst.append(1, *p); 5440b57cec5SDimitry Andric break; 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric void Args::ExpandEscapedCharacters(const char *src, std::string &dst) { 5520b57cec5SDimitry Andric dst.clear(); 5530b57cec5SDimitry Andric if (src) { 5540b57cec5SDimitry Andric for (const char *p = src; *p != '\0'; ++p) { 5555ffd83dbSDimitry Andric if (llvm::isPrint(*p)) 5560b57cec5SDimitry Andric dst.append(1, *p); 5570b57cec5SDimitry Andric else { 5580b57cec5SDimitry Andric switch (*p) { 5590b57cec5SDimitry Andric case '\a': 5600b57cec5SDimitry Andric dst.append("\\a"); 5610b57cec5SDimitry Andric break; 5620b57cec5SDimitry Andric case '\b': 5630b57cec5SDimitry Andric dst.append("\\b"); 5640b57cec5SDimitry Andric break; 5650b57cec5SDimitry Andric case '\f': 5660b57cec5SDimitry Andric dst.append("\\f"); 5670b57cec5SDimitry Andric break; 5680b57cec5SDimitry Andric case '\n': 5690b57cec5SDimitry Andric dst.append("\\n"); 5700b57cec5SDimitry Andric break; 5710b57cec5SDimitry Andric case '\r': 5720b57cec5SDimitry Andric dst.append("\\r"); 5730b57cec5SDimitry Andric break; 5740b57cec5SDimitry Andric case '\t': 5750b57cec5SDimitry Andric dst.append("\\t"); 5760b57cec5SDimitry Andric break; 5770b57cec5SDimitry Andric case '\v': 5780b57cec5SDimitry Andric dst.append("\\v"); 5790b57cec5SDimitry Andric break; 5800b57cec5SDimitry Andric case '\'': 5810b57cec5SDimitry Andric dst.append("\\'"); 5820b57cec5SDimitry Andric break; 5830b57cec5SDimitry Andric case '"': 5840b57cec5SDimitry Andric dst.append("\\\""); 5850b57cec5SDimitry Andric break; 5860b57cec5SDimitry Andric case '\\': 5870b57cec5SDimitry Andric dst.append("\\\\"); 5880b57cec5SDimitry Andric break; 5890b57cec5SDimitry Andric default: { 5900b57cec5SDimitry Andric // Just encode as octal 5910b57cec5SDimitry Andric dst.append("\\0"); 5920b57cec5SDimitry Andric char octal_str[32]; 5930b57cec5SDimitry Andric snprintf(octal_str, sizeof(octal_str), "%o", *p); 5940b57cec5SDimitry Andric dst.append(octal_str); 5950b57cec5SDimitry Andric } break; 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric std::string Args::EscapeLLDBCommandArgument(const std::string &arg, 6030b57cec5SDimitry Andric char quote_char) { 6040b57cec5SDimitry Andric const char *chars_to_escape = nullptr; 6050b57cec5SDimitry Andric switch (quote_char) { 6060b57cec5SDimitry Andric case '\0': 6070b57cec5SDimitry Andric chars_to_escape = " \t\\'\"`"; 6080b57cec5SDimitry Andric break; 6090b57cec5SDimitry Andric case '"': 6100b57cec5SDimitry Andric chars_to_escape = "$\"`\\"; 6110b57cec5SDimitry Andric break; 6120b57cec5SDimitry Andric case '`': 6130b57cec5SDimitry Andric case '\'': 6140b57cec5SDimitry Andric return arg; 6150b57cec5SDimitry Andric default: 6160b57cec5SDimitry Andric assert(false && "Unhandled quote character"); 6170b57cec5SDimitry Andric return arg; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric std::string res; 6210b57cec5SDimitry Andric res.reserve(arg.size()); 6220b57cec5SDimitry Andric for (char c : arg) { 6230b57cec5SDimitry Andric if (::strchr(chars_to_escape, c)) 6240b57cec5SDimitry Andric res.push_back('\\'); 6250b57cec5SDimitry Andric res.push_back(c); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric return res; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) { 6310b57cec5SDimitry Andric SetFromString(arg_string); 6320b57cec5SDimitry Andric } 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) { 6350b57cec5SDimitry Andric const llvm::StringRef original_args = arg_string; 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric arg_string = ltrimForArgs(arg_string); 6380b57cec5SDimitry Andric std::string arg; 6390b57cec5SDimitry Andric char quote; 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric // If the string doesn't start with a dash, we just have no options and just 6420b57cec5SDimitry Andric // a raw part. 6430b57cec5SDimitry Andric if (!arg_string.startswith("-")) { 6445ffd83dbSDimitry Andric m_suffix = std::string(original_args); 6450b57cec5SDimitry Andric return; 6460b57cec5SDimitry Andric } 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric bool found_suffix = false; 6490b57cec5SDimitry Andric while (!arg_string.empty()) { 6500b57cec5SDimitry Andric // The length of the prefix before parsing. 6510b57cec5SDimitry Andric std::size_t prev_prefix_length = original_args.size() - arg_string.size(); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric // Parse the next argument from the remaining string. 6540b57cec5SDimitry Andric std::tie(arg, quote, arg_string) = ParseSingleArgument(arg_string); 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric // If we get an unquoted '--' argument, then we reached the suffix part 6570b57cec5SDimitry Andric // of the command. 6580b57cec5SDimitry Andric Args::ArgEntry entry(arg, quote); 6590b57cec5SDimitry Andric if (!entry.IsQuoted() && arg == "--") { 6600b57cec5SDimitry Andric // The remaining line is the raw suffix, and the line we parsed so far 6610b57cec5SDimitry Andric // needs to be interpreted as arguments. 6620b57cec5SDimitry Andric m_has_args = true; 6635ffd83dbSDimitry Andric m_suffix = std::string(arg_string); 6640b57cec5SDimitry Andric found_suffix = true; 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric // The length of the prefix after parsing. 6670b57cec5SDimitry Andric std::size_t prefix_length = original_args.size() - arg_string.size(); 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric // Take the string we know contains all the arguments and actually parse 6700b57cec5SDimitry Andric // it as proper arguments. 6710b57cec5SDimitry Andric llvm::StringRef prefix = original_args.take_front(prev_prefix_length); 6720b57cec5SDimitry Andric m_args = Args(prefix); 6730b57cec5SDimitry Andric m_arg_string = prefix; 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric // We also record the part of the string that contains the arguments plus 6760b57cec5SDimitry Andric // the delimiter. 6770b57cec5SDimitry Andric m_arg_string_with_delimiter = original_args.take_front(prefix_length); 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric // As the rest of the string became the raw suffix, we are done here. 6800b57cec5SDimitry Andric break; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric arg_string = ltrimForArgs(arg_string); 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric // If we didn't find a suffix delimiter, the whole string is the raw suffix. 687e8d8bef9SDimitry Andric if (!found_suffix) 6885ffd83dbSDimitry Andric m_suffix = std::string(original_args); 6890b57cec5SDimitry Andric } 690