10b57cec5SDimitry Andric //===-- Args.cpp ------------------------------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "lldb/Utility/Args.h" 100b57cec5SDimitry Andric #include "lldb/Utility/ConstString.h" 110b57cec5SDimitry Andric #include "lldb/Utility/FileSpec.h" 120b57cec5SDimitry Andric #include "lldb/Utility/Stream.h" 130b57cec5SDimitry Andric #include "lldb/Utility/StringList.h" 140b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h" 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric using namespace lldb; 170b57cec5SDimitry Andric using namespace lldb_private; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric // A helper function for argument parsing. 200b57cec5SDimitry Andric // Parses the initial part of the first argument using normal double quote 210b57cec5SDimitry Andric // rules: backslash escapes the double quote and itself. The parsed string is 220b57cec5SDimitry Andric // appended to the second argument. The function returns the unparsed portion 230b57cec5SDimitry Andric // of the string, starting at the closing quote. 240b57cec5SDimitry Andric static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted, 250b57cec5SDimitry Andric std::string &result) { 260b57cec5SDimitry Andric // Inside double quotes, '\' and '"' are special. 270b57cec5SDimitry Andric static const char *k_escapable_characters = "\"\\"; 280b57cec5SDimitry Andric while (true) { 290b57cec5SDimitry Andric // Skip over over regular characters and append them. 300b57cec5SDimitry Andric size_t regular = quoted.find_first_of(k_escapable_characters); 310b57cec5SDimitry Andric result += quoted.substr(0, regular); 320b57cec5SDimitry Andric quoted = quoted.substr(regular); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric // If we have reached the end of string or the closing quote, we're done. 350b57cec5SDimitry Andric if (quoted.empty() || quoted.front() == '"') 360b57cec5SDimitry Andric break; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric // We have found a backslash. 390b57cec5SDimitry Andric quoted = quoted.drop_front(); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric if (quoted.empty()) { 420b57cec5SDimitry Andric // A lone backslash at the end of string, let's just append it. 430b57cec5SDimitry Andric result += '\\'; 440b57cec5SDimitry Andric break; 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric // If the character after the backslash is not a whitelisted escapable 480b57cec5SDimitry Andric // character, we leave the character sequence untouched. 490b57cec5SDimitry Andric if (strchr(k_escapable_characters, quoted.front()) == nullptr) 500b57cec5SDimitry Andric result += '\\'; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric result += quoted.front(); 530b57cec5SDimitry Andric quoted = quoted.drop_front(); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric return quoted; 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static size_t ArgvToArgc(const char **argv) { 600b57cec5SDimitry Andric if (!argv) 610b57cec5SDimitry Andric return 0; 620b57cec5SDimitry Andric size_t count = 0; 630b57cec5SDimitry Andric while (*argv++) 640b57cec5SDimitry Andric ++count; 650b57cec5SDimitry Andric return count; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric // Trims all whitespace that can separate command line arguments from the left 690b57cec5SDimitry Andric // side of the string. 700b57cec5SDimitry Andric static llvm::StringRef ltrimForArgs(llvm::StringRef str) { 710b57cec5SDimitry Andric static const char *k_space_separators = " \t"; 720b57cec5SDimitry Andric return str.ltrim(k_space_separators); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // A helper function for SetCommandString. Parses a single argument from the 760b57cec5SDimitry Andric // command string, processing quotes and backslashes in a shell-like manner. 770b57cec5SDimitry Andric // The function returns a tuple consisting of the parsed argument, the quote 780b57cec5SDimitry Andric // char used, and the unparsed portion of the string starting at the first 790b57cec5SDimitry Andric // unqouted, unescaped whitespace character. 800b57cec5SDimitry Andric static std::tuple<std::string, char, llvm::StringRef> 810b57cec5SDimitry Andric ParseSingleArgument(llvm::StringRef command) { 820b57cec5SDimitry Andric // Argument can be split into multiple discontiguous pieces, for example: 830b57cec5SDimitry Andric // "Hello ""World" 840b57cec5SDimitry Andric // this would result in a single argument "Hello World" (without the quotes) 850b57cec5SDimitry Andric // since the quotes would be removed and there is not space between the 860b57cec5SDimitry Andric // strings. 870b57cec5SDimitry Andric std::string arg; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric // Since we can have multiple quotes that form a single command in a command 900b57cec5SDimitry Andric // like: "Hello "world'!' (which will make a single argument "Hello world!") 910b57cec5SDimitry Andric // we remember the first quote character we encounter and use that for the 920b57cec5SDimitry Andric // quote character. 930b57cec5SDimitry Andric char first_quote_char = '\0'; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric bool arg_complete = false; 960b57cec5SDimitry Andric do { 970b57cec5SDimitry Andric // Skip over over regular characters and append them. 980b57cec5SDimitry Andric size_t regular = command.find_first_of(" \t\r\"'`\\"); 990b57cec5SDimitry Andric arg += command.substr(0, regular); 1000b57cec5SDimitry Andric command = command.substr(regular); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric if (command.empty()) 1030b57cec5SDimitry Andric break; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric char special = command.front(); 1060b57cec5SDimitry Andric command = command.drop_front(); 1070b57cec5SDimitry Andric switch (special) { 1080b57cec5SDimitry Andric case '\\': 1090b57cec5SDimitry Andric if (command.empty()) { 1100b57cec5SDimitry Andric arg += '\\'; 1110b57cec5SDimitry Andric break; 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric // If the character after the backslash is not a whitelisted escapable 1150b57cec5SDimitry Andric // character, we leave the character sequence untouched. 1160b57cec5SDimitry Andric if (strchr(" \t\\'\"`", command.front()) == nullptr) 1170b57cec5SDimitry Andric arg += '\\'; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric arg += command.front(); 1200b57cec5SDimitry Andric command = command.drop_front(); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric break; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric case ' ': 1250b57cec5SDimitry Andric case '\t': 1260b57cec5SDimitry Andric case '\r': 1270b57cec5SDimitry Andric // We are not inside any quotes, we just found a space after an argument. 1280b57cec5SDimitry Andric // We are done. 1290b57cec5SDimitry Andric arg_complete = true; 1300b57cec5SDimitry Andric break; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric case '"': 1330b57cec5SDimitry Andric case '\'': 1340b57cec5SDimitry Andric case '`': 1350b57cec5SDimitry Andric // We found the start of a quote scope. 1360b57cec5SDimitry Andric if (first_quote_char == '\0') 1370b57cec5SDimitry Andric first_quote_char = special; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (special == '"') 1400b57cec5SDimitry Andric command = ParseDoubleQuotes(command, arg); 1410b57cec5SDimitry Andric else { 1420b57cec5SDimitry Andric // For single quotes, we simply skip ahead to the matching quote 1430b57cec5SDimitry Andric // character (or the end of the string). 1440b57cec5SDimitry Andric size_t quoted = command.find(special); 1450b57cec5SDimitry Andric arg += command.substr(0, quoted); 1460b57cec5SDimitry Andric command = command.substr(quoted); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // If we found a closing quote, skip it. 1500b57cec5SDimitry Andric if (!command.empty()) 1510b57cec5SDimitry Andric command = command.drop_front(); 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric break; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } while (!arg_complete); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric return std::make_tuple(arg, first_quote_char, command); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote) : quote(quote) { 1610b57cec5SDimitry Andric size_t size = str.size(); 1620b57cec5SDimitry Andric ptr.reset(new char[size + 1]); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric ::memcpy(data(), str.data() ? str.data() : "", size); 1650b57cec5SDimitry Andric ptr[size] = 0; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric // Args constructor 1690b57cec5SDimitry Andric Args::Args(llvm::StringRef command) { SetCommandString(command); } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric Args::Args(const Args &rhs) { *this = rhs; } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric Args::Args(const StringList &list) : Args() { 1749dba64beSDimitry Andric for (const std::string &arg : list) 1759dba64beSDimitry Andric AppendArgument(arg); 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric Args &Args::operator=(const Args &rhs) { 1790b57cec5SDimitry Andric Clear(); 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric m_argv.clear(); 1820b57cec5SDimitry Andric m_entries.clear(); 1830b57cec5SDimitry Andric for (auto &entry : rhs.m_entries) { 1849dba64beSDimitry Andric m_entries.emplace_back(entry.ref(), entry.quote); 1850b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric m_argv.push_back(nullptr); 1880b57cec5SDimitry Andric return *this; 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric // Destructor 1920b57cec5SDimitry Andric Args::~Args() {} 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric void Args::Dump(Stream &s, const char *label_name) const { 1950b57cec5SDimitry Andric if (!label_name) 1960b57cec5SDimitry Andric return; 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric int i = 0; 1990b57cec5SDimitry Andric for (auto &entry : m_entries) { 2000b57cec5SDimitry Andric s.Indent(); 2019dba64beSDimitry Andric s.Format("{0}[{1}]=\"{2}\"\n", label_name, i++, entry.ref()); 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric s.Format("{0}[{1}]=NULL\n", label_name, i); 2040b57cec5SDimitry Andric s.EOL(); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric bool Args::GetCommandString(std::string &command) const { 2080b57cec5SDimitry Andric command.clear(); 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric for (size_t i = 0; i < m_entries.size(); ++i) { 2110b57cec5SDimitry Andric if (i > 0) 2120b57cec5SDimitry Andric command += ' '; 2139dba64beSDimitry Andric command += m_entries[i].ref(); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric return !m_entries.empty(); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric bool Args::GetQuotedCommandString(std::string &command) const { 2200b57cec5SDimitry Andric command.clear(); 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric for (size_t i = 0; i < m_entries.size(); ++i) { 2230b57cec5SDimitry Andric if (i > 0) 2240b57cec5SDimitry Andric command += ' '; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric if (m_entries[i].quote) { 2270b57cec5SDimitry Andric command += m_entries[i].quote; 2289dba64beSDimitry Andric command += m_entries[i].ref(); 2290b57cec5SDimitry Andric command += m_entries[i].quote; 2300b57cec5SDimitry Andric } else { 2319dba64beSDimitry Andric command += m_entries[i].ref(); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric return !m_entries.empty(); 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric void Args::SetCommandString(llvm::StringRef command) { 2390b57cec5SDimitry Andric Clear(); 2400b57cec5SDimitry Andric m_argv.clear(); 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric command = ltrimForArgs(command); 2430b57cec5SDimitry Andric std::string arg; 2440b57cec5SDimitry Andric char quote; 2450b57cec5SDimitry Andric while (!command.empty()) { 2460b57cec5SDimitry Andric std::tie(arg, quote, command) = ParseSingleArgument(command); 2470b57cec5SDimitry Andric m_entries.emplace_back(arg, quote); 2480b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 2490b57cec5SDimitry Andric command = ltrimForArgs(command); 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric m_argv.push_back(nullptr); 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric size_t Args::GetArgumentCount() const { return m_entries.size(); } 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric const char *Args::GetArgumentAtIndex(size_t idx) const { 2570b57cec5SDimitry Andric if (idx < m_argv.size()) 2580b57cec5SDimitry Andric return m_argv[idx]; 2590b57cec5SDimitry Andric return nullptr; 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric char **Args::GetArgumentVector() { 2630b57cec5SDimitry Andric assert(!m_argv.empty()); 2640b57cec5SDimitry Andric // TODO: functions like execve and posix_spawnp exhibit undefined behavior 2650b57cec5SDimitry Andric // when argv or envp is null. So the code below is actually wrong. However, 2660b57cec5SDimitry Andric // other code in LLDB depends on it being null. The code has been acting 2670b57cec5SDimitry Andric // this way for some time, so it makes sense to leave it this way until 2680b57cec5SDimitry Andric // someone has the time to come along and fix it. 2690b57cec5SDimitry Andric return (m_argv.size() > 1) ? m_argv.data() : nullptr; 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric const char **Args::GetConstArgumentVector() const { 2730b57cec5SDimitry Andric assert(!m_argv.empty()); 2740b57cec5SDimitry Andric return (m_argv.size() > 1) ? const_cast<const char **>(m_argv.data()) 2750b57cec5SDimitry Andric : nullptr; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric void Args::Shift() { 2790b57cec5SDimitry Andric // Don't pop the last NULL terminator from the argv array 2800b57cec5SDimitry Andric if (m_entries.empty()) 2810b57cec5SDimitry Andric return; 2820b57cec5SDimitry Andric m_argv.erase(m_argv.begin()); 2830b57cec5SDimitry Andric m_entries.erase(m_entries.begin()); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric void Args::Unshift(llvm::StringRef arg_str, char quote_char) { 2870b57cec5SDimitry Andric InsertArgumentAtIndex(0, arg_str, quote_char); 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric void Args::AppendArguments(const Args &rhs) { 2910b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 2920b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 2930b57cec5SDimitry Andric m_argv.pop_back(); 2940b57cec5SDimitry Andric for (auto &entry : rhs.m_entries) { 2959dba64beSDimitry Andric m_entries.emplace_back(entry.ref(), entry.quote); 2960b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric m_argv.push_back(nullptr); 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric void Args::AppendArguments(const char **argv) { 3020b57cec5SDimitry Andric size_t argc = ArgvToArgc(argv); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3050b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3060b57cec5SDimitry Andric m_argv.pop_back(); 3070b57cec5SDimitry Andric for (auto arg : llvm::makeArrayRef(argv, argc)) { 3080b57cec5SDimitry Andric m_entries.emplace_back(arg, '\0'); 3090b57cec5SDimitry Andric m_argv.push_back(m_entries.back().data()); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric m_argv.push_back(nullptr); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) { 3160b57cec5SDimitry Andric InsertArgumentAtIndex(GetArgumentCount(), arg_str, quote_char); 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 3200b57cec5SDimitry Andric char quote_char) { 3210b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3220b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric if (idx > m_entries.size()) 3250b57cec5SDimitry Andric return; 3260b57cec5SDimitry Andric m_entries.emplace(m_entries.begin() + idx, arg_str, quote_char); 3270b57cec5SDimitry Andric m_argv.insert(m_argv.begin() + idx, m_entries[idx].data()); 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 3310b57cec5SDimitry Andric char quote_char) { 3320b57cec5SDimitry Andric assert(m_argv.size() == m_entries.size() + 1); 3330b57cec5SDimitry Andric assert(m_argv.back() == nullptr); 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric if (idx >= m_entries.size()) 3360b57cec5SDimitry Andric return; 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric m_entries[idx] = ArgEntry(arg_str, quote_char); 3390b57cec5SDimitry Andric m_argv[idx] = m_entries[idx].data(); 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric void Args::DeleteArgumentAtIndex(size_t idx) { 3430b57cec5SDimitry Andric if (idx >= m_entries.size()) 3440b57cec5SDimitry Andric return; 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric m_argv.erase(m_argv.begin() + idx); 3470b57cec5SDimitry Andric m_entries.erase(m_entries.begin() + idx); 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric void Args::SetArguments(size_t argc, const char **argv) { 3510b57cec5SDimitry Andric Clear(); 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric auto args = llvm::makeArrayRef(argv, argc); 3540b57cec5SDimitry Andric m_entries.resize(argc); 3550b57cec5SDimitry Andric m_argv.resize(argc + 1); 3560b57cec5SDimitry Andric for (size_t i = 0; i < args.size(); ++i) { 3570b57cec5SDimitry Andric char quote = 3580b57cec5SDimitry Andric ((args[i][0] == '\'') || (args[i][0] == '"') || (args[i][0] == '`')) 3590b57cec5SDimitry Andric ? args[i][0] 3600b57cec5SDimitry Andric : '\0'; 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric m_entries[i] = ArgEntry(args[i], quote); 3630b57cec5SDimitry Andric m_argv[i] = m_entries[i].data(); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric void Args::SetArguments(const char **argv) { 3680b57cec5SDimitry Andric SetArguments(ArgvToArgc(argv), argv); 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric void Args::Clear() { 3720b57cec5SDimitry Andric m_entries.clear(); 3730b57cec5SDimitry Andric m_argv.clear(); 3740b57cec5SDimitry Andric m_argv.push_back(nullptr); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric const char *Args::GetShellSafeArgument(const FileSpec &shell, 3780b57cec5SDimitry Andric const char *unsafe_arg, 3790b57cec5SDimitry Andric std::string &safe_arg) { 3800b57cec5SDimitry Andric struct ShellDescriptor { 3810b57cec5SDimitry Andric ConstString m_basename; 3820b57cec5SDimitry Andric const char *m_escapables; 3830b57cec5SDimitry Andric }; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric static ShellDescriptor g_Shells[] = {{ConstString("bash"), " '\"<>()&"}, 3860b57cec5SDimitry Andric {ConstString("tcsh"), " '\"<>()&$"}, 3870b57cec5SDimitry Andric {ConstString("sh"), " '\"<>()&"}}; 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric // safe minimal set 3900b57cec5SDimitry Andric const char *escapables = " '\""; 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric if (auto basename = shell.GetFilename()) { 3930b57cec5SDimitry Andric for (const auto &Shell : g_Shells) { 3940b57cec5SDimitry Andric if (Shell.m_basename == basename) { 3950b57cec5SDimitry Andric escapables = Shell.m_escapables; 3960b57cec5SDimitry Andric break; 3970b57cec5SDimitry Andric } 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric safe_arg.assign(unsafe_arg); 4020b57cec5SDimitry Andric size_t prev_pos = 0; 4030b57cec5SDimitry Andric while (prev_pos < safe_arg.size()) { 4040b57cec5SDimitry Andric // Escape spaces and quotes 4050b57cec5SDimitry Andric size_t pos = safe_arg.find_first_of(escapables, prev_pos); 4060b57cec5SDimitry Andric if (pos != std::string::npos) { 4070b57cec5SDimitry Andric safe_arg.insert(pos, 1, '\\'); 4080b57cec5SDimitry Andric prev_pos = pos + 2; 4090b57cec5SDimitry Andric } else 4100b57cec5SDimitry Andric break; 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric return safe_arg.c_str(); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric lldb::Encoding Args::StringToEncoding(llvm::StringRef s, 4160b57cec5SDimitry Andric lldb::Encoding fail_value) { 4170b57cec5SDimitry Andric return llvm::StringSwitch<lldb::Encoding>(s) 4180b57cec5SDimitry Andric .Case("uint", eEncodingUint) 4190b57cec5SDimitry Andric .Case("sint", eEncodingSint) 4200b57cec5SDimitry Andric .Case("ieee754", eEncodingIEEE754) 4210b57cec5SDimitry Andric .Case("vector", eEncodingVector) 4220b57cec5SDimitry Andric .Default(fail_value); 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric uint32_t Args::StringToGenericRegister(llvm::StringRef s) { 4260b57cec5SDimitry Andric if (s.empty()) 4270b57cec5SDimitry Andric return LLDB_INVALID_REGNUM; 4280b57cec5SDimitry Andric uint32_t result = llvm::StringSwitch<uint32_t>(s) 4290b57cec5SDimitry Andric .Case("pc", LLDB_REGNUM_GENERIC_PC) 4300b57cec5SDimitry Andric .Case("sp", LLDB_REGNUM_GENERIC_SP) 4310b57cec5SDimitry Andric .Case("fp", LLDB_REGNUM_GENERIC_FP) 4320b57cec5SDimitry Andric .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA) 4330b57cec5SDimitry Andric .Case("flags", LLDB_REGNUM_GENERIC_FLAGS) 4340b57cec5SDimitry Andric .Case("arg1", LLDB_REGNUM_GENERIC_ARG1) 4350b57cec5SDimitry Andric .Case("arg2", LLDB_REGNUM_GENERIC_ARG2) 4360b57cec5SDimitry Andric .Case("arg3", LLDB_REGNUM_GENERIC_ARG3) 4370b57cec5SDimitry Andric .Case("arg4", LLDB_REGNUM_GENERIC_ARG4) 4380b57cec5SDimitry Andric .Case("arg5", LLDB_REGNUM_GENERIC_ARG5) 4390b57cec5SDimitry Andric .Case("arg6", LLDB_REGNUM_GENERIC_ARG6) 4400b57cec5SDimitry Andric .Case("arg7", LLDB_REGNUM_GENERIC_ARG7) 4410b57cec5SDimitry Andric .Case("arg8", LLDB_REGNUM_GENERIC_ARG8) 4420b57cec5SDimitry Andric .Default(LLDB_INVALID_REGNUM); 4430b57cec5SDimitry Andric return result; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric void Args::EncodeEscapeSequences(const char *src, std::string &dst) { 4470b57cec5SDimitry Andric dst.clear(); 4480b57cec5SDimitry Andric if (src) { 4490b57cec5SDimitry Andric for (const char *p = src; *p != '\0'; ++p) { 4500b57cec5SDimitry Andric size_t non_special_chars = ::strcspn(p, "\\"); 4510b57cec5SDimitry Andric if (non_special_chars > 0) { 4520b57cec5SDimitry Andric dst.append(p, non_special_chars); 4530b57cec5SDimitry Andric p += non_special_chars; 4540b57cec5SDimitry Andric if (*p == '\0') 4550b57cec5SDimitry Andric break; 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric if (*p == '\\') { 4590b57cec5SDimitry Andric ++p; // skip the slash 4600b57cec5SDimitry Andric switch (*p) { 4610b57cec5SDimitry Andric case 'a': 4620b57cec5SDimitry Andric dst.append(1, '\a'); 4630b57cec5SDimitry Andric break; 4640b57cec5SDimitry Andric case 'b': 4650b57cec5SDimitry Andric dst.append(1, '\b'); 4660b57cec5SDimitry Andric break; 4670b57cec5SDimitry Andric case 'f': 4680b57cec5SDimitry Andric dst.append(1, '\f'); 4690b57cec5SDimitry Andric break; 4700b57cec5SDimitry Andric case 'n': 4710b57cec5SDimitry Andric dst.append(1, '\n'); 4720b57cec5SDimitry Andric break; 4730b57cec5SDimitry Andric case 'r': 4740b57cec5SDimitry Andric dst.append(1, '\r'); 4750b57cec5SDimitry Andric break; 4760b57cec5SDimitry Andric case 't': 4770b57cec5SDimitry Andric dst.append(1, '\t'); 4780b57cec5SDimitry Andric break; 4790b57cec5SDimitry Andric case 'v': 4800b57cec5SDimitry Andric dst.append(1, '\v'); 4810b57cec5SDimitry Andric break; 4820b57cec5SDimitry Andric case '\\': 4830b57cec5SDimitry Andric dst.append(1, '\\'); 4840b57cec5SDimitry Andric break; 4850b57cec5SDimitry Andric case '\'': 4860b57cec5SDimitry Andric dst.append(1, '\''); 4870b57cec5SDimitry Andric break; 4880b57cec5SDimitry Andric case '"': 4890b57cec5SDimitry Andric dst.append(1, '"'); 4900b57cec5SDimitry Andric break; 4910b57cec5SDimitry Andric case '0': 4920b57cec5SDimitry Andric // 1 to 3 octal chars 4930b57cec5SDimitry Andric { 4940b57cec5SDimitry Andric // Make a string that can hold onto the initial zero char, up to 3 4950b57cec5SDimitry Andric // octal digits, and a terminating NULL. 4960b57cec5SDimitry Andric char oct_str[5] = {'\0', '\0', '\0', '\0', '\0'}; 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric int i; 4990b57cec5SDimitry Andric for (i = 0; (p[i] >= '0' && p[i] <= '7') && i < 4; ++i) 5000b57cec5SDimitry Andric oct_str[i] = p[i]; 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // We don't want to consume the last octal character since the main 5030b57cec5SDimitry Andric // for loop will do this for us, so we advance p by one less than i 5040b57cec5SDimitry Andric // (even if i is zero) 5050b57cec5SDimitry Andric p += i - 1; 5060b57cec5SDimitry Andric unsigned long octal_value = ::strtoul(oct_str, nullptr, 8); 5070b57cec5SDimitry Andric if (octal_value <= UINT8_MAX) { 5080b57cec5SDimitry Andric dst.append(1, static_cast<char>(octal_value)); 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric break; 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric case 'x': 5140b57cec5SDimitry Andric // hex number in the format 5150b57cec5SDimitry Andric if (isxdigit(p[1])) { 5160b57cec5SDimitry Andric ++p; // Skip the 'x' 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric // Make a string that can hold onto two hex chars plus a 5190b57cec5SDimitry Andric // NULL terminator 5200b57cec5SDimitry Andric char hex_str[3] = {*p, '\0', '\0'}; 5210b57cec5SDimitry Andric if (isxdigit(p[1])) { 5220b57cec5SDimitry Andric ++p; // Skip the first of the two hex chars 5230b57cec5SDimitry Andric hex_str[1] = *p; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric unsigned long hex_value = strtoul(hex_str, nullptr, 16); 5270b57cec5SDimitry Andric if (hex_value <= UINT8_MAX) 5280b57cec5SDimitry Andric dst.append(1, static_cast<char>(hex_value)); 5290b57cec5SDimitry Andric } else { 5300b57cec5SDimitry Andric dst.append(1, 'x'); 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric break; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric default: 5350b57cec5SDimitry Andric // Just desensitize any other character by just printing what came 5360b57cec5SDimitry Andric // after the '\' 5370b57cec5SDimitry Andric dst.append(1, *p); 5380b57cec5SDimitry Andric break; 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric void Args::ExpandEscapedCharacters(const char *src, std::string &dst) { 5460b57cec5SDimitry Andric dst.clear(); 5470b57cec5SDimitry Andric if (src) { 5480b57cec5SDimitry Andric for (const char *p = src; *p != '\0'; ++p) { 5490b57cec5SDimitry Andric if (isprint(*p)) 5500b57cec5SDimitry Andric dst.append(1, *p); 5510b57cec5SDimitry Andric else { 5520b57cec5SDimitry Andric switch (*p) { 5530b57cec5SDimitry Andric case '\a': 5540b57cec5SDimitry Andric dst.append("\\a"); 5550b57cec5SDimitry Andric break; 5560b57cec5SDimitry Andric case '\b': 5570b57cec5SDimitry Andric dst.append("\\b"); 5580b57cec5SDimitry Andric break; 5590b57cec5SDimitry Andric case '\f': 5600b57cec5SDimitry Andric dst.append("\\f"); 5610b57cec5SDimitry Andric break; 5620b57cec5SDimitry Andric case '\n': 5630b57cec5SDimitry Andric dst.append("\\n"); 5640b57cec5SDimitry Andric break; 5650b57cec5SDimitry Andric case '\r': 5660b57cec5SDimitry Andric dst.append("\\r"); 5670b57cec5SDimitry Andric break; 5680b57cec5SDimitry Andric case '\t': 5690b57cec5SDimitry Andric dst.append("\\t"); 5700b57cec5SDimitry Andric break; 5710b57cec5SDimitry Andric case '\v': 5720b57cec5SDimitry Andric dst.append("\\v"); 5730b57cec5SDimitry Andric break; 5740b57cec5SDimitry Andric case '\'': 5750b57cec5SDimitry Andric dst.append("\\'"); 5760b57cec5SDimitry Andric break; 5770b57cec5SDimitry Andric case '"': 5780b57cec5SDimitry Andric dst.append("\\\""); 5790b57cec5SDimitry Andric break; 5800b57cec5SDimitry Andric case '\\': 5810b57cec5SDimitry Andric dst.append("\\\\"); 5820b57cec5SDimitry Andric break; 5830b57cec5SDimitry Andric default: { 5840b57cec5SDimitry Andric // Just encode as octal 5850b57cec5SDimitry Andric dst.append("\\0"); 5860b57cec5SDimitry Andric char octal_str[32]; 5870b57cec5SDimitry Andric snprintf(octal_str, sizeof(octal_str), "%o", *p); 5880b57cec5SDimitry Andric dst.append(octal_str); 5890b57cec5SDimitry Andric } break; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric std::string Args::EscapeLLDBCommandArgument(const std::string &arg, 5970b57cec5SDimitry Andric char quote_char) { 5980b57cec5SDimitry Andric const char *chars_to_escape = nullptr; 5990b57cec5SDimitry Andric switch (quote_char) { 6000b57cec5SDimitry Andric case '\0': 6010b57cec5SDimitry Andric chars_to_escape = " \t\\'\"`"; 6020b57cec5SDimitry Andric break; 6030b57cec5SDimitry Andric case '"': 6040b57cec5SDimitry Andric chars_to_escape = "$\"`\\"; 6050b57cec5SDimitry Andric break; 6060b57cec5SDimitry Andric case '`': 6070b57cec5SDimitry Andric case '\'': 6080b57cec5SDimitry Andric return arg; 6090b57cec5SDimitry Andric default: 6100b57cec5SDimitry Andric assert(false && "Unhandled quote character"); 6110b57cec5SDimitry Andric return arg; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric std::string res; 6150b57cec5SDimitry Andric res.reserve(arg.size()); 6160b57cec5SDimitry Andric for (char c : arg) { 6170b57cec5SDimitry Andric if (::strchr(chars_to_escape, c)) 6180b57cec5SDimitry Andric res.push_back('\\'); 6190b57cec5SDimitry Andric res.push_back(c); 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric return res; 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) { 6250b57cec5SDimitry Andric SetFromString(arg_string); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) { 6290b57cec5SDimitry Andric const llvm::StringRef original_args = arg_string; 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric arg_string = ltrimForArgs(arg_string); 6320b57cec5SDimitry Andric std::string arg; 6330b57cec5SDimitry Andric char quote; 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric // If the string doesn't start with a dash, we just have no options and just 6360b57cec5SDimitry Andric // a raw part. 6370b57cec5SDimitry Andric if (!arg_string.startswith("-")) { 6380b57cec5SDimitry Andric m_suffix = original_args; 6390b57cec5SDimitry Andric return; 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric bool found_suffix = false; 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric while (!arg_string.empty()) { 6450b57cec5SDimitry Andric // The length of the prefix before parsing. 6460b57cec5SDimitry Andric std::size_t prev_prefix_length = original_args.size() - arg_string.size(); 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric // Parse the next argument from the remaining string. 6490b57cec5SDimitry Andric std::tie(arg, quote, arg_string) = ParseSingleArgument(arg_string); 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric // If we get an unquoted '--' argument, then we reached the suffix part 6520b57cec5SDimitry Andric // of the command. 6530b57cec5SDimitry Andric Args::ArgEntry entry(arg, quote); 6540b57cec5SDimitry Andric if (!entry.IsQuoted() && arg == "--") { 6550b57cec5SDimitry Andric // The remaining line is the raw suffix, and the line we parsed so far 6560b57cec5SDimitry Andric // needs to be interpreted as arguments. 6570b57cec5SDimitry Andric m_has_args = true; 6580b57cec5SDimitry Andric m_suffix = arg_string; 6590b57cec5SDimitry Andric found_suffix = true; 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric // The length of the prefix after parsing. 6620b57cec5SDimitry Andric std::size_t prefix_length = original_args.size() - arg_string.size(); 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric // Take the string we know contains all the arguments and actually parse 6650b57cec5SDimitry Andric // it as proper arguments. 6660b57cec5SDimitry Andric llvm::StringRef prefix = original_args.take_front(prev_prefix_length); 6670b57cec5SDimitry Andric m_args = Args(prefix); 6680b57cec5SDimitry Andric m_arg_string = prefix; 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric // We also record the part of the string that contains the arguments plus 6710b57cec5SDimitry Andric // the delimiter. 6720b57cec5SDimitry Andric m_arg_string_with_delimiter = original_args.take_front(prefix_length); 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric // As the rest of the string became the raw suffix, we are done here. 6750b57cec5SDimitry Andric break; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric arg_string = ltrimForArgs(arg_string); 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric // If we didn't find a suffix delimiter, the whole string is the raw suffix. 6820b57cec5SDimitry Andric if (!found_suffix) { 6830b57cec5SDimitry Andric found_suffix = true; 6840b57cec5SDimitry Andric m_suffix = original_args; 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric } 687