15ffd83dbSDimitry Andric //===-- StringPrinter.cpp -------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "lldb/DataFormatters/StringPrinter.h" 100b57cec5SDimitry Andric 110b57cec5SDimitry Andric #include "lldb/Core/Debugger.h" 120b57cec5SDimitry Andric #include "lldb/Core/ValueObject.h" 130b57cec5SDimitry Andric #include "lldb/Target/Language.h" 140b57cec5SDimitry Andric #include "lldb/Target/Process.h" 150b57cec5SDimitry Andric #include "lldb/Target/Target.h" 160b57cec5SDimitry Andric #include "lldb/Utility/Status.h" 170b57cec5SDimitry Andric 185ffd83dbSDimitry Andric #include "llvm/ADT/StringExtras.h" 190b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h" 200b57cec5SDimitry Andric 21fe6060f1SDimitry Andric #include <cctype> 220b57cec5SDimitry Andric #include <locale> 230b57cec5SDimitry Andric #include <memory> 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using namespace lldb; 260b57cec5SDimitry Andric using namespace lldb_private; 270b57cec5SDimitry Andric using namespace lldb_private::formatters; 285ffd83dbSDimitry Andric using GetPrintableElementType = StringPrinter::GetPrintableElementType; 295ffd83dbSDimitry Andric using StringElementType = StringPrinter::StringElementType; 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric /// DecodedCharBuffer stores the decoded contents of a single character. It 325ffd83dbSDimitry Andric /// avoids managing memory on the heap by copying decoded bytes into an in-line 335ffd83dbSDimitry Andric /// buffer. 345ffd83dbSDimitry Andric class DecodedCharBuffer { 355ffd83dbSDimitry Andric public: 365ffd83dbSDimitry Andric DecodedCharBuffer(std::nullptr_t) {} 375ffd83dbSDimitry Andric 385ffd83dbSDimitry Andric DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) { 395ffd83dbSDimitry Andric if (size > MaxLength) 405ffd83dbSDimitry Andric llvm_unreachable("unsupported length"); 415ffd83dbSDimitry Andric memcpy(m_data, bytes, size); 425ffd83dbSDimitry Andric } 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric DecodedCharBuffer(const char *bytes, size_t size) 455ffd83dbSDimitry Andric : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {} 465ffd83dbSDimitry Andric 475ffd83dbSDimitry Andric const uint8_t *GetBytes() const { return m_data; } 485ffd83dbSDimitry Andric 495ffd83dbSDimitry Andric size_t GetSize() const { return m_size; } 505ffd83dbSDimitry Andric 515ffd83dbSDimitry Andric private: 525ffd83dbSDimitry Andric static constexpr unsigned MaxLength = 16; 535ffd83dbSDimitry Andric 545ffd83dbSDimitry Andric size_t m_size = 0; 555ffd83dbSDimitry Andric uint8_t m_data[MaxLength] = {0}; 565ffd83dbSDimitry Andric }; 575ffd83dbSDimitry Andric 585ffd83dbSDimitry Andric using EscapingHelper = 595ffd83dbSDimitry Andric std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric // we define this for all values of type but only implement it for those we 620b57cec5SDimitry Andric // care about that's good because we get linker errors for any unsupported type 635ffd83dbSDimitry Andric template <StringElementType type> 645ffd83dbSDimitry Andric static DecodedCharBuffer 655ffd83dbSDimitry Andric GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 665ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style); 670b57cec5SDimitry Andric 685ffd83dbSDimitry Andric // Mimic isprint() for Unicode codepoints. 695ffd83dbSDimitry Andric static bool isprint32(char32_t codepoint) { 700b57cec5SDimitry Andric if (codepoint <= 0x1F || codepoint == 0x7F) // C0 710b57cec5SDimitry Andric { 720b57cec5SDimitry Andric return false; 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 750b57cec5SDimitry Andric { 760b57cec5SDimitry Andric return false; 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 790b57cec5SDimitry Andric { 800b57cec5SDimitry Andric return false; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric if (codepoint == 0x200E || codepoint == 0x200F || 830b57cec5SDimitry Andric (codepoint >= 0x202A && 840b57cec5SDimitry Andric codepoint <= 0x202E)) // bidirectional text control 850b57cec5SDimitry Andric { 860b57cec5SDimitry Andric return false; 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric if (codepoint >= 0xFFF9 && 890b57cec5SDimitry Andric codepoint <= 0xFFFF) // interlinears and generally specials 900b57cec5SDimitry Andric { 910b57cec5SDimitry Andric return false; 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric return true; 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 965ffd83dbSDimitry Andric DecodedCharBuffer attemptASCIIEscape(llvm::UTF32 c, 975ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) { 985ffd83dbSDimitry Andric const bool is_swift_escape_style = 995ffd83dbSDimitry Andric escape_style == StringPrinter::EscapeStyle::Swift; 1005ffd83dbSDimitry Andric switch (c) { 1010b57cec5SDimitry Andric case 0: 1025ffd83dbSDimitry Andric return {"\\0", 2}; 1030b57cec5SDimitry Andric case '\a': 1045ffd83dbSDimitry Andric return {"\\a", 2}; 1050b57cec5SDimitry Andric case '\b': 1065ffd83dbSDimitry Andric if (is_swift_escape_style) 1075ffd83dbSDimitry Andric return nullptr; 1085ffd83dbSDimitry Andric return {"\\b", 2}; 1090b57cec5SDimitry Andric case '\f': 1105ffd83dbSDimitry Andric if (is_swift_escape_style) 1115ffd83dbSDimitry Andric return nullptr; 1125ffd83dbSDimitry Andric return {"\\f", 2}; 1130b57cec5SDimitry Andric case '\n': 1145ffd83dbSDimitry Andric return {"\\n", 2}; 1150b57cec5SDimitry Andric case '\r': 1165ffd83dbSDimitry Andric return {"\\r", 2}; 1170b57cec5SDimitry Andric case '\t': 1185ffd83dbSDimitry Andric return {"\\t", 2}; 1190b57cec5SDimitry Andric case '\v': 1205ffd83dbSDimitry Andric if (is_swift_escape_style) 1215ffd83dbSDimitry Andric return nullptr; 1225ffd83dbSDimitry Andric return {"\\v", 2}; 1230b57cec5SDimitry Andric case '\"': 1245ffd83dbSDimitry Andric return {"\\\"", 2}; 1255ffd83dbSDimitry Andric case '\'': 1265ffd83dbSDimitry Andric if (is_swift_escape_style) 1275ffd83dbSDimitry Andric return {"\\'", 2}; 1285ffd83dbSDimitry Andric return nullptr; 1290b57cec5SDimitry Andric case '\\': 1305ffd83dbSDimitry Andric return {"\\\\", 2}; 1310b57cec5SDimitry Andric } 1325ffd83dbSDimitry Andric return nullptr; 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric template <> 1365ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>( 1375ffd83dbSDimitry Andric uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 1385ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) { 1395ffd83dbSDimitry Andric // The ASCII helper always advances 1 byte at a time. 1400b57cec5SDimitry Andric next = buffer + 1; 1415ffd83dbSDimitry Andric 1425ffd83dbSDimitry Andric DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style); 1435ffd83dbSDimitry Andric if (retval.GetSize()) 1440b57cec5SDimitry Andric return retval; 1455ffd83dbSDimitry Andric 1465ffd83dbSDimitry Andric // Use llvm's locale-independent isPrint(char), instead of the libc 1475ffd83dbSDimitry Andric // implementation which may give different results on different platforms. 1485ffd83dbSDimitry Andric if (llvm::isPrint(*buffer)) 1495ffd83dbSDimitry Andric return {buffer, 1}; 1505ffd83dbSDimitry Andric 1515ffd83dbSDimitry Andric unsigned escaped_len; 1525ffd83dbSDimitry Andric constexpr unsigned max_buffer_size = 7; 1535ffd83dbSDimitry Andric uint8_t data[max_buffer_size]; 1545ffd83dbSDimitry Andric switch (escape_style) { 1555ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::CXX: 1565ffd83dbSDimitry Andric // Prints 4 characters, then a \0 terminator. 1575ffd83dbSDimitry Andric escaped_len = sprintf((char *)data, "\\x%02x", *buffer); 1585ffd83dbSDimitry Andric break; 1595ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::Swift: 1605ffd83dbSDimitry Andric // Prints up to 6 characters, then a \0 terminator. 1615ffd83dbSDimitry Andric escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); 1625ffd83dbSDimitry Andric break; 1635ffd83dbSDimitry Andric } 1645ffd83dbSDimitry Andric lldbassert(escaped_len > 0 && "unknown string escape style"); 1655ffd83dbSDimitry Andric return {data, escaped_len}; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1685ffd83dbSDimitry Andric template <> 1695ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>( 1705ffd83dbSDimitry Andric uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, 1715ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) { 1725ffd83dbSDimitry Andric // If the utf8 encoded length is invalid (i.e., not in the closed interval 1735ffd83dbSDimitry Andric // [1;4]), or if there aren't enough bytes to print, or if the subsequence 1745ffd83dbSDimitry Andric // isn't valid utf8, fall back to printing an ASCII-escaped subsequence. 1755ffd83dbSDimitry Andric if (!llvm::isLegalUTF8Sequence(buffer, buffer_end)) 1765ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 1775ffd83dbSDimitry Andric escape_style); 1780b57cec5SDimitry Andric 1795ffd83dbSDimitry Andric // Convert the valid utf8 sequence to a utf32 codepoint. This cannot fail. 1805ffd83dbSDimitry Andric llvm::UTF32 codepoint = 0; 1815ffd83dbSDimitry Andric const llvm::UTF8 *buffer_for_conversion = buffer; 1825ffd83dbSDimitry Andric llvm::ConversionResult result = llvm::convertUTF8Sequence( 1835ffd83dbSDimitry Andric &buffer_for_conversion, buffer_end, &codepoint, llvm::strictConversion); 1845ffd83dbSDimitry Andric assert(result == llvm::conversionOK && 1855ffd83dbSDimitry Andric "Failed to convert legal utf8 sequence"); 1865ffd83dbSDimitry Andric (void)result; 1875ffd83dbSDimitry Andric 1885ffd83dbSDimitry Andric // The UTF8 helper always advances by the utf8 encoded length. 1895ffd83dbSDimitry Andric const unsigned utf8_encoded_len = buffer_for_conversion - buffer; 1900b57cec5SDimitry Andric next = buffer + utf8_encoded_len; 1910b57cec5SDimitry Andric 1925ffd83dbSDimitry Andric DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style); 1935ffd83dbSDimitry Andric if (retval.GetSize()) 1940b57cec5SDimitry Andric return retval; 1955ffd83dbSDimitry Andric if (isprint32(codepoint)) 1965ffd83dbSDimitry Andric return {buffer, utf8_encoded_len}; 1975ffd83dbSDimitry Andric 1985ffd83dbSDimitry Andric unsigned escaped_len; 1995ffd83dbSDimitry Andric constexpr unsigned max_buffer_size = 13; 2005ffd83dbSDimitry Andric uint8_t data[max_buffer_size]; 2015ffd83dbSDimitry Andric switch (escape_style) { 2025ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::CXX: 2035ffd83dbSDimitry Andric // Prints 10 characters, then a \0 terminator. 2045ffd83dbSDimitry Andric escaped_len = sprintf((char *)data, "\\U%08x", codepoint); 2055ffd83dbSDimitry Andric break; 2065ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::Swift: 2075ffd83dbSDimitry Andric // Prints up to 12 characters, then a \0 terminator. 2085ffd83dbSDimitry Andric escaped_len = sprintf((char *)data, "\\u{%x}", codepoint); 2095ffd83dbSDimitry Andric break; 2105ffd83dbSDimitry Andric } 2115ffd83dbSDimitry Andric lldbassert(escaped_len > 0 && "unknown string escape style"); 2125ffd83dbSDimitry Andric return {data, escaped_len}; 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric // Given a sequence of bytes, this function returns: a sequence of bytes to 2160b57cec5SDimitry Andric // actually print out + a length the following unscanned position of the buffer 2170b57cec5SDimitry Andric // is in next 2185ffd83dbSDimitry Andric static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer, 2195ffd83dbSDimitry Andric uint8_t *buffer_end, uint8_t *&next, 2205ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) { 2215ffd83dbSDimitry Andric if (!buffer || buffer >= buffer_end) 2220b57cec5SDimitry Andric return {nullptr}; 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric switch (type) { 2255ffd83dbSDimitry Andric case StringElementType::ASCII: 2265ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next, 2275ffd83dbSDimitry Andric escape_style); 2285ffd83dbSDimitry Andric case StringElementType::UTF8: 2295ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next, 2305ffd83dbSDimitry Andric escape_style); 2310b57cec5SDimitry Andric default: 2320b57cec5SDimitry Andric return {nullptr}; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2365ffd83dbSDimitry Andric static EscapingHelper 2375ffd83dbSDimitry Andric GetDefaultEscapingHelper(GetPrintableElementType elem_type, 2385ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) { 2390b57cec5SDimitry Andric switch (elem_type) { 2400b57cec5SDimitry Andric case GetPrintableElementType::UTF8: 2410b57cec5SDimitry Andric case GetPrintableElementType::ASCII: 2425ffd83dbSDimitry Andric return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end, 2435ffd83dbSDimitry Andric uint8_t *&next) -> DecodedCharBuffer { 2445ffd83dbSDimitry Andric return GetPrintable(elem_type == GetPrintableElementType::UTF8 2455ffd83dbSDimitry Andric ? StringElementType::UTF8 2465ffd83dbSDimitry Andric : StringElementType::ASCII, 2475ffd83dbSDimitry Andric buffer, buffer_end, next, escape_style); 2480b57cec5SDimitry Andric }; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric llvm_unreachable("bad element type"); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2535ffd83dbSDimitry Andric /// Read a string encoded in accordance with \tparam SourceDataType from a 2545ffd83dbSDimitry Andric /// host-side LLDB buffer, then pretty-print it to a stream using \p style. 2550b57cec5SDimitry Andric template <typename SourceDataType> 2565ffd83dbSDimitry Andric static bool DumpEncodedBufferToStream( 2575ffd83dbSDimitry Andric GetPrintableElementType style, 2580b57cec5SDimitry Andric llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 2590b57cec5SDimitry Andric const SourceDataType *, 2600b57cec5SDimitry Andric llvm::UTF8 **, llvm::UTF8 *, 2610b57cec5SDimitry Andric llvm::ConversionFlags), 2620b57cec5SDimitry Andric const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 2635ffd83dbSDimitry Andric assert(dump_options.GetStream() && "need a Stream to print the string to"); 2640b57cec5SDimitry Andric Stream &stream(*dump_options.GetStream()); 2650b57cec5SDimitry Andric if (dump_options.GetPrefixToken() != nullptr) 2660b57cec5SDimitry Andric stream.Printf("%s", dump_options.GetPrefixToken()); 2670b57cec5SDimitry Andric if (dump_options.GetQuote() != 0) 2680b57cec5SDimitry Andric stream.Printf("%c", dump_options.GetQuote()); 2690b57cec5SDimitry Andric auto data(dump_options.GetData()); 2700b57cec5SDimitry Andric auto source_size(dump_options.GetSourceSize()); 2710b57cec5SDimitry Andric if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 2720b57cec5SDimitry Andric const int bufferSPSize = data.GetByteSize(); 2730b57cec5SDimitry Andric if (dump_options.GetSourceSize() == 0) { 2740b57cec5SDimitry Andric const int origin_encoding = 8 * sizeof(SourceDataType); 2750b57cec5SDimitry Andric source_size = bufferSPSize / (origin_encoding / 4); 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric const SourceDataType *data_ptr = 2790b57cec5SDimitry Andric (const SourceDataType *)data.GetDataStart(); 2800b57cec5SDimitry Andric const SourceDataType *data_end_ptr = data_ptr + source_size; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric if (zero_is_terminator) { 2850b57cec5SDimitry Andric while (data_ptr < data_end_ptr) { 2860b57cec5SDimitry Andric if (!*data_ptr) { 2870b57cec5SDimitry Andric data_end_ptr = data_ptr; 2880b57cec5SDimitry Andric break; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric data_ptr++; 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric data_ptr = (const SourceDataType *)data.GetDataStart(); 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric lldb::DataBufferSP utf8_data_buffer_sp; 2970b57cec5SDimitry Andric llvm::UTF8 *utf8_data_ptr = nullptr; 2980b57cec5SDimitry Andric llvm::UTF8 *utf8_data_end_ptr = nullptr; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric if (ConvertFunction) { 3010b57cec5SDimitry Andric utf8_data_buffer_sp = 3020b57cec5SDimitry Andric std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 3030b57cec5SDimitry Andric utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 3040b57cec5SDimitry Andric utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 3050b57cec5SDimitry Andric ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 3060b57cec5SDimitry Andric utf8_data_end_ptr, llvm::lenientConversion); 3070b57cec5SDimitry Andric if (!zero_is_terminator) 3080b57cec5SDimitry Andric utf8_data_end_ptr = utf8_data_ptr; 3090b57cec5SDimitry Andric // needed because the ConvertFunction will change the value of the 3100b57cec5SDimitry Andric // data_ptr. 3110b57cec5SDimitry Andric utf8_data_ptr = 3120b57cec5SDimitry Andric (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 3130b57cec5SDimitry Andric } else { 3140b57cec5SDimitry Andric // just copy the pointers - the cast is necessary to make the compiler 3150b57cec5SDimitry Andric // happy but this should only happen if we are reading UTF8 data 3160b57cec5SDimitry Andric utf8_data_ptr = const_cast<llvm::UTF8 *>( 3170b57cec5SDimitry Andric reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 3180b57cec5SDimitry Andric utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 3190b57cec5SDimitry Andric reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 3235ffd83dbSDimitry Andric EscapingHelper escaping_callback; 3245ffd83dbSDimitry Andric if (escape_non_printables) 3250b57cec5SDimitry Andric escaping_callback = 3265ffd83dbSDimitry Andric GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle()); 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric // since we tend to accept partial data (and even partially malformed data) 3290b57cec5SDimitry Andric // we might end up with no NULL terminator before the end_ptr hence we need 3300b57cec5SDimitry Andric // to take a slower route and ensure we stay within boundaries 3310b57cec5SDimitry Andric for (; utf8_data_ptr < utf8_data_end_ptr;) { 3320b57cec5SDimitry Andric if (zero_is_terminator && !*utf8_data_ptr) 3330b57cec5SDimitry Andric break; 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric if (escape_non_printables) { 3360b57cec5SDimitry Andric uint8_t *next_data = nullptr; 3370b57cec5SDimitry Andric auto printable = 3380b57cec5SDimitry Andric escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 3390b57cec5SDimitry Andric auto printable_bytes = printable.GetBytes(); 3400b57cec5SDimitry Andric auto printable_size = printable.GetSize(); 3415ffd83dbSDimitry Andric 3425ffd83dbSDimitry Andric // We failed to figure out how to print this string. 3435ffd83dbSDimitry Andric if (!printable_bytes || !next_data) 3445ffd83dbSDimitry Andric return false; 3455ffd83dbSDimitry Andric 3460b57cec5SDimitry Andric for (unsigned c = 0; c < printable_size; c++) 3470b57cec5SDimitry Andric stream.Printf("%c", *(printable_bytes + c)); 3480b57cec5SDimitry Andric utf8_data_ptr = (uint8_t *)next_data; 3490b57cec5SDimitry Andric } else { 3500b57cec5SDimitry Andric stream.Printf("%c", *utf8_data_ptr); 3510b57cec5SDimitry Andric utf8_data_ptr++; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric if (dump_options.GetQuote() != 0) 3560b57cec5SDimitry Andric stream.Printf("%c", dump_options.GetQuote()); 3570b57cec5SDimitry Andric if (dump_options.GetSuffixToken() != nullptr) 3580b57cec5SDimitry Andric stream.Printf("%s", dump_options.GetSuffixToken()); 3590b57cec5SDimitry Andric if (dump_options.GetIsTruncated()) 3600b57cec5SDimitry Andric stream.Printf("..."); 3610b57cec5SDimitry Andric return true; 3620b57cec5SDimitry Andric } 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 3650b57cec5SDimitry Andric ReadStringAndDumpToStreamOptions(ValueObject &valobj) 3660b57cec5SDimitry Andric : ReadStringAndDumpToStreamOptions() { 3670b57cec5SDimitry Andric SetEscapeNonPrintables( 3680b57cec5SDimitry Andric valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 3720b57cec5SDimitry Andric ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 3730b57cec5SDimitry Andric : ReadBufferAndDumpToStreamOptions() { 3740b57cec5SDimitry Andric SetEscapeNonPrintables( 3750b57cec5SDimitry Andric valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 3790b57cec5SDimitry Andric ReadBufferAndDumpToStreamOptions( 3800b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) 3810b57cec5SDimitry Andric : ReadBufferAndDumpToStreamOptions() { 3820b57cec5SDimitry Andric SetStream(options.GetStream()); 3830b57cec5SDimitry Andric SetPrefixToken(options.GetPrefixToken()); 3840b57cec5SDimitry Andric SetSuffixToken(options.GetSuffixToken()); 3850b57cec5SDimitry Andric SetQuote(options.GetQuote()); 3860b57cec5SDimitry Andric SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 3870b57cec5SDimitry Andric SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 3885ffd83dbSDimitry Andric SetEscapeStyle(options.GetEscapeStyle()); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric namespace lldb_private { 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric namespace formatters { 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric template <typename SourceDataType> 3965ffd83dbSDimitry Andric static bool ReadEncodedBufferAndDumpToStream( 3975ffd83dbSDimitry Andric StringElementType elem_type, 3980b57cec5SDimitry Andric const StringPrinter::ReadStringAndDumpToStreamOptions &options, 3990b57cec5SDimitry Andric llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 4000b57cec5SDimitry Andric const SourceDataType *, 4010b57cec5SDimitry Andric llvm::UTF8 **, llvm::UTF8 *, 4020b57cec5SDimitry Andric llvm::ConversionFlags)) { 4030b57cec5SDimitry Andric assert(options.GetStream() && "need a Stream to print the string to"); 4045ffd83dbSDimitry Andric if (!options.GetStream()) 4055ffd83dbSDimitry Andric return false; 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric if (options.GetLocation() == 0 || 4080b57cec5SDimitry Andric options.GetLocation() == LLDB_INVALID_ADDRESS) 4090b57cec5SDimitry Andric return false; 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric lldb::ProcessSP process_sp(options.GetProcessSP()); 4120b57cec5SDimitry Andric if (!process_sp) 4130b57cec5SDimitry Andric return false; 4140b57cec5SDimitry Andric 4155ffd83dbSDimitry Andric constexpr int type_width = sizeof(SourceDataType); 4165ffd83dbSDimitry Andric constexpr int origin_encoding = 8 * type_width; 4170b57cec5SDimitry Andric if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 4180b57cec5SDimitry Andric return false; 4195ffd83dbSDimitry Andric // If not UTF8 or ASCII, conversion to UTF8 is necessary. 4200b57cec5SDimitry Andric if (origin_encoding != 8 && !ConvertFunction) 4210b57cec5SDimitry Andric return false; 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric bool needs_zero_terminator = options.GetNeedsZeroTermination(); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric bool is_truncated = false; 4260b57cec5SDimitry Andric const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 4270b57cec5SDimitry Andric 4285ffd83dbSDimitry Andric uint32_t sourceSize; 4295ffd83dbSDimitry Andric if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) { 4305ffd83dbSDimitry Andric // FIXME: The NSString formatter sets HasSourceSize(true) when the size is 4315ffd83dbSDimitry Andric // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the 4325ffd83dbSDimitry Andric // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't 4335ffd83dbSDimitry Andric // mean to. I don't see how this makes sense: we should fix the formatters. 4345ffd83dbSDimitry Andric // 4355ffd83dbSDimitry Andric // Until then, the behavior that's expected for ASCII strings with unknown 4365ffd83dbSDimitry Andric // lengths is to read up to the max size and then null-terminate. Do that. 4370b57cec5SDimitry Andric sourceSize = max_size; 4380b57cec5SDimitry Andric needs_zero_terminator = true; 4395ffd83dbSDimitry Andric } else if (options.HasSourceSize()) { 4405ffd83dbSDimitry Andric sourceSize = options.GetSourceSize(); 4415ffd83dbSDimitry Andric if (!options.GetIgnoreMaxLength()) { 4420b57cec5SDimitry Andric if (sourceSize > max_size) { 4430b57cec5SDimitry Andric sourceSize = max_size; 4440b57cec5SDimitry Andric is_truncated = true; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric } 4475ffd83dbSDimitry Andric } else { 4485ffd83dbSDimitry Andric sourceSize = max_size; 4495ffd83dbSDimitry Andric needs_zero_terminator = true; 4505ffd83dbSDimitry Andric } 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric const int bufferSPSize = sourceSize * type_width; 4530b57cec5SDimitry Andric lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 4540b57cec5SDimitry Andric 4555ffd83dbSDimitry Andric // Check if we got bytes. We never get any bytes if we have an empty 4565ffd83dbSDimitry Andric // string, but we still continue so that we end up actually printing 4575ffd83dbSDimitry Andric // an empty string (""). 4585ffd83dbSDimitry Andric if (sourceSize != 0 && !buffer_sp->GetBytes()) 4590b57cec5SDimitry Andric return false; 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric Status error; 4620b57cec5SDimitry Andric char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 4630b57cec5SDimitry Andric 4645ffd83dbSDimitry Andric if (elem_type == StringElementType::ASCII) 4655ffd83dbSDimitry Andric process_sp->ReadCStringFromMemory(options.GetLocation(), buffer, 4665ffd83dbSDimitry Andric bufferSPSize, error); 4675ffd83dbSDimitry Andric else if (needs_zero_terminator) 4680b57cec5SDimitry Andric process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 4690b57cec5SDimitry Andric bufferSPSize, error, type_width); 4700b57cec5SDimitry Andric else 4715ffd83dbSDimitry Andric process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer, 4720b57cec5SDimitry Andric bufferSPSize, error); 4730b57cec5SDimitry Andric if (error.Fail()) { 4740b57cec5SDimitry Andric options.GetStream()->Printf("unable to read data"); 4750b57cec5SDimitry Andric return true; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 4790b57cec5SDimitry Andric process_sp->GetAddressByteSize()); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 4820b57cec5SDimitry Andric dump_options.SetData(data); 4830b57cec5SDimitry Andric dump_options.SetSourceSize(sourceSize); 4840b57cec5SDimitry Andric dump_options.SetIsTruncated(is_truncated); 4855ffd83dbSDimitry Andric dump_options.SetNeedsZeroTermination(needs_zero_terminator); 4865ffd83dbSDimitry Andric if (needs_zero_terminator) 4875ffd83dbSDimitry Andric dump_options.SetBinaryZeroIsTerminator(true); 4880b57cec5SDimitry Andric 4895ffd83dbSDimitry Andric GetPrintableElementType print_style = (elem_type == StringElementType::ASCII) 4905ffd83dbSDimitry Andric ? GetPrintableElementType::ASCII 4915ffd83dbSDimitry Andric : GetPrintableElementType::UTF8; 4925ffd83dbSDimitry Andric return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options); 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric template <> 4965ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>( 4970b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) { 4985ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8, 4995ffd83dbSDimitry Andric options, nullptr); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric template <> 5035ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>( 5040b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) { 5055ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF16>( 5065ffd83dbSDimitry Andric StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8); 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric template <> 5105ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>( 5110b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) { 5125ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF32>( 5135ffd83dbSDimitry Andric StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8); 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric template <> 5175ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>( 5185ffd83dbSDimitry Andric const ReadStringAndDumpToStreamOptions &options) { 5195ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII, 5205ffd83dbSDimitry Andric options, nullptr); 5215ffd83dbSDimitry Andric } 5225ffd83dbSDimitry Andric 5235ffd83dbSDimitry Andric template <> 5245ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>( 5250b57cec5SDimitry Andric const ReadBufferAndDumpToStreamOptions &options) { 5265ffd83dbSDimitry Andric return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8, 5275ffd83dbSDimitry Andric nullptr, options); 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric template <> 5315ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>( 5320b57cec5SDimitry Andric const ReadBufferAndDumpToStreamOptions &options) { 5335ffd83dbSDimitry Andric return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 5345ffd83dbSDimitry Andric llvm::ConvertUTF16toUTF8, options); 5355ffd83dbSDimitry Andric } 5365ffd83dbSDimitry Andric 5375ffd83dbSDimitry Andric template <> 5385ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>( 5395ffd83dbSDimitry Andric const ReadBufferAndDumpToStreamOptions &options) { 5405ffd83dbSDimitry Andric return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, 5415ffd83dbSDimitry Andric llvm::ConvertUTF32toUTF8, options); 5425ffd83dbSDimitry Andric } 5435ffd83dbSDimitry Andric 5445ffd83dbSDimitry Andric template <> 5455ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>( 5465ffd83dbSDimitry Andric const ReadBufferAndDumpToStreamOptions &options) { 5475ffd83dbSDimitry Andric // Treat ASCII the same as UTF8. 5485ffd83dbSDimitry Andric // 5495ffd83dbSDimitry Andric // FIXME: This is probably not the right thing to do (well, it's debatable). 5505ffd83dbSDimitry Andric // If an ASCII-encoded string happens to contain a sequence of invalid bytes 5515ffd83dbSDimitry Andric // that forms a valid UTF8 character, we'll print out that character. This is 5525ffd83dbSDimitry Andric // good if you're playing fast and loose with encodings (probably good for 5535ffd83dbSDimitry Andric // std::string users), but maybe not so good if you care about your string 5545ffd83dbSDimitry Andric // formatter respecting the semantics of your selected string encoding. In 5555ffd83dbSDimitry Andric // the latter case you'd want to see the character byte sequence ('\x..'), not 5565ffd83dbSDimitry Andric // the UTF8 character itself. 5570b57cec5SDimitry Andric return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric } // namespace formatters 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric } // namespace lldb_private 563