15ffd83dbSDimitry Andric //===-- StringPrinter.cpp -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "lldb/DataFormatters/StringPrinter.h"
100b57cec5SDimitry Andric
110b57cec5SDimitry Andric #include "lldb/Core/Debugger.h"
120b57cec5SDimitry Andric #include "lldb/Core/ValueObject.h"
130b57cec5SDimitry Andric #include "lldb/Target/Language.h"
140b57cec5SDimitry Andric #include "lldb/Target/Process.h"
150b57cec5SDimitry Andric #include "lldb/Target/Target.h"
160b57cec5SDimitry Andric #include "lldb/Utility/Status.h"
170b57cec5SDimitry Andric
185ffd83dbSDimitry Andric #include "llvm/ADT/StringExtras.h"
190b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
200b57cec5SDimitry Andric
21fe6060f1SDimitry Andric #include <cctype>
220b57cec5SDimitry Andric #include <locale>
230b57cec5SDimitry Andric #include <memory>
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric using namespace lldb;
260b57cec5SDimitry Andric using namespace lldb_private;
270b57cec5SDimitry Andric using namespace lldb_private::formatters;
285ffd83dbSDimitry Andric using GetPrintableElementType = StringPrinter::GetPrintableElementType;
295ffd83dbSDimitry Andric using StringElementType = StringPrinter::StringElementType;
305ffd83dbSDimitry Andric
315ffd83dbSDimitry Andric /// DecodedCharBuffer stores the decoded contents of a single character. It
325ffd83dbSDimitry Andric /// avoids managing memory on the heap by copying decoded bytes into an in-line
335ffd83dbSDimitry Andric /// buffer.
345ffd83dbSDimitry Andric class DecodedCharBuffer {
355ffd83dbSDimitry Andric public:
DecodedCharBuffer(std::nullptr_t)365ffd83dbSDimitry Andric DecodedCharBuffer(std::nullptr_t) {}
375ffd83dbSDimitry Andric
DecodedCharBuffer(const uint8_t * bytes,size_t size)385ffd83dbSDimitry Andric DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) {
395ffd83dbSDimitry Andric if (size > MaxLength)
405ffd83dbSDimitry Andric llvm_unreachable("unsupported length");
415ffd83dbSDimitry Andric memcpy(m_data, bytes, size);
425ffd83dbSDimitry Andric }
435ffd83dbSDimitry Andric
DecodedCharBuffer(const char * bytes,size_t size)445ffd83dbSDimitry Andric DecodedCharBuffer(const char *bytes, size_t size)
455ffd83dbSDimitry Andric : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {}
465ffd83dbSDimitry Andric
GetBytes() const475ffd83dbSDimitry Andric const uint8_t *GetBytes() const { return m_data; }
485ffd83dbSDimitry Andric
GetSize() const495ffd83dbSDimitry Andric size_t GetSize() const { return m_size; }
505ffd83dbSDimitry Andric
515ffd83dbSDimitry Andric private:
525ffd83dbSDimitry Andric static constexpr unsigned MaxLength = 16;
535ffd83dbSDimitry Andric
545ffd83dbSDimitry Andric size_t m_size = 0;
555ffd83dbSDimitry Andric uint8_t m_data[MaxLength] = {0};
565ffd83dbSDimitry Andric };
575ffd83dbSDimitry Andric
585ffd83dbSDimitry Andric using EscapingHelper =
595ffd83dbSDimitry Andric std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>;
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric // we define this for all values of type but only implement it for those we
620b57cec5SDimitry Andric // care about that's good because we get linker errors for any unsupported type
635ffd83dbSDimitry Andric template <StringElementType type>
645ffd83dbSDimitry Andric static DecodedCharBuffer
655ffd83dbSDimitry Andric GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
665ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style);
670b57cec5SDimitry Andric
685ffd83dbSDimitry Andric // Mimic isprint() for Unicode codepoints.
isprint32(char32_t codepoint)695ffd83dbSDimitry Andric static bool isprint32(char32_t codepoint) {
700b57cec5SDimitry Andric if (codepoint <= 0x1F || codepoint == 0x7F) // C0
710b57cec5SDimitry Andric {
720b57cec5SDimitry Andric return false;
730b57cec5SDimitry Andric }
740b57cec5SDimitry Andric if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
750b57cec5SDimitry Andric {
760b57cec5SDimitry Andric return false;
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
790b57cec5SDimitry Andric {
800b57cec5SDimitry Andric return false;
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric if (codepoint == 0x200E || codepoint == 0x200F ||
830b57cec5SDimitry Andric (codepoint >= 0x202A &&
840b57cec5SDimitry Andric codepoint <= 0x202E)) // bidirectional text control
850b57cec5SDimitry Andric {
860b57cec5SDimitry Andric return false;
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric if (codepoint >= 0xFFF9 &&
890b57cec5SDimitry Andric codepoint <= 0xFFFF) // interlinears and generally specials
900b57cec5SDimitry Andric {
910b57cec5SDimitry Andric return false;
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric return true;
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric
attemptASCIIEscape(llvm::UTF32 c,StringPrinter::EscapeStyle escape_style)965ffd83dbSDimitry Andric DecodedCharBuffer attemptASCIIEscape(llvm::UTF32 c,
975ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) {
985ffd83dbSDimitry Andric const bool is_swift_escape_style =
995ffd83dbSDimitry Andric escape_style == StringPrinter::EscapeStyle::Swift;
1005ffd83dbSDimitry Andric switch (c) {
1010b57cec5SDimitry Andric case 0:
1025ffd83dbSDimitry Andric return {"\\0", 2};
1030b57cec5SDimitry Andric case '\a':
1045ffd83dbSDimitry Andric return {"\\a", 2};
1050b57cec5SDimitry Andric case '\b':
1065ffd83dbSDimitry Andric if (is_swift_escape_style)
1075ffd83dbSDimitry Andric return nullptr;
1085ffd83dbSDimitry Andric return {"\\b", 2};
1090b57cec5SDimitry Andric case '\f':
1105ffd83dbSDimitry Andric if (is_swift_escape_style)
1115ffd83dbSDimitry Andric return nullptr;
1125ffd83dbSDimitry Andric return {"\\f", 2};
1130b57cec5SDimitry Andric case '\n':
1145ffd83dbSDimitry Andric return {"\\n", 2};
1150b57cec5SDimitry Andric case '\r':
1165ffd83dbSDimitry Andric return {"\\r", 2};
1170b57cec5SDimitry Andric case '\t':
1185ffd83dbSDimitry Andric return {"\\t", 2};
1190b57cec5SDimitry Andric case '\v':
1205ffd83dbSDimitry Andric if (is_swift_escape_style)
1215ffd83dbSDimitry Andric return nullptr;
1225ffd83dbSDimitry Andric return {"\\v", 2};
1230b57cec5SDimitry Andric case '\"':
1245ffd83dbSDimitry Andric return {"\\\"", 2};
1255ffd83dbSDimitry Andric case '\'':
1265ffd83dbSDimitry Andric if (is_swift_escape_style)
1275ffd83dbSDimitry Andric return {"\\'", 2};
1285ffd83dbSDimitry Andric return nullptr;
1290b57cec5SDimitry Andric case '\\':
1305ffd83dbSDimitry Andric return {"\\\\", 2};
1310b57cec5SDimitry Andric }
1325ffd83dbSDimitry Andric return nullptr;
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric template <>
GetPrintableImpl(uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)1365ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>(
1375ffd83dbSDimitry Andric uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
1385ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) {
1395ffd83dbSDimitry Andric // The ASCII helper always advances 1 byte at a time.
1400b57cec5SDimitry Andric next = buffer + 1;
1415ffd83dbSDimitry Andric
1425ffd83dbSDimitry Andric DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style);
1435ffd83dbSDimitry Andric if (retval.GetSize())
1440b57cec5SDimitry Andric return retval;
1455ffd83dbSDimitry Andric
1465ffd83dbSDimitry Andric // Use llvm's locale-independent isPrint(char), instead of the libc
1475ffd83dbSDimitry Andric // implementation which may give different results on different platforms.
1485ffd83dbSDimitry Andric if (llvm::isPrint(*buffer))
1495ffd83dbSDimitry Andric return {buffer, 1};
1505ffd83dbSDimitry Andric
1515ffd83dbSDimitry Andric unsigned escaped_len;
1525ffd83dbSDimitry Andric constexpr unsigned max_buffer_size = 7;
1535ffd83dbSDimitry Andric uint8_t data[max_buffer_size];
1545ffd83dbSDimitry Andric switch (escape_style) {
1555ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::CXX:
1565ffd83dbSDimitry Andric // Prints 4 characters, then a \0 terminator.
15706c3fb27SDimitry Andric escaped_len = snprintf((char *)data, max_buffer_size, "\\x%02x", *buffer);
1585ffd83dbSDimitry Andric break;
1595ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::Swift:
1605ffd83dbSDimitry Andric // Prints up to 6 characters, then a \0 terminator.
16106c3fb27SDimitry Andric escaped_len = snprintf((char *)data, max_buffer_size, "\\u{%x}", *buffer);
1625ffd83dbSDimitry Andric break;
1635ffd83dbSDimitry Andric }
1645ffd83dbSDimitry Andric lldbassert(escaped_len > 0 && "unknown string escape style");
1655ffd83dbSDimitry Andric return {data, escaped_len};
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric
1685ffd83dbSDimitry Andric template <>
GetPrintableImpl(uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)1695ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>(
1705ffd83dbSDimitry Andric uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
1715ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) {
1725ffd83dbSDimitry Andric // If the utf8 encoded length is invalid (i.e., not in the closed interval
1735ffd83dbSDimitry Andric // [1;4]), or if there aren't enough bytes to print, or if the subsequence
1745ffd83dbSDimitry Andric // isn't valid utf8, fall back to printing an ASCII-escaped subsequence.
1755ffd83dbSDimitry Andric if (!llvm::isLegalUTF8Sequence(buffer, buffer_end))
1765ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next,
1775ffd83dbSDimitry Andric escape_style);
1780b57cec5SDimitry Andric
1795ffd83dbSDimitry Andric // Convert the valid utf8 sequence to a utf32 codepoint. This cannot fail.
1805ffd83dbSDimitry Andric llvm::UTF32 codepoint = 0;
1815ffd83dbSDimitry Andric const llvm::UTF8 *buffer_for_conversion = buffer;
1825ffd83dbSDimitry Andric llvm::ConversionResult result = llvm::convertUTF8Sequence(
1835ffd83dbSDimitry Andric &buffer_for_conversion, buffer_end, &codepoint, llvm::strictConversion);
1845ffd83dbSDimitry Andric assert(result == llvm::conversionOK &&
1855ffd83dbSDimitry Andric "Failed to convert legal utf8 sequence");
1865f757f3fSDimitry Andric UNUSED_IF_ASSERT_DISABLED(result);
1875ffd83dbSDimitry Andric
1885ffd83dbSDimitry Andric // The UTF8 helper always advances by the utf8 encoded length.
1895ffd83dbSDimitry Andric const unsigned utf8_encoded_len = buffer_for_conversion - buffer;
1900b57cec5SDimitry Andric next = buffer + utf8_encoded_len;
1910b57cec5SDimitry Andric
1925ffd83dbSDimitry Andric DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style);
1935ffd83dbSDimitry Andric if (retval.GetSize())
1940b57cec5SDimitry Andric return retval;
1955ffd83dbSDimitry Andric if (isprint32(codepoint))
1965ffd83dbSDimitry Andric return {buffer, utf8_encoded_len};
1975ffd83dbSDimitry Andric
1985ffd83dbSDimitry Andric unsigned escaped_len;
1995ffd83dbSDimitry Andric constexpr unsigned max_buffer_size = 13;
2005ffd83dbSDimitry Andric uint8_t data[max_buffer_size];
2015ffd83dbSDimitry Andric switch (escape_style) {
2025ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::CXX:
2035ffd83dbSDimitry Andric // Prints 10 characters, then a \0 terminator.
20406c3fb27SDimitry Andric escaped_len = snprintf((char *)data, max_buffer_size, "\\U%08x", codepoint);
2055ffd83dbSDimitry Andric break;
2065ffd83dbSDimitry Andric case StringPrinter::EscapeStyle::Swift:
2075ffd83dbSDimitry Andric // Prints up to 12 characters, then a \0 terminator.
20806c3fb27SDimitry Andric escaped_len = snprintf((char *)data, max_buffer_size, "\\u{%x}", codepoint);
2095ffd83dbSDimitry Andric break;
2105ffd83dbSDimitry Andric }
2115ffd83dbSDimitry Andric lldbassert(escaped_len > 0 && "unknown string escape style");
2125ffd83dbSDimitry Andric return {data, escaped_len};
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric
2150b57cec5SDimitry Andric // Given a sequence of bytes, this function returns: a sequence of bytes to
2160b57cec5SDimitry Andric // actually print out + a length the following unscanned position of the buffer
2170b57cec5SDimitry Andric // is in next
GetPrintable(StringElementType type,uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)2185ffd83dbSDimitry Andric static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer,
2195ffd83dbSDimitry Andric uint8_t *buffer_end, uint8_t *&next,
2205ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) {
2215ffd83dbSDimitry Andric if (!buffer || buffer >= buffer_end)
2220b57cec5SDimitry Andric return {nullptr};
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andric switch (type) {
2255ffd83dbSDimitry Andric case StringElementType::ASCII:
2265ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next,
2275ffd83dbSDimitry Andric escape_style);
2285ffd83dbSDimitry Andric case StringElementType::UTF8:
2295ffd83dbSDimitry Andric return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next,
2305ffd83dbSDimitry Andric escape_style);
2310b57cec5SDimitry Andric default:
2320b57cec5SDimitry Andric return {nullptr};
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric
2365ffd83dbSDimitry Andric static EscapingHelper
GetDefaultEscapingHelper(GetPrintableElementType elem_type,StringPrinter::EscapeStyle escape_style)2375ffd83dbSDimitry Andric GetDefaultEscapingHelper(GetPrintableElementType elem_type,
2385ffd83dbSDimitry Andric StringPrinter::EscapeStyle escape_style) {
2390b57cec5SDimitry Andric switch (elem_type) {
2400b57cec5SDimitry Andric case GetPrintableElementType::UTF8:
2410b57cec5SDimitry Andric case GetPrintableElementType::ASCII:
2425ffd83dbSDimitry Andric return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end,
2435ffd83dbSDimitry Andric uint8_t *&next) -> DecodedCharBuffer {
2445ffd83dbSDimitry Andric return GetPrintable(elem_type == GetPrintableElementType::UTF8
2455ffd83dbSDimitry Andric ? StringElementType::UTF8
2465ffd83dbSDimitry Andric : StringElementType::ASCII,
2475ffd83dbSDimitry Andric buffer, buffer_end, next, escape_style);
2480b57cec5SDimitry Andric };
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric llvm_unreachable("bad element type");
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric
2535ffd83dbSDimitry Andric /// Read a string encoded in accordance with \tparam SourceDataType from a
2545ffd83dbSDimitry Andric /// host-side LLDB buffer, then pretty-print it to a stream using \p style.
2550b57cec5SDimitry Andric template <typename SourceDataType>
DumpEncodedBufferToStream(GetPrintableElementType style,llvm::ConversionResult (* ConvertFunction)(const SourceDataType **,const SourceDataType *,llvm::UTF8 **,llvm::UTF8 *,llvm::ConversionFlags),const StringPrinter::ReadBufferAndDumpToStreamOptions & dump_options)2565ffd83dbSDimitry Andric static bool DumpEncodedBufferToStream(
2575ffd83dbSDimitry Andric GetPrintableElementType style,
2580b57cec5SDimitry Andric llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
2590b57cec5SDimitry Andric const SourceDataType *,
2600b57cec5SDimitry Andric llvm::UTF8 **, llvm::UTF8 *,
2610b57cec5SDimitry Andric llvm::ConversionFlags),
2620b57cec5SDimitry Andric const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
2635ffd83dbSDimitry Andric assert(dump_options.GetStream() && "need a Stream to print the string to");
2640b57cec5SDimitry Andric Stream &stream(*dump_options.GetStream());
2650b57cec5SDimitry Andric if (dump_options.GetPrefixToken() != nullptr)
2660b57cec5SDimitry Andric stream.Printf("%s", dump_options.GetPrefixToken());
2670b57cec5SDimitry Andric if (dump_options.GetQuote() != 0)
2680b57cec5SDimitry Andric stream.Printf("%c", dump_options.GetQuote());
2690b57cec5SDimitry Andric auto data(dump_options.GetData());
2700b57cec5SDimitry Andric auto source_size(dump_options.GetSourceSize());
2710b57cec5SDimitry Andric if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) {
2720b57cec5SDimitry Andric const int bufferSPSize = data.GetByteSize();
2730b57cec5SDimitry Andric if (dump_options.GetSourceSize() == 0) {
2740b57cec5SDimitry Andric const int origin_encoding = 8 * sizeof(SourceDataType);
2750b57cec5SDimitry Andric source_size = bufferSPSize / (origin_encoding / 4);
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric
2780b57cec5SDimitry Andric const SourceDataType *data_ptr =
2790b57cec5SDimitry Andric (const SourceDataType *)data.GetDataStart();
2800b57cec5SDimitry Andric const SourceDataType *data_end_ptr = data_ptr + source_size;
2810b57cec5SDimitry Andric
2820b57cec5SDimitry Andric const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
2830b57cec5SDimitry Andric
2840b57cec5SDimitry Andric if (zero_is_terminator) {
2850b57cec5SDimitry Andric while (data_ptr < data_end_ptr) {
2860b57cec5SDimitry Andric if (!*data_ptr) {
2870b57cec5SDimitry Andric data_end_ptr = data_ptr;
2880b57cec5SDimitry Andric break;
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric data_ptr++;
2910b57cec5SDimitry Andric }
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andric data_ptr = (const SourceDataType *)data.GetDataStart();
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric
29681ad6265SDimitry Andric lldb::WritableDataBufferSP utf8_data_buffer_sp;
2970b57cec5SDimitry Andric llvm::UTF8 *utf8_data_ptr = nullptr;
2980b57cec5SDimitry Andric llvm::UTF8 *utf8_data_end_ptr = nullptr;
2990b57cec5SDimitry Andric
3000b57cec5SDimitry Andric if (ConvertFunction) {
3010b57cec5SDimitry Andric utf8_data_buffer_sp =
3020b57cec5SDimitry Andric std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0);
3030b57cec5SDimitry Andric utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
3040b57cec5SDimitry Andric utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
3050b57cec5SDimitry Andric ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
3060b57cec5SDimitry Andric utf8_data_end_ptr, llvm::lenientConversion);
3070b57cec5SDimitry Andric if (!zero_is_terminator)
3080b57cec5SDimitry Andric utf8_data_end_ptr = utf8_data_ptr;
3090b57cec5SDimitry Andric // needed because the ConvertFunction will change the value of the
3100b57cec5SDimitry Andric // data_ptr.
3110b57cec5SDimitry Andric utf8_data_ptr =
3120b57cec5SDimitry Andric (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
3130b57cec5SDimitry Andric } else {
3140b57cec5SDimitry Andric // just copy the pointers - the cast is necessary to make the compiler
3150b57cec5SDimitry Andric // happy but this should only happen if we are reading UTF8 data
3160b57cec5SDimitry Andric utf8_data_ptr = const_cast<llvm::UTF8 *>(
3170b57cec5SDimitry Andric reinterpret_cast<const llvm::UTF8 *>(data_ptr));
3180b57cec5SDimitry Andric utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
3190b57cec5SDimitry Andric reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
3200b57cec5SDimitry Andric }
3210b57cec5SDimitry Andric
3220b57cec5SDimitry Andric const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
3235ffd83dbSDimitry Andric EscapingHelper escaping_callback;
3245ffd83dbSDimitry Andric if (escape_non_printables)
3250b57cec5SDimitry Andric escaping_callback =
3265ffd83dbSDimitry Andric GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle());
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric // since we tend to accept partial data (and even partially malformed data)
3290b57cec5SDimitry Andric // we might end up with no NULL terminator before the end_ptr hence we need
3300b57cec5SDimitry Andric // to take a slower route and ensure we stay within boundaries
3310b57cec5SDimitry Andric for (; utf8_data_ptr < utf8_data_end_ptr;) {
3320b57cec5SDimitry Andric if (zero_is_terminator && !*utf8_data_ptr)
3330b57cec5SDimitry Andric break;
3340b57cec5SDimitry Andric
3350b57cec5SDimitry Andric if (escape_non_printables) {
3360b57cec5SDimitry Andric uint8_t *next_data = nullptr;
3370b57cec5SDimitry Andric auto printable =
3380b57cec5SDimitry Andric escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
3390b57cec5SDimitry Andric auto printable_bytes = printable.GetBytes();
3400b57cec5SDimitry Andric auto printable_size = printable.GetSize();
3415ffd83dbSDimitry Andric
3425ffd83dbSDimitry Andric // We failed to figure out how to print this string.
3435ffd83dbSDimitry Andric if (!printable_bytes || !next_data)
3445ffd83dbSDimitry Andric return false;
3455ffd83dbSDimitry Andric
3460b57cec5SDimitry Andric for (unsigned c = 0; c < printable_size; c++)
3470b57cec5SDimitry Andric stream.Printf("%c", *(printable_bytes + c));
3480b57cec5SDimitry Andric utf8_data_ptr = (uint8_t *)next_data;
3490b57cec5SDimitry Andric } else {
3500b57cec5SDimitry Andric stream.Printf("%c", *utf8_data_ptr);
3510b57cec5SDimitry Andric utf8_data_ptr++;
3520b57cec5SDimitry Andric }
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric if (dump_options.GetQuote() != 0)
3560b57cec5SDimitry Andric stream.Printf("%c", dump_options.GetQuote());
3570b57cec5SDimitry Andric if (dump_options.GetSuffixToken() != nullptr)
3580b57cec5SDimitry Andric stream.Printf("%s", dump_options.GetSuffixToken());
3590b57cec5SDimitry Andric if (dump_options.GetIsTruncated())
3600b57cec5SDimitry Andric stream.Printf("...");
3610b57cec5SDimitry Andric return true;
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
ReadStringAndDumpToStreamOptions(ValueObject & valobj)3650b57cec5SDimitry Andric ReadStringAndDumpToStreamOptions(ValueObject &valobj)
3660b57cec5SDimitry Andric : ReadStringAndDumpToStreamOptions() {
3670b57cec5SDimitry Andric SetEscapeNonPrintables(
3680b57cec5SDimitry Andric valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
3690b57cec5SDimitry Andric }
3700b57cec5SDimitry Andric
3710b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(ValueObject & valobj)3720b57cec5SDimitry Andric ReadBufferAndDumpToStreamOptions(ValueObject &valobj)
3730b57cec5SDimitry Andric : ReadBufferAndDumpToStreamOptions() {
3740b57cec5SDimitry Andric SetEscapeNonPrintables(
3750b57cec5SDimitry Andric valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(const ReadStringAndDumpToStreamOptions & options)3790b57cec5SDimitry Andric ReadBufferAndDumpToStreamOptions(
3800b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options)
3810b57cec5SDimitry Andric : ReadBufferAndDumpToStreamOptions() {
3820b57cec5SDimitry Andric SetStream(options.GetStream());
3830b57cec5SDimitry Andric SetPrefixToken(options.GetPrefixToken());
3840b57cec5SDimitry Andric SetSuffixToken(options.GetSuffixToken());
3850b57cec5SDimitry Andric SetQuote(options.GetQuote());
3860b57cec5SDimitry Andric SetEscapeNonPrintables(options.GetEscapeNonPrintables());
3870b57cec5SDimitry Andric SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
3885ffd83dbSDimitry Andric SetEscapeStyle(options.GetEscapeStyle());
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric
3910b57cec5SDimitry Andric namespace lldb_private {
3920b57cec5SDimitry Andric
3930b57cec5SDimitry Andric namespace formatters {
3940b57cec5SDimitry Andric
3950b57cec5SDimitry Andric template <typename SourceDataType>
ReadEncodedBufferAndDumpToStream(StringElementType elem_type,const StringPrinter::ReadStringAndDumpToStreamOptions & options,llvm::ConversionResult (* ConvertFunction)(const SourceDataType **,const SourceDataType *,llvm::UTF8 **,llvm::UTF8 *,llvm::ConversionFlags))3965ffd83dbSDimitry Andric static bool ReadEncodedBufferAndDumpToStream(
3975ffd83dbSDimitry Andric StringElementType elem_type,
3980b57cec5SDimitry Andric const StringPrinter::ReadStringAndDumpToStreamOptions &options,
3990b57cec5SDimitry Andric llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
4000b57cec5SDimitry Andric const SourceDataType *,
4010b57cec5SDimitry Andric llvm::UTF8 **, llvm::UTF8 *,
4020b57cec5SDimitry Andric llvm::ConversionFlags)) {
4030b57cec5SDimitry Andric assert(options.GetStream() && "need a Stream to print the string to");
4045ffd83dbSDimitry Andric if (!options.GetStream())
4055ffd83dbSDimitry Andric return false;
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andric if (options.GetLocation() == 0 ||
4080b57cec5SDimitry Andric options.GetLocation() == LLDB_INVALID_ADDRESS)
4090b57cec5SDimitry Andric return false;
4100b57cec5SDimitry Andric
411349cc55cSDimitry Andric lldb::TargetSP target_sp = options.GetTargetSP();
412349cc55cSDimitry Andric if (!target_sp)
4130b57cec5SDimitry Andric return false;
4140b57cec5SDimitry Andric
4155ffd83dbSDimitry Andric constexpr int type_width = sizeof(SourceDataType);
4165ffd83dbSDimitry Andric constexpr int origin_encoding = 8 * type_width;
4170b57cec5SDimitry Andric if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
4180b57cec5SDimitry Andric return false;
4195ffd83dbSDimitry Andric // If not UTF8 or ASCII, conversion to UTF8 is necessary.
4200b57cec5SDimitry Andric if (origin_encoding != 8 && !ConvertFunction)
4210b57cec5SDimitry Andric return false;
4220b57cec5SDimitry Andric
4230b57cec5SDimitry Andric bool needs_zero_terminator = options.GetNeedsZeroTermination();
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric bool is_truncated = false;
426349cc55cSDimitry Andric const auto max_size = target_sp->GetMaximumSizeOfStringSummary();
4270b57cec5SDimitry Andric
4285ffd83dbSDimitry Andric uint32_t sourceSize;
4295ffd83dbSDimitry Andric if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) {
4305ffd83dbSDimitry Andric // FIXME: The NSString formatter sets HasSourceSize(true) when the size is
4315ffd83dbSDimitry Andric // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the
4325ffd83dbSDimitry Andric // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't
4335ffd83dbSDimitry Andric // mean to. I don't see how this makes sense: we should fix the formatters.
4345ffd83dbSDimitry Andric //
4355ffd83dbSDimitry Andric // Until then, the behavior that's expected for ASCII strings with unknown
4365ffd83dbSDimitry Andric // lengths is to read up to the max size and then null-terminate. Do that.
4370b57cec5SDimitry Andric sourceSize = max_size;
4380b57cec5SDimitry Andric needs_zero_terminator = true;
4395ffd83dbSDimitry Andric } else if (options.HasSourceSize()) {
4405ffd83dbSDimitry Andric sourceSize = options.GetSourceSize();
4415ffd83dbSDimitry Andric if (!options.GetIgnoreMaxLength()) {
4420b57cec5SDimitry Andric if (sourceSize > max_size) {
4430b57cec5SDimitry Andric sourceSize = max_size;
4440b57cec5SDimitry Andric is_truncated = true;
4450b57cec5SDimitry Andric }
4460b57cec5SDimitry Andric }
4475ffd83dbSDimitry Andric } else {
4485ffd83dbSDimitry Andric sourceSize = max_size;
4495ffd83dbSDimitry Andric needs_zero_terminator = true;
4505ffd83dbSDimitry Andric }
4510b57cec5SDimitry Andric
4520b57cec5SDimitry Andric const int bufferSPSize = sourceSize * type_width;
45381ad6265SDimitry Andric lldb::WritableDataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0));
4540b57cec5SDimitry Andric
4555ffd83dbSDimitry Andric // Check if we got bytes. We never get any bytes if we have an empty
4565ffd83dbSDimitry Andric // string, but we still continue so that we end up actually printing
4575ffd83dbSDimitry Andric // an empty string ("").
4585ffd83dbSDimitry Andric if (sourceSize != 0 && !buffer_sp->GetBytes())
4590b57cec5SDimitry Andric return false;
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric Status error;
4620b57cec5SDimitry Andric char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
4630b57cec5SDimitry Andric
4645ffd83dbSDimitry Andric if (elem_type == StringElementType::ASCII)
465349cc55cSDimitry Andric target_sp->ReadCStringFromMemory(options.GetLocation(), buffer,
4665ffd83dbSDimitry Andric bufferSPSize, error);
4675ffd83dbSDimitry Andric else if (needs_zero_terminator)
468349cc55cSDimitry Andric target_sp->ReadStringFromMemory(options.GetLocation(), buffer,
4690b57cec5SDimitry Andric bufferSPSize, error, type_width);
4700b57cec5SDimitry Andric else
471349cc55cSDimitry Andric target_sp->ReadMemory(options.GetLocation(), buffer, bufferSPSize, error);
4720b57cec5SDimitry Andric if (error.Fail()) {
4730b57cec5SDimitry Andric options.GetStream()->Printf("unable to read data");
4740b57cec5SDimitry Andric return true;
4750b57cec5SDimitry Andric }
4760b57cec5SDimitry Andric
4770b57cec5SDimitry Andric StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
478349cc55cSDimitry Andric dump_options.SetData(
479349cc55cSDimitry Andric DataExtractor(buffer_sp, target_sp->GetArchitecture().GetByteOrder(),
480349cc55cSDimitry Andric target_sp->GetArchitecture().GetAddressByteSize()));
4810b57cec5SDimitry Andric dump_options.SetSourceSize(sourceSize);
4820b57cec5SDimitry Andric dump_options.SetIsTruncated(is_truncated);
4835ffd83dbSDimitry Andric dump_options.SetNeedsZeroTermination(needs_zero_terminator);
4845ffd83dbSDimitry Andric if (needs_zero_terminator)
4855ffd83dbSDimitry Andric dump_options.SetBinaryZeroIsTerminator(true);
4860b57cec5SDimitry Andric
4875ffd83dbSDimitry Andric GetPrintableElementType print_style = (elem_type == StringElementType::ASCII)
4885ffd83dbSDimitry Andric ? GetPrintableElementType::ASCII
4895ffd83dbSDimitry Andric : GetPrintableElementType::UTF8;
4905ffd83dbSDimitry Andric return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options);
4910b57cec5SDimitry Andric }
4920b57cec5SDimitry Andric
4930b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)4945ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>(
4950b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) {
4965ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8,
4975ffd83dbSDimitry Andric options, nullptr);
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric
5000b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5015ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>(
5020b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) {
5035ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF16>(
5045ffd83dbSDimitry Andric StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8);
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5085ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>(
5090b57cec5SDimitry Andric const ReadStringAndDumpToStreamOptions &options) {
5105ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<llvm::UTF32>(
5115ffd83dbSDimitry Andric StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8);
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric
5140b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5155ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>(
5165ffd83dbSDimitry Andric const ReadStringAndDumpToStreamOptions &options) {
5175ffd83dbSDimitry Andric return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII,
5185ffd83dbSDimitry Andric options, nullptr);
5195ffd83dbSDimitry Andric }
5205ffd83dbSDimitry Andric
5215ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5225ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>(
5230b57cec5SDimitry Andric const ReadBufferAndDumpToStreamOptions &options) {
5245ffd83dbSDimitry Andric return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8,
5255ffd83dbSDimitry Andric nullptr, options);
5260b57cec5SDimitry Andric }
5270b57cec5SDimitry Andric
5280b57cec5SDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5295ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>(
5300b57cec5SDimitry Andric const ReadBufferAndDumpToStreamOptions &options) {
5315ffd83dbSDimitry Andric return DumpEncodedBufferToStream(GetPrintableElementType::UTF8,
5325ffd83dbSDimitry Andric llvm::ConvertUTF16toUTF8, options);
5335ffd83dbSDimitry Andric }
5345ffd83dbSDimitry Andric
5355ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5365ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>(
5375ffd83dbSDimitry Andric const ReadBufferAndDumpToStreamOptions &options) {
5385ffd83dbSDimitry Andric return DumpEncodedBufferToStream(GetPrintableElementType::UTF8,
5395ffd83dbSDimitry Andric llvm::ConvertUTF32toUTF8, options);
5405ffd83dbSDimitry Andric }
5415ffd83dbSDimitry Andric
5425ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5435ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>(
5445ffd83dbSDimitry Andric const ReadBufferAndDumpToStreamOptions &options) {
5455ffd83dbSDimitry Andric // Treat ASCII the same as UTF8.
5465ffd83dbSDimitry Andric //
5475ffd83dbSDimitry Andric // FIXME: This is probably not the right thing to do (well, it's debatable).
5485ffd83dbSDimitry Andric // If an ASCII-encoded string happens to contain a sequence of invalid bytes
5495ffd83dbSDimitry Andric // that forms a valid UTF8 character, we'll print out that character. This is
5505ffd83dbSDimitry Andric // good if you're playing fast and loose with encodings (probably good for
5515ffd83dbSDimitry Andric // std::string users), but maybe not so good if you care about your string
5525ffd83dbSDimitry Andric // formatter respecting the semantics of your selected string encoding. In
5535ffd83dbSDimitry Andric // the latter case you'd want to see the character byte sequence ('\x..'), not
5545ffd83dbSDimitry Andric // the UTF8 character itself.
5550b57cec5SDimitry Andric return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
5560b57cec5SDimitry Andric }
5570b57cec5SDimitry Andric
5580b57cec5SDimitry Andric } // namespace formatters
5590b57cec5SDimitry Andric
5600b57cec5SDimitry Andric } // namespace lldb_private
561