15ffd83dbSDimitry Andric //===-- StringPrinter.cpp -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "lldb/DataFormatters/StringPrinter.h"
100b57cec5SDimitry Andric 
110b57cec5SDimitry Andric #include "lldb/Core/Debugger.h"
120b57cec5SDimitry Andric #include "lldb/Core/ValueObject.h"
130b57cec5SDimitry Andric #include "lldb/Target/Language.h"
140b57cec5SDimitry Andric #include "lldb/Target/Process.h"
150b57cec5SDimitry Andric #include "lldb/Target/Target.h"
160b57cec5SDimitry Andric #include "lldb/Utility/Status.h"
170b57cec5SDimitry Andric 
185ffd83dbSDimitry Andric #include "llvm/ADT/StringExtras.h"
190b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
200b57cec5SDimitry Andric 
21fe6060f1SDimitry Andric #include <cctype>
220b57cec5SDimitry Andric #include <locale>
230b57cec5SDimitry Andric #include <memory>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace lldb;
260b57cec5SDimitry Andric using namespace lldb_private;
270b57cec5SDimitry Andric using namespace lldb_private::formatters;
285ffd83dbSDimitry Andric using GetPrintableElementType = StringPrinter::GetPrintableElementType;
295ffd83dbSDimitry Andric using StringElementType = StringPrinter::StringElementType;
305ffd83dbSDimitry Andric 
315ffd83dbSDimitry Andric /// DecodedCharBuffer stores the decoded contents of a single character. It
325ffd83dbSDimitry Andric /// avoids managing memory on the heap by copying decoded bytes into an in-line
335ffd83dbSDimitry Andric /// buffer.
345ffd83dbSDimitry Andric class DecodedCharBuffer {
355ffd83dbSDimitry Andric public:
DecodedCharBuffer(std::nullptr_t)365ffd83dbSDimitry Andric   DecodedCharBuffer(std::nullptr_t) {}
375ffd83dbSDimitry Andric 
DecodedCharBuffer(const uint8_t * bytes,size_t size)385ffd83dbSDimitry Andric   DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) {
395ffd83dbSDimitry Andric     if (size > MaxLength)
405ffd83dbSDimitry Andric       llvm_unreachable("unsupported length");
415ffd83dbSDimitry Andric     memcpy(m_data, bytes, size);
425ffd83dbSDimitry Andric   }
435ffd83dbSDimitry Andric 
DecodedCharBuffer(const char * bytes,size_t size)445ffd83dbSDimitry Andric   DecodedCharBuffer(const char *bytes, size_t size)
455ffd83dbSDimitry Andric       : DecodedCharBuffer(reinterpret_cast<const uint8_t *>(bytes), size) {}
465ffd83dbSDimitry Andric 
GetBytes() const475ffd83dbSDimitry Andric   const uint8_t *GetBytes() const { return m_data; }
485ffd83dbSDimitry Andric 
GetSize() const495ffd83dbSDimitry Andric   size_t GetSize() const { return m_size; }
505ffd83dbSDimitry Andric 
515ffd83dbSDimitry Andric private:
525ffd83dbSDimitry Andric   static constexpr unsigned MaxLength = 16;
535ffd83dbSDimitry Andric 
545ffd83dbSDimitry Andric   size_t m_size = 0;
555ffd83dbSDimitry Andric   uint8_t m_data[MaxLength] = {0};
565ffd83dbSDimitry Andric };
575ffd83dbSDimitry Andric 
585ffd83dbSDimitry Andric using EscapingHelper =
595ffd83dbSDimitry Andric     std::function<DecodedCharBuffer(uint8_t *, uint8_t *, uint8_t *&)>;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric // we define this for all values of type but only implement it for those we
620b57cec5SDimitry Andric // care about that's good because we get linker errors for any unsupported type
635ffd83dbSDimitry Andric template <StringElementType type>
645ffd83dbSDimitry Andric static DecodedCharBuffer
655ffd83dbSDimitry Andric GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
665ffd83dbSDimitry Andric                  StringPrinter::EscapeStyle escape_style);
670b57cec5SDimitry Andric 
685ffd83dbSDimitry Andric // Mimic isprint() for Unicode codepoints.
isprint32(char32_t codepoint)695ffd83dbSDimitry Andric static bool isprint32(char32_t codepoint) {
700b57cec5SDimitry Andric   if (codepoint <= 0x1F || codepoint == 0x7F) // C0
710b57cec5SDimitry Andric   {
720b57cec5SDimitry Andric     return false;
730b57cec5SDimitry Andric   }
740b57cec5SDimitry Andric   if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
750b57cec5SDimitry Andric   {
760b57cec5SDimitry Andric     return false;
770b57cec5SDimitry Andric   }
780b57cec5SDimitry Andric   if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
790b57cec5SDimitry Andric   {
800b57cec5SDimitry Andric     return false;
810b57cec5SDimitry Andric   }
820b57cec5SDimitry Andric   if (codepoint == 0x200E || codepoint == 0x200F ||
830b57cec5SDimitry Andric       (codepoint >= 0x202A &&
840b57cec5SDimitry Andric        codepoint <= 0x202E)) // bidirectional text control
850b57cec5SDimitry Andric   {
860b57cec5SDimitry Andric     return false;
870b57cec5SDimitry Andric   }
880b57cec5SDimitry Andric   if (codepoint >= 0xFFF9 &&
890b57cec5SDimitry Andric       codepoint <= 0xFFFF) // interlinears and generally specials
900b57cec5SDimitry Andric   {
910b57cec5SDimitry Andric     return false;
920b57cec5SDimitry Andric   }
930b57cec5SDimitry Andric   return true;
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric 
attemptASCIIEscape(llvm::UTF32 c,StringPrinter::EscapeStyle escape_style)965ffd83dbSDimitry Andric DecodedCharBuffer attemptASCIIEscape(llvm::UTF32 c,
975ffd83dbSDimitry Andric                                      StringPrinter::EscapeStyle escape_style) {
985ffd83dbSDimitry Andric   const bool is_swift_escape_style =
995ffd83dbSDimitry Andric       escape_style == StringPrinter::EscapeStyle::Swift;
1005ffd83dbSDimitry Andric   switch (c) {
1010b57cec5SDimitry Andric   case 0:
1025ffd83dbSDimitry Andric     return {"\\0", 2};
1030b57cec5SDimitry Andric   case '\a':
1045ffd83dbSDimitry Andric     return {"\\a", 2};
1050b57cec5SDimitry Andric   case '\b':
1065ffd83dbSDimitry Andric     if (is_swift_escape_style)
1075ffd83dbSDimitry Andric       return nullptr;
1085ffd83dbSDimitry Andric     return {"\\b", 2};
1090b57cec5SDimitry Andric   case '\f':
1105ffd83dbSDimitry Andric     if (is_swift_escape_style)
1115ffd83dbSDimitry Andric       return nullptr;
1125ffd83dbSDimitry Andric     return {"\\f", 2};
1130b57cec5SDimitry Andric   case '\n':
1145ffd83dbSDimitry Andric     return {"\\n", 2};
1150b57cec5SDimitry Andric   case '\r':
1165ffd83dbSDimitry Andric     return {"\\r", 2};
1170b57cec5SDimitry Andric   case '\t':
1185ffd83dbSDimitry Andric     return {"\\t", 2};
1190b57cec5SDimitry Andric   case '\v':
1205ffd83dbSDimitry Andric     if (is_swift_escape_style)
1215ffd83dbSDimitry Andric       return nullptr;
1225ffd83dbSDimitry Andric     return {"\\v", 2};
1230b57cec5SDimitry Andric   case '\"':
1245ffd83dbSDimitry Andric     return {"\\\"", 2};
1255ffd83dbSDimitry Andric   case '\'':
1265ffd83dbSDimitry Andric     if (is_swift_escape_style)
1275ffd83dbSDimitry Andric       return {"\\'", 2};
1285ffd83dbSDimitry Andric     return nullptr;
1290b57cec5SDimitry Andric   case '\\':
1305ffd83dbSDimitry Andric     return {"\\\\", 2};
1310b57cec5SDimitry Andric   }
1325ffd83dbSDimitry Andric   return nullptr;
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric template <>
GetPrintableImpl(uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)1365ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::ASCII>(
1375ffd83dbSDimitry Andric     uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
1385ffd83dbSDimitry Andric     StringPrinter::EscapeStyle escape_style) {
1395ffd83dbSDimitry Andric   // The ASCII helper always advances 1 byte at a time.
1400b57cec5SDimitry Andric   next = buffer + 1;
1415ffd83dbSDimitry Andric 
1425ffd83dbSDimitry Andric   DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style);
1435ffd83dbSDimitry Andric   if (retval.GetSize())
1440b57cec5SDimitry Andric     return retval;
1455ffd83dbSDimitry Andric 
1465ffd83dbSDimitry Andric   // Use llvm's locale-independent isPrint(char), instead of the libc
1475ffd83dbSDimitry Andric   // implementation which may give different results on different platforms.
1485ffd83dbSDimitry Andric   if (llvm::isPrint(*buffer))
1495ffd83dbSDimitry Andric     return {buffer, 1};
1505ffd83dbSDimitry Andric 
1515ffd83dbSDimitry Andric   unsigned escaped_len;
1525ffd83dbSDimitry Andric   constexpr unsigned max_buffer_size = 7;
1535ffd83dbSDimitry Andric   uint8_t data[max_buffer_size];
1545ffd83dbSDimitry Andric   switch (escape_style) {
1555ffd83dbSDimitry Andric   case StringPrinter::EscapeStyle::CXX:
1565ffd83dbSDimitry Andric     // Prints 4 characters, then a \0 terminator.
15706c3fb27SDimitry Andric     escaped_len = snprintf((char *)data, max_buffer_size, "\\x%02x", *buffer);
1585ffd83dbSDimitry Andric     break;
1595ffd83dbSDimitry Andric   case StringPrinter::EscapeStyle::Swift:
1605ffd83dbSDimitry Andric     // Prints up to 6 characters, then a \0 terminator.
16106c3fb27SDimitry Andric     escaped_len = snprintf((char *)data, max_buffer_size, "\\u{%x}", *buffer);
1625ffd83dbSDimitry Andric     break;
1635ffd83dbSDimitry Andric   }
1645ffd83dbSDimitry Andric   lldbassert(escaped_len > 0 && "unknown string escape style");
1655ffd83dbSDimitry Andric   return {data, escaped_len};
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
1685ffd83dbSDimitry Andric template <>
GetPrintableImpl(uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)1695ffd83dbSDimitry Andric DecodedCharBuffer GetPrintableImpl<StringElementType::UTF8>(
1705ffd83dbSDimitry Andric     uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next,
1715ffd83dbSDimitry Andric     StringPrinter::EscapeStyle escape_style) {
1725ffd83dbSDimitry Andric   // If the utf8 encoded length is invalid (i.e., not in the closed interval
1735ffd83dbSDimitry Andric   // [1;4]), or if there aren't enough bytes to print, or if the subsequence
1745ffd83dbSDimitry Andric   // isn't valid utf8, fall back to printing an ASCII-escaped subsequence.
1755ffd83dbSDimitry Andric   if (!llvm::isLegalUTF8Sequence(buffer, buffer_end))
1765ffd83dbSDimitry Andric     return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next,
1775ffd83dbSDimitry Andric                                                       escape_style);
1780b57cec5SDimitry Andric 
1795ffd83dbSDimitry Andric   // Convert the valid utf8 sequence to a utf32 codepoint. This cannot fail.
1805ffd83dbSDimitry Andric   llvm::UTF32 codepoint = 0;
1815ffd83dbSDimitry Andric   const llvm::UTF8 *buffer_for_conversion = buffer;
1825ffd83dbSDimitry Andric   llvm::ConversionResult result = llvm::convertUTF8Sequence(
1835ffd83dbSDimitry Andric       &buffer_for_conversion, buffer_end, &codepoint, llvm::strictConversion);
1845ffd83dbSDimitry Andric   assert(result == llvm::conversionOK &&
1855ffd83dbSDimitry Andric          "Failed to convert legal utf8 sequence");
1865f757f3fSDimitry Andric   UNUSED_IF_ASSERT_DISABLED(result);
1875ffd83dbSDimitry Andric 
1885ffd83dbSDimitry Andric   // The UTF8 helper always advances by the utf8 encoded length.
1895ffd83dbSDimitry Andric   const unsigned utf8_encoded_len = buffer_for_conversion - buffer;
1900b57cec5SDimitry Andric   next = buffer + utf8_encoded_len;
1910b57cec5SDimitry Andric 
1925ffd83dbSDimitry Andric   DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style);
1935ffd83dbSDimitry Andric   if (retval.GetSize())
1940b57cec5SDimitry Andric     return retval;
1955ffd83dbSDimitry Andric   if (isprint32(codepoint))
1965ffd83dbSDimitry Andric     return {buffer, utf8_encoded_len};
1975ffd83dbSDimitry Andric 
1985ffd83dbSDimitry Andric   unsigned escaped_len;
1995ffd83dbSDimitry Andric   constexpr unsigned max_buffer_size = 13;
2005ffd83dbSDimitry Andric   uint8_t data[max_buffer_size];
2015ffd83dbSDimitry Andric   switch (escape_style) {
2025ffd83dbSDimitry Andric   case StringPrinter::EscapeStyle::CXX:
2035ffd83dbSDimitry Andric     // Prints 10 characters, then a \0 terminator.
20406c3fb27SDimitry Andric     escaped_len = snprintf((char *)data, max_buffer_size, "\\U%08x", codepoint);
2055ffd83dbSDimitry Andric     break;
2065ffd83dbSDimitry Andric   case StringPrinter::EscapeStyle::Swift:
2075ffd83dbSDimitry Andric     // Prints up to 12 characters, then a \0 terminator.
20806c3fb27SDimitry Andric     escaped_len = snprintf((char *)data, max_buffer_size, "\\u{%x}", codepoint);
2095ffd83dbSDimitry Andric     break;
2105ffd83dbSDimitry Andric   }
2115ffd83dbSDimitry Andric   lldbassert(escaped_len > 0 && "unknown string escape style");
2125ffd83dbSDimitry Andric   return {data, escaped_len};
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric // Given a sequence of bytes, this function returns: a sequence of bytes to
2160b57cec5SDimitry Andric // actually print out + a length the following unscanned position of the buffer
2170b57cec5SDimitry Andric // is in next
GetPrintable(StringElementType type,uint8_t * buffer,uint8_t * buffer_end,uint8_t * & next,StringPrinter::EscapeStyle escape_style)2185ffd83dbSDimitry Andric static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer,
2195ffd83dbSDimitry Andric                                       uint8_t *buffer_end, uint8_t *&next,
2205ffd83dbSDimitry Andric                                       StringPrinter::EscapeStyle escape_style) {
2215ffd83dbSDimitry Andric   if (!buffer || buffer >= buffer_end)
2220b57cec5SDimitry Andric     return {nullptr};
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   switch (type) {
2255ffd83dbSDimitry Andric   case StringElementType::ASCII:
2265ffd83dbSDimitry Andric     return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next,
2275ffd83dbSDimitry Andric                                                       escape_style);
2285ffd83dbSDimitry Andric   case StringElementType::UTF8:
2295ffd83dbSDimitry Andric     return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next,
2305ffd83dbSDimitry Andric                                                      escape_style);
2310b57cec5SDimitry Andric   default:
2320b57cec5SDimitry Andric     return {nullptr};
2330b57cec5SDimitry Andric   }
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric 
2365ffd83dbSDimitry Andric static EscapingHelper
GetDefaultEscapingHelper(GetPrintableElementType elem_type,StringPrinter::EscapeStyle escape_style)2375ffd83dbSDimitry Andric GetDefaultEscapingHelper(GetPrintableElementType elem_type,
2385ffd83dbSDimitry Andric                          StringPrinter::EscapeStyle escape_style) {
2390b57cec5SDimitry Andric   switch (elem_type) {
2400b57cec5SDimitry Andric   case GetPrintableElementType::UTF8:
2410b57cec5SDimitry Andric   case GetPrintableElementType::ASCII:
2425ffd83dbSDimitry Andric     return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end,
2435ffd83dbSDimitry Andric                                      uint8_t *&next) -> DecodedCharBuffer {
2445ffd83dbSDimitry Andric       return GetPrintable(elem_type == GetPrintableElementType::UTF8
2455ffd83dbSDimitry Andric                               ? StringElementType::UTF8
2465ffd83dbSDimitry Andric                               : StringElementType::ASCII,
2475ffd83dbSDimitry Andric                           buffer, buffer_end, next, escape_style);
2480b57cec5SDimitry Andric     };
2490b57cec5SDimitry Andric   }
2500b57cec5SDimitry Andric   llvm_unreachable("bad element type");
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2535ffd83dbSDimitry Andric /// Read a string encoded in accordance with \tparam SourceDataType from a
2545ffd83dbSDimitry Andric /// host-side LLDB buffer, then pretty-print it to a stream using \p style.
2550b57cec5SDimitry Andric template <typename SourceDataType>
DumpEncodedBufferToStream(GetPrintableElementType style,llvm::ConversionResult (* ConvertFunction)(const SourceDataType **,const SourceDataType *,llvm::UTF8 **,llvm::UTF8 *,llvm::ConversionFlags),const StringPrinter::ReadBufferAndDumpToStreamOptions & dump_options)2565ffd83dbSDimitry Andric static bool DumpEncodedBufferToStream(
2575ffd83dbSDimitry Andric     GetPrintableElementType style,
2580b57cec5SDimitry Andric     llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
2590b57cec5SDimitry Andric                                               const SourceDataType *,
2600b57cec5SDimitry Andric                                               llvm::UTF8 **, llvm::UTF8 *,
2610b57cec5SDimitry Andric                                               llvm::ConversionFlags),
2620b57cec5SDimitry Andric     const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
2635ffd83dbSDimitry Andric   assert(dump_options.GetStream() && "need a Stream to print the string to");
2640b57cec5SDimitry Andric   Stream &stream(*dump_options.GetStream());
2650b57cec5SDimitry Andric   if (dump_options.GetPrefixToken() != nullptr)
2660b57cec5SDimitry Andric     stream.Printf("%s", dump_options.GetPrefixToken());
2670b57cec5SDimitry Andric   if (dump_options.GetQuote() != 0)
2680b57cec5SDimitry Andric     stream.Printf("%c", dump_options.GetQuote());
2690b57cec5SDimitry Andric   auto data(dump_options.GetData());
2700b57cec5SDimitry Andric   auto source_size(dump_options.GetSourceSize());
2710b57cec5SDimitry Andric   if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) {
2720b57cec5SDimitry Andric     const int bufferSPSize = data.GetByteSize();
2730b57cec5SDimitry Andric     if (dump_options.GetSourceSize() == 0) {
2740b57cec5SDimitry Andric       const int origin_encoding = 8 * sizeof(SourceDataType);
2750b57cec5SDimitry Andric       source_size = bufferSPSize / (origin_encoding / 4);
2760b57cec5SDimitry Andric     }
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric     const SourceDataType *data_ptr =
2790b57cec5SDimitry Andric         (const SourceDataType *)data.GetDataStart();
2800b57cec5SDimitry Andric     const SourceDataType *data_end_ptr = data_ptr + source_size;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric     const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric     if (zero_is_terminator) {
2850b57cec5SDimitry Andric       while (data_ptr < data_end_ptr) {
2860b57cec5SDimitry Andric         if (!*data_ptr) {
2870b57cec5SDimitry Andric           data_end_ptr = data_ptr;
2880b57cec5SDimitry Andric           break;
2890b57cec5SDimitry Andric         }
2900b57cec5SDimitry Andric         data_ptr++;
2910b57cec5SDimitry Andric       }
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric       data_ptr = (const SourceDataType *)data.GetDataStart();
2940b57cec5SDimitry Andric     }
2950b57cec5SDimitry Andric 
29681ad6265SDimitry Andric     lldb::WritableDataBufferSP utf8_data_buffer_sp;
2970b57cec5SDimitry Andric     llvm::UTF8 *utf8_data_ptr = nullptr;
2980b57cec5SDimitry Andric     llvm::UTF8 *utf8_data_end_ptr = nullptr;
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric     if (ConvertFunction) {
3010b57cec5SDimitry Andric       utf8_data_buffer_sp =
3020b57cec5SDimitry Andric           std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0);
3030b57cec5SDimitry Andric       utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
3040b57cec5SDimitry Andric       utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
3050b57cec5SDimitry Andric       ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
3060b57cec5SDimitry Andric                       utf8_data_end_ptr, llvm::lenientConversion);
3070b57cec5SDimitry Andric       if (!zero_is_terminator)
3080b57cec5SDimitry Andric         utf8_data_end_ptr = utf8_data_ptr;
3090b57cec5SDimitry Andric       // needed because the ConvertFunction will change the value of the
3100b57cec5SDimitry Andric       // data_ptr.
3110b57cec5SDimitry Andric       utf8_data_ptr =
3120b57cec5SDimitry Andric           (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
3130b57cec5SDimitry Andric     } else {
3140b57cec5SDimitry Andric       // just copy the pointers - the cast is necessary to make the compiler
3150b57cec5SDimitry Andric       // happy but this should only happen if we are reading UTF8 data
3160b57cec5SDimitry Andric       utf8_data_ptr = const_cast<llvm::UTF8 *>(
3170b57cec5SDimitry Andric           reinterpret_cast<const llvm::UTF8 *>(data_ptr));
3180b57cec5SDimitry Andric       utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
3190b57cec5SDimitry Andric           reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
3200b57cec5SDimitry Andric     }
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric     const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
3235ffd83dbSDimitry Andric     EscapingHelper escaping_callback;
3245ffd83dbSDimitry Andric     if (escape_non_printables)
3250b57cec5SDimitry Andric       escaping_callback =
3265ffd83dbSDimitry Andric           GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle());
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric     // since we tend to accept partial data (and even partially malformed data)
3290b57cec5SDimitry Andric     // we might end up with no NULL terminator before the end_ptr hence we need
3300b57cec5SDimitry Andric     // to take a slower route and ensure we stay within boundaries
3310b57cec5SDimitry Andric     for (; utf8_data_ptr < utf8_data_end_ptr;) {
3320b57cec5SDimitry Andric       if (zero_is_terminator && !*utf8_data_ptr)
3330b57cec5SDimitry Andric         break;
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric       if (escape_non_printables) {
3360b57cec5SDimitry Andric         uint8_t *next_data = nullptr;
3370b57cec5SDimitry Andric         auto printable =
3380b57cec5SDimitry Andric             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
3390b57cec5SDimitry Andric         auto printable_bytes = printable.GetBytes();
3400b57cec5SDimitry Andric         auto printable_size = printable.GetSize();
3415ffd83dbSDimitry Andric 
3425ffd83dbSDimitry Andric         // We failed to figure out how to print this string.
3435ffd83dbSDimitry Andric         if (!printable_bytes || !next_data)
3445ffd83dbSDimitry Andric           return false;
3455ffd83dbSDimitry Andric 
3460b57cec5SDimitry Andric         for (unsigned c = 0; c < printable_size; c++)
3470b57cec5SDimitry Andric           stream.Printf("%c", *(printable_bytes + c));
3480b57cec5SDimitry Andric         utf8_data_ptr = (uint8_t *)next_data;
3490b57cec5SDimitry Andric       } else {
3500b57cec5SDimitry Andric         stream.Printf("%c", *utf8_data_ptr);
3510b57cec5SDimitry Andric         utf8_data_ptr++;
3520b57cec5SDimitry Andric       }
3530b57cec5SDimitry Andric     }
3540b57cec5SDimitry Andric   }
3550b57cec5SDimitry Andric   if (dump_options.GetQuote() != 0)
3560b57cec5SDimitry Andric     stream.Printf("%c", dump_options.GetQuote());
3570b57cec5SDimitry Andric   if (dump_options.GetSuffixToken() != nullptr)
3580b57cec5SDimitry Andric     stream.Printf("%s", dump_options.GetSuffixToken());
3590b57cec5SDimitry Andric   if (dump_options.GetIsTruncated())
3600b57cec5SDimitry Andric     stream.Printf("...");
3610b57cec5SDimitry Andric   return true;
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
ReadStringAndDumpToStreamOptions(ValueObject & valobj)3650b57cec5SDimitry Andric     ReadStringAndDumpToStreamOptions(ValueObject &valobj)
3660b57cec5SDimitry Andric     : ReadStringAndDumpToStreamOptions() {
3670b57cec5SDimitry Andric   SetEscapeNonPrintables(
3680b57cec5SDimitry Andric       valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
3690b57cec5SDimitry Andric }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(ValueObject & valobj)3720b57cec5SDimitry Andric     ReadBufferAndDumpToStreamOptions(ValueObject &valobj)
3730b57cec5SDimitry Andric     : ReadBufferAndDumpToStreamOptions() {
3740b57cec5SDimitry Andric   SetEscapeNonPrintables(
3750b57cec5SDimitry Andric       valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(const ReadStringAndDumpToStreamOptions & options)3790b57cec5SDimitry Andric     ReadBufferAndDumpToStreamOptions(
3800b57cec5SDimitry Andric         const ReadStringAndDumpToStreamOptions &options)
3810b57cec5SDimitry Andric     : ReadBufferAndDumpToStreamOptions() {
3820b57cec5SDimitry Andric   SetStream(options.GetStream());
3830b57cec5SDimitry Andric   SetPrefixToken(options.GetPrefixToken());
3840b57cec5SDimitry Andric   SetSuffixToken(options.GetSuffixToken());
3850b57cec5SDimitry Andric   SetQuote(options.GetQuote());
3860b57cec5SDimitry Andric   SetEscapeNonPrintables(options.GetEscapeNonPrintables());
3870b57cec5SDimitry Andric   SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
3885ffd83dbSDimitry Andric   SetEscapeStyle(options.GetEscapeStyle());
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric namespace lldb_private {
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric namespace formatters {
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric template <typename SourceDataType>
ReadEncodedBufferAndDumpToStream(StringElementType elem_type,const StringPrinter::ReadStringAndDumpToStreamOptions & options,llvm::ConversionResult (* ConvertFunction)(const SourceDataType **,const SourceDataType *,llvm::UTF8 **,llvm::UTF8 *,llvm::ConversionFlags))3965ffd83dbSDimitry Andric static bool ReadEncodedBufferAndDumpToStream(
3975ffd83dbSDimitry Andric     StringElementType elem_type,
3980b57cec5SDimitry Andric     const StringPrinter::ReadStringAndDumpToStreamOptions &options,
3990b57cec5SDimitry Andric     llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
4000b57cec5SDimitry Andric                                               const SourceDataType *,
4010b57cec5SDimitry Andric                                               llvm::UTF8 **, llvm::UTF8 *,
4020b57cec5SDimitry Andric                                               llvm::ConversionFlags)) {
4030b57cec5SDimitry Andric   assert(options.GetStream() && "need a Stream to print the string to");
4045ffd83dbSDimitry Andric   if (!options.GetStream())
4055ffd83dbSDimitry Andric     return false;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   if (options.GetLocation() == 0 ||
4080b57cec5SDimitry Andric       options.GetLocation() == LLDB_INVALID_ADDRESS)
4090b57cec5SDimitry Andric     return false;
4100b57cec5SDimitry Andric 
411349cc55cSDimitry Andric   lldb::TargetSP target_sp = options.GetTargetSP();
412349cc55cSDimitry Andric   if (!target_sp)
4130b57cec5SDimitry Andric     return false;
4140b57cec5SDimitry Andric 
4155ffd83dbSDimitry Andric   constexpr int type_width = sizeof(SourceDataType);
4165ffd83dbSDimitry Andric   constexpr int origin_encoding = 8 * type_width;
4170b57cec5SDimitry Andric   if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
4180b57cec5SDimitry Andric     return false;
4195ffd83dbSDimitry Andric   // If not UTF8 or ASCII, conversion to UTF8 is necessary.
4200b57cec5SDimitry Andric   if (origin_encoding != 8 && !ConvertFunction)
4210b57cec5SDimitry Andric     return false;
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   bool needs_zero_terminator = options.GetNeedsZeroTermination();
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   bool is_truncated = false;
426349cc55cSDimitry Andric   const auto max_size = target_sp->GetMaximumSizeOfStringSummary();
4270b57cec5SDimitry Andric 
4285ffd83dbSDimitry Andric   uint32_t sourceSize;
4295ffd83dbSDimitry Andric   if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) {
4305ffd83dbSDimitry Andric     // FIXME: The NSString formatter sets HasSourceSize(true) when the size is
4315ffd83dbSDimitry Andric     // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the
4325ffd83dbSDimitry Andric     // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't
4335ffd83dbSDimitry Andric     // mean to. I don't see how this makes sense: we should fix the formatters.
4345ffd83dbSDimitry Andric     //
4355ffd83dbSDimitry Andric     // Until then, the behavior that's expected for ASCII strings with unknown
4365ffd83dbSDimitry Andric     // lengths is to read up to the max size and then null-terminate. Do that.
4370b57cec5SDimitry Andric     sourceSize = max_size;
4380b57cec5SDimitry Andric     needs_zero_terminator = true;
4395ffd83dbSDimitry Andric   } else if (options.HasSourceSize()) {
4405ffd83dbSDimitry Andric     sourceSize = options.GetSourceSize();
4415ffd83dbSDimitry Andric     if (!options.GetIgnoreMaxLength()) {
4420b57cec5SDimitry Andric       if (sourceSize > max_size) {
4430b57cec5SDimitry Andric         sourceSize = max_size;
4440b57cec5SDimitry Andric         is_truncated = true;
4450b57cec5SDimitry Andric       }
4460b57cec5SDimitry Andric     }
4475ffd83dbSDimitry Andric   } else {
4485ffd83dbSDimitry Andric     sourceSize = max_size;
4495ffd83dbSDimitry Andric     needs_zero_terminator = true;
4505ffd83dbSDimitry Andric   }
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   const int bufferSPSize = sourceSize * type_width;
45381ad6265SDimitry Andric   lldb::WritableDataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0));
4540b57cec5SDimitry Andric 
4555ffd83dbSDimitry Andric   // Check if we got bytes. We never get any bytes if we have an empty
4565ffd83dbSDimitry Andric   // string, but we still continue so that we end up actually printing
4575ffd83dbSDimitry Andric   // an empty string ("").
4585ffd83dbSDimitry Andric   if (sourceSize != 0 && !buffer_sp->GetBytes())
4590b57cec5SDimitry Andric     return false;
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric   Status error;
4620b57cec5SDimitry Andric   char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
4630b57cec5SDimitry Andric 
4645ffd83dbSDimitry Andric   if (elem_type == StringElementType::ASCII)
465349cc55cSDimitry Andric     target_sp->ReadCStringFromMemory(options.GetLocation(), buffer,
4665ffd83dbSDimitry Andric                                       bufferSPSize, error);
4675ffd83dbSDimitry Andric   else if (needs_zero_terminator)
468349cc55cSDimitry Andric     target_sp->ReadStringFromMemory(options.GetLocation(), buffer,
4690b57cec5SDimitry Andric                                      bufferSPSize, error, type_width);
4700b57cec5SDimitry Andric   else
471349cc55cSDimitry Andric     target_sp->ReadMemory(options.GetLocation(), buffer, bufferSPSize, error);
4720b57cec5SDimitry Andric   if (error.Fail()) {
4730b57cec5SDimitry Andric     options.GetStream()->Printf("unable to read data");
4740b57cec5SDimitry Andric     return true;
4750b57cec5SDimitry Andric   }
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric   StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
478349cc55cSDimitry Andric   dump_options.SetData(
479349cc55cSDimitry Andric       DataExtractor(buffer_sp, target_sp->GetArchitecture().GetByteOrder(),
480349cc55cSDimitry Andric                     target_sp->GetArchitecture().GetAddressByteSize()));
4810b57cec5SDimitry Andric   dump_options.SetSourceSize(sourceSize);
4820b57cec5SDimitry Andric   dump_options.SetIsTruncated(is_truncated);
4835ffd83dbSDimitry Andric   dump_options.SetNeedsZeroTermination(needs_zero_terminator);
4845ffd83dbSDimitry Andric   if (needs_zero_terminator)
4855ffd83dbSDimitry Andric     dump_options.SetBinaryZeroIsTerminator(true);
4860b57cec5SDimitry Andric 
4875ffd83dbSDimitry Andric   GetPrintableElementType print_style = (elem_type == StringElementType::ASCII)
4885ffd83dbSDimitry Andric                                             ? GetPrintableElementType::ASCII
4895ffd83dbSDimitry Andric                                             : GetPrintableElementType::UTF8;
4905ffd83dbSDimitry Andric   return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options);
4910b57cec5SDimitry Andric }
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)4945ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>(
4950b57cec5SDimitry Andric     const ReadStringAndDumpToStreamOptions &options) {
4965ffd83dbSDimitry Andric   return ReadEncodedBufferAndDumpToStream<llvm::UTF8>(StringElementType::UTF8,
4975ffd83dbSDimitry Andric                                                       options, nullptr);
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric 
5000b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5015ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>(
5020b57cec5SDimitry Andric     const ReadStringAndDumpToStreamOptions &options) {
5035ffd83dbSDimitry Andric   return ReadEncodedBufferAndDumpToStream<llvm::UTF16>(
5045ffd83dbSDimitry Andric       StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8);
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5085ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>(
5090b57cec5SDimitry Andric     const ReadStringAndDumpToStreamOptions &options) {
5105ffd83dbSDimitry Andric   return ReadEncodedBufferAndDumpToStream<llvm::UTF32>(
5115ffd83dbSDimitry Andric       StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8);
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric template <>
ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions & options)5155ffd83dbSDimitry Andric bool StringPrinter::ReadStringAndDumpToStream<StringElementType::ASCII>(
5165ffd83dbSDimitry Andric     const ReadStringAndDumpToStreamOptions &options) {
5175ffd83dbSDimitry Andric   return ReadEncodedBufferAndDumpToStream<char>(StringElementType::ASCII,
5185ffd83dbSDimitry Andric                                                 options, nullptr);
5195ffd83dbSDimitry Andric }
5205ffd83dbSDimitry Andric 
5215ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5225ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>(
5230b57cec5SDimitry Andric     const ReadBufferAndDumpToStreamOptions &options) {
5245ffd83dbSDimitry Andric   return DumpEncodedBufferToStream<llvm::UTF8>(GetPrintableElementType::UTF8,
5255ffd83dbSDimitry Andric                                                nullptr, options);
5260b57cec5SDimitry Andric }
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5295ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>(
5300b57cec5SDimitry Andric     const ReadBufferAndDumpToStreamOptions &options) {
5315ffd83dbSDimitry Andric   return DumpEncodedBufferToStream(GetPrintableElementType::UTF8,
5325ffd83dbSDimitry Andric                                    llvm::ConvertUTF16toUTF8, options);
5335ffd83dbSDimitry Andric }
5345ffd83dbSDimitry Andric 
5355ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5365ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>(
5375ffd83dbSDimitry Andric     const ReadBufferAndDumpToStreamOptions &options) {
5385ffd83dbSDimitry Andric   return DumpEncodedBufferToStream(GetPrintableElementType::UTF8,
5395ffd83dbSDimitry Andric                                    llvm::ConvertUTF32toUTF8, options);
5405ffd83dbSDimitry Andric }
5415ffd83dbSDimitry Andric 
5425ffd83dbSDimitry Andric template <>
ReadBufferAndDumpToStream(const ReadBufferAndDumpToStreamOptions & options)5435ffd83dbSDimitry Andric bool StringPrinter::ReadBufferAndDumpToStream<StringElementType::ASCII>(
5445ffd83dbSDimitry Andric     const ReadBufferAndDumpToStreamOptions &options) {
5455ffd83dbSDimitry Andric   // Treat ASCII the same as UTF8.
5465ffd83dbSDimitry Andric   //
5475ffd83dbSDimitry Andric   // FIXME: This is probably not the right thing to do (well, it's debatable).
5485ffd83dbSDimitry Andric   // If an ASCII-encoded string happens to contain a sequence of invalid bytes
5495ffd83dbSDimitry Andric   // that forms a valid UTF8 character, we'll print out that character. This is
5505ffd83dbSDimitry Andric   // good if you're playing fast and loose with encodings (probably good for
5515ffd83dbSDimitry Andric   // std::string users), but maybe not so good if you care about your string
5525ffd83dbSDimitry Andric   // formatter respecting the semantics of your selected string encoding. In
5535ffd83dbSDimitry Andric   // the latter case you'd want to see the character byte sequence ('\x..'), not
5545ffd83dbSDimitry Andric   // the UTF8 character itself.
5550b57cec5SDimitry Andric   return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
5560b57cec5SDimitry Andric }
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric } // namespace formatters
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric } // namespace lldb_private
561