//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This tablegen backend emits an efficient function to translate HTML named // character references to UTF-8 sequences. // //===----------------------------------------------------------------------===// #include "TableGenBackends.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/TableGenBackend.h" #include using namespace llvm; /// Convert a code point to the corresponding UTF-8 sequence represented /// as a C string literal. /// /// \returns true on success. static bool translateCodePointToUTF8(unsigned CodePoint, SmallVectorImpl &CLiteral) { char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; char *TranslatedPtr = Translated; if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) return false; StringRef UTF8(Translated, TranslatedPtr - Translated); raw_svector_ostream OS(CLiteral); OS << "\""; for (size_t i = 0, e = UTF8.size(); i != e; ++i) { OS << "\\x"; OS.write_hex(static_cast(UTF8[i])); } OS << "\""; return true; } void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, raw_ostream &OS) { std::vector Tags = Records.getAllDerivedDefinitions("NCR"); std::vector NameToUTF8; SmallString<32> CLiteral; for (std::vector::iterator I = Tags.begin(), E = Tags.end(); I != E; ++I) { Record &Tag = **I; std::string Spelling = std::string(Tag.getValueAsString("Spelling")); uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); CLiteral.clear(); CLiteral.append("return "); if (!translateCodePointToUTF8(CodePoint, CLiteral)) { SrcMgr.PrintMessage(Tag.getLoc().front(), SourceMgr::DK_Error, Twine("invalid code point")); continue; } CLiteral.append(";"); StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str())); NameToUTF8.push_back(Match); } emitSourceFileHeader("HTML named character reference to UTF-8 " "translation", OS); OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" " StringRef Name) {\n"; StringMatcher("Name", NameToUTF8, OS).Emit(); OS << " return StringRef();\n" << "}\n\n"; }