xref: /src/contrib/llvm-project/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
1809500fcSDimitry Andric //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2809500fcSDimitry Andric //
322989816SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
422989816SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
522989816SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6809500fcSDimitry Andric //
7809500fcSDimitry Andric //===----------------------------------------------------------------------===//
8809500fcSDimitry Andric //
9676fbe81SDimitry Andric // This tablegen backend emits an efficient function to translate HTML named
10809500fcSDimitry Andric // character references to UTF-8 sequences.
11809500fcSDimitry Andric //
12809500fcSDimitry Andric //===----------------------------------------------------------------------===//
13809500fcSDimitry Andric 
14519fc96cSDimitry Andric #include "TableGenBackends.h"
15809500fcSDimitry Andric #include "llvm/ADT/SmallString.h"
16809500fcSDimitry Andric #include "llvm/Support/ConvertUTF.h"
17809500fcSDimitry Andric #include "llvm/TableGen/Error.h"
18809500fcSDimitry Andric #include "llvm/TableGen/Record.h"
19809500fcSDimitry Andric #include "llvm/TableGen/StringMatcher.h"
20809500fcSDimitry Andric #include "llvm/TableGen/TableGenBackend.h"
21809500fcSDimitry Andric #include <vector>
22809500fcSDimitry Andric 
23809500fcSDimitry Andric using namespace llvm;
24809500fcSDimitry Andric 
2548675466SDimitry Andric /// Convert a code point to the corresponding UTF-8 sequence represented
26809500fcSDimitry Andric /// as a C string literal.
27809500fcSDimitry Andric ///
28809500fcSDimitry Andric /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29809500fcSDimitry Andric static bool translateCodePointToUTF8(unsigned CodePoint,
30809500fcSDimitry Andric                                      SmallVectorImpl<char> &CLiteral) {
31809500fcSDimitry Andric   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32809500fcSDimitry Andric   char *TranslatedPtr = Translated;
33809500fcSDimitry Andric   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34809500fcSDimitry Andric     return false;
35809500fcSDimitry Andric 
36809500fcSDimitry Andric   StringRef UTF8(Translated, TranslatedPtr - Translated);
37809500fcSDimitry Andric 
38809500fcSDimitry Andric   raw_svector_ostream OS(CLiteral);
39809500fcSDimitry Andric   OS << "\"";
40809500fcSDimitry Andric   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41809500fcSDimitry Andric     OS << "\\x";
42809500fcSDimitry Andric     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43809500fcSDimitry Andric   }
44809500fcSDimitry Andric   OS << "\"";
45809500fcSDimitry Andric 
46809500fcSDimitry Andric   return true;
47809500fcSDimitry Andric }
48809500fcSDimitry Andric 
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)49519fc96cSDimitry Andric void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50809500fcSDimitry Andric                                                          raw_ostream &OS) {
51809500fcSDimitry Andric   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52809500fcSDimitry Andric   std::vector<StringMatcher::StringPair> NameToUTF8;
53809500fcSDimitry Andric   SmallString<32> CLiteral;
54809500fcSDimitry Andric   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55809500fcSDimitry Andric        I != E; ++I) {
56809500fcSDimitry Andric     Record &Tag = **I;
57cfca06d7SDimitry Andric     std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
58809500fcSDimitry Andric     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59809500fcSDimitry Andric     CLiteral.clear();
60809500fcSDimitry Andric     CLiteral.append("return ");
61809500fcSDimitry Andric     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62809500fcSDimitry Andric       SrcMgr.PrintMessage(Tag.getLoc().front(),
63809500fcSDimitry Andric                           SourceMgr::DK_Error,
64809500fcSDimitry Andric                           Twine("invalid code point"));
65809500fcSDimitry Andric       continue;
66809500fcSDimitry Andric     }
67809500fcSDimitry Andric     CLiteral.append(";");
68809500fcSDimitry Andric 
694df029ccSDimitry Andric     StringMatcher::StringPair Match(Spelling, std::string(CLiteral));
70809500fcSDimitry Andric     NameToUTF8.push_back(Match);
71809500fcSDimitry Andric   }
72809500fcSDimitry Andric 
73b1c73532SDimitry Andric   emitSourceFileHeader("HTML named character reference to UTF-8 translation",
74b1c73532SDimitry Andric                        OS, Records);
75809500fcSDimitry Andric 
76809500fcSDimitry Andric   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77809500fcSDimitry Andric         "                                             StringRef Name) {\n";
78809500fcSDimitry Andric   StringMatcher("Name", NameToUTF8, OS).Emit();
79809500fcSDimitry Andric   OS << "  return StringRef();\n"
80809500fcSDimitry Andric      << "}\n\n";
81809500fcSDimitry Andric }
82