/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This tablegen backend emits an efficient function to translate HTML named |
10 | | // character references to UTF-8 sequences. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "TableGenBackends.h" |
15 | | #include "llvm/ADT/SmallString.h" |
16 | | #include "llvm/Support/ConvertUTF.h" |
17 | | #include "llvm/TableGen/Error.h" |
18 | | #include "llvm/TableGen/Record.h" |
19 | | #include "llvm/TableGen/StringMatcher.h" |
20 | | #include "llvm/TableGen/TableGenBackend.h" |
21 | | #include <vector> |
22 | | |
23 | | using namespace llvm; |
24 | | |
25 | | /// Convert a code point to the corresponding UTF-8 sequence represented |
26 | | /// as a C string literal. |
27 | | /// |
28 | | /// \returns true on success. |
29 | | static bool translateCodePointToUTF8(unsigned CodePoint, |
30 | 0 | SmallVectorImpl<char> &CLiteral) { |
31 | 0 | char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; |
32 | 0 | char *TranslatedPtr = Translated; |
33 | 0 | if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) |
34 | 0 | return false; |
35 | | |
36 | 0 | StringRef UTF8(Translated, TranslatedPtr - Translated); |
37 | |
|
38 | 0 | raw_svector_ostream OS(CLiteral); |
39 | 0 | OS << "\""; |
40 | 0 | for (size_t i = 0, e = UTF8.size(); i != e; ++i) { |
41 | 0 | OS << "\\x"; |
42 | 0 | OS.write_hex(static_cast<unsigned char>(UTF8[i])); |
43 | 0 | } |
44 | 0 | OS << "\""; |
45 | |
|
46 | 0 | return true; |
47 | 0 | } |
48 | | |
49 | | void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, |
50 | 0 | raw_ostream &OS) { |
51 | 0 | std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); |
52 | 0 | std::vector<StringMatcher::StringPair> NameToUTF8; |
53 | 0 | SmallString<32> CLiteral; |
54 | 0 | for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); |
55 | 0 | I != E; ++I) { |
56 | 0 | Record &Tag = **I; |
57 | 0 | std::string Spelling = std::string(Tag.getValueAsString("Spelling")); |
58 | 0 | uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); |
59 | 0 | CLiteral.clear(); |
60 | 0 | CLiteral.append("return "); |
61 | 0 | if (!translateCodePointToUTF8(CodePoint, CLiteral)) { |
62 | 0 | SrcMgr.PrintMessage(Tag.getLoc().front(), |
63 | 0 | SourceMgr::DK_Error, |
64 | 0 | Twine("invalid code point")); |
65 | 0 | continue; |
66 | 0 | } |
67 | 0 | CLiteral.append(";"); |
68 | |
|
69 | 0 | StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str())); |
70 | 0 | NameToUTF8.push_back(Match); |
71 | 0 | } |
72 | |
|
73 | 0 | emitSourceFileHeader("HTML named character reference to UTF-8 " |
74 | 0 | "translation", OS); |
75 | |
|
76 | 0 | OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" |
77 | 0 | " StringRef Name) {\n"; |
78 | 0 | StringMatcher("Name", NameToUTF8, OS).Emit(); |
79 | 0 | OS << " return StringRef();\n" |
80 | 0 | << "}\n\n"; |
81 | 0 | } |