Coverage Report

Created: 2023-09-21 18:56

/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ClangHighlighter.cpp ----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "ClangHighlighter.h"
10
11
#include "lldb/Host/FileSystem.h"
12
#include "lldb/Target/Language.h"
13
#include "lldb/Utility/AnsiTerminal.h"
14
#include "lldb/Utility/StreamString.h"
15
16
#include "clang/Basic/FileManager.h"
17
#include "clang/Basic/SourceManager.h"
18
#include "clang/Lex/Lexer.h"
19
#include "llvm/ADT/StringSet.h"
20
#include "llvm/Support/MemoryBuffer.h"
21
#include <optional>
22
23
using namespace lldb_private;
24
25
56.5k
bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26
56.5k
  return keywords.contains(token);
27
56.5k
}
28
29
8.92k
ClangHighlighter::ClangHighlighter() {
30
2.82M
#define KEYWORD(X, N) keywords.insert(#X);
31
8.92k
#include "clang/Basic/TokenKinds.def"
32
8.92k
}
33
34
/// Determines which style should be applied to the given token.
35
/// \param highlighter
36
///     The current highlighter that should use the style.
37
/// \param token
38
///     The current token.
39
/// \param tok_str
40
///     The string in the source code the token represents.
41
/// \param options
42
///     The style we use for coloring the source code.
43
/// \param in_pp_directive
44
///     If we are currently in a preprocessor directive. NOTE: This is
45
///     passed by reference and will be updated if the current token starts
46
///     or ends a preprocessor directive.
47
/// \return
48
///     The ColorStyle that should be applied to the token.
49
static HighlightStyle::ColorStyle
50
determineClangStyle(const ClangHighlighter &highlighter,
51
                    const clang::Token &token, llvm::StringRef tok_str,
52
63.1k
                    const HighlightStyle &options, bool &in_pp_directive) {
53
63.1k
  using namespace clang;
54
55
63.1k
  if (token.is(tok::comment)) {
56
    // If we were in a preprocessor directive before, we now left it.
57
1.78k
    in_pp_directive = false;
58
1.78k
    return options.comment;
59
61.4k
  } else if (in_pp_directive || 
token.getKind() == tok::hash60.7k
) {
60
    // Let's assume that the rest of the line is a PP directive.
61
861
    in_pp_directive = true;
62
    // Preprocessor directives are hard to match, so we have to hack this in.
63
861
    return options.pp_directive;
64
60.5k
  } else if (tok::isStringLiteral(token.getKind()))
65
1.46k
    return options.string_literal;
66
59.0k
  else if (tok::isLiteral(token.getKind()))
67
2.50k
    return options.scalar_literal;
68
56.5k
  else if (highlighter.isKeyword(tok_str))
69
3.47k
    return options.keyword;
70
53.0k
  else
71
53.0k
    switch (token.getKind()) {
72
10.7k
    case tok::raw_identifier:
73
10.7k
    case tok::identifier:
74
10.7k
      return options.identifier;
75
1.12k
    case tok::l_brace:
76
2.40k
    case tok::r_brace:
77
2.40k
      return options.braces;
78
679
    case tok::l_square:
79
1.36k
    case tok::r_square:
80
1.36k
      return options.square_brackets;
81
3.55k
    case tok::l_paren:
82
7.12k
    case tok::r_paren:
83
7.12k
      return options.parentheses;
84
2.07k
    case tok::comma:
85
2.07k
      return options.comma;
86
247
    case tok::coloncolon:
87
430
    case tok::colon:
88
430
      return options.colon;
89
90
162
    case tok::amp:
91
178
    case tok::ampamp:
92
178
    case tok::ampequal:
93
900
    case tok::star:
94
904
    case tok::starequal:
95
1.09k
    case tok::plus:
96
1.50k
    case tok::plusplus:
97
1.53k
    case tok::plusequal:
98
1.57k
    case tok::minus:
99
1.74k
    case tok::arrow:
100
1.74k
    case tok::minusminus:
101
1.74k
    case tok::minusequal:
102
1.74k
    case tok::tilde:
103
1.88k
    case tok::exclaim:
104
1.88k
    case tok::exclaimequal:
105
1.89k
    case tok::slash:
106
1.89k
    case tok::slashequal:
107
1.90k
    case tok::percent:
108
1.90k
    case tok::percentequal:
109
2.05k
    case tok::less:
110
2.08k
    case tok::lessless:
111
2.09k
    case tok::lessequal:
112
2.09k
    case tok::lesslessequal:
113
2.09k
    case tok::spaceship:
114
2.19k
    case tok::greater:
115
2.19k
    case tok::greatergreater:
116
2.20k
    case tok::greaterequal:
117
2.20k
    case tok::greatergreaterequal:
118
2.43k
    case tok::caret:
119
2.43k
    case tok::caretequal:
120
2.43k
    case tok::pipe:
121
2.43k
    case tok::pipepipe:
122
2.43k
    case tok::pipeequal:
123
2.43k
    case tok::question:
124
3.99k
    case tok::equal:
125
4.01k
    case tok::equalequal:
126
4.01k
      return options.operators;
127
24.8k
    default:
128
24.8k
      break;
129
53.0k
    }
130
24.8k
  return HighlightStyle::ColorStyle();
131
63.1k
}
132
133
void ClangHighlighter::Highlight(const HighlightStyle &options,
134
                                 llvm::StringRef line,
135
                                 std::optional<size_t> cursor_pos,
136
                                 llvm::StringRef previous_lines,
137
10.0k
                                 Stream &result) const {
138
10.0k
  using namespace clang;
139
140
10.0k
  FileSystemOptions file_opts;
141
10.0k
  FileManager file_mgr(file_opts,
142
10.0k
                       FileSystem::Instance().GetVirtualFileSystem());
143
144
  // The line might end in a backslash which would cause Clang to drop the
145
  // backslash and the terminating new line. This makes sense when parsing C++,
146
  // but when highlighting we care about preserving the backslash/newline. To
147
  // not lose this information we remove the new line here so that Clang knows
148
  // this is just a single line we are highlighting. We add back the newline
149
  // after tokenizing.
150
10.0k
  llvm::StringRef line_ending = "";
151
  // There are a few legal line endings Clang recognizes and we need to
152
  // temporarily remove from the string.
153
10.0k
  if (line.consume_back("\r\n"))
154
2
    line_ending = "\r\n";
155
10.0k
  else if (line.consume_back("\n"))
156
9.97k
    line_ending = "\n";
157
31
  else if (line.consume_back("\r"))
158
1
    line_ending = "\r";
159
160
10.0k
  unsigned line_number = previous_lines.count('\n') + 1U;
161
162
  // Let's build the actual source code Clang needs and setup some utility
163
  // objects.
164
10.0k
  std::string full_source = previous_lines.str() + line.str();
165
10.0k
  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166
10.0k
  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
167
10.0k
      new DiagnosticOptions());
168
10.0k
  DiagnosticsEngine diags(diag_ids, diags_opts);
169
10.0k
  clang::SourceManager SM(diags, file_mgr);
170
10.0k
  auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
171
172
10.0k
  FileID FID = SM.createFileID(buf->getMemBufferRef());
173
174
  // Let's just enable the latest ObjC and C++ which should get most tokens
175
  // right.
176
10.0k
  LangOptions Opts;
177
10.0k
  Opts.ObjC = true;
178
  // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179
10.0k
  Opts.CPlusPlus17 = true;
180
10.0k
  Opts.LineComment = true;
181
182
10.0k
  Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
183
  // The lexer should keep whitespace around.
184
10.0k
  lex.SetKeepWhitespaceMode(true);
185
186
  // Keeps track if we have entered a PP directive.
187
10.0k
  bool in_pp_directive = false;
188
189
  // True once we actually lexed the user provided line.
190
10.0k
  bool found_user_line = false;
191
192
  // True if we already highlighted the token under the cursor, false otherwise.
193
10.0k
  bool highlighted_cursor = false;
194
10.0k
  Token token;
195
10.0k
  bool exit = false;
196
75.1k
  while (!exit) {
197
    // Returns true if this is the last token we get from the lexer.
198
65.1k
    exit = lex.LexFromRawLexer(token);
199
200
65.1k
    bool invalid = false;
201
65.1k
    unsigned current_line_number =
202
65.1k
        SM.getSpellingLineNumber(token.getLocation(), &invalid);
203
65.1k
    if (current_line_number != line_number)
204
0
      continue;
205
65.1k
    found_user_line = true;
206
207
    // We don't need to print any tokens without a spelling line number.
208
65.1k
    if (invalid)
209
0
      continue;
210
211
    // Same as above but with the column number.
212
65.1k
    invalid = false;
213
65.1k
    unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
214
65.1k
    if (invalid)
215
0
      continue;
216
    // Column numbers start at 1, but indexes in our string start at 0.
217
65.1k
    --start;
218
219
    // Annotations don't have a length, so let's skip them.
220
65.1k
    if (token.isAnnotation())
221
0
      continue;
222
223
    // Extract the token string from our source code.
224
65.1k
    llvm::StringRef tok_str = line.substr(start, token.getLength());
225
226
    // If the token is just an empty string, we can skip all the work below.
227
65.1k
    if (tok_str.empty())
228
1.93k
      continue;
229
230
    // If the cursor is inside this token, we have to apply the 'selected'
231
    // highlight style before applying the actual token color.
232
63.1k
    llvm::StringRef to_print = tok_str;
233
63.1k
    StreamString storage;
234
63.1k
    auto end = start + token.getLength();
235
63.1k
    if (cursor_pos && 
end > *cursor_pos14.6k
&&
!highlighted_cursor9.35k
) {
236
1.49k
      highlighted_cursor = true;
237
1.49k
      options.selected.Apply(storage, tok_str);
238
1.49k
      to_print = storage.GetString();
239
1.49k
    }
240
241
    // See how we are supposed to highlight this token.
242
63.1k
    HighlightStyle::ColorStyle color =
243
63.1k
        determineClangStyle(*this, token, tok_str, options, in_pp_directive);
244
245
63.1k
    color.Apply(result, to_print);
246
63.1k
  }
247
248
  // Add the line ending we trimmed before tokenizing.
249
10.0k
  result << line_ending;
250
251
  // If we went over the whole file but couldn't find our own file, then
252
  // somehow our setup was wrong. When we're in release mode we just give the
253
  // user the normal line and pretend we don't know how to highlight it. In
254
  // debug mode we bail out with an assert as this should never happen.
255
10.0k
  if (!found_user_line) {
256
0
    result << line;
257
0
    assert(false && "We couldn't find the user line in the input file?");
258
0
  }
259
10.0k
}