Coverage Report

Created: 2022-01-18 06:27

/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ClangHighlighter.cpp ----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "ClangHighlighter.h"
10
11
#include "lldb/Host/FileSystem.h"
12
#include "lldb/Target/Language.h"
13
#include "lldb/Utility/AnsiTerminal.h"
14
#include "lldb/Utility/StreamString.h"
15
16
#include "clang/Basic/FileManager.h"
17
#include "clang/Basic/SourceManager.h"
18
#include "clang/Lex/Lexer.h"
19
#include "llvm/ADT/StringSet.h"
20
#include "llvm/Support/MemoryBuffer.h"
21
22
using namespace lldb_private;
23
24
74.7k
bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
25
74.7k
  return keywords.find(token) != keywords.end();
26
74.7k
}
27
28
6.04k
ClangHighlighter::ClangHighlighter() {
29
1.67M
#define KEYWORD(X, N) keywords.insert(#X);
30
6.04k
#include "clang/Basic/TokenKinds.def"
31
6.04k
}
32
33
/// Determines which style should be applied to the given token.
34
/// \param highlighter
35
///     The current highlighter that should use the style.
36
/// \param token
37
///     The current token.
38
/// \param tok_str
39
///     The string in the source code the token represents.
40
/// \param options
41
///     The style we use for coloring the source code.
42
/// \param in_pp_directive
43
///     If we are currently in a preprocessor directive. NOTE: This is
44
///     passed by reference and will be updated if the current token starts
45
///     or ends a preprocessor directive.
46
/// \return
47
///     The ColorStyle that should be applied to the token.
48
static HighlightStyle::ColorStyle
49
determineClangStyle(const ClangHighlighter &highlighter,
50
                    const clang::Token &token, llvm::StringRef tok_str,
51
83.6k
                    const HighlightStyle &options, bool &in_pp_directive) {
52
83.6k
  using namespace clang;
53
54
83.6k
  if (token.is(tok::comment)) {
55
    // If we were in a preprocessor directive before, we now left it.
56
2.40k
    in_pp_directive = false;
57
2.40k
    return options.comment;
58
81.2k
  } else if (in_pp_directive || 
token.getKind() == tok::hash80.4k
) {
59
    // Let's assume that the rest of the line is a PP directive.
60
1.16k
    in_pp_directive = true;
61
    // Preprocessor directives are hard to match, so we have to hack this in.
62
1.16k
    return options.pp_directive;
63
80.1k
  } else if (tok::isStringLiteral(token.getKind()))
64
2.06k
    return options.string_literal;
65
78.0k
  else if (tok::isLiteral(token.getKind()))
66
3.28k
    return options.scalar_literal;
67
74.7k
  else if (highlighter.isKeyword(tok_str))
68
4.22k
    return options.keyword;
69
70.5k
  else
70
70.5k
    switch (token.getKind()) {
71
14.4k
    case tok::raw_identifier:
72
14.4k
    case tok::identifier:
73
14.4k
      return options.identifier;
74
1.41k
    case tok::l_brace:
75
3.00k
    case tok::r_brace:
76
3.00k
      return options.braces;
77
922
    case tok::l_square:
78
1.85k
    case tok::r_square:
79
1.85k
      return options.square_brackets;
80
4.75k
    case tok::l_paren:
81
9.51k
    case tok::r_paren:
82
9.51k
      return options.parentheses;
83
2.90k
    case tok::comma:
84
2.90k
      return options.comma;
85
0
    case tok::coloncolon:
86
921
    case tok::colon:
87
921
      return options.colon;
88
89
227
    case tok::amp:
90
239
    case tok::ampamp:
91
239
    case tok::ampequal:
92
1.16k
    case tok::star:
93
1.17k
    case tok::starequal:
94
1.42k
    case tok::plus:
95
1.91k
    case tok::plusplus:
96
1.94k
    case tok::plusequal:
97
2.00k
    case tok::minus:
98
2.25k
    case tok::arrow:
99
2.25k
    case tok::minusminus:
100
2.25k
    case tok::minusequal:
101
2.25k
    case tok::tilde:
102
2.45k
    case tok::exclaim:
103
2.45k
    case tok::exclaimequal:
104
2.47k
    case tok::slash:
105
2.47k
    case tok::slashequal:
106
2.48k
    case tok::percent:
107
2.48k
    case tok::percentequal:
108
2.62k
    case tok::less:
109
2.65k
    case tok::lessless:
110
2.66k
    case tok::lessequal:
111
2.66k
    case tok::lesslessequal:
112
2.66k
    case tok::spaceship:
113
2.75k
    case tok::greater:
114
2.75k
    case tok::greatergreater:
115
2.76k
    case tok::greaterequal:
116
2.76k
    case tok::greatergreaterequal:
117
3.11k
    case tok::caret:
118
3.11k
    case tok::caretequal:
119
3.11k
    case tok::pipe:
120
3.11k
    case tok::pipepipe:
121
3.11k
    case tok::pipeequal:
122
3.11k
    case tok::question:
123
5.14k
    case tok::equal:
124
5.16k
    case tok::equalequal:
125
5.16k
      return options.operators;
126
32.7k
    default:
127
32.7k
      break;
128
70.5k
    }
129
32.7k
  return HighlightStyle::ColorStyle();
130
83.6k
}
131
132
void ClangHighlighter::Highlight(const HighlightStyle &options,
133
                                 llvm::StringRef line,
134
                                 llvm::Optional<size_t> cursor_pos,
135
                                 llvm::StringRef previous_lines,
136
13.2k
                                 Stream &result) const {
137
13.2k
  using namespace clang;
138
139
13.2k
  FileSystemOptions file_opts;
140
13.2k
  FileManager file_mgr(file_opts,
141
13.2k
                       FileSystem::Instance().GetVirtualFileSystem());
142
143
  // The line might end in a backslash which would cause Clang to drop the
144
  // backslash and the terminating new line. This makes sense when parsing C++,
145
  // but when highlighting we care about preserving the backslash/newline. To
146
  // not lose this information we remove the new line here so that Clang knows
147
  // this is just a single line we are highlighting. We add back the newline
148
  // after tokenizing.
149
13.2k
  llvm::StringRef line_ending = "";
150
  // There are a few legal line endings Clang recognizes and we need to
151
  // temporarily remove from the string.
152
13.2k
  if (line.consume_back("\r\n"))
153
2
    line_ending = "\r\n";
154
13.2k
  else if (line.consume_back("\n"))
155
13.1k
    line_ending = "\n";
156
31
  else if (line.consume_back("\r"))
157
1
    line_ending = "\r";
158
159
13.2k
  unsigned line_number = previous_lines.count('\n') + 1U;
160
161
  // Let's build the actual source code Clang needs and setup some utility
162
  // objects.
163
13.2k
  std::string full_source = previous_lines.str() + line.str();
164
13.2k
  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
165
13.2k
  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
166
13.2k
      new DiagnosticOptions());
167
13.2k
  DiagnosticsEngine diags(diag_ids, diags_opts);
168
13.2k
  clang::SourceManager SM(diags, file_mgr);
169
13.2k
  auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
170
171
13.2k
  FileID FID = SM.createFileID(buf->getMemBufferRef());
172
173
  // Let's just enable the latest ObjC and C++ which should get most tokens
174
  // right.
175
13.2k
  LangOptions Opts;
176
13.2k
  Opts.ObjC = true;
177
  // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
178
13.2k
  Opts.CPlusPlus17 = true;
179
13.2k
  Opts.LineComment = true;
180
181
13.2k
  Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
182
  // The lexer should keep whitespace around.
183
13.2k
  lex.SetKeepWhitespaceMode(true);
184
185
  // Keeps track if we have entered a PP directive.
186
13.2k
  bool in_pp_directive = false;
187
188
  // True once we actually lexed the user provided line.
189
13.2k
  bool found_user_line = false;
190
191
  // True if we already highlighted the token under the cursor, false otherwise.
192
13.2k
  bool highlighted_cursor = false;
193
13.2k
  Token token;
194
13.2k
  bool exit = false;
195
99.4k
  while (!exit) {
196
    // Returns true if this is the last token we get from the lexer.
197
86.2k
    exit = lex.LexFromRawLexer(token);
198
199
86.2k
    bool invalid = false;
200
86.2k
    unsigned current_line_number =
201
86.2k
        SM.getSpellingLineNumber(token.getLocation(), &invalid);
202
86.2k
    if (current_line_number != line_number)
203
0
      continue;
204
86.2k
    found_user_line = true;
205
206
    // We don't need to print any tokens without a spelling line number.
207
86.2k
    if (invalid)
208
0
      continue;
209
210
    // Same as above but with the column number.
211
86.2k
    invalid = false;
212
86.2k
    unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
213
86.2k
    if (invalid)
214
0
      continue;
215
    // Column numbers start at 1, but indexes in our string start at 0.
216
86.2k
    --start;
217
218
    // Annotations don't have a length, so let's skip them.
219
86.2k
    if (token.isAnnotation())
220
0
      continue;
221
222
    // Extract the token string from our source code.
223
86.2k
    llvm::StringRef tok_str = line.substr(start, token.getLength());
224
225
    // If the token is just an empty string, we can skip all the work below.
226
86.2k
    if (tok_str.empty())
227
2.54k
      continue;
228
229
    // If the cursor is inside this token, we have to apply the 'selected'
230
    // highlight style before applying the actual token color.
231
83.6k
    llvm::StringRef to_print = tok_str;
232
83.6k
    StreamString storage;
233
83.6k
    auto end = start + token.getLength();
234
83.6k
    if (cursor_pos && 
end > *cursor_pos19.3k
&&
!highlighted_cursor12.3k
) {
235
1.97k
      highlighted_cursor = true;
236
1.97k
      options.selected.Apply(storage, tok_str);
237
1.97k
      to_print = storage.GetString();
238
1.97k
    }
239
240
    // See how we are supposed to highlight this token.
241
83.6k
    HighlightStyle::ColorStyle color =
242
83.6k
        determineClangStyle(*this, token, tok_str, options, in_pp_directive);
243
244
83.6k
    color.Apply(result, to_print);
245
83.6k
  }
246
247
  // Add the line ending we trimmed before tokenizing.
248
13.2k
  result << line_ending;
249
250
  // If we went over the whole file but couldn't find our own file, then
251
  // somehow our setup was wrong. When we're in release mode we just give the
252
  // user the normal line and pretend we don't know how to highlight it. In
253
  // debug mode we bail out with an assert as this should never happen.
254
13.2k
  if (!found_user_line) {
255
0
    result << line;
256
0
    assert(false && "We couldn't find the user line in the input file?");
257
0
  }
258
13.2k
}