Coverage Report

Created: 2021-08-24 07:12

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Tooling/Syntax/Tree.h
Line
Count
Source (jump to first uncovered line)
1
//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
9
//   - leaf nodes correspond to a token in the expanded token stream,
10
//   - tree nodes correspond to language grammar constructs.
11
//
12
// The tree is initially built from an AST. Each node of a newly built tree
13
// covers a continous subrange of expanded tokens (i.e. tokens after
14
// preprocessing), the specific tokens coverered are stored in the leaf nodes of
15
// a tree. A post-order traversal of a tree will visit leaf nodes in an order
16
// corresponding the original order of expanded tokens.
17
//
18
// This is still work in progress and highly experimental, we leave room for
19
// ourselves to completely change the design and/or implementation.
20
//===----------------------------------------------------------------------===//
21
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23
24
#include "clang/Basic/LangOptions.h"
25
#include "clang/Basic/SourceLocation.h"
26
#include "clang/Basic/SourceManager.h"
27
#include "clang/Basic/TokenKinds.h"
28
#include "clang/Tooling/Syntax/Tokens.h"
29
#include "llvm/ADT/ArrayRef.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/iterator.h"
32
#include "llvm/Support/Allocator.h"
33
#include <cstdint>
34
#include <iterator>
35
36
namespace clang {
37
namespace syntax {
38
39
/// A memory arena for syntax trees. Also tracks the underlying token buffers,
40
/// source manager, etc.
41
class Arena {
42
public:
43
  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
44
        const TokenBuffer &Tokens);
45
46
27.5k
  const SourceManager &getSourceManager() const { return SourceMgr; }
47
0
  const LangOptions &getLangOptions() const { return LangOpts; }
48
49
  const TokenBuffer &getTokenBuffer() const;
50
73.5k
  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
51
52
private:
53
  /// Add \p Buffer to the underlying source manager, tokenize it and store the
54
  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
55
  /// that were not written in user code.
56
  std::pair<FileID, ArrayRef<Token>>
57
  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
58
  friend class FactoryImpl;
59
60
private:
61
  SourceManager &SourceMgr;
62
  const LangOptions &LangOpts;
63
  const TokenBuffer &Tokens;
64
  /// IDs and storage for additional tokenized files.
65
  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
66
  /// Keeps all the allocated nodes and their intermediate data structures.
67
  llvm::BumpPtrAllocator Allocator;
68
};
69
70
class Tree;
71
class TreeBuilder;
72
class FactoryImpl;
73
class MutationsImpl;
74
75
enum class NodeKind : uint16_t;
76
enum class NodeRole : uint8_t;
77
78
/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
79
/// a Tree (representing language constructrs).
80
class Node {
81
protected:
82
  /// Newly created nodes are detached from a tree, parent and sibling links are
83
  /// set when the node is added as a child to another one.
84
  Node(NodeKind Kind);
85
  /// Nodes are allocated on Arenas; the destructor is never called.
86
  ~Node() = default;
87
88
public:
89
  /// Nodes cannot simply be copied without violating tree invariants.
90
  Node(const Node &) = delete;
91
  Node &operator=(const Node &) = delete;
92
  /// Idiomatically, nodes are allocated on an Arena and never moved.
93
  Node(Node &&) = delete;
94
  Node &operator=(Node &&) = delete;
95
96
1.11M
  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
97
396k
  NodeRole getRole() const { return static_cast<NodeRole>(Role); }
98
99
  /// Whether the node is detached from a tree, i.e. does not have a parent.
100
  bool isDetached() const;
101
  /// Whether the node was created from the AST backed by the source code
102
  /// rather than added later through mutation APIs or created with factory
103
  /// functions.
104
  /// When this flag is true, all subtrees are also original.
105
  /// This flag is set to false on any modifications to the node or any of its
106
  /// subtrees, even if this simply involves swapping existing subtrees.
107
236k
  bool isOriginal() const { return Original; }
108
  /// If this function return false, the tree cannot be modified because there
109
  /// is no reasonable way to produce the corresponding textual replacements.
110
  /// This can happen when the node crosses macro expansion boundaries.
111
  ///
112
  /// Note that even if the node is not modifiable, its child nodes can be
113
  /// modifiable.
114
32.7k
  bool canModify() const { return CanModify; }
115
116
139k
  const Tree *getParent() const { return Parent; }
117
84
  Tree *getParent() { return Parent; }
118
119
339k
  const Node *getNextSibling() const { return NextSibling; }
120
59.9k
  Node *getNextSibling() { return NextSibling; }
121
36.9k
  const Node *getPreviousSibling() const { return PreviousSibling; }
122
0
  Node *getPreviousSibling() { return PreviousSibling; }
123
124
  /// Dumps the structure of a subtree. For debugging and testing purposes.
125
  std::string dump(const SourceManager &SM) const;
126
  /// Dumps the tokens forming this subtree.
127
  std::string dumpTokens(const SourceManager &SM) const;
128
129
  /// Asserts invariants on this node of the tree and its immediate children.
130
  /// Will not recurse into the subtree. No-op if NDEBUG is set.
131
  void assertInvariants() const;
132
  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
133
  void assertInvariantsRecursive() const;
134
135
private:
136
  // Tree is allowed to change the Parent link and Role.
137
  friend class Tree;
138
  // TreeBuilder is allowed to set the Original and CanModify flags.
139
  friend class TreeBuilder;
140
  // MutationsImpl sets roles and CanModify flag.
141
  friend class MutationsImpl;
142
  // FactoryImpl sets CanModify flag.
143
  friend class FactoryImpl;
144
145
  void setRole(NodeRole NR);
146
147
  Tree *Parent;
148
  Node *NextSibling;
149
  Node *PreviousSibling;
150
  unsigned Kind : 16;
151
  unsigned Role : 8;
152
  unsigned Original : 1;
153
  unsigned CanModify : 1;
154
};
155
156
/// A leaf node points to a single token inside the expanded token stream.
157
class Leaf final : public Node {
158
public:
159
  Leaf(const Token *T);
160
  static bool classof(const Node *N);
161
162
126k
  const Token *getToken() const { return Tok; }
163
164
private:
165
  const Token *Tok;
166
};
167
168
/// A node that has children and represents a syntactic language construct.
169
class Tree : public Node {
170
  /// Iterator over children (common base for const/non-const).
171
  /// Not invalidated by tree mutations (holds a stable node pointer).
172
  template <typename DerivedT, typename NodeT>
173
  class ChildIteratorBase
174
      : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
175
                                          NodeT> {
176
  protected:
177
    NodeT *N = nullptr;
178
    using Base = ChildIteratorBase;
179
180
  public:
181
137k
    ChildIteratorBase() = default;
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::ChildIteratorBase()
Line
Count
Source
181
137k
    ChildIteratorBase() = default;
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::ChildIteratorBase()
Line
Count
Source
181
194
    ChildIteratorBase() = default;
182
137k
    explicit ChildIteratorBase(NodeT *N) : N(N) {}
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::ChildIteratorBase(clang::syntax::Node const*)
Line
Count
Source
182
137k
    explicit ChildIteratorBase(NodeT *N) : N(N) {}
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::ChildIteratorBase(clang::syntax::Node*)
Line
Count
Source
182
193
    explicit ChildIteratorBase(NodeT *N) : N(N) {}
183
184
309k
    bool operator==(const DerivedT &O) const { return O.N == N; }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator==(clang::syntax::Tree::ConstChildIterator const&) const
Line
Count
Source
184
308k
    bool operator==(const DerivedT &O) const { return O.N == N; }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator==(clang::syntax::Tree::ChildIterator const&) const
Line
Count
Source
184
1.09k
    bool operator==(const DerivedT &O) const { return O.N == N; }
185
226k
    NodeT &operator*() const { return *N; }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator*() const
Line
Count
Source
185
225k
    NodeT &operator*() const { return *N; }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator*() const
Line
Count
Source
185
902
    NodeT &operator*() const { return *N; }
186
172k
    DerivedT &operator++() {
187
172k
      N = N->getNextSibling();
188
172k
      return *static_cast<DerivedT *>(this);
189
172k
    }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator++()
Line
Count
Source
186
171k
    DerivedT &operator++() {
187
171k
      N = N->getNextSibling();
188
171k
      return *static_cast<DerivedT *>(this);
189
171k
    }
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator++()
Line
Count
Source
186
902
    DerivedT &operator++() {
187
902
      N = N->getNextSibling();
188
902
      return *static_cast<DerivedT *>(this);
189
902
    }
190
191
    /// Truthy if valid (not past-the-end).
192
    /// This allows: if (auto It = find_if(N.children(), ...) )
193
    explicit operator bool() const { return N != nullptr; }
194
    /// The element, or nullptr if past-the-end.
195
    NodeT *asPointer() const { return N; }
196
  };
197
198
public:
199
  static bool classof(const Node *N);
200
201
63.7k
  Node *getFirstChild() { return FirstChild; }
202
138k
  const Node *getFirstChild() const { return FirstChild; }
203
0
  Node *getLastChild() { return LastChild; }
204
95.2k
  const Node *getLastChild() const { return LastChild; }
205
206
  const Leaf *findFirstLeaf() const;
207
  Leaf *findFirstLeaf() {
208
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
209
  }
210
211
  const Leaf *findLastLeaf() const;
212
  Leaf *findLastLeaf() {
213
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
214
  }
215
216
  /// child_iterator is not invalidated by mutations.
217
  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
218
    using Base::ChildIteratorBase;
219
  };
220
  struct ConstChildIterator
221
      : ChildIteratorBase<ConstChildIterator, const Node> {
222
    using Base::ChildIteratorBase;
223
137k
    ConstChildIterator() = default;
224
    ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
225
  };
226
227
193
  llvm::iterator_range<ChildIterator> getChildren() {
228
193
    return {ChildIterator(getFirstChild()), ChildIterator()};
229
193
  }
230
137k
  llvm::iterator_range<ConstChildIterator> getChildren() const {
231
137k
    return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
232
137k
  }
233
234
  /// Find the first node with a corresponding role.
235
  const Node *findChild(NodeRole R) const;
236
0
  Node *findChild(NodeRole R) {
237
0
    return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
238
0
  }
239
240
protected:
241
  using Node::Node;
242
243
private:
244
  /// Append \p Child to the list of children and sets the parent pointer.
245
  /// A very low-level operation that does not check any invariants, only used
246
  /// by TreeBuilder and FactoryImpl.
247
  /// EXPECTS: Role != Detached.
248
  void appendChildLowLevel(Node *Child, NodeRole Role);
249
  /// Similar but prepends.
250
  void prependChildLowLevel(Node *Child, NodeRole Role);
251
252
  /// Like the previous overloads, but does not set role for \p Child.
253
  /// EXPECTS: Child->Role != Detached
254
  void appendChildLowLevel(Node *Child);
255
  void prependChildLowLevel(Node *Child);
256
  friend class TreeBuilder;
257
  friend class FactoryImpl;
258
259
  /// Replace a range of children [Begin, End) with a list of
260
  /// new nodes starting at \p New.
261
  /// Only used by MutationsImpl to implement higher-level mutation operations.
262
  /// (!) \p New can be null to model removal of the child range.
263
  /// (!) \p End can be null to model one past the end.
264
  /// (!) \p Begin can be null to model an append.
265
  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
266
  friend class MutationsImpl;
267
268
  Node *FirstChild = nullptr;
269
  Node *LastChild = nullptr;
270
};
271
272
// Provide missing non_const == const overload.
273
// iterator_facade_base requires == to be a member, but implicit conversions
274
// don't work on the LHS of a member operator.
275
inline bool operator==(const Tree::ConstChildIterator &A,
276
308k
                       const Tree::ConstChildIterator &B) {
277
308k
  return A.operator==(B);
278
308k
}
279
280
/// A list of Elements separated or terminated by a fixed token.
281
///
282
/// This type models the following grammar construct:
283
/// delimited-list(element, delimiter, termination, canBeEmpty)
284
class List : public Tree {
285
public:
286
  template <typename Element> struct ElementAndDelimiter {
287
    Element *element;
288
    Leaf *delimiter;
289
  };
290
291
  enum class TerminationKind {
292
    Terminated,
293
    MaybeTerminated,
294
    Separated,
295
  };
296
297
  using Tree::Tree;
298
  static bool classof(const Node *N);
299
  /// Returns the elements and corresponding delimiters. Missing elements
300
  /// and delimiters are represented as null pointers.
301
  ///
302
  /// For example, in a separated list:
303
  /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
304
  /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
305
  /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
306
  /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
307
  ///
308
  /// In a terminated or maybe-terminated list:
309
  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
310
  /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
311
  /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
312
  /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
313
  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
314
315
  /// Returns the elements of the list. Missing elements are represented
316
  /// as null pointers in the same way as in the return value of
317
  /// `getElementsAsNodesAndDelimiters()`.
318
  std::vector<Node *> getElementsAsNodes();
319
320
  // These can't be implemented with the information we have!
321
322
  /// Returns the appropriate delimiter for this list.
323
  ///
324
  /// Useful for discovering the correct delimiter to use when adding
325
  /// elements to empty or one-element lists.
326
  clang::tok::TokenKind getDelimiterTokenKind() const;
327
328
  TerminationKind getTerminationKind() const;
329
330
  /// Whether this list can be empty in syntactically and semantically correct
331
  /// code.
332
  ///
333
  /// This list may be empty when the source code has errors even if
334
  /// canBeEmpty() returns false.
335
  bool canBeEmpty() const;
336
};
337
338
} // namespace syntax
339
} // namespace clang
340
341
#endif