/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Tooling/Syntax/Tree.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // Defines the basic structure of the syntax tree. There are two kinds of nodes: |
9 | | // - leaf nodes correspond to a token in the expanded token stream, |
10 | | // - tree nodes correspond to language grammar constructs. |
11 | | // |
12 | | // The tree is initially built from an AST. Each node of a newly built tree |
13 | | // covers a continous subrange of expanded tokens (i.e. tokens after |
14 | | // preprocessing), the specific tokens coverered are stored in the leaf nodes of |
15 | | // a tree. A post-order traversal of a tree will visit leaf nodes in an order |
16 | | // corresponding the original order of expanded tokens. |
17 | | // |
18 | | // This is still work in progress and highly experimental, we leave room for |
19 | | // ourselves to completely change the design and/or implementation. |
20 | | //===----------------------------------------------------------------------===// |
21 | | #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H |
22 | | #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H |
23 | | |
24 | | #include "clang/Basic/LangOptions.h" |
25 | | #include "clang/Basic/SourceLocation.h" |
26 | | #include "clang/Basic/SourceManager.h" |
27 | | #include "clang/Basic/TokenKinds.h" |
28 | | #include "clang/Tooling/Syntax/Tokens.h" |
29 | | #include "llvm/ADT/ArrayRef.h" |
30 | | #include "llvm/ADT/DenseMap.h" |
31 | | #include "llvm/ADT/iterator.h" |
32 | | #include "llvm/Support/Allocator.h" |
33 | | #include <cstdint> |
34 | | #include <iterator> |
35 | | |
36 | | namespace clang { |
37 | | namespace syntax { |
38 | | |
39 | | /// A memory arena for syntax trees. Also tracks the underlying token buffers, |
40 | | /// source manager, etc. |
41 | | class Arena { |
42 | | public: |
43 | | Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, |
44 | | const TokenBuffer &Tokens); |
45 | | |
46 | 27.1k | const SourceManager &getSourceManager() const { return SourceMgr; } |
47 | 0 | const LangOptions &getLangOptions() const { return LangOpts; } |
48 | | |
49 | | const TokenBuffer &getTokenBuffer() const; |
50 | 72.7k | llvm::BumpPtrAllocator &getAllocator() { return Allocator; } |
51 | | |
52 | | private: |
53 | | /// Add \p Buffer to the underlying source manager, tokenize it and store the |
54 | | /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens |
55 | | /// that were not written in user code. |
56 | | std::pair<FileID, ArrayRef<Token>> |
57 | | lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); |
58 | | friend class FactoryImpl; |
59 | | |
60 | | private: |
61 | | SourceManager &SourceMgr; |
62 | | const LangOptions &LangOpts; |
63 | | const TokenBuffer &Tokens; |
64 | | /// IDs and storage for additional tokenized files. |
65 | | llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens; |
66 | | /// Keeps all the allocated nodes and their intermediate data structures. |
67 | | llvm::BumpPtrAllocator Allocator; |
68 | | }; |
69 | | |
70 | | class Tree; |
71 | | class TreeBuilder; |
72 | | class FactoryImpl; |
73 | | class MutationsImpl; |
74 | | |
75 | | enum class NodeKind : uint16_t; |
76 | | enum class NodeRole : uint8_t; |
77 | | |
78 | | /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or |
79 | | /// a Tree (representing language constructrs). |
80 | | class Node { |
81 | | protected: |
82 | | /// Newly created nodes are detached from a tree, parent and sibling links are |
83 | | /// set when the node is added as a child to another one. |
84 | | Node(NodeKind Kind); |
85 | | /// Nodes are allocated on Arenas; the destructor is never called. |
86 | | ~Node() = default; |
87 | | |
88 | | public: |
89 | | /// Nodes cannot simply be copied without violating tree invariants. |
90 | | Node(const Node &) = delete; |
91 | | Node &operator=(const Node &) = delete; |
92 | | /// Idiomatically, nodes are allocated on an Arena and never moved. |
93 | | Node(Node &&) = delete; |
94 | | Node &operator=(Node &&) = delete; |
95 | | |
96 | 1.08M | NodeKind getKind() const { return static_cast<NodeKind>(Kind); } |
97 | 391k | NodeRole getRole() const { return static_cast<NodeRole>(Role); } |
98 | | |
99 | | /// Whether the node is detached from a tree, i.e. does not have a parent. |
100 | | bool isDetached() const; |
101 | | /// Whether the node was created from the AST backed by the source code |
102 | | /// rather than added later through mutation APIs or created with factory |
103 | | /// functions. |
104 | | /// When this flag is true, all subtrees are also original. |
105 | | /// This flag is set to false on any modifications to the node or any of its |
106 | | /// subtrees, even if this simply involves swapping existing subtrees. |
107 | 232k | bool isOriginal() const { return Original; } |
108 | | /// If this function return false, the tree cannot be modified because there |
109 | | /// is no reasonable way to produce the corresponding textual replacements. |
110 | | /// This can happen when the node crosses macro expansion boundaries. |
111 | | /// |
112 | | /// Note that even if the node is not modifiable, its child nodes can be |
113 | | /// modifiable. |
114 | 32.6k | bool canModify() const { return CanModify; } |
115 | | |
116 | 138k | const Tree *getParent() const { return Parent; } |
117 | 84 | Tree *getParent() { return Parent; } |
118 | | |
119 | 336k | const Node *getNextSibling() const { return NextSibling; } |
120 | 58.1k | Node *getNextSibling() { return NextSibling; } |
121 | 36.6k | const Node *getPreviousSibling() const { return PreviousSibling; } |
122 | 0 | Node *getPreviousSibling() { return PreviousSibling; } |
123 | | |
124 | | /// Dumps the structure of a subtree. For debugging and testing purposes. |
125 | | std::string dump(const SourceManager &SM) const; |
126 | | /// Dumps the tokens forming this subtree. |
127 | | std::string dumpTokens(const SourceManager &SM) const; |
128 | | |
129 | | /// Asserts invariants on this node of the tree and its immediate children. |
130 | | /// Will not recurse into the subtree. No-op if NDEBUG is set. |
131 | | void assertInvariants() const; |
132 | | /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set. |
133 | | void assertInvariantsRecursive() const; |
134 | | |
135 | | private: |
136 | | // Tree is allowed to change the Parent link and Role. |
137 | | friend class Tree; |
138 | | // TreeBuilder is allowed to set the Original and CanModify flags. |
139 | | friend class TreeBuilder; |
140 | | // MutationsImpl sets roles and CanModify flag. |
141 | | friend class MutationsImpl; |
142 | | // FactoryImpl sets CanModify flag. |
143 | | friend class FactoryImpl; |
144 | | |
145 | | void setRole(NodeRole NR); |
146 | | |
147 | | Tree *Parent; |
148 | | Node *NextSibling; |
149 | | Node *PreviousSibling; |
150 | | unsigned Kind : 16; |
151 | | unsigned Role : 8; |
152 | | unsigned Original : 1; |
153 | | unsigned CanModify : 1; |
154 | | }; |
155 | | |
156 | | /// A leaf node points to a single token inside the expanded token stream. |
157 | | class Leaf final : public Node { |
158 | | public: |
159 | | Leaf(const Token *T); |
160 | | static bool classof(const Node *N); |
161 | | |
162 | 122k | const Token *getToken() const { return Tok; } |
163 | | |
164 | | private: |
165 | | const Token *Tok; |
166 | | }; |
167 | | |
168 | | /// A node that has children and represents a syntactic language construct. |
169 | | class Tree : public Node { |
170 | | /// Iterator over children (common base for const/non-const). |
171 | | /// Not invalidated by tree mutations (holds a stable node pointer). |
172 | | template <typename DerivedT, typename NodeT> |
173 | | class ChildIteratorBase |
174 | | : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag, |
175 | | NodeT> { |
176 | | protected: |
177 | | NodeT *N = nullptr; |
178 | | using Base = ChildIteratorBase; |
179 | | |
180 | | public: |
181 | 134k | ChildIteratorBase() = default; clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::ChildIteratorBase() Line | Count | Source | 181 | 134k | ChildIteratorBase() = default; |
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::ChildIteratorBase() Line | Count | Source | 181 | 194 | ChildIteratorBase() = default; |
|
182 | 134k | explicit ChildIteratorBase(NodeT *N) : N(N) {} clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::ChildIteratorBase(clang::syntax::Node const*) Line | Count | Source | 182 | 134k | explicit ChildIteratorBase(NodeT *N) : N(N) {} |
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::ChildIteratorBase(clang::syntax::Node*) Line | Count | Source | 182 | 193 | explicit ChildIteratorBase(NodeT *N) : N(N) {} |
|
183 | | |
184 | 305k | bool operator==(const DerivedT &O) const { return O.N == N; } clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator==(clang::syntax::Tree::ConstChildIterator const&) const Line | Count | Source | 184 | 304k | bool operator==(const DerivedT &O) const { return O.N == N; } |
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator==(clang::syntax::Tree::ChildIterator const&) const Line | Count | Source | 184 | 1.09k | bool operator==(const DerivedT &O) const { return O.N == N; } |
|
185 | 222k | NodeT &operator*() const { return *N; } clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator*() const Line | Count | Source | 185 | 221k | NodeT &operator*() const { return *N; } |
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator*() const Line | Count | Source | 185 | 902 | NodeT &operator*() const { return *N; } |
|
186 | 170k | DerivedT &operator++() { |
187 | 170k | N = N->getNextSibling(); |
188 | 170k | return *static_cast<DerivedT *>(this); |
189 | 170k | } clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ConstChildIterator, clang::syntax::Node const>::operator++() Line | Count | Source | 186 | 169k | DerivedT &operator++() { | 187 | 169k | N = N->getNextSibling(); | 188 | 169k | return *static_cast<DerivedT *>(this); | 189 | 169k | } |
clang::syntax::Tree::ChildIteratorBase<clang::syntax::Tree::ChildIterator, clang::syntax::Node>::operator++() Line | Count | Source | 186 | 902 | DerivedT &operator++() { | 187 | 902 | N = N->getNextSibling(); | 188 | 902 | return *static_cast<DerivedT *>(this); | 189 | 902 | } |
|
190 | | |
191 | | /// Truthy if valid (not past-the-end). |
192 | | /// This allows: if (auto It = find_if(N.children(), ...) ) |
193 | | explicit operator bool() const { return N != nullptr; } |
194 | | /// The element, or nullptr if past-the-end. |
195 | | NodeT *asPointer() const { return N; } |
196 | | }; |
197 | | |
198 | | public: |
199 | | static bool classof(const Node *N); |
200 | | |
201 | 62.1k | Node *getFirstChild() { return FirstChild; } |
202 | 135k | const Node *getFirstChild() const { return FirstChild; } |
203 | 0 | Node *getLastChild() { return LastChild; } |
204 | 92.0k | const Node *getLastChild() const { return LastChild; } |
205 | | |
206 | | const Leaf *findFirstLeaf() const; |
207 | | Leaf *findFirstLeaf() { |
208 | | return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf()); |
209 | | } |
210 | | |
211 | | const Leaf *findLastLeaf() const; |
212 | | Leaf *findLastLeaf() { |
213 | | return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf()); |
214 | | } |
215 | | |
216 | | /// child_iterator is not invalidated by mutations. |
217 | | struct ChildIterator : ChildIteratorBase<ChildIterator, Node> { |
218 | | using Base::ChildIteratorBase; |
219 | | }; |
220 | | struct ConstChildIterator |
221 | | : ChildIteratorBase<ConstChildIterator, const Node> { |
222 | | using Base::ChildIteratorBase; |
223 | 134k | ConstChildIterator() = default; |
224 | | ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {} |
225 | | }; |
226 | | |
227 | 193 | llvm::iterator_range<ChildIterator> getChildren() { |
228 | 193 | return {ChildIterator(getFirstChild()), ChildIterator()}; |
229 | 193 | } |
230 | 134k | llvm::iterator_range<ConstChildIterator> getChildren() const { |
231 | 134k | return {ConstChildIterator(getFirstChild()), ConstChildIterator()}; |
232 | 134k | } |
233 | | |
234 | | /// Find the first node with a corresponding role. |
235 | | const Node *findChild(NodeRole R) const; |
236 | 0 | Node *findChild(NodeRole R) { |
237 | 0 | return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R)); |
238 | 0 | } |
239 | | |
240 | | protected: |
241 | | using Node::Node; |
242 | | |
243 | | private: |
244 | | /// Append \p Child to the list of children and sets the parent pointer. |
245 | | /// A very low-level operation that does not check any invariants, only used |
246 | | /// by TreeBuilder and FactoryImpl. |
247 | | /// EXPECTS: Role != Detached. |
248 | | void appendChildLowLevel(Node *Child, NodeRole Role); |
249 | | /// Similar but prepends. |
250 | | void prependChildLowLevel(Node *Child, NodeRole Role); |
251 | | |
252 | | /// Like the previous overloads, but does not set role for \p Child. |
253 | | /// EXPECTS: Child->Role != Detached |
254 | | void appendChildLowLevel(Node *Child); |
255 | | void prependChildLowLevel(Node *Child); |
256 | | friend class TreeBuilder; |
257 | | friend class FactoryImpl; |
258 | | |
259 | | /// Replace a range of children [Begin, End) with a list of |
260 | | /// new nodes starting at \p New. |
261 | | /// Only used by MutationsImpl to implement higher-level mutation operations. |
262 | | /// (!) \p New can be null to model removal of the child range. |
263 | | /// (!) \p End can be null to model one past the end. |
264 | | /// (!) \p Begin can be null to model an append. |
265 | | void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New); |
266 | | friend class MutationsImpl; |
267 | | |
268 | | Node *FirstChild = nullptr; |
269 | | Node *LastChild = nullptr; |
270 | | }; |
271 | | |
272 | | // Provide missing non_const == const overload. |
273 | | // iterator_facade_base requires == to be a member, but implicit conversions |
274 | | // don't work on the LHS of a member operator. |
275 | | inline bool operator==(const Tree::ConstChildIterator &A, |
276 | 304k | const Tree::ConstChildIterator &B) { |
277 | 304k | return A.operator==(B); |
278 | 304k | } |
279 | | |
280 | | /// A list of Elements separated or terminated by a fixed token. |
281 | | /// |
282 | | /// This type models the following grammar construct: |
283 | | /// delimited-list(element, delimiter, termination, canBeEmpty) |
284 | | class List : public Tree { |
285 | | public: |
286 | | template <typename Element> struct ElementAndDelimiter { |
287 | | Element *element; |
288 | | Leaf *delimiter; |
289 | | }; |
290 | | |
291 | | enum class TerminationKind { |
292 | | Terminated, |
293 | | MaybeTerminated, |
294 | | Separated, |
295 | | }; |
296 | | |
297 | | using Tree::Tree; |
298 | | static bool classof(const Node *N); |
299 | | /// Returns the elements and corresponding delimiters. Missing elements |
300 | | /// and delimiters are represented as null pointers. |
301 | | /// |
302 | | /// For example, in a separated list: |
303 | | /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)] |
304 | | /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)] |
305 | | /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)] |
306 | | /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)] |
307 | | /// |
308 | | /// In a terminated or maybe-terminated list: |
309 | | /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )] |
310 | | /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )] |
311 | | /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )] |
312 | | /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)] |
313 | | std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters(); |
314 | | |
315 | | /// Returns the elements of the list. Missing elements are represented |
316 | | /// as null pointers in the same way as in the return value of |
317 | | /// `getElementsAsNodesAndDelimiters()`. |
318 | | std::vector<Node *> getElementsAsNodes(); |
319 | | |
320 | | // These can't be implemented with the information we have! |
321 | | |
322 | | /// Returns the appropriate delimiter for this list. |
323 | | /// |
324 | | /// Useful for discovering the correct delimiter to use when adding |
325 | | /// elements to empty or one-element lists. |
326 | | clang::tok::TokenKind getDelimiterTokenKind() const; |
327 | | |
328 | | TerminationKind getTerminationKind() const; |
329 | | |
330 | | /// Whether this list can be empty in syntactically and semantically correct |
331 | | /// code. |
332 | | /// |
333 | | /// This list may be empty when the source code has errors even if |
334 | | /// canBeEmpty() returns false. |
335 | | bool canBeEmpty() const; |
336 | | }; |
337 | | |
338 | | } // namespace syntax |
339 | | } // namespace clang |
340 | | |
341 | | #endif |