/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/ASTDiff/ASTDiff.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains definitons for the AST differencing interface. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/Tooling/ASTDiff/ASTDiff.h" |
14 | | #include "clang/AST/ParentMapContext.h" |
15 | | #include "clang/AST/RecursiveASTVisitor.h" |
16 | | #include "clang/Basic/SourceManager.h" |
17 | | #include "clang/Lex/Lexer.h" |
18 | | #include "llvm/ADT/PriorityQueue.h" |
19 | | |
20 | | #include <limits> |
21 | | #include <memory> |
22 | | #include <unordered_set> |
23 | | |
24 | | using namespace llvm; |
25 | | using namespace clang; |
26 | | |
27 | | namespace clang { |
28 | | namespace diff { |
29 | | |
30 | | namespace { |
31 | | /// Maps nodes of the left tree to ones on the right, and vice versa. |
32 | | class Mapping { |
33 | | public: |
34 | 6 | Mapping() = default; |
35 | | Mapping(Mapping &&Other) = default; |
36 | 6 | Mapping &operator=(Mapping &&Other) = default; |
37 | | |
38 | 6 | Mapping(size_t Size) { |
39 | 6 | SrcToDst = std::make_unique<NodeId[]>(Size); |
40 | 6 | DstToSrc = std::make_unique<NodeId[]>(Size); |
41 | 6 | } |
42 | | |
43 | 241 | void link(NodeId Src, NodeId Dst) { |
44 | 241 | SrcToDst[Src] = Dst, DstToSrc[Dst] = Src; |
45 | 241 | } |
46 | | |
47 | 2.23k | NodeId getDst(NodeId Src) const { return SrcToDst[Src]; } |
48 | 1.31k | NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; } |
49 | 977 | bool hasSrc(NodeId Src) const { return getDst(Src).isValid(); } |
50 | 530 | bool hasDst(NodeId Dst) const { return getSrc(Dst).isValid(); } |
51 | | |
52 | | private: |
53 | | std::unique_ptr<NodeId[]> SrcToDst, DstToSrc; |
54 | | }; |
55 | | } // end anonymous namespace |
56 | | |
57 | | class ASTDiff::Impl { |
58 | | public: |
59 | | SyntaxTree::Impl &T1, &T2; |
60 | | Mapping TheMapping; |
61 | | |
62 | | Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, |
63 | | const ComparisonOptions &Options); |
64 | | |
65 | | /// Matches nodes one-by-one based on their similarity. |
66 | | void computeMapping(); |
67 | | |
68 | | // Compute Change for each node based on similarity. |
69 | | void computeChangeKinds(Mapping &M); |
70 | | |
71 | | NodeId getMapped(const std::unique_ptr<SyntaxTree::Impl> &Tree, |
72 | 782 | NodeId Id) const { |
73 | 782 | if (&*Tree == &T1) |
74 | 281 | return TheMapping.getDst(Id); |
75 | 501 | assert(&*Tree == &T2 && "Invalid tree."); |
76 | 501 | return TheMapping.getSrc(Id); |
77 | 501 | } |
78 | | |
79 | | private: |
80 | | // Returns true if the two subtrees are identical. |
81 | | bool identical(NodeId Id1, NodeId Id2) const; |
82 | | |
83 | | // Returns false if the nodes must not be mached. |
84 | | bool isMatchingPossible(NodeId Id1, NodeId Id2) const; |
85 | | |
86 | | // Returns true if the nodes' parents are matched. |
87 | | bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const; |
88 | | |
89 | | // Uses an optimal albeit slow algorithm to compute a mapping between two |
90 | | // subtrees, but only if both have fewer nodes than MaxSize. |
91 | | void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const; |
92 | | |
93 | | // Computes the ratio of common descendants between the two nodes. |
94 | | // Descendants are only considered to be equal when they are mapped in M. |
95 | | double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; |
96 | | |
97 | | // Returns the node that has the highest degree of similarity. |
98 | | NodeId findCandidate(const Mapping &M, NodeId Id1) const; |
99 | | |
100 | | // Returns a mapping of identical subtrees. |
101 | | Mapping matchTopDown() const; |
102 | | |
103 | | // Tries to match any yet unmapped nodes, in a bottom-up fashion. |
104 | | void matchBottomUp(Mapping &M) const; |
105 | | |
106 | | const ComparisonOptions &Options; |
107 | | |
108 | | friend class ZhangShashaMatcher; |
109 | | }; |
110 | | |
111 | | /// Represents the AST of a TranslationUnit. |
112 | | class SyntaxTree::Impl { |
113 | | public: |
114 | | Impl(SyntaxTree *Parent, ASTContext &AST); |
115 | | /// Constructs a tree from an AST node. |
116 | | Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST); |
117 | | Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); |
118 | | template <class T> |
119 | | Impl(SyntaxTree *Parent, |
120 | | std::enable_if_t<std::is_base_of<Stmt, T>::value, T> *Node, |
121 | | ASTContext &AST) |
122 | | : Impl(Parent, dyn_cast<Stmt>(Node), AST) {} |
123 | | template <class T> |
124 | | Impl(SyntaxTree *Parent, |
125 | | std::enable_if_t<std::is_base_of<Decl, T>::value, T> *Node, |
126 | | ASTContext &AST) |
127 | | : Impl(Parent, dyn_cast<Decl>(Node), AST) {} |
128 | | |
129 | | SyntaxTree *Parent; |
130 | | ASTContext &AST; |
131 | | PrintingPolicy TypePP; |
132 | | /// Nodes in preorder. |
133 | | std::vector<Node> Nodes; |
134 | | std::vector<NodeId> Leaves; |
135 | | // Maps preorder indices to postorder ones. |
136 | | std::vector<int> PostorderIds; |
137 | | std::vector<NodeId> NodesBfs; |
138 | | |
139 | 706 | int getSize() const { return Nodes.size(); } |
140 | 512 | NodeId getRootId() const { return 0; } |
141 | 39 | PreorderIterator begin() const { return getRootId(); } |
142 | 39 | PreorderIterator end() const { return getSize(); } |
143 | | |
144 | 40.9k | const Node &getNode(NodeId Id) const { return Nodes[Id]; } |
145 | 3.23k | Node &getMutableNode(NodeId Id) { return Nodes[Id]; } |
146 | 0 | bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); } |
147 | 0 | void addNode(Node &N) { Nodes.push_back(N); } |
148 | | int getNumberOfDescendants(NodeId Id) const; |
149 | | bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const; |
150 | | int findPositionInParent(NodeId Id, bool Shifted = false) const; |
151 | | |
152 | | std::string getRelativeName(const NamedDecl *ND, |
153 | | const DeclContext *Context) const; |
154 | | std::string getRelativeName(const NamedDecl *ND) const; |
155 | | |
156 | | std::string getNodeValue(NodeId Id) const; |
157 | | std::string getNodeValue(const Node &Node) const; |
158 | | std::string getDeclValue(const Decl *D) const; |
159 | | std::string getStmtValue(const Stmt *S) const; |
160 | | |
161 | | private: |
162 | | void initTree(); |
163 | | void setLeftMostDescendants(); |
164 | | }; |
165 | | |
166 | 320 | static bool isSpecializedNodeExcluded(const Decl *D) { return D->isImplicit(); } |
167 | 430 | static bool isSpecializedNodeExcluded(const Stmt *S) { return false; } |
168 | 9 | static bool isSpecializedNodeExcluded(CXXCtorInitializer *I) { |
169 | 9 | return !I->isWritten(); |
170 | 9 | } |
171 | | |
172 | | template <class T> |
173 | 791 | static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { |
174 | 791 | if (!N) |
175 | 5 | return true; |
176 | 786 | SourceLocation SLoc = N->getSourceRange().getBegin(); |
177 | 786 | if (SLoc.isValid()) { |
178 | | // Ignore everything from other files. |
179 | 684 | if (!SrcMgr.isInMainFile(SLoc)) |
180 | 21 | return true; |
181 | | // Ignore macros. |
182 | 663 | if (SLoc != SrcMgr.getSpellingLoc(SLoc)) |
183 | 6 | return true; |
184 | 759 | } |
185 | 759 | return isSpecializedNodeExcluded(N); |
186 | 759 | } ASTDiff.cpp:bool clang::diff::isNodeExcluded<clang::Decl>(clang::SourceManager const&, clang::Decl*) Line | Count | Source | 173 | 341 | static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { | 174 | 341 | if (!N) | 175 | 0 | return true; | 176 | 341 | SourceLocation SLoc = N->getSourceRange().getBegin(); | 177 | 341 | if (SLoc.isValid()) { | 178 | | // Ignore everything from other files. | 179 | 239 | if (!SrcMgr.isInMainFile(SLoc)) | 180 | 21 | return true; | 181 | | // Ignore macros. | 182 | 218 | if (SLoc != SrcMgr.getSpellingLoc(SLoc)) | 183 | 0 | return true; | 184 | 320 | } | 185 | 320 | return isSpecializedNodeExcluded(N); | 186 | 320 | } |
ASTDiff.cpp:bool clang::diff::isNodeExcluded<clang::CXXCtorInitializer>(clang::SourceManager const&, clang::CXXCtorInitializer*) Line | Count | Source | 173 | 9 | static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { | 174 | 9 | if (!N) | 175 | 0 | return true; | 176 | 9 | SourceLocation SLoc = N->getSourceRange().getBegin(); | 177 | 9 | if (SLoc.isValid()) { | 178 | | // Ignore everything from other files. | 179 | 9 | if (!SrcMgr.isInMainFile(SLoc)) | 180 | 0 | return true; | 181 | | // Ignore macros. | 182 | 9 | if (SLoc != SrcMgr.getSpellingLoc(SLoc)) | 183 | 0 | return true; | 184 | 9 | } | 185 | 9 | return isSpecializedNodeExcluded(N); | 186 | 9 | } |
ASTDiff.cpp:bool clang::diff::isNodeExcluded<clang::Stmt>(clang::SourceManager const&, clang::Stmt*) Line | Count | Source | 173 | 441 | static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { | 174 | 441 | if (!N) | 175 | 5 | return true; | 176 | 436 | SourceLocation SLoc = N->getSourceRange().getBegin(); | 177 | 436 | if (SLoc.isValid()) { | 178 | | // Ignore everything from other files. | 179 | 436 | if (!SrcMgr.isInMainFile(SLoc)) | 180 | 0 | return true; | 181 | | // Ignore macros. | 182 | 436 | if (SLoc != SrcMgr.getSpellingLoc(SLoc)) | 183 | 6 | return true; | 184 | 430 | } | 185 | 430 | return isSpecializedNodeExcluded(N); | 186 | 430 | } |
|
187 | | |
188 | | namespace { |
189 | | // Sets Height, Parent and Children for each node. |
190 | | struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> { |
191 | | int Id = 0, Depth = 0; |
192 | | NodeId Parent; |
193 | | SyntaxTree::Impl &Tree; |
194 | | |
195 | 17 | PreorderVisitor(SyntaxTree::Impl &Tree) : Tree(Tree) {} |
196 | | |
197 | 638 | template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { |
198 | 638 | NodeId MyId = Id; |
199 | 638 | Tree.Nodes.emplace_back(); |
200 | 638 | Node &N = Tree.getMutableNode(MyId); |
201 | 638 | N.Parent = Parent; |
202 | 638 | N.Depth = Depth; |
203 | 638 | N.ASTNode = DynTypedNode::create(*ASTNode); |
204 | 638 | assert(!N.ASTNode.getNodeKind().isNone() && |
205 | 638 | "Expected nodes to have a valid kind."); |
206 | 638 | if (Parent.isValid()) { |
207 | 621 | Node &P = Tree.getMutableNode(Parent); |
208 | 621 | P.Children.push_back(MyId); |
209 | 621 | } |
210 | 638 | Parent = MyId; |
211 | 638 | ++Id; |
212 | 638 | ++Depth; |
213 | 638 | return std::make_tuple(MyId, Tree.getNode(MyId).Parent); |
214 | 638 | } ASTDiff.cpp:std::__1::tuple<clang::diff::NodeId, clang::diff::NodeId> clang::diff::(anonymous namespace)::PreorderVisitor::PreTraverse<clang::Decl>(clang::Decl*) Line | Count | Source | 197 | 199 | template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { | 198 | 199 | NodeId MyId = Id; | 199 | 199 | Tree.Nodes.emplace_back(); | 200 | 199 | Node &N = Tree.getMutableNode(MyId); | 201 | 199 | N.Parent = Parent; | 202 | 199 | N.Depth = Depth; | 203 | 199 | N.ASTNode = DynTypedNode::create(*ASTNode); | 204 | 199 | assert(!N.ASTNode.getNodeKind().isNone() && | 205 | 199 | "Expected nodes to have a valid kind."); | 206 | 199 | if (Parent.isValid()) { | 207 | 182 | Node &P = Tree.getMutableNode(Parent); | 208 | 182 | P.Children.push_back(MyId); | 209 | 182 | } | 210 | 199 | Parent = MyId; | 211 | 199 | ++Id; | 212 | 199 | ++Depth; | 213 | 199 | return std::make_tuple(MyId, Tree.getNode(MyId).Parent); | 214 | 199 | } |
ASTDiff.cpp:std::__1::tuple<clang::diff::NodeId, clang::diff::NodeId> clang::diff::(anonymous namespace)::PreorderVisitor::PreTraverse<clang::CXXCtorInitializer>(clang::CXXCtorInitializer*) Line | Count | Source | 197 | 9 | template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { | 198 | 9 | NodeId MyId = Id; | 199 | 9 | Tree.Nodes.emplace_back(); | 200 | 9 | Node &N = Tree.getMutableNode(MyId); | 201 | 9 | N.Parent = Parent; | 202 | 9 | N.Depth = Depth; | 203 | 9 | N.ASTNode = DynTypedNode::create(*ASTNode); | 204 | 9 | assert(!N.ASTNode.getNodeKind().isNone() && | 205 | 9 | "Expected nodes to have a valid kind."); | 206 | 9 | if (Parent.isValid()) { | 207 | 9 | Node &P = Tree.getMutableNode(Parent); | 208 | 9 | P.Children.push_back(MyId); | 209 | 9 | } | 210 | 9 | Parent = MyId; | 211 | 9 | ++Id; | 212 | 9 | ++Depth; | 213 | 9 | return std::make_tuple(MyId, Tree.getNode(MyId).Parent); | 214 | 9 | } |
ASTDiff.cpp:std::__1::tuple<clang::diff::NodeId, clang::diff::NodeId> clang::diff::(anonymous namespace)::PreorderVisitor::PreTraverse<clang::Stmt>(clang::Stmt*) Line | Count | Source | 197 | 430 | template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { | 198 | 430 | NodeId MyId = Id; | 199 | 430 | Tree.Nodes.emplace_back(); | 200 | 430 | Node &N = Tree.getMutableNode(MyId); | 201 | 430 | N.Parent = Parent; | 202 | 430 | N.Depth = Depth; | 203 | 430 | N.ASTNode = DynTypedNode::create(*ASTNode); | 204 | 430 | assert(!N.ASTNode.getNodeKind().isNone() && | 205 | 430 | "Expected nodes to have a valid kind."); | 206 | 430 | if (Parent.isValid()) { | 207 | 430 | Node &P = Tree.getMutableNode(Parent); | 208 | 430 | P.Children.push_back(MyId); | 209 | 430 | } | 210 | 430 | Parent = MyId; | 211 | 430 | ++Id; | 212 | 430 | ++Depth; | 213 | 430 | return std::make_tuple(MyId, Tree.getNode(MyId).Parent); | 214 | 430 | } |
|
215 | 638 | void PostTraverse(std::tuple<NodeId, NodeId> State) { |
216 | 638 | NodeId MyId, PreviousParent; |
217 | 638 | std::tie(MyId, PreviousParent) = State; |
218 | 638 | assert(MyId.isValid() && "Expecting to only traverse valid nodes."); |
219 | 638 | Parent = PreviousParent; |
220 | 638 | --Depth; |
221 | 638 | Node &N = Tree.getMutableNode(MyId); |
222 | 638 | N.RightMostDescendant = Id - 1; |
223 | 638 | assert(N.RightMostDescendant >= 0 && |
224 | 638 | N.RightMostDescendant < Tree.getSize() && |
225 | 638 | "Rightmost descendant must be a valid tree node."); |
226 | 638 | if (N.isLeaf()) |
227 | 282 | Tree.Leaves.push_back(MyId); |
228 | 638 | N.Height = 1; |
229 | 638 | for (NodeId Child : N.Children) |
230 | 621 | N.Height = std::max(N.Height, 1 + Tree.getNode(Child).Height); |
231 | 638 | } |
232 | 341 | bool TraverseDecl(Decl *D) { |
233 | 341 | if (isNodeExcluded(Tree.AST.getSourceManager(), D)) |
234 | 142 | return true; |
235 | 199 | auto SavedState = PreTraverse(D); |
236 | 199 | RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D); |
237 | 199 | PostTraverse(SavedState); |
238 | 199 | return true; |
239 | 199 | } |
240 | 441 | bool TraverseStmt(Stmt *S) { |
241 | 441 | if (auto *E = dyn_cast_or_null<Expr>(S)) |
242 | 228 | S = E->IgnoreImplicit(); |
243 | 441 | if (isNodeExcluded(Tree.AST.getSourceManager(), S)) |
244 | 11 | return true; |
245 | 430 | auto SavedState = PreTraverse(S); |
246 | 430 | RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S); |
247 | 430 | PostTraverse(SavedState); |
248 | 430 | return true; |
249 | 430 | } |
250 | 9 | bool TraverseType(QualType T) { return true; } |
251 | 9 | bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { |
252 | 9 | if (isNodeExcluded(Tree.AST.getSourceManager(), Init)) |
253 | 0 | return true; |
254 | 9 | auto SavedState = PreTraverse(Init); |
255 | 9 | RecursiveASTVisitor<PreorderVisitor>::TraverseConstructorInitializer(Init); |
256 | 9 | PostTraverse(SavedState); |
257 | 9 | return true; |
258 | 9 | } |
259 | | }; |
260 | | } // end anonymous namespace |
261 | | |
262 | | SyntaxTree::Impl::Impl(SyntaxTree *Parent, ASTContext &AST) |
263 | 17 | : Parent(Parent), AST(AST), TypePP(AST.getLangOpts()) { |
264 | 17 | TypePP.AnonymousTagLocations = false; |
265 | 17 | } |
266 | | |
267 | | SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST) |
268 | 17 | : Impl(Parent, AST) { |
269 | 17 | PreorderVisitor PreorderWalker(*this); |
270 | 17 | PreorderWalker.TraverseDecl(N); |
271 | 17 | initTree(); |
272 | 17 | } Unexecuted instantiation: clang::diff::SyntaxTree::Impl::Impl(clang::diff::SyntaxTree*, clang::Decl*, clang::ASTContext&) clang::diff::SyntaxTree::Impl::Impl(clang::diff::SyntaxTree*, clang::Decl*, clang::ASTContext&) Line | Count | Source | 268 | 17 | : Impl(Parent, AST) { | 269 | 17 | PreorderVisitor PreorderWalker(*this); | 270 | 17 | PreorderWalker.TraverseDecl(N); | 271 | 17 | initTree(); | 272 | 17 | } |
|
273 | | |
274 | | SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST) |
275 | 0 | : Impl(Parent, AST) { |
276 | 0 | PreorderVisitor PreorderWalker(*this); |
277 | 0 | PreorderWalker.TraverseStmt(N); |
278 | 0 | initTree(); |
279 | 0 | } Unexecuted instantiation: clang::diff::SyntaxTree::Impl::Impl(clang::diff::SyntaxTree*, clang::Stmt*, clang::ASTContext&) Unexecuted instantiation: clang::diff::SyntaxTree::Impl::Impl(clang::diff::SyntaxTree*, clang::Stmt*, clang::ASTContext&) |
280 | | |
281 | | static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree, |
282 | 23 | NodeId Root) { |
283 | 23 | std::vector<NodeId> Postorder; |
284 | 639 | std::function<void(NodeId)> Traverse = [&](NodeId Id) { |
285 | 639 | const Node &N = Tree.getNode(Id); |
286 | 639 | for (NodeId Child : N.Children) |
287 | 616 | Traverse(Child); |
288 | 639 | Postorder.push_back(Id); |
289 | 639 | }; |
290 | 23 | Traverse(Root); |
291 | 23 | return Postorder; |
292 | 23 | } |
293 | | |
294 | | static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree, |
295 | 17 | NodeId Root) { |
296 | 17 | std::vector<NodeId> Ids; |
297 | 17 | size_t Expanded = 0; |
298 | 17 | Ids.push_back(Root); |
299 | 655 | while (Expanded < Ids.size()) |
300 | 638 | for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) |
301 | 621 | Ids.push_back(Child); |
302 | 17 | return Ids; |
303 | 17 | } |
304 | | |
305 | 17 | void SyntaxTree::Impl::initTree() { |
306 | 17 | setLeftMostDescendants(); |
307 | 17 | int PostorderId = 0; |
308 | 17 | PostorderIds.resize(getSize()); |
309 | 638 | std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) { |
310 | 638 | for (NodeId Child : getNode(Id).Children) |
311 | 621 | PostorderTraverse(Child); |
312 | 638 | PostorderIds[Id] = PostorderId; |
313 | 638 | ++PostorderId; |
314 | 638 | }; |
315 | 17 | PostorderTraverse(getRootId()); |
316 | 17 | NodesBfs = getSubtreeBfs(*this, getRootId()); |
317 | 17 | } |
318 | | |
319 | 17 | void SyntaxTree::Impl::setLeftMostDescendants() { |
320 | 282 | for (NodeId Leaf : Leaves) { |
321 | 282 | getMutableNode(Leaf).LeftMostDescendant = Leaf; |
322 | 282 | NodeId Parent, Cur = Leaf; |
323 | 638 | while ((Parent = getNode(Cur).Parent).isValid() && |
324 | 621 | getNode(Parent).Children[0] == Cur) { |
325 | 356 | Cur = Parent; |
326 | 356 | getMutableNode(Cur).LeftMostDescendant = Leaf; |
327 | 356 | } |
328 | 282 | } |
329 | 17 | } |
330 | | |
331 | 110 | int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { |
332 | 110 | return getNode(Id).RightMostDescendant - Id + 1; |
333 | 110 | } |
334 | | |
335 | 196 | bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { |
336 | 196 | return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant141 ; |
337 | 196 | } |
338 | | |
339 | 922 | int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const { |
340 | 922 | NodeId Parent = getNode(Id).Parent; |
341 | 922 | if (Parent.isInvalid()) |
342 | 21 | return 0; |
343 | 901 | const auto &Siblings = getNode(Parent).Children; |
344 | 901 | int Position = 0; |
345 | 1.74k | for (size_t I = 0, E = Siblings.size(); I < E; ++I840 ) { |
346 | 1.74k | if (Shifted) |
347 | 1.65k | Position += getNode(Siblings[I]).Shift; |
348 | 1.74k | if (Siblings[I] == Id) { |
349 | 901 | Position += I; |
350 | 901 | return Position; |
351 | 901 | } |
352 | 1.74k | } |
353 | 901 | llvm_unreachable0 ("Node not found in parent's children."); |
354 | 901 | } |
355 | | |
356 | | // Returns the qualified name of ND. If it is subordinate to Context, |
357 | | // then the prefix of the latter is removed from the returned value. |
358 | | std::string |
359 | | SyntaxTree::Impl::getRelativeName(const NamedDecl *ND, |
360 | 958 | const DeclContext *Context) const { |
361 | 958 | std::string Val = ND->getQualifiedNameAsString(); |
362 | 958 | std::string ContextPrefix; |
363 | 958 | if (!Context) |
364 | 0 | return Val; |
365 | 958 | if (auto *Namespace = dyn_cast<NamespaceDecl>(Context)) |
366 | 411 | ContextPrefix = Namespace->getQualifiedNameAsString(); |
367 | 547 | else if (auto *Record = dyn_cast<RecordDecl>(Context)) |
368 | 155 | ContextPrefix = Record->getQualifiedNameAsString(); |
369 | 392 | else if (AST.getLangOpts().CPlusPlus11) |
370 | 392 | if (auto *Tag = dyn_cast<TagDecl>(Context)) |
371 | 0 | ContextPrefix = Tag->getQualifiedNameAsString(); |
372 | | // Strip the qualifier, if Val refers to something in the current scope. |
373 | | // But leave one leading ':' in place, so that we know that this is a |
374 | | // relative path. |
375 | 958 | if (!ContextPrefix.empty() && StringRef(Val).startswith(ContextPrefix)566 ) |
376 | 460 | Val = Val.substr(ContextPrefix.size() + 1); |
377 | 958 | return Val; |
378 | 958 | } |
379 | | |
380 | 746 | std::string SyntaxTree::Impl::getRelativeName(const NamedDecl *ND) const { |
381 | 746 | return getRelativeName(ND, ND->getDeclContext()); |
382 | 746 | } |
383 | | |
384 | | static const DeclContext *getEnclosingDeclContext(ASTContext &AST, |
385 | 212 | const Stmt *S) { |
386 | 846 | while (S) { |
387 | 846 | const auto &Parents = AST.getParents(*S); |
388 | 846 | if (Parents.empty()) |
389 | 0 | return nullptr; |
390 | 846 | const auto &P = Parents[0]; |
391 | 846 | if (const auto *D = P.get<Decl>()) |
392 | 212 | return D->getDeclContext(); |
393 | 634 | S = P.get<Stmt>(); |
394 | 634 | } |
395 | 0 | return nullptr; |
396 | 212 | } |
397 | | |
398 | | static std::string getInitializerValue(const CXXCtorInitializer *Init, |
399 | 15 | const PrintingPolicy &TypePP) { |
400 | 15 | if (Init->isAnyMemberInitializer()) |
401 | 5 | return std::string(Init->getAnyMember()->getName()); |
402 | 10 | if (Init->isBaseInitializer()) |
403 | 5 | return QualType(Init->getBaseClass(), 0).getAsString(TypePP); |
404 | 5 | if (Init->isDelegatingInitializer()) |
405 | 5 | return Init->getTypeSourceInfo()->getType().getAsString(TypePP); |
406 | 0 | llvm_unreachable("Unknown initializer type"); |
407 | 0 | } |
408 | | |
409 | 3.70k | std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { |
410 | 3.70k | return getNodeValue(getNode(Id)); |
411 | 3.70k | } |
412 | | |
413 | 3.84k | std::string SyntaxTree::Impl::getNodeValue(const Node &N) const { |
414 | 3.84k | const DynTypedNode &DTN = N.ASTNode; |
415 | 3.84k | if (auto *S = DTN.get<Stmt>()) |
416 | 3.04k | return getStmtValue(S); |
417 | 802 | if (auto *D = DTN.get<Decl>()) |
418 | 787 | return getDeclValue(D); |
419 | 15 | if (auto *Init = DTN.get<CXXCtorInitializer>()) |
420 | 15 | return getInitializerValue(Init, TypePP); |
421 | 0 | llvm_unreachable("Fatal: unhandled AST node.\n"); |
422 | 0 | } |
423 | | |
424 | 787 | std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const { |
425 | 787 | std::string Value; |
426 | 787 | if (auto *V = dyn_cast<ValueDecl>(D)) |
427 | 623 | return getRelativeName(V) + "(" + V->getType().getAsString(TypePP) + ")"; |
428 | 164 | if (auto *N = dyn_cast<NamedDecl>(D)) |
429 | 112 | Value += getRelativeName(N) + ";"; |
430 | 164 | if (auto *T = dyn_cast<TypedefNameDecl>(D)) |
431 | 26 | return Value + T->getUnderlyingType().getAsString(TypePP) + ";"; |
432 | 138 | if (auto *T = dyn_cast<TypeDecl>(D)) |
433 | 35 | if (T->getTypeForDecl()) |
434 | 35 | Value += |
435 | 35 | T->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) + |
436 | 35 | ";"; |
437 | 138 | if (auto *U = dyn_cast<UsingDirectiveDecl>(D)) |
438 | 5 | return std::string(U->getNominatedNamespace()->getName()); |
439 | 133 | if (auto *A = dyn_cast<AccessSpecDecl>(D)) { |
440 | 7 | CharSourceRange Range(A->getSourceRange(), false); |
441 | 7 | return std::string( |
442 | 7 | Lexer::getSourceText(Range, AST.getSourceManager(), AST.getLangOpts())); |
443 | 7 | } |
444 | 126 | return Value; |
445 | 126 | } |
446 | | |
447 | 3.04k | std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { |
448 | 3.04k | if (auto *U = dyn_cast<UnaryOperator>(S)) |
449 | 8 | return std::string(UnaryOperator::getOpcodeStr(U->getOpcode())); |
450 | 3.03k | if (auto *B = dyn_cast<BinaryOperator>(S)) |
451 | 487 | return std::string(B->getOpcodeStr()); |
452 | 2.55k | if (auto *M = dyn_cast<MemberExpr>(S)) |
453 | 11 | return getRelativeName(M->getMemberDecl()); |
454 | 2.53k | if (auto *I = dyn_cast<IntegerLiteral>(S)) { |
455 | 893 | SmallString<256> Str; |
456 | 893 | I->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); |
457 | 893 | return std::string(Str.str()); |
458 | 893 | } |
459 | 1.64k | if (auto *F = dyn_cast<FloatingLiteral>(S)) { |
460 | 5 | SmallString<256> Str; |
461 | 5 | F->getValue().toString(Str); |
462 | 5 | return std::string(Str.str()); |
463 | 5 | } |
464 | 1.64k | if (auto *D = dyn_cast<DeclRefExpr>(S)) |
465 | 212 | return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S)); |
466 | 1.42k | if (auto *String = dyn_cast<StringLiteral>(S)) |
467 | 100 | return std::string(String->getString()); |
468 | 1.32k | if (auto *B = dyn_cast<CXXBoolLiteralExpr>(S)) |
469 | 5 | return B->getValue() ? "true" : "false"0 ; |
470 | 1.32k | return ""; |
471 | 1.32k | } |
472 | | |
473 | | /// Identifies a node in a subtree by its postorder offset, starting at 1. |
474 | | struct SNodeId { |
475 | | int Id = 0; |
476 | | |
477 | 12.5k | explicit SNodeId(int Id) : Id(Id) {} |
478 | 955 | explicit SNodeId() = default; |
479 | | |
480 | 1.70M | operator int() const { return Id; } |
481 | 69.8k | SNodeId &operator++() { return ++Id, *this; } |
482 | 784 | SNodeId &operator--() { return --Id, *this; } |
483 | 10.8k | SNodeId operator+(int Other) const { return SNodeId(Id + Other); } |
484 | | }; |
485 | | |
486 | | class Subtree { |
487 | | private: |
488 | | /// The parent tree. |
489 | | const SyntaxTree::Impl &Tree; |
490 | | /// Maps SNodeIds to original ids. |
491 | | std::vector<NodeId> RootIds; |
492 | | /// Maps subtree nodes to their leftmost descendants wtihin the subtree. |
493 | | std::vector<SNodeId> LeftMostDescendants; |
494 | | |
495 | | public: |
496 | | std::vector<SNodeId> KeyRoots; |
497 | | |
498 | 18 | Subtree(const SyntaxTree::Impl &Tree, NodeId SubtreeRoot) : Tree(Tree) { |
499 | 18 | RootIds = getSubtreePostorder(Tree, SubtreeRoot); |
500 | 18 | int NumLeaves = setLeftMostDescendants(); |
501 | 18 | computeKeyRoots(NumLeaves); |
502 | 18 | } |
503 | 152k | int getSize() const { return RootIds.size(); } |
504 | 25.3k | NodeId getIdInRoot(SNodeId Id) const { |
505 | 25.3k | assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); |
506 | 25.3k | return RootIds[Id - 1]; |
507 | 25.3k | } |
508 | 392 | const Node &getNode(SNodeId Id) const { |
509 | 392 | return Tree.getNode(getIdInRoot(Id)); |
510 | 392 | } |
511 | 126k | SNodeId getLeftMostDescendant(SNodeId Id) const { |
512 | 126k | assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); |
513 | 126k | return LeftMostDescendants[Id - 1]; |
514 | 126k | } |
515 | | /// Returns the postorder index of the leftmost descendant in the subtree. |
516 | 784 | NodeId getPostorderOffset() const { |
517 | 784 | return Tree.PostorderIds[getIdInRoot(SNodeId(1))]; |
518 | 784 | } |
519 | 2.25k | std::string getNodeValue(SNodeId Id) const { |
520 | 2.25k | return Tree.getNodeValue(getIdInRoot(Id)); |
521 | 2.25k | } |
522 | | |
523 | | private: |
524 | | /// Returns the number of leafs in the subtree. |
525 | 18 | int setLeftMostDescendants() { |
526 | 18 | int NumLeaves = 0; |
527 | 18 | LeftMostDescendants.resize(getSize()); |
528 | 410 | for (int I = 0; I < getSize(); ++I392 ) { |
529 | 392 | SNodeId SI(I + 1); |
530 | 392 | const Node &N = getNode(SI); |
531 | 392 | NumLeaves += N.isLeaf(); |
532 | 392 | assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() && |
533 | 392 | "Postorder traversal in subtree should correspond to traversal in " |
534 | 392 | "the root tree by a constant offset."); |
535 | 392 | LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] - |
536 | 392 | getPostorderOffset()); |
537 | 392 | } |
538 | 18 | return NumLeaves; |
539 | 18 | } |
540 | 18 | void computeKeyRoots(int Leaves) { |
541 | 18 | KeyRoots.resize(Leaves); |
542 | 18 | std::unordered_set<int> Visited; |
543 | 18 | int K = Leaves - 1; |
544 | 410 | for (SNodeId I(getSize()); I > 0; --I392 ) { |
545 | 392 | SNodeId LeftDesc = getLeftMostDescendant(I); |
546 | 392 | if (Visited.count(LeftDesc)) |
547 | 237 | continue; |
548 | 155 | assert(K >= 0 && "K should be non-negative"); |
549 | 155 | KeyRoots[K] = I; |
550 | 155 | Visited.insert(LeftDesc); |
551 | 155 | --K; |
552 | 155 | } |
553 | 18 | } |
554 | | }; |
555 | | |
556 | | /// Implementation of Zhang and Shasha's Algorithm for tree edit distance. |
557 | | /// Computes an optimal mapping between two trees using only insertion, |
558 | | /// deletion and update as edit actions (similar to the Levenshtein distance). |
559 | | class ZhangShashaMatcher { |
560 | | const ASTDiff::Impl &DiffImpl; |
561 | | Subtree S1; |
562 | | Subtree S2; |
563 | | std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist; |
564 | | |
565 | | public: |
566 | | ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTree::Impl &T1, |
567 | | const SyntaxTree::Impl &T2, NodeId Id1, NodeId Id2) |
568 | 9 | : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) { |
569 | 9 | TreeDist = std::make_unique<std::unique_ptr<double[]>[]>( |
570 | 9 | size_t(S1.getSize()) + 1); |
571 | 9 | ForestDist = std::make_unique<std::unique_ptr<double[]>[]>( |
572 | 9 | size_t(S1.getSize()) + 1); |
573 | 215 | for (int I = 0, E = S1.getSize() + 1; I < E; ++I206 ) { |
574 | 206 | TreeDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1); |
575 | 206 | ForestDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1); |
576 | 206 | } |
577 | 9 | } |
578 | | |
579 | 9 | std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() { |
580 | 9 | std::vector<std::pair<NodeId, NodeId>> Matches; |
581 | 9 | std::vector<std::pair<SNodeId, SNodeId>> TreePairs; |
582 | | |
583 | 9 | computeTreeDist(); |
584 | | |
585 | 9 | bool RootNodePair = true; |
586 | | |
587 | 9 | TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize())); |
588 | | |
589 | 77 | while (!TreePairs.empty()) { |
590 | 68 | SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col; |
591 | 68 | std::tie(LastRow, LastCol) = TreePairs.back(); |
592 | 68 | TreePairs.pop_back(); |
593 | | |
594 | 68 | if (!RootNodePair) { |
595 | 59 | computeForestDist(LastRow, LastCol); |
596 | 59 | } |
597 | | |
598 | 68 | RootNodePair = false; |
599 | | |
600 | 68 | FirstRow = S1.getLeftMostDescendant(LastRow); |
601 | 68 | FirstCol = S2.getLeftMostDescendant(LastCol); |
602 | | |
603 | 68 | Row = LastRow; |
604 | 68 | Col = LastCol; |
605 | | |
606 | 347 | while (Row > FirstRow || Col > FirstCol68 ) { |
607 | 279 | if (Row > FirstRow && |
608 | 279 | ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) { |
609 | 25 | --Row; |
610 | 254 | } else if (Col > FirstCol && |
611 | 254 | ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) { |
612 | 23 | --Col; |
613 | 231 | } else { |
614 | 231 | SNodeId LMD1 = S1.getLeftMostDescendant(Row); |
615 | 231 | SNodeId LMD2 = S2.getLeftMostDescendant(Col); |
616 | 231 | if (LMD1 == S1.getLeftMostDescendant(LastRow) && |
617 | 172 | LMD2 == S2.getLeftMostDescendant(LastCol)) { |
618 | 172 | NodeId Id1 = S1.getIdInRoot(Row); |
619 | 172 | NodeId Id2 = S2.getIdInRoot(Col); |
620 | 172 | assert(DiffImpl.isMatchingPossible(Id1, Id2) && |
621 | 172 | "These nodes must not be matched."); |
622 | 172 | Matches.emplace_back(Id1, Id2); |
623 | 172 | --Row; |
624 | 172 | --Col; |
625 | 59 | } else { |
626 | 59 | TreePairs.emplace_back(Row, Col); |
627 | 59 | Row = LMD1; |
628 | 59 | Col = LMD2; |
629 | 59 | } |
630 | 231 | } |
631 | 279 | } |
632 | 68 | } |
633 | 9 | return Matches; |
634 | 9 | } |
635 | | |
636 | | private: |
637 | | /// We use a simple cost model for edit actions, which seems good enough. |
638 | | /// Simple cost model for edit actions. This seems to make the matching |
639 | | /// algorithm perform reasonably well. |
640 | | /// The values range between 0 and 1, or infinity if this edit action should |
641 | | /// always be avoided. |
642 | | static constexpr double DeletionCost = 1; |
643 | | static constexpr double InsertionCost = 1; |
644 | | |
645 | 10.5k | double getUpdateCost(SNodeId Id1, SNodeId Id2) { |
646 | 10.5k | if (!DiffImpl.isMatchingPossible(S1.getIdInRoot(Id1), S2.getIdInRoot(Id2))) |
647 | 9.45k | return std::numeric_limits<double>::max(); |
648 | 1.12k | return S1.getNodeValue(Id1) != S2.getNodeValue(Id2); |
649 | 1.12k | } |
650 | | |
651 | 9 | void computeTreeDist() { |
652 | 9 | for (SNodeId Id1 : S1.KeyRoots) |
653 | 80 | for (SNodeId Id2 : S2.KeyRoots) |
654 | 1.59k | computeForestDist(Id1, Id2); |
655 | 9 | } |
656 | | |
657 | 1.65k | void computeForestDist(SNodeId Id1, SNodeId Id2) { |
658 | 1.65k | assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0."); |
659 | 1.65k | SNodeId LMD1 = S1.getLeftMostDescendant(Id1); |
660 | 1.65k | SNodeId LMD2 = S2.getLeftMostDescendant(Id2); |
661 | | |
662 | 1.65k | ForestDist[LMD1][LMD2] = 0; |
663 | 10.8k | for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D19.24k ) { |
664 | 9.24k | ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost; |
665 | 69.8k | for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D260.6k ) { |
666 | 60.6k | ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost; |
667 | 60.6k | SNodeId DLMD1 = S1.getLeftMostDescendant(D1); |
668 | 60.6k | SNodeId DLMD2 = S2.getLeftMostDescendant(D2); |
669 | 60.6k | if (DLMD1 == LMD1 && DLMD2 == LMD225.5k ) { |
670 | 10.5k | double UpdateCost = getUpdateCost(D1, D2); |
671 | 10.5k | ForestDist[D1][D2] = |
672 | 10.5k | std::min({ForestDist[D1 - 1][D2] + DeletionCost, |
673 | 10.5k | ForestDist[D1][D2 - 1] + InsertionCost, |
674 | 10.5k | ForestDist[D1 - 1][D2 - 1] + UpdateCost}); |
675 | 10.5k | TreeDist[D1][D2] = ForestDist[D1][D2]; |
676 | 50.0k | } else { |
677 | 50.0k | ForestDist[D1][D2] = |
678 | 50.0k | std::min({ForestDist[D1 - 1][D2] + DeletionCost, |
679 | 50.0k | ForestDist[D1][D2 - 1] + InsertionCost, |
680 | 50.0k | ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]}); |
681 | 50.0k | } |
682 | 60.6k | } |
683 | 9.24k | } |
684 | 1.65k | } |
685 | | }; |
686 | | |
687 | 23.7k | ASTNodeKind Node::getType() const { return ASTNode.getNodeKind(); } |
688 | | |
689 | 588 | StringRef Node::getTypeLabel() const { return getType().asStringRef(); } |
690 | | |
691 | 7 | llvm::Optional<std::string> Node::getQualifiedIdentifier() const { |
692 | 7 | if (auto *ND = ASTNode.get<NamedDecl>()) { |
693 | 4 | if (ND->getDeclName().isIdentifier()) |
694 | 4 | return ND->getQualifiedNameAsString(); |
695 | 3 | } |
696 | 3 | return llvm::None; |
697 | 3 | } |
698 | | |
699 | 7 | llvm::Optional<StringRef> Node::getIdentifier() const { |
700 | 7 | if (auto *ND = ASTNode.get<NamedDecl>()) { |
701 | 4 | if (ND->getDeclName().isIdentifier()) |
702 | 4 | return ND->getName(); |
703 | 3 | } |
704 | 3 | return llvm::None; |
705 | 3 | } |
706 | | |
707 | | namespace { |
708 | | // Compares nodes by their depth. |
709 | | struct HeightLess { |
710 | | const SyntaxTree::Impl &Tree; |
711 | 12 | HeightLess(const SyntaxTree::Impl &Tree) : Tree(Tree) {} |
712 | 876 | bool operator()(NodeId Id1, NodeId Id2) const { |
713 | 876 | return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height; |
714 | 876 | } |
715 | | }; |
716 | | } // end anonymous namespace |
717 | | |
718 | | namespace { |
719 | | // Priority queue for nodes, sorted descendingly by their height. |
720 | | class PriorityList { |
721 | | const SyntaxTree::Impl &Tree; |
722 | | HeightLess Cmp; |
723 | | std::vector<NodeId> Container; |
724 | | PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List; |
725 | | |
726 | | public: |
727 | | PriorityList(const SyntaxTree::Impl &Tree) |
728 | 12 | : Tree(Tree), Cmp(Tree), List(Cmp, Container) {} |
729 | | |
730 | 247 | void push(NodeId id) { List.push(id); } |
731 | | |
732 | 58 | std::vector<NodeId> pop() { |
733 | 58 | int Max = peekMax(); |
734 | 58 | std::vector<NodeId> Result; |
735 | 58 | if (Max == 0) |
736 | 0 | return Result; |
737 | 211 | while (58 peekMax() == Max) { |
738 | 153 | Result.push_back(List.top()); |
739 | 153 | List.pop(); |
740 | 153 | } |
741 | | // TODO this is here to get a stable output, not a good heuristic |
742 | 58 | llvm::sort(Result); |
743 | 58 | return Result; |
744 | 58 | } |
745 | 343 | int peekMax() const { |
746 | 343 | if (List.empty()) |
747 | 22 | return 0; |
748 | 321 | return Tree.getNode(List.top()).Height; |
749 | 321 | } |
750 | 121 | void open(NodeId Id) { |
751 | 121 | for (NodeId Child : Tree.getNode(Id).Children) |
752 | 235 | push(Child); |
753 | 121 | } |
754 | | }; |
755 | | } // end anonymous namespace |
756 | | |
757 | 477 | bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const { |
758 | 477 | const Node &N1 = T1.getNode(Id1); |
759 | 477 | const Node &N2 = T2.getNode(Id2); |
760 | 477 | if (N1.Children.size() != N2.Children.size() || |
761 | 344 | !isMatchingPossible(Id1, Id2) || |
762 | 258 | T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) |
763 | 257 | return false; |
764 | 358 | for (size_t Id = 0, E = N1.Children.size(); 220 Id < E; ++Id138 ) |
765 | 201 | if (!identical(N1.Children[Id], N2.Children[Id])) |
766 | 63 | return false; |
767 | 157 | return true; |
768 | 220 | } |
769 | | |
770 | 11.6k | bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { |
771 | 11.6k | return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2)); |
772 | 11.6k | } |
773 | | |
774 | | bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, |
775 | 482 | NodeId Id2) const { |
776 | 482 | NodeId P1 = T1.getNode(Id1).Parent; |
777 | 482 | NodeId P2 = T2.getNode(Id2).Parent; |
778 | 482 | return (P1.isInvalid() && P2.isInvalid()10 ) || |
779 | 472 | (P1.isValid() && P2.isValid() && M.getDst(P1) == P2); |
780 | 482 | } |
781 | | |
782 | | void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, |
783 | 16 | NodeId Id2) const { |
784 | 16 | if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) > |
785 | 16 | Options.MaxSize) |
786 | 7 | return; |
787 | 9 | ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); |
788 | 9 | std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes(); |
789 | 172 | for (const auto &Tuple : R) { |
790 | 172 | NodeId Src = Tuple.first; |
791 | 172 | NodeId Dst = Tuple.second; |
792 | 172 | if (!M.hasSrc(Src) && !M.hasDst(Dst)84 ) |
793 | 84 | M.link(Src, Dst); |
794 | 172 | } |
795 | 9 | } |
796 | | |
797 | | double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, |
798 | 31 | NodeId Id2) const { |
799 | 31 | int CommonDescendants = 0; |
800 | 31 | const Node &N1 = T1.getNode(Id1); |
801 | | // Count the common descendants, excluding the subtree root. |
802 | 252 | for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src221 ) { |
803 | 221 | NodeId Dst = M.getDst(Src); |
804 | 221 | CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)196 ); |
805 | 221 | } |
806 | | // We need to subtract 1 to get the number of descendants excluding the root. |
807 | 31 | double Denominator = T1.getNumberOfDescendants(Id1) - 1 + |
808 | 31 | T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; |
809 | | // CommonDescendants is less than the size of one subtree. |
810 | 31 | assert(Denominator >= 0 && "Expected non-negative denominator."); |
811 | 31 | if (Denominator == 0) |
812 | 0 | return 0; |
813 | 31 | return CommonDescendants / Denominator; |
814 | 31 | } |
815 | | |
816 | 13 | NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { |
817 | 13 | NodeId Candidate; |
818 | 13 | double HighestSimilarity = 0.0; |
819 | 504 | for (NodeId Id2 : T2) { |
820 | 504 | if (!isMatchingPossible(Id1, Id2)) |
821 | 438 | continue; |
822 | 66 | if (M.hasDst(Id2)) |
823 | 35 | continue; |
824 | 31 | double Similarity = getJaccardSimilarity(M, Id1, Id2); |
825 | 31 | if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity12 ) { |
826 | 12 | HighestSimilarity = Similarity; |
827 | 12 | Candidate = Id2; |
828 | 12 | } |
829 | 31 | } |
830 | 13 | return Candidate; |
831 | 13 | } |
832 | | |
833 | 5 | void ASTDiff::Impl::matchBottomUp(Mapping &M) const { |
834 | 5 | std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.getRootId()); |
835 | 247 | for (NodeId Id1 : Postorder) { |
836 | 247 | if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId())5 && |
837 | 4 | !M.hasDst(T2.getRootId())) { |
838 | 4 | if (isMatchingPossible(T1.getRootId(), T2.getRootId())) { |
839 | 4 | M.link(T1.getRootId(), T2.getRootId()); |
840 | 4 | addOptimalMapping(M, T1.getRootId(), T2.getRootId()); |
841 | 4 | } |
842 | 4 | break; |
843 | 4 | } |
844 | 243 | bool Matched = M.hasSrc(Id1); |
845 | 243 | const Node &N1 = T1.getNode(Id1); |
846 | 243 | bool MatchedChildren = llvm::any_of( |
847 | 182 | N1.Children, [&](NodeId Child) { return M.hasSrc(Child); }); |
848 | 243 | if (Matched || !MatchedChildren127 ) |
849 | 230 | continue; |
850 | 13 | NodeId Id2 = findCandidate(M, Id1); |
851 | 13 | if (Id2.isValid()) { |
852 | 12 | M.link(Id1, Id2); |
853 | 12 | addOptimalMapping(M, Id1, Id2); |
854 | 12 | } |
855 | 13 | } |
856 | 5 | } |
857 | | |
858 | 6 | Mapping ASTDiff::Impl::matchTopDown() const { |
859 | 6 | PriorityList L1(T1); |
860 | 6 | PriorityList L2(T2); |
861 | | |
862 | 6 | Mapping M(T1.getSize() + T2.getSize()); |
863 | | |
864 | 6 | L1.push(T1.getRootId()); |
865 | 6 | L2.push(T2.getRootId()); |
866 | | |
867 | 6 | int Max1, Max2; |
868 | 37 | while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) > |
869 | 31 | Options.MinHeight) { |
870 | 31 | if (Max1 > Max2) { |
871 | 0 | for (NodeId Id : L1.pop()) |
872 | 0 | L1.open(Id); |
873 | 0 | continue; |
874 | 0 | } |
875 | 31 | if (Max2 > Max1) { |
876 | 4 | for (NodeId Id : L2.pop()) |
877 | 4 | L2.open(Id); |
878 | 4 | continue; |
879 | 4 | } |
880 | 27 | std::vector<NodeId> H1, H2; |
881 | 27 | H1 = L1.pop(); |
882 | 27 | H2 = L2.pop(); |
883 | 75 | for (NodeId Id1 : H1) { |
884 | 276 | for (NodeId Id2 : H2) { |
885 | 276 | if (identical(Id1, Id2) && !M.hasSrc(Id1)19 && !M.hasDst(Id2)17 ) { |
886 | 157 | for (int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I141 ) |
887 | 141 | M.link(Id1 + I, Id2 + I); |
888 | 16 | } |
889 | 276 | } |
890 | 75 | } |
891 | 75 | for (NodeId Id1 : H1) { |
892 | 75 | if (!M.hasSrc(Id1)) |
893 | 59 | L1.open(Id1); |
894 | 75 | } |
895 | 74 | for (NodeId Id2 : H2) { |
896 | 74 | if (!M.hasDst(Id2)) |
897 | 58 | L2.open(Id2); |
898 | 74 | } |
899 | 27 | } |
900 | 6 | return M; |
901 | 6 | } |
902 | | |
903 | | ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, |
904 | | const ComparisonOptions &Options) |
905 | 6 | : T1(T1), T2(T2), Options(Options) { |
906 | 6 | computeMapping(); |
907 | 6 | computeChangeKinds(TheMapping); |
908 | 6 | } |
909 | | |
910 | 6 | void ASTDiff::Impl::computeMapping() { |
911 | 6 | TheMapping = matchTopDown(); |
912 | 6 | if (Options.StopAfterTopDown) |
913 | 1 | return; |
914 | 5 | matchBottomUp(TheMapping); |
915 | 5 | } |
916 | | |
917 | 6 | void ASTDiff::Impl::computeChangeKinds(Mapping &M) { |
918 | 281 | for (NodeId Id1 : T1) { |
919 | 281 | if (!M.hasSrc(Id1)) { |
920 | 40 | T1.getMutableNode(Id1).Change = Delete; |
921 | 40 | T1.getMutableNode(Id1).Shift -= 1; |
922 | 40 | } |
923 | 281 | } |
924 | 285 | for (NodeId Id2 : T2) { |
925 | 285 | if (!M.hasDst(Id2)) { |
926 | 44 | T2.getMutableNode(Id2).Change = Insert; |
927 | 44 | T2.getMutableNode(Id2).Shift -= 1; |
928 | 44 | } |
929 | 285 | } |
930 | 281 | for (NodeId Id1 : T1.NodesBfs) { |
931 | 281 | NodeId Id2 = M.getDst(Id1); |
932 | 281 | if (Id2.isInvalid()) |
933 | 40 | continue; |
934 | 241 | if (!haveSameParents(M, Id1, Id2) || |
935 | 218 | T1.findPositionInParent(Id1, true) != |
936 | 25 | T2.findPositionInParent(Id2, true)) { |
937 | 25 | T1.getMutableNode(Id1).Shift -= 1; |
938 | 25 | T2.getMutableNode(Id2).Shift -= 1; |
939 | 25 | } |
940 | 241 | } |
941 | 285 | for (NodeId Id2 : T2.NodesBfs) { |
942 | 285 | NodeId Id1 = M.getSrc(Id2); |
943 | 285 | if (Id1.isInvalid()) |
944 | 44 | continue; |
945 | 241 | Node &N1 = T1.getMutableNode(Id1); |
946 | 241 | Node &N2 = T2.getMutableNode(Id2); |
947 | 241 | if (Id1.isInvalid()) |
948 | 0 | continue; |
949 | 241 | if (!haveSameParents(M, Id1, Id2) || |
950 | 218 | T1.findPositionInParent(Id1, true) != |
951 | 23 | T2.findPositionInParent(Id2, true)) { |
952 | 23 | N1.Change = N2.Change = Move; |
953 | 23 | } |
954 | 241 | if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { |
955 | 18 | N1.Change = N2.Change = (N1.Change == Move ? UpdateMove4 : Update); |
956 | 22 | } |
957 | 241 | } |
958 | 6 | } |
959 | | |
960 | | ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, |
961 | | const ComparisonOptions &Options) |
962 | 6 | : DiffImpl(std::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {} |
963 | | |
964 | 6 | ASTDiff::~ASTDiff() = default; |
965 | | |
966 | 782 | NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const { |
967 | 782 | return DiffImpl->getMapped(SourceTree.TreeImpl, Id); |
968 | 782 | } |
969 | | |
970 | | SyntaxTree::SyntaxTree(ASTContext &AST) |
971 | | : TreeImpl(std::make_unique<SyntaxTree::Impl>( |
972 | 17 | this, AST.getTranslationUnitDecl(), AST)) {} |
973 | | |
974 | 17 | SyntaxTree::~SyntaxTree() = default; |
975 | | |
976 | 139 | const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; } |
977 | | |
978 | 1.09k | const Node &SyntaxTree::getNode(NodeId Id) const { |
979 | 1.09k | return TreeImpl->getNode(Id); |
980 | 1.09k | } |
981 | | |
982 | 0 | int SyntaxTree::getSize() const { return TreeImpl->getSize(); } |
983 | 142 | NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); } |
984 | 14 | SyntaxTree::PreorderIterator SyntaxTree::begin() const { |
985 | 14 | return TreeImpl->begin(); |
986 | 14 | } |
987 | 14 | SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); } |
988 | | |
989 | 50 | int SyntaxTree::findPositionInParent(NodeId Id) const { |
990 | 50 | return TreeImpl->findPositionInParent(Id); |
991 | 50 | } |
992 | | |
993 | | std::pair<unsigned, unsigned> |
994 | 146 | SyntaxTree::getSourceRangeOffsets(const Node &N) const { |
995 | 146 | const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager(); |
996 | 146 | SourceRange Range = N.ASTNode.getSourceRange(); |
997 | 146 | SourceLocation BeginLoc = Range.getBegin(); |
998 | 146 | SourceLocation EndLoc = Lexer::getLocForEndOfToken( |
999 | 146 | Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts()); |
1000 | 146 | if (auto *ThisExpr = N.ASTNode.get<CXXThisExpr>()) { |
1001 | 0 | if (ThisExpr->isImplicit()) |
1002 | 0 | EndLoc = BeginLoc; |
1003 | 0 | } |
1004 | 146 | unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc)); |
1005 | 146 | unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc)); |
1006 | 146 | return {Begin, End}; |
1007 | 146 | } |
1008 | | |
1009 | 451 | std::string SyntaxTree::getNodeValue(NodeId Id) const { |
1010 | 451 | return TreeImpl->getNodeValue(Id); |
1011 | 451 | } |
1012 | | |
1013 | 146 | std::string SyntaxTree::getNodeValue(const Node &N) const { |
1014 | 146 | return TreeImpl->getNodeValue(N); |
1015 | 146 | } |
1016 | | |
1017 | | } // end namespace diff |
1018 | | } // end namespace clang |