/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/MachineOutliner.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | /// |
10 | | /// \file |
11 | | /// Replaces repeated sequences of instructions with function calls. |
12 | | /// |
13 | | /// This works by placing every instruction from every basic block in a |
14 | | /// suffix tree, and repeatedly querying that tree for repeated sequences of |
15 | | /// instructions. If a sequence of instructions appears often, then it ought |
16 | | /// to be beneficial to pull out into a function. |
17 | | /// |
18 | | /// The MachineOutliner communicates with a given target using hooks defined in |
19 | | /// TargetInstrInfo.h. The target supplies the outliner with information on how |
20 | | /// a specific sequence of instructions should be outlined. This information |
21 | | /// is used to deduce the number of instructions necessary to |
22 | | /// |
23 | | /// * Create an outlined function |
24 | | /// * Call that outlined function |
25 | | /// |
26 | | /// Targets must implement |
27 | | /// * getOutliningCandidateInfo |
28 | | /// * insertOutlinerEpilogue |
29 | | /// * insertOutlinedCall |
30 | | /// * insertOutlinerPrologue |
31 | | /// * isFunctionSafeToOutlineFrom |
32 | | /// |
33 | | /// in order to make use of the MachineOutliner. |
34 | | /// |
35 | | /// This was originally presented at the 2016 LLVM Developers' Meeting in the |
36 | | /// talk "Reducing Code Size Using Outlining". For a high-level overview of |
37 | | /// how this pass works, the talk is available on YouTube at |
38 | | /// |
39 | | /// https://www.youtube.com/watch?v=yorld-WSOeU |
40 | | /// |
41 | | /// The slides for the talk are available at |
42 | | /// |
43 | | /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf |
44 | | /// |
45 | | /// The talk provides an overview of how the outliner finds candidates and |
46 | | /// ultimately outlines them. It describes how the main data structure for this |
47 | | /// pass, the suffix tree, is queried and purged for candidates. It also gives |
48 | | /// a simplified suffix tree construction algorithm for suffix trees based off |
49 | | /// of the algorithm actually used here, Ukkonen's algorithm. |
50 | | /// |
51 | | /// For the original RFC for this pass, please see |
52 | | /// |
53 | | /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html |
54 | | /// |
55 | | /// For more information on the suffix tree data structure, please see |
56 | | /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf |
57 | | /// |
58 | | //===----------------------------------------------------------------------===// |
59 | | #include "llvm/ADT/DenseMap.h" |
60 | | #include "llvm/ADT/Statistic.h" |
61 | | #include "llvm/ADT/Twine.h" |
62 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
63 | | #include "llvm/CodeGen/MachineFunction.h" |
64 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
65 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
66 | | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
67 | | #include "llvm/CodeGen/Passes.h" |
68 | | #include "llvm/IR/IRBuilder.h" |
69 | | #include "llvm/Support/Allocator.h" |
70 | | #include "llvm/Support/Debug.h" |
71 | | #include "llvm/Support/raw_ostream.h" |
72 | | #include "llvm/Target/TargetInstrInfo.h" |
73 | | #include "llvm/Target/TargetMachine.h" |
74 | | #include "llvm/Target/TargetRegisterInfo.h" |
75 | | #include "llvm/Target/TargetSubtargetInfo.h" |
76 | | #include <functional> |
77 | | #include <map> |
78 | | #include <sstream> |
79 | | #include <tuple> |
80 | | #include <vector> |
81 | | |
82 | 4 | #define DEBUG_TYPE "machine-outliner" |
83 | | |
84 | | using namespace llvm; |
85 | | using namespace ore; |
86 | | |
87 | | STATISTIC(NumOutlined, "Number of candidates outlined"); |
88 | | STATISTIC(FunctionsCreated, "Number of functions created"); |
89 | | |
90 | | namespace { |
91 | | |
92 | | /// \brief An individual sequence of instructions to be replaced with a call to |
93 | | /// an outlined function. |
94 | | struct Candidate { |
95 | | |
96 | | /// Set to false if the candidate overlapped with another candidate. |
97 | | bool InCandidateList = true; |
98 | | |
99 | | /// The start index of this \p Candidate. |
100 | | unsigned StartIdx; |
101 | | |
102 | | /// The number of instructions in this \p Candidate. |
103 | | unsigned Len; |
104 | | |
105 | | /// The index of this \p Candidate's \p OutlinedFunction in the list of |
106 | | /// \p OutlinedFunctions. |
107 | | unsigned FunctionIdx; |
108 | | |
109 | | /// Contains all target-specific information for this \p Candidate. |
110 | | TargetInstrInfo::MachineOutlinerInfo MInfo; |
111 | | |
112 | | /// \brief The number of instructions that would be saved by outlining every |
113 | | /// candidate of this type. |
114 | | /// |
115 | | /// This is a fixed value which is not updated during the candidate pruning |
116 | | /// process. It is only used for deciding which candidate to keep if two |
117 | | /// candidates overlap. The true benefit is stored in the OutlinedFunction |
118 | | /// for some given candidate. |
119 | | unsigned Benefit = 0; |
120 | | |
121 | | Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) |
122 | 59 | : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} |
123 | | |
124 | 0 | Candidate() {} |
125 | | |
126 | | /// \brief Used to ensure that \p Candidates are outlined in an order that |
127 | | /// preserves the start and end indices of other \p Candidates. |
128 | 223 | bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; } |
129 | | }; |
130 | | |
131 | | /// \brief The information necessary to create an outlined function for some |
132 | | /// class of candidate. |
133 | | struct OutlinedFunction { |
134 | | |
135 | | /// The actual outlined function created. |
136 | | /// This is initialized after we go through and create the actual function. |
137 | | MachineFunction *MF = nullptr; |
138 | | |
139 | | /// A number assigned to this function which appears at the end of its name. |
140 | | unsigned Name; |
141 | | |
142 | | /// The number of candidates for this OutlinedFunction. |
143 | | unsigned OccurrenceCount = 0; |
144 | | |
145 | | /// \brief The sequence of integers corresponding to the instructions in this |
146 | | /// function. |
147 | | std::vector<unsigned> Sequence; |
148 | | |
149 | | /// The number of instructions this function would save. |
150 | | unsigned Benefit = 0; |
151 | | |
152 | | /// Contains all target-specific information for this \p OutlinedFunction. |
153 | | TargetInstrInfo::MachineOutlinerInfo MInfo; |
154 | | |
155 | | OutlinedFunction(unsigned Name, unsigned OccurrenceCount, |
156 | | const std::vector<unsigned> &Sequence, unsigned Benefit, |
157 | | TargetInstrInfo::MachineOutlinerInfo &MInfo) |
158 | | : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), |
159 | 23 | Benefit(Benefit), MInfo(MInfo) {} |
160 | | }; |
161 | | |
162 | | /// Represents an undefined index in the suffix tree. |
163 | | const unsigned EmptyIdx = -1; |
164 | | |
165 | | /// A node in a suffix tree which represents a substring or suffix. |
166 | | /// |
167 | | /// Each node has either no children or at least two children, with the root |
168 | | /// being a exception in the empty tree. |
169 | | /// |
170 | | /// Children are represented as a map between unsigned integers and nodes. If |
171 | | /// a node N has a child M on unsigned integer k, then the mapping represented |
172 | | /// by N is a proper prefix of the mapping represented by M. Note that this, |
173 | | /// although similar to a trie is somewhat different: each node stores a full |
174 | | /// substring of the full mapping rather than a single character state. |
175 | | /// |
176 | | /// Each internal node contains a pointer to the internal node representing |
177 | | /// the same string, but with the first character chopped off. This is stored |
178 | | /// in \p Link. Each leaf node stores the start index of its respective |
179 | | /// suffix in \p SuffixIdx. |
180 | | struct SuffixTreeNode { |
181 | | |
182 | | /// The children of this node. |
183 | | /// |
184 | | /// A child existing on an unsigned integer implies that from the mapping |
185 | | /// represented by the current node, there is a way to reach another |
186 | | /// mapping by tacking that character on the end of the current string. |
187 | | DenseMap<unsigned, SuffixTreeNode *> Children; |
188 | | |
189 | | /// A flag set to false if the node has been pruned from the tree. |
190 | | bool IsInTree = true; |
191 | | |
192 | | /// The start index of this node's substring in the main string. |
193 | | unsigned StartIdx = EmptyIdx; |
194 | | |
195 | | /// The end index of this node's substring in the main string. |
196 | | /// |
197 | | /// Every leaf node must have its \p EndIdx incremented at the end of every |
198 | | /// step in the construction algorithm. To avoid having to update O(N) |
199 | | /// nodes individually at the end of every step, the end index is stored |
200 | | /// as a pointer. |
201 | | unsigned *EndIdx = nullptr; |
202 | | |
203 | | /// For leaves, the start index of the suffix represented by this node. |
204 | | /// |
205 | | /// For all other nodes, this is ignored. |
206 | | unsigned SuffixIdx = EmptyIdx; |
207 | | |
208 | | /// \brief For internal nodes, a pointer to the internal node representing |
209 | | /// the same sequence with the first character chopped off. |
210 | | /// |
211 | | /// This acts as a shortcut in Ukkonen's algorithm. One of the things that |
212 | | /// Ukkonen's algorithm does to achieve linear-time construction is |
213 | | /// keep track of which node the next insert should be at. This makes each |
214 | | /// insert O(1), and there are a total of O(N) inserts. The suffix link |
215 | | /// helps with inserting children of internal nodes. |
216 | | /// |
217 | | /// Say we add a child to an internal node with associated mapping S. The |
218 | | /// next insertion must be at the node representing S - its first character. |
219 | | /// This is given by the way that we iteratively build the tree in Ukkonen's |
220 | | /// algorithm. The main idea is to look at the suffixes of each prefix in the |
221 | | /// string, starting with the longest suffix of the prefix, and ending with |
222 | | /// the shortest. Therefore, if we keep pointers between such nodes, we can |
223 | | /// move to the next insertion point in O(1) time. If we don't, then we'd |
224 | | /// have to query from the root, which takes O(N) time. This would make the |
225 | | /// construction algorithm O(N^2) rather than O(N). |
226 | | SuffixTreeNode *Link = nullptr; |
227 | | |
228 | | /// The parent of this node. Every node except for the root has a parent. |
229 | | SuffixTreeNode *Parent = nullptr; |
230 | | |
231 | | /// The number of times this node's string appears in the tree. |
232 | | /// |
233 | | /// This is equal to the number of leaf children of the string. It represents |
234 | | /// the number of suffixes that the node's string is a prefix of. |
235 | | unsigned OccurrenceCount = 0; |
236 | | |
237 | | /// The length of the string formed by concatenating the edge labels from the |
238 | | /// root to this node. |
239 | | unsigned ConcatLen = 0; |
240 | | |
241 | | /// Returns true if this node is a leaf. |
242 | 60 | bool isLeaf() const { return SuffixIdx != EmptyIdx; } |
243 | | |
244 | | /// Returns true if this node is the root of its owning \p SuffixTree. |
245 | 1.67k | bool isRoot() const { return StartIdx == EmptyIdx; } |
246 | | |
247 | | /// Return the number of elements in the substring associated with this node. |
248 | 748 | size_t size() const { |
249 | 748 | |
250 | 748 | // Is it the root? If so, it's the empty string so return 0. |
251 | 748 | if (isRoot()) |
252 | 0 | return 0; |
253 | 748 | |
254 | 748 | assert(*EndIdx != EmptyIdx && "EndIdx is undefined!"); |
255 | 748 | |
256 | 748 | // Size = the number of elements in the string. |
257 | 748 | // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1. |
258 | 748 | return *EndIdx - StartIdx + 1; |
259 | 748 | } |
260 | | |
261 | | SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link, |
262 | | SuffixTreeNode *Parent) |
263 | 278 | : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} |
264 | | |
265 | 0 | SuffixTreeNode() {} |
266 | | }; |
267 | | |
268 | | /// A data structure for fast substring queries. |
269 | | /// |
270 | | /// Suffix trees represent the suffixes of their input strings in their leaves. |
271 | | /// A suffix tree is a type of compressed trie structure where each node |
272 | | /// represents an entire substring rather than a single character. Each leaf |
273 | | /// of the tree is a suffix. |
274 | | /// |
275 | | /// A suffix tree can be seen as a type of state machine where each state is a |
276 | | /// substring of the full string. The tree is structured so that, for a string |
277 | | /// of length N, there are exactly N leaves in the tree. This structure allows |
278 | | /// us to quickly find repeated substrings of the input string. |
279 | | /// |
280 | | /// In this implementation, a "string" is a vector of unsigned integers. |
281 | | /// These integers may result from hashing some data type. A suffix tree can |
282 | | /// contain 1 or many strings, which can then be queried as one large string. |
283 | | /// |
284 | | /// The suffix tree is implemented using Ukkonen's algorithm for linear-time |
285 | | /// suffix tree construction. Ukkonen's algorithm is explained in more detail |
286 | | /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The |
287 | | /// paper is available at |
288 | | /// |
289 | | /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf |
290 | | class SuffixTree { |
291 | | public: |
292 | | /// Stores each leaf node in the tree. |
293 | | /// |
294 | | /// This is used for finding outlining candidates. |
295 | | std::vector<SuffixTreeNode *> LeafVector; |
296 | | |
297 | | /// Each element is an integer representing an instruction in the module. |
298 | | ArrayRef<unsigned> Str; |
299 | | |
300 | | private: |
301 | | /// Maintains each node in the tree. |
302 | | SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator; |
303 | | |
304 | | /// The root of the suffix tree. |
305 | | /// |
306 | | /// The root represents the empty string. It is maintained by the |
307 | | /// \p NodeAllocator like every other node in the tree. |
308 | | SuffixTreeNode *Root = nullptr; |
309 | | |
310 | | /// Maintains the end indices of the internal nodes in the tree. |
311 | | /// |
312 | | /// Each internal node is guaranteed to never have its end index change |
313 | | /// during the construction algorithm; however, leaves must be updated at |
314 | | /// every step. Therefore, we need to store leaf end indices by reference |
315 | | /// to avoid updating O(N) leaves at every step of construction. Thus, |
316 | | /// every internal node must be allocated its own end index. |
317 | | BumpPtrAllocator InternalEndIdxAllocator; |
318 | | |
319 | | /// The end index of each leaf in the tree. |
320 | | unsigned LeafEndIdx = -1; |
321 | | |
322 | | /// \brief Helper struct which keeps track of the next insertion point in |
323 | | /// Ukkonen's algorithm. |
324 | | struct ActiveState { |
325 | | /// The next node to insert at. |
326 | | SuffixTreeNode *Node; |
327 | | |
328 | | /// The index of the first character in the substring currently being added. |
329 | | unsigned Idx = EmptyIdx; |
330 | | |
331 | | /// The length of the substring we have to add at the current step. |
332 | | unsigned Len = 0; |
333 | | }; |
334 | | |
335 | | /// \brief The point the next insertion will take place at in the |
336 | | /// construction algorithm. |
337 | | ActiveState Active; |
338 | | |
339 | | /// Allocate a leaf node and add it to the tree. |
340 | | /// |
341 | | /// \param Parent The parent of this node. |
342 | | /// \param StartIdx The start index of this node's associated string. |
343 | | /// \param Edge The label on the edge leaving \p Parent to this node. |
344 | | /// |
345 | | /// \returns A pointer to the allocated leaf node. |
346 | | SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx, |
347 | 226 | unsigned Edge) { |
348 | 226 | |
349 | 226 | assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); |
350 | 226 | |
351 | 226 | SuffixTreeNode *N = new (NodeAllocator.Allocate()) |
352 | 226 | SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent); |
353 | 226 | Parent.Children[Edge] = N; |
354 | 226 | |
355 | 226 | return N; |
356 | 226 | } |
357 | | |
358 | | /// Allocate an internal node and add it to the tree. |
359 | | /// |
360 | | /// \param Parent The parent of this node. Only null when allocating the root. |
361 | | /// \param StartIdx The start index of this node's associated string. |
362 | | /// \param EndIdx The end index of this node's associated string. |
363 | | /// \param Edge The label on the edge leaving \p Parent to this node. |
364 | | /// |
365 | | /// \returns A pointer to the allocated internal node. |
366 | | SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx, |
367 | 52 | unsigned EndIdx, unsigned Edge) { |
368 | 52 | |
369 | 52 | assert(StartIdx <= EndIdx && "String can't start after it ends!"); |
370 | 52 | assert(!(!Parent && StartIdx != EmptyIdx) && |
371 | 52 | "Non-root internal nodes must have parents!"); |
372 | 52 | |
373 | 52 | unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); |
374 | 52 | SuffixTreeNode *N = new (NodeAllocator.Allocate()) |
375 | 52 | SuffixTreeNode(StartIdx, E, Root, Parent); |
376 | 52 | if (Parent) |
377 | 45 | Parent->Children[Edge] = N; |
378 | 52 | |
379 | 52 | return N; |
380 | 52 | } |
381 | | |
382 | | /// \brief Set the suffix indices of the leaves to the start indices of their |
383 | | /// respective suffixes. Also stores each leaf in \p LeafVector at its |
384 | | /// respective suffix index. |
385 | | /// |
386 | | /// \param[in] CurrNode The node currently being visited. |
387 | | /// \param CurrIdx The current index of the string being visited. |
388 | 278 | void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) { |
389 | 278 | |
390 | 226 | bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); |
391 | 278 | |
392 | 278 | // Store the length of the concatenation of all strings from the root to |
393 | 278 | // this node. |
394 | 278 | if (!CurrNode.isRoot()278 ) { |
395 | 271 | if (CurrNode.ConcatLen == 0) |
396 | 271 | CurrNode.ConcatLen = CurrNode.size(); |
397 | 271 | |
398 | 271 | if (CurrNode.Parent) |
399 | 271 | CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; |
400 | 271 | } |
401 | 278 | |
402 | 278 | // Traverse the tree depth-first. |
403 | 271 | for (auto &ChildPair : CurrNode.Children) { |
404 | 271 | assert(ChildPair.second && "Node had a null child!"); |
405 | 271 | setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size()); |
406 | 271 | } |
407 | 278 | |
408 | 278 | // Is this node a leaf? |
409 | 278 | if (IsLeaf278 ) { |
410 | 226 | // If yes, give it a suffix index and bump its parent's occurrence count. |
411 | 226 | CurrNode.SuffixIdx = Str.size() - CurrIdx; |
412 | 226 | assert(CurrNode.Parent && "CurrNode had no parent!"); |
413 | 226 | CurrNode.Parent->OccurrenceCount++; |
414 | 226 | |
415 | 226 | // Store the leaf in the leaf vector for pruning later. |
416 | 226 | LeafVector[CurrNode.SuffixIdx] = &CurrNode; |
417 | 226 | } |
418 | 278 | } |
419 | | |
420 | | /// \brief Construct the suffix tree for the prefix of the input ending at |
421 | | /// \p EndIdx. |
422 | | /// |
423 | | /// Used to construct the full suffix tree iteratively. At the end of each |
424 | | /// step, the constructed suffix tree is either a valid suffix tree, or a |
425 | | /// suffix tree with implicit suffixes. At the end of the final step, the |
426 | | /// suffix tree is a valid tree. |
427 | | /// |
428 | | /// \param EndIdx The end index of the current prefix in the main string. |
429 | | /// \param SuffixesToAdd The number of suffixes that must be added |
430 | | /// to complete the suffix tree at the current phase. |
431 | | /// |
432 | | /// \returns The number of suffixes that have not been added at the end of |
433 | | /// this step. |
434 | 226 | unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) { |
435 | 226 | SuffixTreeNode *NeedsLink = nullptr; |
436 | 226 | |
437 | 478 | while (SuffixesToAdd > 0478 ) { |
438 | 317 | |
439 | 317 | // Are we waiting to add anything other than just the last character? |
440 | 317 | if (Active.Len == 0317 ) { |
441 | 222 | // If not, then say the active index is the end index. |
442 | 222 | Active.Idx = EndIdx; |
443 | 222 | } |
444 | 317 | |
445 | 317 | assert(Active.Idx <= EndIdx && "Start index can't be after end index!"); |
446 | 317 | |
447 | 317 | // The first character in the current substring we're looking at. |
448 | 317 | unsigned FirstChar = Str[Active.Idx]; |
449 | 317 | |
450 | 317 | // Have we inserted anything starting with FirstChar at the current node? |
451 | 317 | if (Active.Node->Children.count(FirstChar) == 0317 ) { |
452 | 181 | // If not, then we can just insert a leaf and move too the next step. |
453 | 181 | insertLeaf(*Active.Node, EndIdx, FirstChar); |
454 | 181 | |
455 | 181 | // The active node is an internal node, and we visited it, so it must |
456 | 181 | // need a link if it doesn't have one. |
457 | 181 | if (NeedsLink181 ) { |
458 | 19 | NeedsLink->Link = Active.Node; |
459 | 19 | NeedsLink = nullptr; |
460 | 19 | } |
461 | 317 | } else { |
462 | 136 | // There's a match with FirstChar, so look for the point in the tree to |
463 | 136 | // insert a new node. |
464 | 136 | SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; |
465 | 136 | |
466 | 136 | unsigned SubstringLen = NextNode->size(); |
467 | 136 | |
468 | 136 | // Is the current suffix we're trying to insert longer than the size of |
469 | 136 | // the child we want to move to? |
470 | 136 | if (Active.Len >= SubstringLen136 ) { |
471 | 26 | // If yes, then consume the characters we've seen and move to the next |
472 | 26 | // node. |
473 | 26 | Active.Idx += SubstringLen; |
474 | 26 | Active.Len -= SubstringLen; |
475 | 26 | Active.Node = NextNode; |
476 | 26 | continue; |
477 | 26 | } |
478 | 110 | |
479 | 110 | // Otherwise, the suffix we're trying to insert must be contained in the |
480 | 110 | // next node we want to move to. |
481 | 110 | unsigned LastChar = Str[EndIdx]; |
482 | 110 | |
483 | 110 | // Is the string we're trying to insert a substring of the next node? |
484 | 110 | if (Str[NextNode->StartIdx + Active.Len] == LastChar110 ) { |
485 | 65 | // If yes, then we're done for this step. Remember our insertion point |
486 | 65 | // and move to the next end index. At this point, we have an implicit |
487 | 65 | // suffix tree. |
488 | 65 | if (NeedsLink && 65 !Active.Node->isRoot()2 ) { |
489 | 0 | NeedsLink->Link = Active.Node; |
490 | 0 | NeedsLink = nullptr; |
491 | 0 | } |
492 | 65 | |
493 | 65 | Active.Len++; |
494 | 65 | break; |
495 | 65 | } |
496 | 45 | |
497 | 45 | // The string we're trying to insert isn't a substring of the next node, |
498 | 45 | // but matches up to a point. Split the node. |
499 | 45 | // |
500 | 45 | // For example, say we ended our search at a node n and we're trying to |
501 | 45 | // insert ABD. Then we'll create a new node s for AB, reduce n to just |
502 | 45 | // representing C, and insert a new leaf node l to represent d. This |
503 | 45 | // allows us to ensure that if n was a leaf, it remains a leaf. |
504 | 45 | // |
505 | 45 | // | ABC ---split---> | AB |
506 | 45 | // n s |
507 | 45 | // C / \ D |
508 | 45 | // n l |
509 | 45 | |
510 | 45 | // The node s from the diagram |
511 | 45 | SuffixTreeNode *SplitNode = |
512 | 45 | insertInternalNode(Active.Node, NextNode->StartIdx, |
513 | 45 | NextNode->StartIdx + Active.Len - 1, FirstChar); |
514 | 45 | |
515 | 45 | // Insert the new node representing the new substring into the tree as |
516 | 45 | // a child of the split node. This is the node l from the diagram. |
517 | 45 | insertLeaf(*SplitNode, EndIdx, LastChar); |
518 | 45 | |
519 | 45 | // Make the old node a child of the split node and update its start |
520 | 45 | // index. This is the node n from the diagram. |
521 | 45 | NextNode->StartIdx += Active.Len; |
522 | 45 | NextNode->Parent = SplitNode; |
523 | 45 | SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; |
524 | 45 | |
525 | 45 | // SplitNode is an internal node, update the suffix link. |
526 | 45 | if (NeedsLink) |
527 | 24 | NeedsLink->Link = SplitNode; |
528 | 136 | |
529 | 136 | NeedsLink = SplitNode; |
530 | 136 | } |
531 | 317 | |
532 | 317 | // We've added something new to the tree, so there's one less suffix to |
533 | 317 | // add. |
534 | 226 | SuffixesToAdd--; |
535 | 226 | |
536 | 226 | if (Active.Node->isRoot()226 ) { |
537 | 200 | if (Active.Len > 0200 ) { |
538 | 39 | Active.Len--; |
539 | 39 | Active.Idx = EndIdx - SuffixesToAdd + 1; |
540 | 39 | } |
541 | 226 | } else { |
542 | 26 | // Start the next phase at the next smallest suffix. |
543 | 26 | Active.Node = Active.Node->Link; |
544 | 26 | } |
545 | 317 | } |
546 | 226 | |
547 | 226 | return SuffixesToAdd; |
548 | 226 | } |
549 | | |
550 | | public: |
551 | | /// Construct a suffix tree from a sequence of unsigned integers. |
552 | | /// |
553 | | /// \param Str The string to construct the suffix tree for. |
554 | 7 | SuffixTree(const std::vector<unsigned> &Str) : Str(Str) { |
555 | 7 | Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); |
556 | 7 | Root->IsInTree = true; |
557 | 7 | Active.Node = Root; |
558 | 7 | LeafVector = std::vector<SuffixTreeNode *>(Str.size()); |
559 | 7 | |
560 | 7 | // Keep track of the number of suffixes we have to add of the current |
561 | 7 | // prefix. |
562 | 7 | unsigned SuffixesToAdd = 0; |
563 | 7 | Active.Node = Root; |
564 | 7 | |
565 | 7 | // Construct the suffix tree iteratively on each prefix of the string. |
566 | 7 | // PfxEndIdx is the end index of the current prefix. |
567 | 7 | // End is one past the last element in the string. |
568 | 233 | for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; |
569 | 226 | PfxEndIdx++226 ) { |
570 | 226 | SuffixesToAdd++; |
571 | 226 | LeafEndIdx = PfxEndIdx; // Extend each of the leaves. |
572 | 226 | SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); |
573 | 226 | } |
574 | 7 | |
575 | 7 | // Set the suffix indices of each leaf. |
576 | 7 | assert(Root && "Root node can't be nullptr!"); |
577 | 7 | setSuffixIndices(*Root, 0); |
578 | 7 | } |
579 | | }; |
580 | | |
581 | | /// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings. |
582 | | struct InstructionMapper { |
583 | | |
584 | | /// \brief The next available integer to assign to a \p MachineInstr that |
585 | | /// cannot be outlined. |
586 | | /// |
587 | | /// Set to -3 for compatability with \p DenseMapInfo<unsigned>. |
588 | | unsigned IllegalInstrNumber = -3; |
589 | | |
590 | | /// \brief The next available integer to assign to a \p MachineInstr that can |
591 | | /// be outlined. |
592 | | unsigned LegalInstrNumber = 0; |
593 | | |
594 | | /// Correspondence from \p MachineInstrs to unsigned integers. |
595 | | DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait> |
596 | | InstructionIntegerMap; |
597 | | |
598 | | /// Corresponcence from unsigned integers to \p MachineInstrs. |
599 | | /// Inverse of \p InstructionIntegerMap. |
600 | | DenseMap<unsigned, MachineInstr *> IntegerInstructionMap; |
601 | | |
602 | | /// The vector of unsigned integers that the module is mapped to. |
603 | | std::vector<unsigned> UnsignedVec; |
604 | | |
605 | | /// \brief Stores the location of the instruction associated with the integer |
606 | | /// at index i in \p UnsignedVec for each index i. |
607 | | std::vector<MachineBasicBlock::iterator> InstrList; |
608 | | |
609 | | /// \brief Maps \p *It to a legal integer. |
610 | | /// |
611 | | /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap, |
612 | | /// \p IntegerInstructionMap, and \p LegalInstrNumber. |
613 | | /// |
614 | | /// \returns The integer that \p *It was mapped to. |
615 | 114 | unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) { |
616 | 114 | |
617 | 114 | // Get the integer for this instruction or give it the current |
618 | 114 | // LegalInstrNumber. |
619 | 114 | InstrList.push_back(It); |
620 | 114 | MachineInstr &MI = *It; |
621 | 114 | bool WasInserted; |
622 | 114 | DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator |
623 | 114 | ResultIt; |
624 | 114 | std::tie(ResultIt, WasInserted) = |
625 | 114 | InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber)); |
626 | 114 | unsigned MINumber = ResultIt->second; |
627 | 114 | |
628 | 114 | // There was an insertion. |
629 | 114 | if (WasInserted114 ) { |
630 | 49 | LegalInstrNumber++; |
631 | 49 | IntegerInstructionMap.insert(std::make_pair(MINumber, &MI)); |
632 | 49 | } |
633 | 114 | |
634 | 114 | UnsignedVec.push_back(MINumber); |
635 | 114 | |
636 | 114 | // Make sure we don't overflow or use any integers reserved by the DenseMap. |
637 | 114 | if (LegalInstrNumber >= IllegalInstrNumber) |
638 | 0 | report_fatal_error("Instruction mapping overflow!"); |
639 | 114 | |
640 | 114 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
641 | 114 | "Tried to assign DenseMap tombstone or empty key to instruction."); |
642 | 114 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
643 | 114 | "Tried to assign DenseMap tombstone or empty key to instruction."); |
644 | 114 | |
645 | 114 | return MINumber; |
646 | 114 | } |
647 | | |
648 | | /// Maps \p *It to an illegal integer. |
649 | | /// |
650 | | /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber. |
651 | | /// |
652 | | /// \returns The integer that \p *It was mapped to. |
653 | 89 | unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) { |
654 | 89 | unsigned MINumber = IllegalInstrNumber; |
655 | 89 | |
656 | 89 | InstrList.push_back(It); |
657 | 89 | UnsignedVec.push_back(IllegalInstrNumber); |
658 | 89 | IllegalInstrNumber--; |
659 | 89 | |
660 | 89 | assert(LegalInstrNumber < IllegalInstrNumber && |
661 | 89 | "Instruction mapping overflow!"); |
662 | 89 | |
663 | 89 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
664 | 89 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); |
665 | 89 | |
666 | 89 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
667 | 89 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); |
668 | 89 | |
669 | 89 | return MINumber; |
670 | 89 | } |
671 | | |
672 | | /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds |
673 | | /// and appends it to \p UnsignedVec and \p InstrList. |
674 | | /// |
675 | | /// Two instructions are assigned the same integer if they are identical. |
676 | | /// If an instruction is deemed unsafe to outline, then it will be assigned an |
677 | | /// unique integer. The resulting mapping is placed into a suffix tree and |
678 | | /// queried for candidates. |
679 | | /// |
680 | | /// \param MBB The \p MachineBasicBlock to be translated into integers. |
681 | | /// \param TRI \p TargetRegisterInfo for the module. |
682 | | /// \param TII \p TargetInstrInfo for the module. |
683 | | void convertToUnsignedVec(MachineBasicBlock &MBB, |
684 | | const TargetRegisterInfo &TRI, |
685 | 23 | const TargetInstrInfo &TII) { |
686 | 227 | for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et; |
687 | 204 | It++204 ) { |
688 | 204 | |
689 | 204 | // Keep track of where this instruction is in the module. |
690 | 204 | switch (TII.getOutliningType(*It)) { |
691 | 89 | case TargetInstrInfo::MachineOutlinerInstrType::Illegal: |
692 | 89 | mapToIllegalUnsigned(It); |
693 | 89 | break; |
694 | 204 | |
695 | 114 | case TargetInstrInfo::MachineOutlinerInstrType::Legal: |
696 | 114 | mapToLegalUnsigned(It); |
697 | 114 | break; |
698 | 204 | |
699 | 1 | case TargetInstrInfo::MachineOutlinerInstrType::Invisible: |
700 | 1 | break; |
701 | 204 | } |
702 | 204 | } |
703 | 23 | |
704 | 23 | // After we're done every insertion, uniquely terminate this part of the |
705 | 23 | // "string". This makes sure we won't match across basic block or function |
706 | 23 | // boundaries since the "end" is encoded uniquely and thus appears in no |
707 | 23 | // repeated substring. |
708 | 23 | InstrList.push_back(MBB.end()); |
709 | 23 | UnsignedVec.push_back(IllegalInstrNumber); |
710 | 23 | IllegalInstrNumber--; |
711 | 23 | } |
712 | | |
713 | 7 | InstructionMapper() { |
714 | 7 | // Make sure that the implementation of DenseMapInfo<unsigned> hasn't |
715 | 7 | // changed. |
716 | 7 | assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 && |
717 | 7 | "DenseMapInfo<unsigned>'s empty key isn't -1!"); |
718 | 7 | assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 && |
719 | 7 | "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); |
720 | 7 | } |
721 | | }; |
722 | | |
723 | | /// \brief An interprocedural pass which finds repeated sequences of |
724 | | /// instructions and replaces them with calls to functions. |
725 | | /// |
726 | | /// Each instruction is mapped to an unsigned integer and placed in a string. |
727 | | /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree |
728 | | /// is then repeatedly queried for repeated sequences of instructions. Each |
729 | | /// non-overlapping repeated sequence is then placed in its own |
730 | | /// \p MachineFunction and each instance is then replaced with a call to that |
731 | | /// function. |
732 | | struct MachineOutliner : public ModulePass { |
733 | | |
734 | | static char ID; |
735 | | |
736 | 1 | StringRef getPassName() const override { return "Machine Outliner"; } |
737 | | |
738 | 7 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
739 | 7 | AU.addRequired<MachineModuleInfo>(); |
740 | 7 | AU.addPreserved<MachineModuleInfo>(); |
741 | 7 | AU.setPreservesAll(); |
742 | 7 | ModulePass::getAnalysisUsage(AU); |
743 | 7 | } |
744 | | |
745 | 7 | MachineOutliner() : ModulePass(ID) { |
746 | 7 | initializeMachineOutlinerPass(*PassRegistry::getPassRegistry()); |
747 | 7 | } |
748 | | |
749 | | /// Find all repeated substrings that satisfy the outlining cost model. |
750 | | /// |
751 | | /// If a substring appears at least twice, then it must be represented by |
752 | | /// an internal node which appears in at least two suffixes. Each suffix is |
753 | | /// represented by a leaf node. To do this, we visit each internal node in |
754 | | /// the tree, using the leaf children of each internal node. If an internal |
755 | | /// node represents a beneficial substring, then we use each of its leaf |
756 | | /// children to find the locations of its substring. |
757 | | /// |
758 | | /// \param ST A suffix tree to query. |
759 | | /// \param TII TargetInstrInfo for the target. |
760 | | /// \param Mapper Contains outlining mapping information. |
761 | | /// \param[out] CandidateList Filled with candidates representing each |
762 | | /// beneficial substring. |
763 | | /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each |
764 | | /// type of candidate. |
765 | | /// |
766 | | /// \returns The length of the longest candidate found. |
767 | | unsigned findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, |
768 | | InstructionMapper &Mapper, |
769 | | std::vector<Candidate> &CandidateList, |
770 | | std::vector<OutlinedFunction> &FunctionList); |
771 | | |
772 | | /// \brief Replace the sequences of instructions represented by the |
773 | | /// \p Candidates in \p CandidateList with calls to \p MachineFunctions |
774 | | /// described in \p FunctionList. |
775 | | /// |
776 | | /// \param M The module we are outlining from. |
777 | | /// \param CandidateList A list of candidates to be outlined. |
778 | | /// \param FunctionList A list of functions to be inserted into the module. |
779 | | /// \param Mapper Contains the instruction mappings for the module. |
780 | | bool outline(Module &M, const ArrayRef<Candidate> &CandidateList, |
781 | | std::vector<OutlinedFunction> &FunctionList, |
782 | | InstructionMapper &Mapper); |
783 | | |
784 | | /// Creates a function for \p OF and inserts it into the module. |
785 | | MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF, |
786 | | InstructionMapper &Mapper); |
787 | | |
788 | | /// Find potential outlining candidates and store them in \p CandidateList. |
789 | | /// |
790 | | /// For each type of potential candidate, also build an \p OutlinedFunction |
791 | | /// struct containing the information to build the function for that |
792 | | /// candidate. |
793 | | /// |
794 | | /// \param[out] CandidateList Filled with outlining candidates for the module. |
795 | | /// \param[out] FunctionList Filled with functions corresponding to each type |
796 | | /// of \p Candidate. |
797 | | /// \param ST The suffix tree for the module. |
798 | | /// \param TII TargetInstrInfo for the module. |
799 | | /// |
800 | | /// \returns The length of the longest candidate found. 0 if there are none. |
801 | | unsigned buildCandidateList(std::vector<Candidate> &CandidateList, |
802 | | std::vector<OutlinedFunction> &FunctionList, |
803 | | SuffixTree &ST, InstructionMapper &Mapper, |
804 | | const TargetInstrInfo &TII); |
805 | | |
806 | | /// \brief Remove any overlapping candidates that weren't handled by the |
807 | | /// suffix tree's pruning method. |
808 | | /// |
809 | | /// Pruning from the suffix tree doesn't necessarily remove all overlaps. |
810 | | /// If a short candidate is chosen for outlining, then a longer candidate |
811 | | /// which has that short candidate as a suffix is chosen, the tree's pruning |
812 | | /// method will not find it. Thus, we need to prune before outlining as well. |
813 | | /// |
814 | | /// \param[in,out] CandidateList A list of outlining candidates. |
815 | | /// \param[in,out] FunctionList A list of functions to be outlined. |
816 | | /// \param Mapper Contains instruction mapping info for outlining. |
817 | | /// \param MaxCandidateLen The length of the longest candidate. |
818 | | /// \param TII TargetInstrInfo for the module. |
819 | | void pruneOverlaps(std::vector<Candidate> &CandidateList, |
820 | | std::vector<OutlinedFunction> &FunctionList, |
821 | | InstructionMapper &Mapper, unsigned MaxCandidateLen, |
822 | | const TargetInstrInfo &TII); |
823 | | |
824 | | /// Construct a suffix tree on the instructions in \p M and outline repeated |
825 | | /// strings from that tree. |
826 | | bool runOnModule(Module &M) override; |
827 | | }; |
828 | | |
829 | | } // Anonymous namespace. |
830 | | |
831 | | char MachineOutliner::ID = 0; |
832 | | |
833 | | namespace llvm { |
834 | 6 | ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } |
835 | | } // namespace llvm |
836 | | |
837 | | INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, |
838 | | false) |
839 | | |
840 | | unsigned |
841 | | MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, |
842 | | InstructionMapper &Mapper, |
843 | | std::vector<Candidate> &CandidateList, |
844 | 7 | std::vector<OutlinedFunction> &FunctionList) { |
845 | 7 | CandidateList.clear(); |
846 | 7 | FunctionList.clear(); |
847 | 7 | unsigned FnIdx = 0; |
848 | 7 | unsigned MaxLen = 0; |
849 | 7 | |
850 | 7 | // FIXME: Visit internal nodes instead of leaves. |
851 | 226 | for (SuffixTreeNode *Leaf : ST.LeafVector) { |
852 | 226 | assert(Leaf && "Leaves in LeafVector cannot be null!"); |
853 | 226 | if (!Leaf->IsInTree) |
854 | 32 | continue; |
855 | 194 | |
856 | 226 | assert(Leaf->Parent && "All leaves must have parents!"); |
857 | 194 | SuffixTreeNode &Parent = *(Leaf->Parent); |
858 | 194 | |
859 | 194 | // If it doesn't appear enough, or we already outlined from it, skip it. |
860 | 194 | if (Parent.OccurrenceCount < 2 || 194 Parent.isRoot()194 || !Parent.IsInTree70 ) |
861 | 124 | continue; |
862 | 70 | |
863 | 70 | // Figure out if this candidate is beneficial. |
864 | 70 | unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); |
865 | 70 | |
866 | 70 | // Too short to be beneficial; skip it. |
867 | 70 | // FIXME: This isn't necessarily true for, say, X86. If we factor in |
868 | 70 | // instruction lengths we need more information than this. |
869 | 70 | if (StringLen < 2) |
870 | 43 | continue; |
871 | 27 | |
872 | 27 | // If this is a beneficial class of candidate, then every one is stored in |
873 | 27 | // this vector. |
874 | 27 | std::vector<Candidate> CandidatesForRepeatedSeq; |
875 | 27 | |
876 | 27 | // Describes the start and end point of each candidate. This allows the |
877 | 27 | // target to infer some information about each occurrence of each repeated |
878 | 27 | // sequence. |
879 | 27 | // FIXME: CandidatesForRepeatedSeq and this should be combined. |
880 | 27 | std::vector< |
881 | 27 | std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> |
882 | 27 | RepeatedSequenceLocs; |
883 | 27 | |
884 | 27 | // Figure out the call overhead for each instance of the sequence. |
885 | 65 | for (auto &ChildPair : Parent.Children) { |
886 | 65 | SuffixTreeNode *M = ChildPair.second; |
887 | 65 | |
888 | 65 | if (M && 65 M->IsInTree65 && M->isLeaf()60 ) { |
889 | 59 | // Each sequence is over [StartIt, EndIt]. |
890 | 59 | MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; |
891 | 59 | MachineBasicBlock::iterator EndIt = |
892 | 59 | Mapper.InstrList[M->SuffixIdx + StringLen - 1]; |
893 | 59 | |
894 | 59 | CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx); |
895 | 59 | RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); |
896 | 59 | |
897 | 59 | // Never visit this leaf again. |
898 | 59 | M->IsInTree = false; |
899 | 59 | } |
900 | 65 | } |
901 | 27 | |
902 | 27 | unsigned SequenceOverhead = StringLen; |
903 | 27 | TargetInstrInfo::MachineOutlinerInfo MInfo = |
904 | 27 | TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); |
905 | 27 | |
906 | 27 | unsigned OutliningCost = |
907 | 27 | (MInfo.CallOverhead * Parent.OccurrenceCount) + MInfo.FrameOverhead; |
908 | 27 | unsigned NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; |
909 | 27 | |
910 | 27 | // Is it better to outline this candidate than not? |
911 | 27 | if (NotOutliningCost <= OutliningCost27 ) { |
912 | 4 | // Outlining this candidate would take more instructions than not |
913 | 4 | // outlining. |
914 | 4 | // Emit a remark explaining why we didn't outline this candidate. |
915 | 4 | std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C = |
916 | 4 | RepeatedSequenceLocs[0]; |
917 | 4 | MachineOptimizationRemarkEmitter MORE( |
918 | 4 | *(C.first->getParent()->getParent()), nullptr); |
919 | 4 | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", |
920 | 4 | C.first->getDebugLoc(), |
921 | 4 | C.first->getParent()); |
922 | 4 | R << "Did not outline " << NV("Length", StringLen) << " instructions" |
923 | 4 | << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) |
924 | 4 | << " locations." |
925 | 4 | << " Instructions from outlining all occurrences (" |
926 | 4 | << NV("OutliningCost", OutliningCost) << ")" |
927 | 4 | << " >= Unoutlined instruction count (" |
928 | 4 | << NV("NotOutliningCost", NotOutliningCost) << ")" |
929 | 4 | << " (Also found at: "; |
930 | 4 | |
931 | 4 | // Tell the user the other places the candidate was found. |
932 | 10 | for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e10 ; i++6 ) { |
933 | 6 | R << NV((Twine("OtherStartLoc") + Twine(i)).str(), |
934 | 6 | RepeatedSequenceLocs[i].first->getDebugLoc()); |
935 | 6 | if (i != e - 1) |
936 | 2 | R << ", "; |
937 | 6 | } |
938 | 4 | |
939 | 4 | R << ")"; |
940 | 4 | MORE.emit(R); |
941 | 4 | |
942 | 4 | // Move to the next candidate. |
943 | 4 | continue; |
944 | 4 | } |
945 | 23 | |
946 | 23 | unsigned Benefit = NotOutliningCost - OutliningCost; |
947 | 23 | |
948 | 23 | if (StringLen > MaxLen) |
949 | 5 | MaxLen = StringLen; |
950 | 23 | |
951 | 23 | // At this point, the candidate class is seen as beneficial. Set their |
952 | 23 | // benefit values and save them in the candidate list. |
953 | 49 | for (Candidate &C : CandidatesForRepeatedSeq) { |
954 | 49 | C.Benefit = Benefit; |
955 | 49 | C.MInfo = MInfo; |
956 | 49 | CandidateList.push_back(C); |
957 | 49 | } |
958 | 23 | |
959 | 23 | // Save the function for the new candidate sequence. |
960 | 23 | std::vector<unsigned> CandidateSequence; |
961 | 106 | for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen106 ; i++83 ) |
962 | 83 | CandidateSequence.push_back(ST.Str[i]); |
963 | 226 | |
964 | 226 | FunctionList.emplace_back(FnIdx, CandidatesForRepeatedSeq.size(), |
965 | 226 | CandidateSequence, Benefit, MInfo); |
966 | 226 | |
967 | 226 | // Move to the next function. |
968 | 226 | FnIdx++; |
969 | 226 | Parent.IsInTree = false; |
970 | 226 | } |
971 | 7 | |
972 | 7 | return MaxLen; |
973 | 7 | } |
974 | | |
975 | | void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, |
976 | | std::vector<OutlinedFunction> &FunctionList, |
977 | | InstructionMapper &Mapper, |
978 | | unsigned MaxCandidateLen, |
979 | 7 | const TargetInstrInfo &TII) { |
980 | 7 | // TODO: Experiment with interval trees or other interval-checking structures |
981 | 7 | // to lower the time complexity of this function. |
982 | 7 | // TODO: Can we do better than the simple greedy choice? |
983 | 7 | // Check for overlaps in the range. |
984 | 7 | // This is O(MaxCandidateLen * CandidateList.size()). |
985 | 56 | for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et; |
986 | 49 | It++49 ) { |
987 | 49 | Candidate &C1 = *It; |
988 | 49 | OutlinedFunction &F1 = FunctionList[C1.FunctionIdx]; |
989 | 49 | |
990 | 49 | // If we removed this candidate, skip it. |
991 | 49 | if (!C1.InCandidateList) |
992 | 7 | continue; |
993 | 42 | |
994 | 42 | // Is it still worth it to outline C1? |
995 | 42 | if (42 F1.Benefit < 1 || 42 F1.OccurrenceCount < 240 ) { |
996 | 9 | assert(F1.OccurrenceCount > 0 && |
997 | 9 | "Can't remove OutlinedFunction with no occurrences!"); |
998 | 9 | F1.OccurrenceCount--; |
999 | 9 | C1.InCandidateList = false; |
1000 | 9 | continue; |
1001 | 9 | } |
1002 | 33 | |
1003 | 33 | // The minimum start index of any candidate that could overlap with this |
1004 | 33 | // one. |
1005 | 33 | unsigned FarthestPossibleIdx = 0; |
1006 | 33 | |
1007 | 33 | // Either the index is 0, or it's at most MaxCandidateLen indices away. |
1008 | 33 | if (C1.StartIdx > MaxCandidateLen) |
1009 | 30 | FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen; |
1010 | 33 | |
1011 | 33 | // Compare against the candidates in the list that start at at most |
1012 | 33 | // FarthestPossibleIdx indices away from C1. There are at most |
1013 | 33 | // MaxCandidateLen of these. |
1014 | 41 | for (auto Sit = It + 1; Sit != Et41 ; Sit++8 ) { |
1015 | 36 | Candidate &C2 = *Sit; |
1016 | 36 | OutlinedFunction &F2 = FunctionList[C2.FunctionIdx]; |
1017 | 36 | |
1018 | 36 | // Is this candidate too far away to overlap? |
1019 | 36 | if (C2.StartIdx < FarthestPossibleIdx) |
1020 | 10 | break; |
1021 | 26 | |
1022 | 26 | // Did we already remove this candidate in a previous step? |
1023 | 26 | if (26 !C2.InCandidateList26 ) |
1024 | 0 | continue; |
1025 | 26 | |
1026 | 26 | // Is the function beneficial to outline? |
1027 | 26 | if (26 F2.OccurrenceCount < 2 || 26 F2.Benefit < 119 ) { |
1028 | 7 | // If not, remove this candidate and move to the next one. |
1029 | 7 | assert(F2.OccurrenceCount > 0 && |
1030 | 7 | "Can't remove OutlinedFunction with no occurrences!"); |
1031 | 7 | F2.OccurrenceCount--; |
1032 | 7 | C2.InCandidateList = false; |
1033 | 7 | continue; |
1034 | 7 | } |
1035 | 19 | |
1036 | 19 | unsigned C2End = C2.StartIdx + C2.Len - 1; |
1037 | 19 | |
1038 | 19 | // Do C1 and C2 overlap? |
1039 | 19 | // |
1040 | 19 | // Not overlapping: |
1041 | 19 | // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices |
1042 | 19 | // |
1043 | 19 | // We sorted our candidate list so C2Start <= C1Start. We know that |
1044 | 19 | // C2End > C2Start since each candidate has length >= 2. Therefore, all we |
1045 | 19 | // have to check is C2End < C2Start to see if we overlap. |
1046 | 19 | if (C2End < C1.StartIdx) |
1047 | 1 | continue; |
1048 | 18 | |
1049 | 18 | // C1 and C2 overlap. |
1050 | 18 | // We need to choose the better of the two. |
1051 | 18 | // |
1052 | 18 | // Approximate this by picking the one which would have saved us the |
1053 | 18 | // most instructions before any pruning. |
1054 | 18 | if (18 C1.Benefit >= C2.Benefit18 ) { |
1055 | 0 |
|
1056 | 0 | // C1 is better, so remove C2 and update C2's OutlinedFunction to |
1057 | 0 | // reflect the removal. |
1058 | 0 | assert(F2.OccurrenceCount > 0 && |
1059 | 0 | "Can't remove OutlinedFunction with no occurrences!"); |
1060 | 0 | F2.OccurrenceCount--; |
1061 | 0 |
|
1062 | 0 | // Remove the call overhead from the removed sequence. |
1063 | 0 | F2.Benefit += C2.MInfo.CallOverhead; |
1064 | 0 |
|
1065 | 0 | // Add back one instance of the sequence. |
1066 | 0 | if (F2.Sequence.size() > F2.Benefit) |
1067 | 0 | F2.Benefit = 0; |
1068 | 0 | else |
1069 | 0 | F2.Benefit -= F2.Sequence.size(); |
1070 | 0 |
|
1071 | 0 | C2.InCandidateList = false; |
1072 | 0 |
|
1073 | 0 | DEBUG(dbgs() << "- Removed C2. \n"; |
1074 | 0 | dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount |
1075 | 0 | << "\n"; |
1076 | 0 | dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";); |
1077 | 0 |
|
1078 | 18 | } else { |
1079 | 18 | // C2 is better, so remove C1 and update C1's OutlinedFunction to |
1080 | 18 | // reflect the removal. |
1081 | 18 | assert(F1.OccurrenceCount > 0 && |
1082 | 18 | "Can't remove OutlinedFunction with no occurrences!"); |
1083 | 18 | F1.OccurrenceCount--; |
1084 | 18 | |
1085 | 18 | // Remove the call overhead from the removed sequence. |
1086 | 18 | F1.Benefit += C1.MInfo.CallOverhead; |
1087 | 18 | |
1088 | 18 | // Add back one instance of the sequence. |
1089 | 18 | if (F1.Sequence.size() > F1.Benefit) |
1090 | 0 | F1.Benefit = 0; |
1091 | 18 | else |
1092 | 18 | F1.Benefit -= F1.Sequence.size(); |
1093 | 18 | |
1094 | 18 | C1.InCandidateList = false; |
1095 | 18 | |
1096 | 18 | DEBUG(dbgs() << "- Removed C1. \n"; |
1097 | 18 | dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount |
1098 | 18 | << "\n"; |
1099 | 18 | dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";); |
1100 | 18 | |
1101 | 18 | // C1 is out, so we don't have to compare it against anyone else. |
1102 | 18 | break; |
1103 | 18 | } |
1104 | 36 | } |
1105 | 49 | } |
1106 | 7 | } |
1107 | | |
1108 | | unsigned |
1109 | | MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList, |
1110 | | std::vector<OutlinedFunction> &FunctionList, |
1111 | | SuffixTree &ST, InstructionMapper &Mapper, |
1112 | 7 | const TargetInstrInfo &TII) { |
1113 | 7 | |
1114 | 7 | std::vector<unsigned> CandidateSequence; // Current outlining candidate. |
1115 | 7 | unsigned MaxCandidateLen = 0; // Length of the longest candidate. |
1116 | 7 | |
1117 | 7 | MaxCandidateLen = |
1118 | 7 | findCandidates(ST, TII, Mapper, CandidateList, FunctionList); |
1119 | 7 | |
1120 | 7 | // Sort the candidates in decending order. This will simplify the outlining |
1121 | 7 | // process when we have to remove the candidates from the mapping by |
1122 | 7 | // allowing us to cut them out without keeping track of an offset. |
1123 | 7 | std::stable_sort(CandidateList.begin(), CandidateList.end()); |
1124 | 7 | |
1125 | 7 | return MaxCandidateLen; |
1126 | 7 | } |
1127 | | |
1128 | | MachineFunction * |
1129 | | MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, |
1130 | 7 | InstructionMapper &Mapper) { |
1131 | 7 | |
1132 | 7 | // Create the function name. This should be unique. For now, just hash the |
1133 | 7 | // module name and include it in the function name plus the number of this |
1134 | 7 | // function. |
1135 | 7 | std::ostringstream NameStream; |
1136 | 7 | NameStream << "OUTLINED_FUNCTION_" << OF.Name; |
1137 | 7 | |
1138 | 7 | // Create the function using an IR-level function. |
1139 | 7 | LLVMContext &C = M.getContext(); |
1140 | 7 | Function *F = dyn_cast<Function>( |
1141 | 7 | M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C))); |
1142 | 7 | assert(F && "Function was null!"); |
1143 | 7 | |
1144 | 7 | // NOTE: If this is linkonceodr, then we can take advantage of linker deduping |
1145 | 7 | // which gives us better results when we outline from linkonceodr functions. |
1146 | 7 | F->setLinkage(GlobalValue::PrivateLinkage); |
1147 | 7 | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
1148 | 7 | |
1149 | 7 | BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); |
1150 | 7 | IRBuilder<> Builder(EntryBB); |
1151 | 7 | Builder.CreateRetVoid(); |
1152 | 7 | |
1153 | 7 | MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); |
1154 | 7 | MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); |
1155 | 7 | MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); |
1156 | 7 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
1157 | 7 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1158 | 7 | |
1159 | 7 | // Insert the new function into the module. |
1160 | 7 | MF.insert(MF.begin(), &MBB); |
1161 | 7 | |
1162 | 7 | TII.insertOutlinerPrologue(MBB, MF, OF.MInfo); |
1163 | 7 | |
1164 | 7 | // Copy over the instructions for the function using the integer mappings in |
1165 | 7 | // its sequence. |
1166 | 32 | for (unsigned Str : OF.Sequence) { |
1167 | 32 | MachineInstr *NewMI = |
1168 | 32 | MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second); |
1169 | 32 | NewMI->dropMemRefs(); |
1170 | 32 | |
1171 | 32 | // Don't keep debug information for outlined instructions. |
1172 | 32 | // FIXME: This means outlined functions are currently undebuggable. |
1173 | 32 | NewMI->setDebugLoc(DebugLoc()); |
1174 | 32 | MBB.insert(MBB.end(), NewMI); |
1175 | 32 | } |
1176 | 7 | |
1177 | 7 | TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); |
1178 | 7 | |
1179 | 7 | return &MF; |
1180 | 7 | } |
1181 | | |
1182 | | bool MachineOutliner::outline(Module &M, |
1183 | | const ArrayRef<Candidate> &CandidateList, |
1184 | | std::vector<OutlinedFunction> &FunctionList, |
1185 | 7 | InstructionMapper &Mapper) { |
1186 | 7 | |
1187 | 7 | bool OutlinedSomething = false; |
1188 | 7 | // Replace the candidates with calls to their respective outlined functions. |
1189 | 49 | for (const Candidate &C : CandidateList) { |
1190 | 49 | |
1191 | 49 | // Was the candidate removed during pruneOverlaps? |
1192 | 49 | if (!C.InCandidateList) |
1193 | 34 | continue; |
1194 | 15 | |
1195 | 15 | // If not, then look at its OutlinedFunction. |
1196 | 15 | OutlinedFunction &OF = FunctionList[C.FunctionIdx]; |
1197 | 15 | |
1198 | 15 | // Was its OutlinedFunction made unbeneficial during pruneOverlaps? |
1199 | 15 | if (OF.OccurrenceCount < 2 || 15 OF.Benefit < 115 ) |
1200 | 0 | continue; |
1201 | 15 | |
1202 | 15 | // If not, then outline it. |
1203 | 15 | assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); |
1204 | 15 | MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent(); |
1205 | 15 | MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx]; |
1206 | 15 | unsigned EndIdx = C.StartIdx + C.Len - 1; |
1207 | 15 | |
1208 | 15 | assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); |
1209 | 15 | MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; |
1210 | 15 | assert(EndIt != MBB->end() && "EndIt out of bounds!"); |
1211 | 15 | |
1212 | 15 | EndIt++; // Erase needs one past the end index. |
1213 | 15 | |
1214 | 15 | // Does this candidate have a function yet? |
1215 | 15 | if (!OF.MF15 ) { |
1216 | 7 | OF.MF = createOutlinedFunction(M, OF, Mapper); |
1217 | 7 | FunctionsCreated++; |
1218 | 7 | } |
1219 | 49 | |
1220 | 49 | MachineFunction *MF = OF.MF; |
1221 | 49 | const TargetSubtargetInfo &STI = MF->getSubtarget(); |
1222 | 49 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1223 | 49 | |
1224 | 49 | // Insert a call to the new function and erase the old sequence. |
1225 | 49 | TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); |
1226 | 49 | StartIt = Mapper.InstrList[C.StartIdx]; |
1227 | 49 | MBB->erase(StartIt, EndIt); |
1228 | 49 | |
1229 | 49 | OutlinedSomething = true; |
1230 | 49 | |
1231 | 49 | // Statistics. |
1232 | 49 | NumOutlined++; |
1233 | 49 | } |
1234 | 7 | |
1235 | 7 | DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); |
1236 | 7 | |
1237 | 7 | return OutlinedSomething; |
1238 | 7 | } |
1239 | | |
1240 | 7 | bool MachineOutliner::runOnModule(Module &M) { |
1241 | 7 | |
1242 | 7 | // Is there anything in the module at all? |
1243 | 7 | if (M.empty()) |
1244 | 0 | return false; |
1245 | 7 | |
1246 | 7 | MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); |
1247 | 7 | const TargetSubtargetInfo &STI = |
1248 | 7 | MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget(); |
1249 | 7 | const TargetRegisterInfo *TRI = STI.getRegisterInfo(); |
1250 | 7 | const TargetInstrInfo *TII = STI.getInstrInfo(); |
1251 | 7 | |
1252 | 7 | InstructionMapper Mapper; |
1253 | 7 | |
1254 | 7 | // Build instruction mappings for each function in the module. |
1255 | 22 | for (Function &F : M) { |
1256 | 22 | MachineFunction &MF = MMI.getOrCreateMachineFunction(F); |
1257 | 22 | |
1258 | 22 | // Is the function empty? Safe to outline from? |
1259 | 22 | if (F.empty() || 22 !TII->isFunctionSafeToOutlineFrom(MF)16 ) |
1260 | 6 | continue; |
1261 | 16 | |
1262 | 16 | // If it is, look at each MachineBasicBlock in the function. |
1263 | 16 | for (MachineBasicBlock &MBB : MF) 16 { |
1264 | 23 | |
1265 | 23 | // Is there anything in MBB? |
1266 | 23 | if (MBB.empty()) |
1267 | 0 | continue; |
1268 | 23 | |
1269 | 23 | // If yes, map it. |
1270 | 23 | Mapper.convertToUnsignedVec(MBB, *TRI, *TII); |
1271 | 23 | } |
1272 | 22 | } |
1273 | 7 | |
1274 | 7 | // Construct a suffix tree, use it to find candidates, and then outline them. |
1275 | 7 | SuffixTree ST(Mapper.UnsignedVec); |
1276 | 7 | std::vector<Candidate> CandidateList; |
1277 | 7 | std::vector<OutlinedFunction> FunctionList; |
1278 | 7 | |
1279 | 7 | // Find all of the outlining candidates. |
1280 | 7 | unsigned MaxCandidateLen = |
1281 | 7 | buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); |
1282 | 7 | |
1283 | 7 | // Remove candidates that overlap with other candidates. |
1284 | 7 | pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); |
1285 | 7 | |
1286 | 7 | // Outline each of the candidates and return true if something was outlined. |
1287 | 7 | return outline(M, CandidateList, FunctionList, Mapper); |
1288 | 7 | } |