Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MachineOutliner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// Replaces repeated sequences of instructions with function calls.
11
///
12
/// This works by placing every instruction from every basic block in a
13
/// suffix tree, and repeatedly querying that tree for repeated sequences of
14
/// instructions. If a sequence of instructions appears often, then it ought
15
/// to be beneficial to pull out into a function.
16
///
17
/// The MachineOutliner communicates with a given target using hooks defined in
18
/// TargetInstrInfo.h. The target supplies the outliner with information on how
19
/// a specific sequence of instructions should be outlined. This information
20
/// is used to deduce the number of instructions necessary to
21
///
22
/// * Create an outlined function
23
/// * Call that outlined function
24
///
25
/// Targets must implement
26
///   * getOutliningCandidateInfo
27
///   * buildOutlinedFrame
28
///   * insertOutlinedCall
29
///   * isFunctionSafeToOutlineFrom
30
///
31
/// in order to make use of the MachineOutliner.
32
///
33
/// This was originally presented at the 2016 LLVM Developers' Meeting in the
34
/// talk "Reducing Code Size Using Outlining". For a high-level overview of
35
/// how this pass works, the talk is available on YouTube at
36
///
37
/// https://www.youtube.com/watch?v=yorld-WSOeU
38
///
39
/// The slides for the talk are available at
40
///
41
/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42
///
43
/// The talk provides an overview of how the outliner finds candidates and
44
/// ultimately outlines them. It describes how the main data structure for this
45
/// pass, the suffix tree, is queried and purged for candidates. It also gives
46
/// a simplified suffix tree construction algorithm for suffix trees based off
47
/// of the algorithm actually used here, Ukkonen's algorithm.
48
///
49
/// For the original RFC for this pass, please see
50
///
51
/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52
///
53
/// For more information on the suffix tree data structure, please see
54
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55
///
56
//===----------------------------------------------------------------------===//
57
#include "llvm/CodeGen/MachineOutliner.h"
58
#include "llvm/ADT/DenseMap.h"
59
#include "llvm/ADT/Statistic.h"
60
#include "llvm/ADT/Twine.h"
61
#include "llvm/CodeGen/MachineFunction.h"
62
#include "llvm/CodeGen/MachineModuleInfo.h"
63
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
64
#include "llvm/CodeGen/MachineRegisterInfo.h"
65
#include "llvm/CodeGen/Passes.h"
66
#include "llvm/CodeGen/TargetInstrInfo.h"
67
#include "llvm/CodeGen/TargetSubtargetInfo.h"
68
#include "llvm/IR/DIBuilder.h"
69
#include "llvm/IR/IRBuilder.h"
70
#include "llvm/IR/Mangler.h"
71
#include "llvm/Support/Allocator.h"
72
#include "llvm/Support/CommandLine.h"
73
#include "llvm/Support/Debug.h"
74
#include "llvm/Support/raw_ostream.h"
75
#include <functional>
76
#include <tuple>
77
#include <vector>
78
79
53
#define DEBUG_TYPE "machine-outliner"
80
81
using namespace llvm;
82
using namespace ore;
83
using namespace outliner;
84
85
STATISTIC(NumOutlined, "Number of candidates outlined");
86
STATISTIC(FunctionsCreated, "Number of functions created");
87
88
// Set to true if the user wants the outliner to run on linkonceodr linkage
89
// functions. This is false by default because the linker can dedupe linkonceodr
90
// functions. Since the outliner is confined to a single module (modulo LTO),
91
// this is off by default. It should, however, be the default behaviour in
92
// LTO.
93
static cl::opt<bool> EnableLinkOnceODROutlining(
94
    "enable-linkonceodr-outlining",
95
    cl::Hidden,
96
    cl::desc("Enable the machine outliner on linkonceodr functions"),
97
    cl::init(false));
98
99
namespace {
100
101
/// Represents an undefined index in the suffix tree.
102
const unsigned EmptyIdx = -1;
103
104
/// A node in a suffix tree which represents a substring or suffix.
105
///
106
/// Each node has either no children or at least two children, with the root
107
/// being a exception in the empty tree.
108
///
109
/// Children are represented as a map between unsigned integers and nodes. If
110
/// a node N has a child M on unsigned integer k, then the mapping represented
111
/// by N is a proper prefix of the mapping represented by M. Note that this,
112
/// although similar to a trie is somewhat different: each node stores a full
113
/// substring of the full mapping rather than a single character state.
114
///
115
/// Each internal node contains a pointer to the internal node representing
116
/// the same string, but with the first character chopped off. This is stored
117
/// in \p Link. Each leaf node stores the start index of its respective
118
/// suffix in \p SuffixIdx.
119
struct SuffixTreeNode {
120
121
  /// The children of this node.
122
  ///
123
  /// A child existing on an unsigned integer implies that from the mapping
124
  /// represented by the current node, there is a way to reach another
125
  /// mapping by tacking that character on the end of the current string.
126
  DenseMap<unsigned, SuffixTreeNode *> Children;
127
128
  /// The start index of this node's substring in the main string.
129
  unsigned StartIdx = EmptyIdx;
130
131
  /// The end index of this node's substring in the main string.
132
  ///
133
  /// Every leaf node must have its \p EndIdx incremented at the end of every
134
  /// step in the construction algorithm. To avoid having to update O(N)
135
  /// nodes individually at the end of every step, the end index is stored
136
  /// as a pointer.
137
  unsigned *EndIdx = nullptr;
138
139
  /// For leaves, the start index of the suffix represented by this node.
140
  ///
141
  /// For all other nodes, this is ignored.
142
  unsigned SuffixIdx = EmptyIdx;
143
144
  /// For internal nodes, a pointer to the internal node representing
145
  /// the same sequence with the first character chopped off.
146
  ///
147
  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
148
  /// Ukkonen's algorithm does to achieve linear-time construction is
149
  /// keep track of which node the next insert should be at. This makes each
150
  /// insert O(1), and there are a total of O(N) inserts. The suffix link
151
  /// helps with inserting children of internal nodes.
152
  ///
153
  /// Say we add a child to an internal node with associated mapping S. The
154
  /// next insertion must be at the node representing S - its first character.
155
  /// This is given by the way that we iteratively build the tree in Ukkonen's
156
  /// algorithm. The main idea is to look at the suffixes of each prefix in the
157
  /// string, starting with the longest suffix of the prefix, and ending with
158
  /// the shortest. Therefore, if we keep pointers between such nodes, we can
159
  /// move to the next insertion point in O(1) time. If we don't, then we'd
160
  /// have to query from the root, which takes O(N) time. This would make the
161
  /// construction algorithm O(N^2) rather than O(N).
162
  SuffixTreeNode *Link = nullptr;
163
164
  /// The length of the string formed by concatenating the edge labels from the
165
  /// root to this node.
166
  unsigned ConcatLen = 0;
167
168
  /// Returns true if this node is a leaf.
169
2.18k
  bool isLeaf() const { return SuffixIdx != EmptyIdx; }
170
171
  /// Returns true if this node is the root of its owning \p SuffixTree.
172
24.2k
  bool isRoot() const { return StartIdx == EmptyIdx; }
173
174
  /// Return the number of elements in the substring associated with this node.
175
3.58k
  size_t size() const {
176
3.58k
177
3.58k
    // Is it the root? If so, it's the empty string so return 0.
178
3.58k
    if (isRoot())
179
0
      return 0;
180
3.58k
181
3.58k
    assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
182
3.58k
183
3.58k
    // Size = the number of elements in the string.
184
3.58k
    // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
185
3.58k
    return *EndIdx - StartIdx + 1;
186
3.58k
  }
187
188
  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
189
10.5k
      : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
190
191
0
  SuffixTreeNode() {}
192
};
193
194
/// A data structure for fast substring queries.
195
///
196
/// Suffix trees represent the suffixes of their input strings in their leaves.
197
/// A suffix tree is a type of compressed trie structure where each node
198
/// represents an entire substring rather than a single character. Each leaf
199
/// of the tree is a suffix.
200
///
201
/// A suffix tree can be seen as a type of state machine where each state is a
202
/// substring of the full string. The tree is structured so that, for a string
203
/// of length N, there are exactly N leaves in the tree. This structure allows
204
/// us to quickly find repeated substrings of the input string.
205
///
206
/// In this implementation, a "string" is a vector of unsigned integers.
207
/// These integers may result from hashing some data type. A suffix tree can
208
/// contain 1 or many strings, which can then be queried as one large string.
209
///
210
/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
211
/// suffix tree construction. Ukkonen's algorithm is explained in more detail
212
/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
213
/// paper is available at
214
///
215
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
216
class SuffixTree {
217
public:
218
  /// Each element is an integer representing an instruction in the module.
219
  ArrayRef<unsigned> Str;
220
221
  /// A repeated substring in the tree.
222
  struct RepeatedSubstring {
223
    /// The length of the string.
224
    unsigned Length;
225
226
    /// The start indices of each occurrence.
227
    std::vector<unsigned> StartIndices;
228
  };
229
230
private:
231
  /// Maintains each node in the tree.
232
  SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
233
234
  /// The root of the suffix tree.
235
  ///
236
  /// The root represents the empty string. It is maintained by the
237
  /// \p NodeAllocator like every other node in the tree.
238
  SuffixTreeNode *Root = nullptr;
239
240
  /// Maintains the end indices of the internal nodes in the tree.
241
  ///
242
  /// Each internal node is guaranteed to never have its end index change
243
  /// during the construction algorithm; however, leaves must be updated at
244
  /// every step. Therefore, we need to store leaf end indices by reference
245
  /// to avoid updating O(N) leaves at every step of construction. Thus,
246
  /// every internal node must be allocated its own end index.
247
  BumpPtrAllocator InternalEndIdxAllocator;
248
249
  /// The end index of each leaf in the tree.
250
  unsigned LeafEndIdx = -1;
251
252
  /// Helper struct which keeps track of the next insertion point in
253
  /// Ukkonen's algorithm.
254
  struct ActiveState {
255
    /// The next node to insert at.
256
    SuffixTreeNode *Node;
257
258
    /// The index of the first character in the substring currently being added.
259
    unsigned Idx = EmptyIdx;
260
261
    /// The length of the substring we have to add at the current step.
262
    unsigned Len = 0;
263
  };
264
265
  /// The point the next insertion will take place at in the
266
  /// construction algorithm.
267
  ActiveState Active;
268
269
  /// Allocate a leaf node and add it to the tree.
270
  ///
271
  /// \param Parent The parent of this node.
272
  /// \param StartIdx The start index of this node's associated string.
273
  /// \param Edge The label on the edge leaving \p Parent to this node.
274
  ///
275
  /// \returns A pointer to the allocated leaf node.
276
  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
277
1.75k
                             unsigned Edge) {
278
1.75k
279
1.75k
    assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
280
1.75k
281
1.75k
    SuffixTreeNode *N = new (NodeAllocator.Allocate())
282
1.75k
        SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
283
1.75k
    Parent.Children[Edge] = N;
284
1.75k
285
1.75k
    return N;
286
1.75k
  }
287
288
  /// Allocate an internal node and add it to the tree.
289
  ///
290
  /// \param Parent The parent of this node. Only null when allocating the root.
291
  /// \param StartIdx The start index of this node's associated string.
292
  /// \param EndIdx The end index of this node's associated string.
293
  /// \param Edge The label on the edge leaving \p Parent to this node.
294
  ///
295
  /// \returns A pointer to the allocated internal node.
296
  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
297
8.81k
                                     unsigned EndIdx, unsigned Edge) {
298
8.81k
299
8.81k
    assert(StartIdx <= EndIdx && "String can't start after it ends!");
300
8.81k
    assert(!(!Parent && StartIdx != EmptyIdx) &&
301
8.81k
           "Non-root internal nodes must have parents!");
302
8.81k
303
8.81k
    unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
304
8.81k
    SuffixTreeNode *N = new (NodeAllocator.Allocate())
305
8.81k
        SuffixTreeNode(StartIdx, E, Root);
306
8.81k
    if (Parent)
307
432
      Parent->Children[Edge] = N;
308
8.81k
309
8.81k
    return N;
310
8.81k
  }
311
312
  /// Set the suffix indices of the leaves to the start indices of their
313
  /// respective suffixes.
314
  ///
315
  /// \param[in] CurrNode The node currently being visited.
316
  /// \param CurrNodeLen The concatenation of all node sizes from the root to
317
  /// this node. Used to produce suffix indices.
318
10.5k
  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
319
10.5k
320
10.5k
    bool IsLeaf = CurrNode.Children.size() == 0 && 
!CurrNode.isRoot()10.0k
;
321
10.5k
322
10.5k
    // Store the concatenation of lengths down from the root.
323
10.5k
    CurrNode.ConcatLen = CurrNodeLen;
324
10.5k
    // Traverse the tree depth-first.
325
10.5k
    for (auto &ChildPair : CurrNode.Children) {
326
2.18k
      assert(ChildPair.second && "Node had a null child!");
327
2.18k
      setSuffixIndices(*ChildPair.second,
328
2.18k
                       CurrNodeLen + ChildPair.second->size());
329
2.18k
    }
330
10.5k
331
10.5k
    // Is this node a leaf? If it is, give it a suffix index.
332
10.5k
    if (IsLeaf)
333
1.75k
      CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
334
10.5k
  }
335
336
  /// Construct the suffix tree for the prefix of the input ending at
337
  /// \p EndIdx.
338
  ///
339
  /// Used to construct the full suffix tree iteratively. At the end of each
340
  /// step, the constructed suffix tree is either a valid suffix tree, or a
341
  /// suffix tree with implicit suffixes. At the end of the final step, the
342
  /// suffix tree is a valid tree.
343
  ///
344
  /// \param EndIdx The end index of the current prefix in the main string.
345
  /// \param SuffixesToAdd The number of suffixes that must be added
346
  /// to complete the suffix tree at the current phase.
347
  ///
348
  /// \returns The number of suffixes that have not been added at the end of
349
  /// this step.
350
1.75k
  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
351
1.75k
    SuffixTreeNode *NeedsLink = nullptr;
352
1.75k
353
3.76k
    while (SuffixesToAdd > 0) {
354
2.72k
355
2.72k
      // Are we waiting to add anything other than just the last character?
356
2.72k
      if (Active.Len == 0) {
357
1.70k
        // If not, then say the active index is the end index.
358
1.70k
        Active.Idx = EndIdx;
359
1.70k
      }
360
2.72k
361
2.72k
      assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
362
2.72k
363
2.72k
      // The first character in the current substring we're looking at.
364
2.72k
      unsigned FirstChar = Str[Active.Idx];
365
2.72k
366
2.72k
      // Have we inserted anything starting with FirstChar at the current node?
367
2.72k
      if (Active.Node->Children.count(FirstChar) == 0) {
368
1.32k
        // If not, then we can just insert a leaf and move too the next step.
369
1.32k
        insertLeaf(*Active.Node, EndIdx, FirstChar);
370
1.32k
371
1.32k
        // The active node is an internal node, and we visited it, so it must
372
1.32k
        // need a link if it doesn't have one.
373
1.32k
        if (NeedsLink) {
374
160
          NeedsLink->Link = Active.Node;
375
160
          NeedsLink = nullptr;
376
160
        }
377
1.39k
      } else {
378
1.39k
        // There's a match with FirstChar, so look for the point in the tree to
379
1.39k
        // insert a new node.
380
1.39k
        SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
381
1.39k
382
1.39k
        unsigned SubstringLen = NextNode->size();
383
1.39k
384
1.39k
        // Is the current suffix we're trying to insert longer than the size of
385
1.39k
        // the child we want to move to?
386
1.39k
        if (Active.Len >= SubstringLen) {
387
253
          // If yes, then consume the characters we've seen and move to the next
388
253
          // node.
389
253
          Active.Idx += SubstringLen;
390
253
          Active.Len -= SubstringLen;
391
253
          Active.Node = NextNode;
392
253
          continue;
393
253
        }
394
1.14k
395
1.14k
        // Otherwise, the suffix we're trying to insert must be contained in the
396
1.14k
        // next node we want to move to.
397
1.14k
        unsigned LastChar = Str[EndIdx];
398
1.14k
399
1.14k
        // Is the string we're trying to insert a substring of the next node?
400
1.14k
        if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
401
713
          // If yes, then we're done for this step. Remember our insertion point
402
713
          // and move to the next end index. At this point, we have an implicit
403
713
          // suffix tree.
404
713
          if (NeedsLink && 
!Active.Node->isRoot()18
) {
405
0
            NeedsLink->Link = Active.Node;
406
0
            NeedsLink = nullptr;
407
0
          }
408
713
409
713
          Active.Len++;
410
713
          break;
411
713
        }
412
432
413
432
        // The string we're trying to insert isn't a substring of the next node,
414
432
        // but matches up to a point. Split the node.
415
432
        //
416
432
        // For example, say we ended our search at a node n and we're trying to
417
432
        // insert ABD. Then we'll create a new node s for AB, reduce n to just
418
432
        // representing C, and insert a new leaf node l to represent d. This
419
432
        // allows us to ensure that if n was a leaf, it remains a leaf.
420
432
        //
421
432
        //   | ABC  ---split--->  | AB
422
432
        //   n                    s
423
432
        //                     C / \ D
424
432
        //                      n   l
425
432
426
432
        // The node s from the diagram
427
432
        SuffixTreeNode *SplitNode =
428
432
            insertInternalNode(Active.Node, NextNode->StartIdx,
429
432
                               NextNode->StartIdx + Active.Len - 1, FirstChar);
430
432
431
432
        // Insert the new node representing the new substring into the tree as
432
432
        // a child of the split node. This is the node l from the diagram.
433
432
        insertLeaf(*SplitNode, EndIdx, LastChar);
434
432
435
432
        // Make the old node a child of the split node and update its start
436
432
        // index. This is the node n from the diagram.
437
432
        NextNode->StartIdx += Active.Len;
438
432
        SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
439
432
440
432
        // SplitNode is an internal node, update the suffix link.
441
432
        if (NeedsLink)
442
254
          NeedsLink->Link = SplitNode;
443
432
444
432
        NeedsLink = SplitNode;
445
432
      }
446
2.72k
447
2.72k
      // We've added something new to the tree, so there's one less suffix to
448
2.72k
      // add.
449
2.72k
      SuffixesToAdd--;
450
1.75k
451
1.75k
      if (Active.Node->isRoot()) {
452
1.42k
        if (Active.Len > 0) {
453
376
          Active.Len--;
454
376
          Active.Idx = EndIdx - SuffixesToAdd + 1;
455
376
        }
456
1.42k
      } else {
457
337
        // Start the next phase at the next smallest suffix.
458
337
        Active.Node = Active.Node->Link;
459
337
      }
460
1.75k
    }
461
1.75k
462
1.75k
    return SuffixesToAdd;
463
1.75k
  }
464
465
public:
466
  /// Construct a suffix tree from a sequence of unsigned integers.
467
  ///
468
  /// \param Str The string to construct the suffix tree for.
469
8.38k
  SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
470
8.38k
    Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
471
8.38k
    Active.Node = Root;
472
8.38k
473
8.38k
    // Keep track of the number of suffixes we have to add of the current
474
8.38k
    // prefix.
475
8.38k
    unsigned SuffixesToAdd = 0;
476
8.38k
    Active.Node = Root;
477
8.38k
478
8.38k
    // Construct the suffix tree iteratively on each prefix of the string.
479
8.38k
    // PfxEndIdx is the end index of the current prefix.
480
8.38k
    // End is one past the last element in the string.
481
10.1k
    for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
482
8.38k
         
PfxEndIdx++1.75k
) {
483
1.75k
      SuffixesToAdd++;
484
1.75k
      LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
485
1.75k
      SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
486
1.75k
    }
487
8.38k
488
8.38k
    // Set the suffix indices of each leaf.
489
8.38k
    assert(Root && "Root node can't be nullptr!");
490
8.38k
    setSuffixIndices(*Root, 0);
491
8.38k
  }
492
493
494
  /// Iterator for finding all repeated substrings in the suffix tree.
495
  struct RepeatedSubstringIterator {
496
    private:
497
    /// The current node we're visiting.
498
    SuffixTreeNode *N = nullptr;
499
500
    /// The repeated substring associated with this node.
501
    RepeatedSubstring RS;
502
503
    /// The nodes left to visit.
504
    std::vector<SuffixTreeNode *> ToVisit;
505
506
    /// The minimum length of a repeated substring to find.
507
    /// Since we're outlining, we want at least two instructions in the range.
508
    /// FIXME: This may not be true for targets like X86 which support many
509
    /// instruction lengths.
510
    const unsigned MinLength = 2;
511
512
    /// Move the iterator to the next repeated substring.
513
8.63k
    void advance() {
514
8.63k
      // Clear the current state. If we're at the end of the range, then this
515
8.63k
      // is the state we want to be in.
516
8.63k
      RS = RepeatedSubstring();
517
8.63k
      N = nullptr;
518
8.63k
519
8.63k
      // Each leaf node represents a repeat of a string.
520
8.63k
      std::vector<SuffixTreeNode *> LeafChildren;
521
8.63k
522
8.63k
      // Continue visiting nodes until we find one which repeats more than once.
523
17.1k
      while (!ToVisit.empty()) {
524
8.81k
        SuffixTreeNode *Curr = ToVisit.back();
525
8.81k
        ToVisit.pop_back();
526
8.81k
        LeafChildren.clear();
527
8.81k
528
8.81k
        // Keep track of the length of the string associated with the node. If
529
8.81k
        // it's too short, we'll quit.
530
8.81k
        unsigned Length = Curr->ConcatLen;
531
8.81k
532
8.81k
        // Iterate over each child, saving internal nodes for visiting, and
533
8.81k
        // leaf nodes in LeafChildren. Internal nodes represent individual
534
8.81k
        // strings, which may repeat.
535
8.81k
        for (auto &ChildPair : Curr->Children) {
536
2.18k
          // Save all of this node's children for processing.
537
2.18k
          if (!ChildPair.second->isLeaf())
538
432
            ToVisit.push_back(ChildPair.second);
539
1.75k
540
1.75k
          // It's not an internal node, so it must be a leaf. If we have a
541
1.75k
          // long enough string, then save the leaf children.
542
1.75k
          else if (Length >= MinLength)
543
636
            LeafChildren.push_back(ChildPair.second);
544
2.18k
        }
545
8.81k
546
8.81k
        // The root never represents a repeated substring. If we're looking at
547
8.81k
        // that, then skip it.
548
8.81k
        if (Curr->isRoot())
549
8.38k
          continue;
550
432
551
432
        // Do we have any repeated substrings?
552
432
        if (LeafChildren.size() >= 2) {
553
251
          // Yes. Update the state to reflect this, and then bail out.
554
251
          N = Curr;
555
251
          RS.Length = Length;
556
251
          for (SuffixTreeNode *Leaf : LeafChildren)
557
627
            RS.StartIndices.push_back(Leaf->SuffixIdx);
558
251
          break;
559
251
        }
560
432
      }
561
8.63k
562
8.63k
      // At this point, either NewRS is an empty RepeatedSubstring, or it was
563
8.63k
      // set in the above loop. Similarly, N is either nullptr, or the node
564
8.63k
      // associated with NewRS.
565
8.63k
    }
566
567
  public:
568
    /// Return the current repeated substring.
569
251
    RepeatedSubstring &operator*() { return RS; }
570
571
251
    RepeatedSubstringIterator &operator++() {
572
251
      advance();
573
251
      return *this;
574
251
    }
575
576
0
    RepeatedSubstringIterator operator++(int I) {
577
0
      RepeatedSubstringIterator It(*this);
578
0
      advance();
579
0
      return It;
580
0
    }
581
582
8.63k
    bool operator==(const RepeatedSubstringIterator &Other) {
583
8.63k
      return N == Other.N;
584
8.63k
    }
585
8.63k
    bool operator!=(const RepeatedSubstringIterator &Other) {
586
8.63k
      return !(*this == Other);
587
8.63k
    }
588
589
16.7k
    RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
590
16.7k
      // Do we have a non-null node?
591
16.7k
      if (N) {
592
8.38k
        // Yes. At the first step, we need to visit all of N's children.
593
8.38k
        // Note: This means that we visit N last.
594
8.38k
        ToVisit.push_back(N);
595
8.38k
        advance();
596
8.38k
      }
597
16.7k
    }
598
};
599
600
  typedef RepeatedSubstringIterator iterator;
601
8.38k
  iterator begin() { return iterator(Root); }
602
8.38k
  iterator end() { return iterator(nullptr); }
603
};
604
605
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
606
struct InstructionMapper {
607
608
  /// The next available integer to assign to a \p MachineInstr that
609
  /// cannot be outlined.
610
  ///
611
  /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
612
  unsigned IllegalInstrNumber = -3;
613
614
  /// The next available integer to assign to a \p MachineInstr that can
615
  /// be outlined.
616
  unsigned LegalInstrNumber = 0;
617
618
  /// Correspondence from \p MachineInstrs to unsigned integers.
619
  DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
620
      InstructionIntegerMap;
621
622
  /// Correspondence between \p MachineBasicBlocks and target-defined flags.
623
  DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
624
625
  /// The vector of unsigned integers that the module is mapped to.
626
  std::vector<unsigned> UnsignedVec;
627
628
  /// Stores the location of the instruction associated with the integer
629
  /// at index i in \p UnsignedVec for each index i.
630
  std::vector<MachineBasicBlock::iterator> InstrList;
631
632
  // Set if we added an illegal number in the previous step.
633
  // Since each illegal number is unique, we only need one of them between
634
  // each range of legal numbers. This lets us make sure we don't add more
635
  // than one illegal number per range.
636
  bool AddedIllegalLastTime = false;
637
638
  /// Maps \p *It to a legal integer.
639
  ///
640
  /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
641
  /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
642
  ///
643
  /// \returns The integer that \p *It was mapped to.
644
  unsigned mapToLegalUnsigned(
645
      MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
646
      bool &HaveLegalRange, unsigned &NumLegalInBlock,
647
      std::vector<unsigned> &UnsignedVecForMBB,
648
1.51k
      std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
649
1.51k
    // We added something legal, so we should unset the AddedLegalLastTime
650
1.51k
    // flag.
651
1.51k
    AddedIllegalLastTime = false;
652
1.51k
653
1.51k
    // If we have at least two adjacent legal instructions (which may have
654
1.51k
    // invisible instructions in between), remember that.
655
1.51k
    if (CanOutlineWithPrevInstr)
656
1.19k
      HaveLegalRange = true;
657
1.51k
    CanOutlineWithPrevInstr = true;
658
1.51k
659
1.51k
    // Keep track of the number of legal instructions we insert.
660
1.51k
    NumLegalInBlock++;
661
1.51k
662
1.51k
    // Get the integer for this instruction or give it the current
663
1.51k
    // LegalInstrNumber.
664
1.51k
    InstrListForMBB.push_back(It);
665
1.51k
    MachineInstr &MI = *It;
666
1.51k
    bool WasInserted;
667
1.51k
    DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
668
1.51k
        ResultIt;
669
1.51k
    std::tie(ResultIt, WasInserted) =
670
1.51k
        InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
671
1.51k
    unsigned MINumber = ResultIt->second;
672
1.51k
673
1.51k
    // There was an insertion.
674
1.51k
    if (WasInserted)
675
785
      LegalInstrNumber++;
676
1.51k
677
1.51k
    UnsignedVecForMBB.push_back(MINumber);
678
1.51k
679
1.51k
    // Make sure we don't overflow or use any integers reserved by the DenseMap.
680
1.51k
    if (LegalInstrNumber >= IllegalInstrNumber)
681
0
      report_fatal_error("Instruction mapping overflow!");
682
1.51k
683
1.51k
    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
684
1.51k
           "Tried to assign DenseMap tombstone or empty key to instruction.");
685
1.51k
    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
686
1.51k
           "Tried to assign DenseMap tombstone or empty key to instruction.");
687
1.51k
688
1.51k
    return MINumber;
689
1.51k
  }
690
691
  /// Maps \p *It to an illegal integer.
692
  ///
693
  /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
694
  /// IllegalInstrNumber.
695
  ///
696
  /// \returns The integer that \p *It was mapped to.
697
  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
698
  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
699
521
  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
700
521
    // Can't outline an illegal instruction. Set the flag.
701
521
    CanOutlineWithPrevInstr = false;
702
521
703
521
    // Only add one illegal number per range of legal numbers.
704
521
    if (AddedIllegalLastTime)
705
200
      return IllegalInstrNumber;
706
321
707
321
    // Remember that we added an illegal number last time.
708
321
    AddedIllegalLastTime = true;
709
321
    unsigned MINumber = IllegalInstrNumber;
710
321
711
321
    InstrListForMBB.push_back(It);
712
321
    UnsignedVecForMBB.push_back(IllegalInstrNumber);
713
321
    IllegalInstrNumber--;
714
321
715
321
    assert(LegalInstrNumber < IllegalInstrNumber &&
716
321
           "Instruction mapping overflow!");
717
321
718
321
    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
719
321
           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
720
321
721
321
    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
722
321
           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
723
321
724
321
    return MINumber;
725
321
  }
726
727
  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
728
  /// and appends it to \p UnsignedVec and \p InstrList.
729
  ///
730
  /// Two instructions are assigned the same integer if they are identical.
731
  /// If an instruction is deemed unsafe to outline, then it will be assigned an
732
  /// unique integer. The resulting mapping is placed into a suffix tree and
733
  /// queried for candidates.
734
  ///
735
  /// \param MBB The \p MachineBasicBlock to be translated into integers.
736
  /// \param TII \p TargetInstrInfo for the function.
737
  void convertToUnsignedVec(MachineBasicBlock &MBB,
738
232
                            const TargetInstrInfo &TII) {
739
232
    unsigned Flags = 0;
740
232
741
232
    // Don't even map in this case.
742
232
    if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
743
3
      return;
744
229
745
229
    // Store info for the MBB for later outlining.
746
229
    MBBFlagsMap[&MBB] = Flags;
747
229
748
229
    MachineBasicBlock::iterator It = MBB.begin();
749
229
750
229
    // The number of instructions in this block that will be considered for
751
229
    // outlining.
752
229
    unsigned NumLegalInBlock = 0;
753
229
754
229
    // True if we have at least two legal instructions which aren't separated
755
229
    // by an illegal instruction.
756
229
    bool HaveLegalRange = false;
757
229
758
229
    // True if we can perform outlining given the last mapped (non-invisible)
759
229
    // instruction. This lets us know if we have a legal range.
760
229
    bool CanOutlineWithPrevInstr = false;
761
229
762
229
    // FIXME: Should this all just be handled in the target, rather than using
763
229
    // repeated calls to getOutliningType?
764
229
    std::vector<unsigned> UnsignedVecForMBB;
765
229
    std::vector<MachineBasicBlock::iterator> InstrListForMBB;
766
229
767
2.04k
    for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; 
It++1.82k
) {
768
1.82k
      // Keep track of where this instruction is in the module.
769
1.82k
      switch (TII.getOutliningType(It, Flags)) {
770
1.82k
      case InstrType::Illegal:
771
298
        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
772
298
                             UnsignedVecForMBB, InstrListForMBB);
773
298
        break;
774
1.82k
775
1.82k
      case InstrType::Legal:
776
1.49k
        mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
777
1.49k
                           NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
778
1.49k
        break;
779
1.82k
780
1.82k
      case InstrType::LegalTerminator:
781
22
        mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
782
22
                           NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
783
22
        // The instruction also acts as a terminator, so we have to record that
784
22
        // in the string.
785
22
        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
786
22
        InstrListForMBB);
787
22
        break;
788
1.82k
789
1.82k
      case InstrType::Invisible:
790
8
        // Normally this is set by mapTo(Blah)Unsigned, but we just want to
791
8
        // skip this instruction. So, unset the flag here.
792
8
        AddedIllegalLastTime = false;
793
8
        break;
794
1.82k
      }
795
1.82k
    }
796
229
797
229
    // Are there enough legal instructions in the block for outlining to be
798
229
    // possible?
799
229
    if (HaveLegalRange) {
800
201
      // After we're done every insertion, uniquely terminate this part of the
801
201
      // "string". This makes sure we won't match across basic block or function
802
201
      // boundaries since the "end" is encoded uniquely and thus appears in no
803
201
      // repeated substring.
804
201
      mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
805
201
      InstrListForMBB);
806
201
      InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
807
201
                       InstrListForMBB.end());
808
201
      UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
809
201
                         UnsignedVecForMBB.end());
810
201
    }
811
229
  }
812
813
8.38k
  InstructionMapper() {
814
8.38k
    // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
815
8.38k
    // changed.
816
8.38k
    assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
817
8.38k
           "DenseMapInfo<unsigned>'s empty key isn't -1!");
818
8.38k
    assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
819
8.38k
           "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
820
8.38k
  }
821
};
822
823
/// An interprocedural pass which finds repeated sequences of
824
/// instructions and replaces them with calls to functions.
825
///
826
/// Each instruction is mapped to an unsigned integer and placed in a string.
827
/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
828
/// is then repeatedly queried for repeated sequences of instructions. Each
829
/// non-overlapping repeated sequence is then placed in its own
830
/// \p MachineFunction and each instance is then replaced with a call to that
831
/// function.
832
struct MachineOutliner : public ModulePass {
833
834
  static char ID;
835
836
  /// Set to true if the outliner should consider functions with
837
  /// linkonceodr linkage.
838
  bool OutlineFromLinkOnceODRs = false;
839
840
  /// Set to true if the outliner should run on all functions in the module
841
  /// considered safe for outlining.
842
  /// Set to true by default for compatibility with llc's -run-pass option.
843
  /// Set when the pass is constructed in TargetPassConfig.
844
  bool RunOnAllFunctions = true;
845
846
8.61k
  StringRef getPassName() const override { return "Machine Outliner"; }
847
848
8.60k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
849
8.60k
    AU.addRequired<MachineModuleInfo>();
850
8.60k
    AU.addPreserved<MachineModuleInfo>();
851
8.60k
    AU.setPreservesAll();
852
8.60k
    ModulePass::getAnalysisUsage(AU);
853
8.60k
  }
854
855
8.63k
  MachineOutliner() : ModulePass(ID) {
856
8.63k
    initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
857
8.63k
  }
858
859
  /// Remark output explaining that not outlining a set of candidates would be
860
  /// better than outlining that set.
861
  void emitNotOutliningCheaperRemark(
862
      unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
863
      OutlinedFunction &OF);
864
865
  /// Remark output explaining that a function was outlined.
866
  void emitOutlinedFunctionRemark(OutlinedFunction &OF);
867
868
  /// Find all repeated substrings that satisfy the outlining cost model by
869
  /// constructing a suffix tree.
870
  ///
871
  /// If a substring appears at least twice, then it must be represented by
872
  /// an internal node which appears in at least two suffixes. Each suffix
873
  /// is represented by a leaf node. To do this, we visit each internal node
874
  /// in the tree, using the leaf children of each internal node. If an
875
  /// internal node represents a beneficial substring, then we use each of
876
  /// its leaf children to find the locations of its substring.
877
  ///
878
  /// \param Mapper Contains outlining mapping information.
879
  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
880
  /// each type of candidate.
881
  void findCandidates(InstructionMapper &Mapper,
882
                      std::vector<OutlinedFunction> &FunctionList);
883
884
  /// Replace the sequences of instructions represented by \p OutlinedFunctions
885
  /// with calls to functions.
886
  ///
887
  /// \param M The module we are outlining from.
888
  /// \param FunctionList A list of functions to be inserted into the module.
889
  /// \param Mapper Contains the instruction mappings for the module.
890
  bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
891
               InstructionMapper &Mapper);
892
893
  /// Creates a function for \p OF and inserts it into the module.
894
  MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
895
                                          InstructionMapper &Mapper,
896
                                          unsigned Name);
897
898
  /// Construct a suffix tree on the instructions in \p M and outline repeated
899
  /// strings from that tree.
900
  bool runOnModule(Module &M) override;
901
902
  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
903
  /// function for remark emission.
904
45
  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
905
45
    DISubprogram *SP;
906
45
    for (const Candidate &C : OF.Candidates)
907
115
      if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
908
8
        return SP;
909
45
    
return nullptr37
;
910
45
  }
911
912
  /// Populate and \p InstructionMapper with instruction-to-integer mappings.
913
  /// These are used to construct a suffix tree.
914
  void populateMapper(InstructionMapper &Mapper, Module &M,
915
                      MachineModuleInfo &MMI);
916
917
  /// Initialize information necessary to output a size remark.
918
  /// FIXME: This should be handled by the pass manager, not the outliner.
919
  /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
920
  /// pass manager.
921
  void initSizeRemarkInfo(
922
      const Module &M, const MachineModuleInfo &MMI,
923
      StringMap<unsigned> &FunctionToInstrCount);
924
925
  /// Emit the remark.
926
  // FIXME: This should be handled by the pass manager, not the outliner.
927
  void emitInstrCountChangedRemark(
928
      const Module &M, const MachineModuleInfo &MMI,
929
      const StringMap<unsigned> &FunctionToInstrCount);
930
};
931
} // Anonymous namespace.
932
933
char MachineOutliner::ID = 0;
934
935
namespace llvm {
936
8.62k
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
937
8.62k
  MachineOutliner *OL = new MachineOutliner();
938
8.62k
  OL->RunOnAllFunctions = RunOnAllFunctions;
939
8.62k
  return OL;
940
8.62k
}
941
942
} // namespace llvm
943
944
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
945
                false)
946
947
void MachineOutliner::emitNotOutliningCheaperRemark(
948
    unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
949
82
    OutlinedFunction &OF) {
950
82
  // FIXME: Right now, we arbitrarily choose some Candidate from the
951
82
  // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
952
82
  // We should probably sort these by function name or something to make sure
953
82
  // the remarks are stable.
954
82
  Candidate &C = CandidatesForRepeatedSeq.front();
955
82
  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
956
82
  MORE.emit([&]() {
957
8
    MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
958
8
                                      C.front()->getDebugLoc(), C.getMBB());
959
8
    R << "Did not outline " << NV("Length", StringLen) << " instructions"
960
8
      << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
961
8
      << " locations."
962
8
      << " Bytes from outlining all occurrences ("
963
8
      << NV("OutliningCost", OF.getOutliningCost()) << ")"
964
8
      << " >= Unoutlined instruction bytes ("
965
8
      << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
966
8
      << " (Also found at: ";
967
8
968
8
    // Tell the user the other places the candidate was found.
969
20
    for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; 
i++12
) {
970
12
      R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
971
12
              CandidatesForRepeatedSeq[i].front()->getDebugLoc());
972
12
      if (i != e - 1)
973
4
        R << ", ";
974
12
    }
975
8
976
8
    R << ")";
977
8
    return R;
978
8
  });
979
82
}
980
981
45
void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
982
45
  MachineBasicBlock *MBB = &*OF.MF->begin();
983
45
  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
984
45
  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
985
45
                              MBB->findDebugLoc(MBB->begin()), MBB);
986
45
  R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
987
45
    << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
988
45
    << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
989
45
    << " locations. "
990
45
    << "(Found at: ";
991
45
992
45
  // Tell the user the other places the candidate was found.
993
170
  for (size_t i = 0, e = OF.Candidates.size(); i < e; 
i++125
) {
994
125
995
125
    R << NV((Twine("StartLoc") + Twine(i)).str(),
996
125
            OF.Candidates[i].front()->getDebugLoc());
997
125
    if (i != e - 1)
998
80
      R << ", ";
999
125
  }
1000
45
1001
45
  R << ")";
1002
45
1003
45
  MORE.emit(R);
1004
45
}
1005
1006
void
1007
MachineOutliner::findCandidates(InstructionMapper &Mapper,
1008
8.38k
                                std::vector<OutlinedFunction> &FunctionList) {
1009
8.38k
  FunctionList.clear();
1010
8.38k
  SuffixTree ST(Mapper.UnsignedVec);
1011
8.38k
1012
8.38k
  // First, find dall of the repeated substrings in the tree of minimum length
1013
8.38k
  // 2.
1014
8.38k
  std::vector<Candidate> CandidatesForRepeatedSeq;
1015
8.63k
  for (auto It = ST.begin(), Et = ST.end(); It != Et; 
++It251
) {
1016
251
    CandidatesForRepeatedSeq.clear();
1017
251
    SuffixTree::RepeatedSubstring RS = *It;
1018
251
    unsigned StringLen = RS.Length;
1019
627
    for (const unsigned &StartIdx : RS.StartIndices) {
1020
627
      unsigned EndIdx = StartIdx + StringLen - 1;
1021
627
      // Trick: Discard some candidates that would be incompatible with the
1022
627
      // ones we've already found for this sequence. This will save us some
1023
627
      // work in candidate selection.
1024
627
      //
1025
627
      // If two candidates overlap, then we can't outline them both. This
1026
627
      // happens when we have candidates that look like, say
1027
627
      //
1028
627
      // AA (where each "A" is an instruction).
1029
627
      //
1030
627
      // We might have some portion of the module that looks like this:
1031
627
      // AAAAAA (6 A's)
1032
627
      //
1033
627
      // In this case, there are 5 different copies of "AA" in this range, but
1034
627
      // at most 3 can be outlined. If only outlining 3 of these is going to
1035
627
      // be unbeneficial, then we ought to not bother.
1036
627
      //
1037
627
      // Note that two things DON'T overlap when they look like this:
1038
627
      // start1...end1 .... start2...end2
1039
627
      // That is, one must either
1040
627
      // * End before the other starts
1041
627
      // * Start after the other ends
1042
627
      if (std::all_of(
1043
627
              CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
1044
627
              [&StartIdx, &EndIdx](const Candidate &C) {
1045
574
                return (EndIdx < C.getStartIdx() || 
StartIdx > C.getEndIdx()230
);
1046
627
              })) {
1047
627
        // It doesn't overlap with anything, so we can outline it.
1048
627
        // Each sequence is over [StartIt, EndIt].
1049
627
        // Save the candidate and its location.
1050
627
1051
627
        MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
1052
627
        MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
1053
627
        MachineBasicBlock *MBB = StartIt->getParent();
1054
627
1055
627
        CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
1056
627
                                              EndIt, MBB, FunctionList.size(),
1057
627
                                              Mapper.MBBFlagsMap[MBB]);
1058
627
      }
1059
627
    }
1060
251
1061
251
    // We've found something we might want to outline.
1062
251
    // Create an OutlinedFunction to store it and check if it'd be beneficial
1063
251
    // to outline.
1064
251
    if (CandidatesForRepeatedSeq.size() < 2)
1065
0
      continue;
1066
251
1067
251
    // Arbitrarily choose a TII from the first candidate.
1068
251
    // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
1069
251
    const TargetInstrInfo *TII =
1070
251
        CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
1071
251
1072
251
    OutlinedFunction OF =
1073
251
        TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
1074
251
1075
251
    // If we deleted too many candidates, then there's nothing worth outlining.
1076
251
    // FIXME: This should take target-specified instruction sizes into account.
1077
251
    if (OF.Candidates.size() < 2)
1078
2
      continue;
1079
249
1080
249
    // Is it better to outline this candidate than not?
1081
249
    if (OF.getBenefit() < 1) {
1082
82
      emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
1083
82
      continue;
1084
82
    }
1085
167
1086
167
    FunctionList.push_back(OF);
1087
167
  }
1088
8.38k
}
1089
1090
MachineFunction *
1091
MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
1092
                                        InstructionMapper &Mapper,
1093
45
                                        unsigned Name) {
1094
45
1095
45
  // Create the function name. This should be unique.
1096
45
  // FIXME: We should have a better naming scheme. This should be stable,
1097
45
  // regardless of changes to the outliner's cost model/traversal order.
1098
45
  std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
1099
45
1100
45
  // Create the function using an IR-level function.
1101
45
  LLVMContext &C = M.getContext();
1102
45
  Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
1103
45
                                 Function::ExternalLinkage, FunctionName, M);
1104
45
1105
45
  // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1106
45
  // which gives us better results when we outline from linkonceodr functions.
1107
45
  F->setLinkage(GlobalValue::InternalLinkage);
1108
45
  F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1109
45
1110
45
  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
1111
45
  // necessary.
1112
45
1113
45
  // Set optsize/minsize, so we don't insert padding between outlined
1114
45
  // functions.
1115
45
  F->addFnAttr(Attribute::OptimizeForSize);
1116
45
  F->addFnAttr(Attribute::MinSize);
1117
45
1118
45
  // Include target features from an arbitrary candidate for the outlined
1119
45
  // function. This makes sure the outlined function knows what kinds of
1120
45
  // instructions are going into it. This is fine, since all parent functions
1121
45
  // must necessarily support the instructions that are in the outlined region.
1122
45
  Candidate &FirstCand = OF.Candidates.front();
1123
45
  const Function &ParentFn = FirstCand.getMF()->getFunction();
1124
45
  if (ParentFn.hasFnAttribute("target-features"))
1125
4
    F->addFnAttr(ParentFn.getFnAttribute("target-features"));
1126
45
1127
45
  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1128
45
  IRBuilder<> Builder(EntryBB);
1129
45
  Builder.CreateRetVoid();
1130
45
1131
45
  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1132
45
  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
1133
45
  MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1134
45
  const TargetSubtargetInfo &STI = MF.getSubtarget();
1135
45
  const TargetInstrInfo &TII = *STI.getInstrInfo();
1136
45
1137
45
  // Insert the new function into the module.
1138
45
  MF.insert(MF.begin(), &MBB);
1139
45
1140
310
  for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
1141
265
       ++I) {
1142
265
    MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1143
265
    NewMI->dropMemRefs(MF);
1144
265
1145
265
    // Don't keep debug information for outlined instructions.
1146
265
    NewMI->setDebugLoc(DebugLoc());
1147
265
    MBB.insert(MBB.end(), NewMI);
1148
265
  }
1149
45
1150
45
  TII.buildOutlinedFrame(MBB, MF, OF);
1151
45
1152
45
  // Outlined functions shouldn't preserve liveness.
1153
45
  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
1154
45
  MF.getRegInfo().freezeReservedRegs(MF);
1155
45
1156
45
  // If there's a DISubprogram associated with this outlined function, then
1157
45
  // emit debug info for the outlined function.
1158
45
  if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1159
8
    // We have a DISubprogram. Get its DICompileUnit.
1160
8
    DICompileUnit *CU = SP->getUnit();
1161
8
    DIBuilder DB(M, true, CU);
1162
8
    DIFile *Unit = SP->getFile();
1163
8
    Mangler Mg;
1164
8
    // Get the mangled name of the function for the linkage name.
1165
8
    std::string Dummy;
1166
8
    llvm::raw_string_ostream MangledNameStream(Dummy);
1167
8
    Mg.getNameWithPrefix(MangledNameStream, F, false);
1168
8
1169
8
    DISubprogram *OutlinedSP = DB.createFunction(
1170
8
        Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
1171
8
        Unit /* File */,
1172
8
        0 /* Line 0 is reserved for compiler-generated code. */,
1173
8
        DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
1174
8
        0, /* Line 0 is reserved for compiler-generated code. */
1175
8
        DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1176
8
        /* Outlined code is optimized code by definition. */
1177
8
        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1178
8
1179
8
    // Don't add any new variables to the subprogram.
1180
8
    DB.finalizeSubprogram(OutlinedSP);
1181
8
1182
8
    // Attach subprogram to the function.
1183
8
    F->setSubprogram(OutlinedSP);
1184
8
    // We're done with the DIBuilder.
1185
8
    DB.finalize();
1186
8
  }
1187
45
1188
45
  return &MF;
1189
45
}
1190
1191
bool MachineOutliner::outline(Module &M,
1192
                              std::vector<OutlinedFunction> &FunctionList,
1193
8.38k
                              InstructionMapper &Mapper) {
1194
8.38k
1195
8.38k
  bool OutlinedSomething = false;
1196
8.38k
1197
8.38k
  // Number to append to the current outlined function.
1198
8.38k
  unsigned OutlinedFunctionNum = 0;
1199
8.38k
1200
8.38k
  // Sort by benefit. The most beneficial functions should be outlined first.
1201
8.38k
  llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
1202
8.38k
                                     const OutlinedFunction &RHS) {
1203
375
    return LHS.getBenefit() > RHS.getBenefit();
1204
375
  });
1205
8.38k
1206
8.38k
  // Walk over each function, outlining them as we go along. Functions are
1207
8.38k
  // outlined greedily, based off the sort above.
1208
8.38k
  for (OutlinedFunction &OF : FunctionList) {
1209
167
    // If we outlined something that overlapped with a candidate in a previous
1210
167
    // step, then we can't outline from it.
1211
416
    erase_if(OF.Candidates, [&Mapper](Candidate &C) {
1212
416
      return std::any_of(
1213
416
          Mapper.UnsignedVec.begin() + C.getStartIdx(),
1214
416
          Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1215
1.01k
          [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
1216
416
    });
1217
167
1218
167
    // If we made it unbeneficial to outline this function, skip it.
1219
167
    if (OF.getBenefit() < 1)
1220
122
      continue;
1221
45
1222
45
    // It's beneficial. Create the function and outline its sequence's
1223
45
    // occurrences.
1224
45
    OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
1225
45
    emitOutlinedFunctionRemark(OF);
1226
45
    FunctionsCreated++;
1227
45
    OutlinedFunctionNum++; // Created a function, move to the next name.
1228
45
    MachineFunction *MF = OF.MF;
1229
45
    const TargetSubtargetInfo &STI = MF->getSubtarget();
1230
45
    const TargetInstrInfo &TII = *STI.getInstrInfo();
1231
45
1232
45
    // Replace occurrences of the sequence with calls to the new function.
1233
125
    for (Candidate &C : OF.Candidates) {
1234
125
      MachineBasicBlock &MBB = *C.getMBB();
1235
125
      MachineBasicBlock::iterator StartIt = C.front();
1236
125
      MachineBasicBlock::iterator EndIt = C.back();
1237
125
1238
125
      // Insert the call.
1239
125
      auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
1240
125
1241
125
      // If the caller tracks liveness, then we need to make sure that
1242
125
      // anything we outline doesn't break liveness assumptions. The outlined
1243
125
      // functions themselves currently don't track liveness, but we should
1244
125
      // make sure that the ranges we yank things out of aren't wrong.
1245
125
      if (MBB.getParent()->getProperties().hasProperty(
1246
125
              MachineFunctionProperties::Property::TracksLiveness)) {
1247
125
        // Helper lambda for adding implicit def operands to the call
1248
125
        // instruction. It also updates call site information for moved
1249
125
        // code.
1250
825
        auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
1251
2.76k
          for (MachineOperand &MOP : MI.operands()) {
1252
2.76k
            // Skip over anything that isn't a register.
1253
2.76k
            if (!MOP.isReg())
1254
1.11k
              continue;
1255
1.65k
1256
1.65k
            // If it's a def, add it to the call instruction.
1257
1.65k
            if (MOP.isDef())
1258
614
              CallInst->addOperand(MachineOperand::CreateReg(
1259
614
                  MOP.getReg(), true, /* isDef = true */
1260
614
                  true /* isImp = true */));
1261
1.65k
          }
1262
825
          if (MI.isCall())
1263
147
            MI.getMF()->updateCallSiteInfo(&MI);
1264
825
        };
1265
125
        // Copy over the defs in the outlined range.
1266
125
        // First inst in outlined range <-- Anything that's defined in this
1267
125
        // ...                           .. range has to be added as an
1268
125
        // implicit Last inst in outlined range  <-- def to the call
1269
125
        // instruction. Also remove call site information for outlined block
1270
125
        // of code.
1271
125
        std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
1272
125
      }
1273
125
1274
125
      // Erase from the point after where the call was inserted up to, and
1275
125
      // including, the final instruction in the sequence.
1276
125
      // Erase needs one past the end, so we need std::next there too.
1277
125
      MBB.erase(std::next(StartIt), std::next(EndIt));
1278
125
1279
125
      // Keep track of what we removed by marking them all as -1.
1280
125
      std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
1281
125
                    Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1282
677
                    [](unsigned &I) { I = static_cast<unsigned>(-1); });
1283
125
      OutlinedSomething = true;
1284
125
1285
125
      // Statistics.
1286
125
      NumOutlined++;
1287
125
    }
1288
45
  }
1289
8.38k
1290
8.38k
  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1291
8.38k
1292
8.38k
  return OutlinedSomething;
1293
8.38k
}
1294
1295
void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
1296
8.38k
                                     MachineModuleInfo &MMI) {
1297
8.38k
  // Build instruction mappings for each function in the module. Start by
1298
8.38k
  // iterating over each Function in M.
1299
393k
  for (Function &F : M) {
1300
393k
1301
393k
    // If there's nothing in F, then there's no reason to try and outline from
1302
393k
    // it.
1303
393k
    if (F.empty())
1304
136k
      continue;
1305
257k
1306
257k
    // There's something in F. Check if it has a MachineFunction associated with
1307
257k
    // it.
1308
257k
    MachineFunction *MF = MMI.getMachineFunction(F);
1309
257k
1310
257k
    // If it doesn't, then there's nothing to outline from. Move to the next
1311
257k
    // Function.
1312
257k
    if (!MF)
1313
0
      continue;
1314
257k
1315
257k
    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1316
257k
1317
257k
    if (!RunOnAllFunctions && 
!TII->shouldOutlineFromFunctionByDefault(*MF)257k
)
1318
256k
      continue;
1319
175
1320
175
    // We have a MachineFunction. Ask the target if it's suitable for outlining.
1321
175
    // If it isn't, then move on to the next Function in the module.
1322
175
    if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
1323
10
      continue;
1324
165
1325
165
    // We have a function suitable for outlining. Iterate over every
1326
165
    // MachineBasicBlock in MF and try to map its instructions to a list of
1327
165
    // unsigned integers.
1328
270
    
for (MachineBasicBlock &MBB : *MF)165
{
1329
270
      // If there isn't anything in MBB, then there's no point in outlining from
1330
270
      // it.
1331
270
      // If there are fewer than 2 instructions in the MBB, then it can't ever
1332
270
      // contain something worth outlining.
1333
270
      // FIXME: This should be based off of the maximum size in B of an outlined
1334
270
      // call versus the size in B of the MBB.
1335
270
      if (MBB.empty() || MBB.size() < 2)
1336
38
        continue;
1337
232
1338
232
      // Check if MBB could be the target of an indirect branch. If it is, then
1339
232
      // we don't want to outline from it.
1340
232
      if (MBB.hasAddressTaken())
1341
0
        continue;
1342
232
1343
232
      // MBB is suitable for outlining. Map it to a list of unsigneds.
1344
232
      Mapper.convertToUnsignedVec(MBB, *TII);
1345
232
    }
1346
165
  }
1347
8.38k
}
1348
1349
void MachineOutliner::initSizeRemarkInfo(
1350
    const Module &M, const MachineModuleInfo &MMI,
1351
1
    StringMap<unsigned> &FunctionToInstrCount) {
1352
1
  // Collect instruction counts for every function. We'll use this to emit
1353
1
  // per-function size remarks later.
1354
1
  for (const Function &F : M) {
1355
1
    MachineFunction *MF = MMI.getMachineFunction(F);
1356
1
1357
1
    // We only care about MI counts here. If there's no MachineFunction at this
1358
1
    // point, then there won't be after the outliner runs, so let's move on.
1359
1
    if (!MF)
1360
0
      continue;
1361
1
    FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1362
1
  }
1363
1
}
1364
1365
void MachineOutliner::emitInstrCountChangedRemark(
1366
    const Module &M, const MachineModuleInfo &MMI,
1367
1
    const StringMap<unsigned> &FunctionToInstrCount) {
1368
1
  // Iterate over each function in the module and emit remarks.
1369
1
  // Note that we won't miss anything by doing this, because the outliner never
1370
1
  // deletes functions.
1371
2
  for (const Function &F : M) {
1372
2
    MachineFunction *MF = MMI.getMachineFunction(F);
1373
2
1374
2
    // The outliner never deletes functions. If we don't have a MF here, then we
1375
2
    // didn't have one prior to outlining either.
1376
2
    if (!MF)
1377
0
      continue;
1378
2
1379
2
    std::string Fname = F.getName();
1380
2
    unsigned FnCountAfter = MF->getInstructionCount();
1381
2
    unsigned FnCountBefore = 0;
1382
2
1383
2
    // Check if the function was recorded before.
1384
2
    auto It = FunctionToInstrCount.find(Fname);
1385
2
1386
2
    // Did we have a previously-recorded size? If yes, then set FnCountBefore
1387
2
    // to that.
1388
2
    if (It != FunctionToInstrCount.end())
1389
1
      FnCountBefore = It->second;
1390
2
1391
2
    // Compute the delta and emit a remark if there was a change.
1392
2
    int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1393
2
                      static_cast<int64_t>(FnCountBefore);
1394
2
    if (FnDelta == 0)
1395
0
      continue;
1396
2
1397
2
    MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1398
2
    MORE.emit([&]() {
1399
2
      MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1400
2
                                          DiagnosticLocation(),
1401
2
                                          &MF->front());
1402
2
      R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1403
2
        << ": Function: "
1404
2
        << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1405
2
        << ": MI instruction count changed from "
1406
2
        << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1407
2
                                                    FnCountBefore)
1408
2
        << " to "
1409
2
        << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1410
2
                                                    FnCountAfter)
1411
2
        << "; Delta: "
1412
2
        << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1413
2
      return R;
1414
2
    });
1415
2
  }
1416
1
}
1417
1418
8.59k
bool MachineOutliner::runOnModule(Module &M) {
1419
8.59k
  // Check if there's anything in the module. If it's empty, then there's
1420
8.59k
  // nothing to outline.
1421
8.59k
  if (M.empty())
1422
212
    return false;
1423
8.38k
1424
8.38k
  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1425
8.38k
1426
8.38k
  // If the user passed -enable-machine-outliner=always or
1427
8.38k
  // -enable-machine-outliner, the pass will run on all functions in the module.
1428
8.38k
  // Otherwise, if the target supports default outlining, it will run on all
1429
8.38k
  // functions deemed by the target to be worth outlining from by default. Tell
1430
8.38k
  // the user how the outliner is running.
1431
8.38k
  LLVM_DEBUG(
1432
8.38k
    dbgs() << "Machine Outliner: Running on ";
1433
8.38k
    if (RunOnAllFunctions)
1434
8.38k
      dbgs() << "all functions";
1435
8.38k
    else
1436
8.38k
      dbgs() << "target-default functions";
1437
8.38k
    dbgs() << "\n"
1438
8.38k
  );
1439
8.38k
1440
8.38k
  // If the user specifies that they want to outline from linkonceodrs, set
1441
8.38k
  // it here.
1442
8.38k
  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1443
8.38k
  InstructionMapper Mapper;
1444
8.38k
1445
8.38k
  // Prepare instruction mappings for the suffix tree.
1446
8.38k
  populateMapper(Mapper, M, MMI);
1447
8.38k
  std::vector<OutlinedFunction> FunctionList;
1448
8.38k
1449
8.38k
  // Find all of the outlining candidates.
1450
8.38k
  findCandidates(Mapper, FunctionList);
1451
8.38k
1452
8.38k
  // If we've requested size remarks, then collect the MI counts of every
1453
8.38k
  // function before outlining, and the MI counts after outlining.
1454
8.38k
  // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1455
8.38k
  // the pass manager's responsibility.
1456
8.38k
  // This could pretty easily be placed in outline instead, but because we
1457
8.38k
  // really ultimately *don't* want this here, it's done like this for now
1458
8.38k
  // instead.
1459
8.38k
1460
8.38k
  // Check if we want size remarks.
1461
8.38k
  bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1462
8.38k
  StringMap<unsigned> FunctionToInstrCount;
1463
8.38k
  if (ShouldEmitSizeRemarks)
1464
1
    initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
1465
8.38k
1466
8.38k
  // Outline each of the candidates and return true if something was outlined.
1467
8.38k
  bool OutlinedSomething = outline(M, FunctionList, Mapper);
1468
8.38k
1469
8.38k
  // If we outlined something, we definitely changed the MI count of the
1470
8.38k
  // module. If we've asked for size remarks, then output them.
1471
8.38k
  // FIXME: This should be in the pass manager.
1472
8.38k
  if (ShouldEmitSizeRemarks && 
OutlinedSomething1
)
1473
1
    emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
1474
8.38k
1475
8.38k
  return OutlinedSomething;
1476
8.38k
}