Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
10
#include "llvm/ADT/PostOrderIterator.h"
11
#include "llvm/ADT/Sequence.h"
12
#include "llvm/ADT/SetVector.h"
13
#include "llvm/ADT/Statistic.h"
14
#include "llvm/Analysis/TargetTransformInfo.h"
15
#include "llvm/Analysis/ValueTracking.h"
16
#include "llvm/IR/BasicBlock.h"
17
#include "llvm/IR/IRBuilder.h"
18
#include "llvm/IR/Instructions.h"
19
#include "llvm/IR/IntrinsicInst.h"
20
#include "llvm/Support/Debug.h"
21
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
22
23
using namespace llvm;
24
25
#define DEBUG_TYPE "spec-phis"
26
27
STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
28
STATISTIC(NumEdgesSplit,
29
          "Number of critical edges which were split for speculation");
30
STATISTIC(NumSpeculatedInstructions,
31
          "Number of instructions we speculated around the PHI nodes");
32
STATISTIC(NumNewRedundantInstructions,
33
          "Number of new, redundant instructions inserted");
34
35
/// Check whether speculating the users of a PHI node around the PHI
36
/// will be safe.
37
///
38
/// This checks both that all of the users are safe and also that all of their
39
/// operands are either recursively safe or already available along an incoming
40
/// edge to the PHI.
41
///
42
/// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43
/// and the chain of nodes that definitively reach any unsafe node in
44
/// `UnsafeSet`. By preserving these between repeated calls to this routine for
45
/// PHIs in the same basic block, the exploration here can be reused. However,
46
/// these caches must no be reused for PHIs in a different basic block as they
47
/// reflect what is available along incoming edges.
48
static bool
49
isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
50
                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
51
51
                          SmallPtrSetImpl<Instruction *> &UnsafeSet) {
52
51
  auto *PhiBB = PN.getParent();
53
51
  SmallPtrSet<Instruction *, 4> Visited;
54
51
  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
55
51
56
51
  // Walk each user of the PHI node.
57
75
  for (Use &U : PN.uses()) {
58
75
    auto *UI = cast<Instruction>(U.getUser());
59
75
60
75
    // Ensure the use post-dominates the PHI node. This ensures that, in the
61
75
    // absence of unwinding, the use will actually be reached.
62
75
    // FIXME: We use a blunt hammer of requiring them to be in the same basic
63
75
    // block. We should consider using actual post-dominance here in the
64
75
    // future.
65
75
    if (UI->getParent() != PhiBB) {
66
3
      LLVM_DEBUG(dbgs() << "  Unsafe: use in a different BB: " << *UI << "\n");
67
3
      return false;
68
3
    }
69
72
70
72
    if (auto CS = ImmutableCallSite(UI)) {
71
3
      if (CS.isConvergent() || 
CS.cannotDuplicate()2
) {
72
2
        LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
73
2
                   "callsite cannot de duplicated: " << *UI << '\n');
74
2
        return false;
75
2
      }
76
70
    }
77
70
78
70
    // FIXME: This check is much too conservative. We're not going to move these
79
70
    // instructions onto new dynamic paths through the program unless there is
80
70
    // a call instruction between the use and the PHI node. And memory isn't
81
70
    // changing unless there is a store in that same sequence. We should
82
70
    // probably change this to do at least a limited scan of the intervening
83
70
    // instructions and allow handling stores in easily proven safe cases.
84
70
    if (mayBeMemoryDependent(*UI)) {
85
2
      LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate use: " << *UI << "\n");
86
2
      return false;
87
2
    }
88
68
89
68
    // Now do a depth-first search of everything these users depend on to make
90
68
    // sure they are transitively safe. This is a depth-first search, but we
91
68
    // check nodes in preorder to minimize the amount of checking.
92
68
    Visited.insert(UI);
93
68
    DFSStack.push_back({UI, UI->value_op_begin()});
94
73
    do {
95
73
      User::value_op_iterator OpIt;
96
73
      std::tie(UI, OpIt) = DFSStack.pop_back_val();
97
73
98
217
      while (OpIt != UI->value_op_end()) {
99
144
        auto *OpI = dyn_cast<Instruction>(*OpIt);
100
144
        // Increment to the next operand for whenever we continue.
101
144
        ++OpIt;
102
144
        // No need to visit non-instructions, which can't form dependencies.
103
144
        if (!OpI)
104
61
          continue;
105
83
106
83
        // Now do the main pre-order checks that this operand is a viable
107
83
        // dependency of something we want to speculate.
108
83
109
83
        // First do a few checks for instructions that won't require
110
83
        // speculation at all because they are trivially available on the
111
83
        // incoming edge (either through dominance or through an incoming value
112
83
        // to a PHI).
113
83
        //
114
83
        // The cases in the current block will be trivially dominated by the
115
83
        // edge.
116
83
        auto *ParentBB = OpI->getParent();
117
83
        if (ParentBB == PhiBB) {
118
80
          if (isa<PHINode>(OpI)) {
119
72
            // We can trivially map through phi nodes in the same block.
120
72
            continue;
121
72
          }
122
3
        } else if (DT.dominates(ParentBB, PhiBB)) {
123
3
          // Instructions from dominating blocks are already available.
124
3
          continue;
125
3
        }
126
8
127
8
        // Once we know that we're considering speculating the operand, check
128
8
        // if we've already explored this subgraph and found it to be safe.
129
8
        if (PotentialSpecSet.count(OpI))
130
3
          continue;
131
5
132
5
        // If we've already explored this subgraph and found it unsafe, bail.
133
5
        // If when we directly test whether this is safe it fails, bail.
134
5
        if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
135
5
            mayBeMemoryDependent(*OpI)) {
136
0
          LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate transitive use: "
137
0
                            << *OpI << "\n");
138
0
          // Record the stack of instructions which reach this node as unsafe
139
0
          // so we prune subsequent searches.
140
0
          UnsafeSet.insert(OpI);
141
0
          for (auto &StackPair : DFSStack) {
142
0
            Instruction *I = StackPair.first;
143
0
            UnsafeSet.insert(I);
144
0
          }
145
0
          return false;
146
0
        }
147
5
148
5
        // Skip any operands we're already recursively checking.
149
5
        if (!Visited.insert(OpI).second)
150
0
          continue;
151
5
152
5
        // Push onto the stack and descend. We can directly continue this
153
5
        // loop when ascending.
154
5
        DFSStack.push_back({UI, OpIt});
155
5
        UI = OpI;
156
5
        OpIt = OpI->value_op_begin();
157
5
      }
158
73
159
73
      // This node and all its operands are safe. Go ahead and cache that for
160
73
      // reuse later.
161
73
      PotentialSpecSet.insert(UI);
162
73
163
73
      // Continue with the next node on the stack.
164
73
    } while (!DFSStack.empty());
165
68
  }
166
51
167
#ifndef NDEBUG
168
  // Every visited operand should have been marked as safe for speculation at
169
  // this point. Verify this and return success.
170
  for (auto *I : Visited)
171
    assert(PotentialSpecSet.count(I) &&
172
           "Failed to mark a visited instruction as safe!");
173
#endif
174
44
  return true;
175
51
}
176
177
/// Check whether, in isolation, a given PHI node is both safe and profitable
178
/// to speculate users around.
179
///
180
/// This handles checking whether there are any constant operands to a PHI
181
/// which could represent a useful speculation candidate, whether the users of
182
/// the PHI are safe to speculate including all their transitive dependencies,
183
/// and whether after speculation there will be some cost savings (profit) to
184
/// folding the operands into the users of the PHI node. Returns true if both
185
/// safe and profitable with relevant cost savings updated in the map and with
186
/// an update to the `PotentialSpecSet`. Returns false if either safety or
187
/// profitability are absent. Some new entries may be made to the
188
/// `PotentialSpecSet` even when this routine returns false, but they remain
189
/// conservatively correct.
190
///
191
/// The profitability check here is a local one, but it checks this in an
192
/// interesting way. Beyond checking that the total cost of materializing the
193
/// constants will be less than the cost of folding them into their users, it
194
/// also checks that no one incoming constant will have a higher cost when
195
/// folded into its users rather than materialized. This higher cost could
196
/// result in a dynamic *path* that is more expensive even when the total cost
197
/// is lower. Currently, all of the interesting cases where this optimization
198
/// should fire are ones where it is a no-loss operation in this sense. If we
199
/// ever want to be more aggressive here, we would need to balance the
200
/// different incoming edges' cost by looking at their respective
201
/// probabilities.
202
static bool isSafeAndProfitableToSpeculateAroundPHI(
203
    PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
204
    SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
205
    SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
206
88
    TargetTransformInfo &TTI) {
207
88
  // First see whether there is any cost savings to speculating around this
208
88
  // PHI, and build up a map of the constant inputs to how many times they
209
88
  // occur.
210
88
  bool NonFreeMat = false;
211
88
  struct CostsAndCount {
212
88
    int MatCost = TargetTransformInfo::TCC_Free;
213
88
    int FoldedCost = TargetTransformInfo::TCC_Free;
214
88
    int Count = 0;
215
88
  };
216
88
  SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
217
88
  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
218
179
  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
219
179
    auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
220
179
    if (!IncomingC)
221
85
      continue;
222
94
223
94
    // Only visit each incoming edge with a constant input once.
224
94
    if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
225
0
      continue;
226
94
227
94
    auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
228
94
    // Count how many edges share a given incoming costant.
229
94
    ++InsertResult.first->second.Count;
230
94
    // Only compute the cost the first time we see a particular constant.
231
94
    if (!InsertResult.second)
232
2
      continue;
233
92
234
92
    int &MatCost = InsertResult.first->second.MatCost;
235
92
    MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
236
92
    NonFreeMat |= MatCost != TTI.TCC_Free;
237
92
  }
238
88
  if (!NonFreeMat) {
239
37
    LLVM_DEBUG(dbgs() << "    Free: " << PN << "\n");
240
37
    // No profit in free materialization.
241
37
    return false;
242
37
  }
243
51
244
51
  // Now check that the uses of this PHI can actually be speculated,
245
51
  // otherwise we'll still have to materialize the PHI value.
246
51
  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
247
7
    LLVM_DEBUG(dbgs() << "    Unsafe PHI: " << PN << "\n");
248
7
    return false;
249
7
  }
250
44
251
44
  // Compute how much (if any) savings are available by speculating around this
252
44
  // PHI.
253
68
  
for (Use &U : PN.uses())44
{
254
68
    auto *UserI = cast<Instruction>(U.getUser());
255
68
    // Now check whether there is any savings to folding the incoming constants
256
68
    // into this use.
257
68
    unsigned Idx = U.getOperandNo();
258
68
259
68
    // If we have a binary operator that is commutative, an actual constant
260
68
    // operand would end up on the RHS, so pretend the use of the PHI is on the
261
68
    // RHS.
262
68
    //
263
68
    // Technically, this is a bit weird if *both* operands are PHIs we're
264
68
    // speculating. But if that is the case, giving an "optimistic" cost isn't
265
68
    // a bad thing because after speculation it will constant fold. And
266
68
    // moreover, such cases should likely have been constant folded already by
267
68
    // some other pass, so we shouldn't worry about "modeling" them terribly
268
68
    // accurately here. Similarly, if the other operand is a constant, it still
269
68
    // seems fine to be "optimistic" in our cost modeling, because when the
270
68
    // incoming operand from the PHI node is also a constant, we will end up
271
68
    // constant folding.
272
68
    if (UserI->isBinaryOp() && 
UserI->isCommutative()46
&&
Idx != 146
)
273
23
      // Assume we will commute the constant to the RHS to be canonical.
274
23
      Idx = 1;
275
68
276
68
    // Get the intrinsic ID if this user is an intrinsic.
277
68
    Intrinsic::ID IID = Intrinsic::not_intrinsic;
278
68
    if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
279
1
      IID = UserII->getIntrinsicID();
280
68
281
92
    for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
282
92
      ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
283
92
      int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
284
92
      int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
285
92
      if (IID)
286
0
        FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
287
0
                                        IncomingC->getType());
288
92
      else
289
92
        FoldedCost +=
290
92
            TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
291
92
                              IncomingC->getType());
292
92
293
92
      // If we accumulate more folded cost for this incoming constant than
294
92
      // materialized cost, then we'll regress any edge with this constant so
295
92
      // just bail. We're only interested in cases where folding the incoming
296
92
      // constants is at least break-even on all paths.
297
92
      if (FoldedCost > MatCost) {
298
1
        LLVM_DEBUG(dbgs() << "  Not profitable to fold imm: " << *IncomingC
299
1
                          << "\n"
300
1
                             "    Materializing cost:    "
301
1
                          << MatCost
302
1
                          << "\n"
303
1
                             "    Accumulated folded cost: "
304
1
                          << FoldedCost << "\n");
305
1
        return false;
306
1
      }
307
92
    }
308
68
  }
309
44
310
44
  // Compute the total cost savings afforded by this PHI node.
311
44
  int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
312
63
  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
313
63
    int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
314
63
    int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
315
63
    int Count = IncomingConstantAndCostsAndCount.second.Count;
316
63
317
63
    TotalMatCost += MatCost * Count;
318
63
    TotalFoldedCost += FoldedCost * Count;
319
63
  }
320
43
  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
321
43
                                            "less that its materialized cost, "
322
43
                                            "the sum must be as well.");
323
43
324
43
  LLVM_DEBUG(dbgs() << "    Cost savings " << (TotalMatCost - TotalFoldedCost)
325
43
                    << ": " << PN << "\n");
326
43
  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
327
43
  return true;
328
44
}
329
330
/// Simple helper to walk all the users of a list of phis depth first, and call
331
/// a visit function on each one in post-order.
332
///
333
/// All of the PHIs should be in the same basic block, and this is primarily
334
/// used to make a single depth-first walk across their collective users
335
/// without revisiting any subgraphs. Callers should provide a fast, idempotent
336
/// callable to test whether a node has been visited and the more important
337
/// callable to actually visit a particular node.
338
///
339
/// Depth-first and postorder here refer to the *operand* graph -- we start
340
/// from a collection of users of PHI nodes and walk "up" the operands
341
/// depth-first.
342
template <typename IsVisitedT, typename VisitT>
343
static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
344
                                            IsVisitedT IsVisited,
345
48
                                            VisitT Visit) {
346
48
  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
347
48
  for (auto *PN : PNs)
348
79
    
for (Use &U : PN->uses())56
{
349
79
      auto *UI = cast<Instruction>(U.getUser());
350
79
      if (IsVisited(UI))
351
2
        // Already visited this user, continue across the roots.
352
2
        continue;
353
77
354
77
      // Otherwise, walk the operand graph depth-first and visit each
355
77
      // dependency in postorder.
356
77
      DFSStack.push_back({UI, UI->value_op_begin()});
357
83
      do {
358
83
        User::value_op_iterator OpIt;
359
83
        std::tie(UI, OpIt) = DFSStack.pop_back_val();
360
245
        while (OpIt != UI->value_op_end()) {
361
162
          auto *OpI = dyn_cast<Instruction>(*OpIt);
362
162
          // Increment to the next operand for whenever we continue.
363
162
          ++OpIt;
364
162
          // No need to visit non-instructions, which can't form dependencies,
365
162
          // or instructions outside of our potential dependency set that we
366
162
          // were given. Finally, if we've already visited the node, continue
367
162
          // to the next.
368
162
          if (!OpI || 
IsVisited(OpI)99
)
369
156
            continue;
370
6
371
6
          // Push onto the stack and descend. We can directly continue this
372
6
          // loop when ascending.
373
6
          DFSStack.push_back({UI, OpIt});
374
6
          UI = OpI;
375
6
          OpIt = OpI->value_op_begin();
376
6
        }
377
83
378
83
        // Finished visiting children, visit this node.
379
83
        assert(!IsVisited(UI) && "Should not have already visited a node!");
380
83
        Visit(UI);
381
83
      } while (!DFSStack.empty());
382
77
    }
383
48
}
SpeculateAroundPHIs.cpp:void visitPHIUsersAndDepsInPostOrder<findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_1, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_0>(llvm::ArrayRef<llvm::PHINode*>, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_1, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_0)
Line
Count
Source
345
35
                                            VisitT Visit) {
346
35
  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
347
35
  for (auto *PN : PNs)
348
62
    
for (Use &U : PN->uses())39
{
349
62
      auto *UI = cast<Instruction>(U.getUser());
350
62
      if (IsVisited(UI))
351
2
        // Already visited this user, continue across the roots.
352
2
        continue;
353
60
354
60
      // Otherwise, walk the operand graph depth-first and visit each
355
60
      // dependency in postorder.
356
60
      DFSStack.push_back({UI, UI->value_op_begin()});
357
64
      do {
358
64
        User::value_op_iterator OpIt;
359
64
        std::tie(UI, OpIt) = DFSStack.pop_back_val();
360
190
        while (OpIt != UI->value_op_end()) {
361
126
          auto *OpI = dyn_cast<Instruction>(*OpIt);
362
126
          // Increment to the next operand for whenever we continue.
363
126
          ++OpIt;
364
126
          // No need to visit non-instructions, which can't form dependencies,
365
126
          // or instructions outside of our potential dependency set that we
366
126
          // were given. Finally, if we've already visited the node, continue
367
126
          // to the next.
368
126
          if (!OpI || 
IsVisited(OpI)72
)
369
122
            continue;
370
4
371
4
          // Push onto the stack and descend. We can directly continue this
372
4
          // loop when ascending.
373
4
          DFSStack.push_back({UI, OpIt});
374
4
          UI = OpI;
375
4
          OpIt = OpI->value_op_begin();
376
4
        }
377
64
378
64
        // Finished visiting children, visit this node.
379
64
        assert(!IsVisited(UI) && "Should not have already visited a node!");
380
64
        Visit(UI);
381
64
      } while (!DFSStack.empty());
382
60
    }
383
35
}
SpeculateAroundPHIs.cpp:void visitPHIUsersAndDepsInPostOrder<speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_2, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_3>(llvm::ArrayRef<llvm::PHINode*>, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_2, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_3)
Line
Count
Source
345
13
                                            VisitT Visit) {
346
13
  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
347
13
  for (auto *PN : PNs)
348
17
    for (Use &U : PN->uses()) {
349
17
      auto *UI = cast<Instruction>(U.getUser());
350
17
      if (IsVisited(UI))
351
0
        // Already visited this user, continue across the roots.
352
0
        continue;
353
17
354
17
      // Otherwise, walk the operand graph depth-first and visit each
355
17
      // dependency in postorder.
356
17
      DFSStack.push_back({UI, UI->value_op_begin()});
357
19
      do {
358
19
        User::value_op_iterator OpIt;
359
19
        std::tie(UI, OpIt) = DFSStack.pop_back_val();
360
55
        while (OpIt != UI->value_op_end()) {
361
36
          auto *OpI = dyn_cast<Instruction>(*OpIt);
362
36
          // Increment to the next operand for whenever we continue.
363
36
          ++OpIt;
364
36
          // No need to visit non-instructions, which can't form dependencies,
365
36
          // or instructions outside of our potential dependency set that we
366
36
          // were given. Finally, if we've already visited the node, continue
367
36
          // to the next.
368
36
          if (!OpI || 
IsVisited(OpI)27
)
369
34
            continue;
370
2
371
2
          // Push onto the stack and descend. We can directly continue this
372
2
          // loop when ascending.
373
2
          DFSStack.push_back({UI, OpIt});
374
2
          UI = OpI;
375
2
          OpIt = OpI->value_op_begin();
376
2
        }
377
19
378
19
        // Finished visiting children, visit this node.
379
19
        assert(!IsVisited(UI) && "Should not have already visited a node!");
380
19
        Visit(UI);
381
19
      } while (!DFSStack.empty());
382
17
    }
383
13
}
384
385
/// Find profitable PHIs to speculate.
386
///
387
/// For a PHI node to be profitable, we need the cost of speculating its users
388
/// (and their dependencies) to not exceed the savings of folding the PHI's
389
/// constant operands into the speculated users.
390
///
391
/// Computing this is surprisingly challenging. Because users of two different
392
/// PHI nodes can depend on each other or on common other instructions, it may
393
/// be profitable to speculate two PHI nodes together even though neither one
394
/// in isolation is profitable. The straightforward way to find all the
395
/// profitable PHIs would be to check each combination of PHIs' cost, but this
396
/// is exponential in complexity.
397
///
398
/// Even if we assume that we only care about cases where we can consider each
399
/// PHI node in isolation (rather than considering cases where none are
400
/// profitable in isolation but some subset are profitable as a set), we still
401
/// have a challenge. The obvious way to find all individually profitable PHIs
402
/// is to iterate until reaching a fixed point, but this will be quadratic in
403
/// complexity. =/
404
///
405
/// This code currently uses a linear-to-compute order for a greedy approach.
406
/// It won't find cases where a set of PHIs must be considered together, but it
407
/// handles most cases of order dependence without quadratic iteration. The
408
/// specific order used is the post-order across the operand DAG. When the last
409
/// user of a PHI is visited in this postorder walk, we check it for
410
/// profitability.
411
///
412
/// There is an orthogonal extra complexity to all of this: computing the cost
413
/// itself can easily become a linear computation making everything again (at
414
/// best) quadratic. Using a postorder over the operand graph makes it
415
/// particularly easy to avoid this through dynamic programming. As we do the
416
/// postorder walk, we build the transitive cost of that subgraph. It is also
417
/// straightforward to then update these costs when we mark a PHI for
418
/// speculation so that subsequent PHIs don't re-pay the cost of already
419
/// speculated instructions.
420
static SmallVector<PHINode *, 16>
421
findProfitablePHIs(ArrayRef<PHINode *> PNs,
422
                   const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
423
                   const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
424
35
                   int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
425
35
  SmallVector<PHINode *, 16> SpecPNs;
426
35
427
35
  // First, establish a reverse mapping from immediate users of the PHI nodes
428
35
  // to the nodes themselves, and count how many users each PHI node has in
429
35
  // a way we can update while processing them.
430
35
  SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
431
35
  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
432
35
  SmallPtrSet<Instruction *, 16> UserSet;
433
39
  for (auto *PN : PNs) {
434
39
    assert(UserSet.empty() && "Must start with an empty user set!");
435
39
    for (Use &U : PN->uses())
436
62
      UserSet.insert(cast<Instruction>(U.getUser()));
437
39
    PNUserCountMap[PN] = UserSet.size();
438
39
    for (auto *UI : UserSet)
439
62
      UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
440
39
    UserSet.clear();
441
39
  }
442
35
443
35
  // Now do a DFS across the operand graph of the users, computing cost as we
444
35
  // go and when all costs for a given PHI are known, checking that PHI for
445
35
  // profitability.
446
35
  SmallDenseMap<Instruction *, int, 16> SpecCostMap;
447
35
  visitPHIUsersAndDepsInPostOrder(
448
35
      PNs,
449
35
      /*IsVisited*/
450
134
      [&](Instruction *I) {
451
134
        // We consider anything that isn't potentially speculated to be
452
134
        // "visited" as it is already handled. Similarly, anything that *is*
453
134
        // potentially speculated but for which we have an entry in our cost
454
134
        // map, we're done.
455
134
        return !PotentialSpecSet.count(I) || 
SpecCostMap.count(I)68
;
456
134
      },
457
35
      /*Visit*/
458
64
      [&](Instruction *I) {
459
64
        // We've fully visited the operands, so sum their cost with this node
460
64
        // and update the cost map.
461
64
        int Cost = TTI.TCC_Free;
462
64
        for (Value *OpV : I->operand_values())
463
126
          if (auto *OpI = dyn_cast<Instruction>(OpV)) {
464
72
            auto CostMapIt = SpecCostMap.find(OpI);
465
72
            if (CostMapIt != SpecCostMap.end())
466
6
              Cost += CostMapIt->second;
467
72
          }
468
64
        Cost += TTI.getUserCost(I);
469
64
        bool Inserted = SpecCostMap.insert({I, Cost}).second;
470
64
        (void)Inserted;
471
64
        assert(Inserted && "Must not re-insert a cost during the DFS!");
472
64
473
64
        // Now check if this node had a corresponding PHI node using it. If so,
474
64
        // we need to decrement the outstanding user count for it.
475
64
        auto UserPNsIt = UserToPNMap.find(I);
476
64
        if (UserPNsIt == UserToPNMap.end())
477
2
          return;
478
62
        auto &UserPNs = UserPNsIt->second;
479
62
        auto UserPNsSplitIt = std::stable_partition(
480
62
            UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
481
62
              int &PNUserCount = PNUserCountMap.find(UserPN)->second;
482
62
              assert(
483
62
                  PNUserCount > 0 &&
484
62
                  "Should never re-visit a PN after its user count hits zero!");
485
62
              --PNUserCount;
486
62
              return PNUserCount != 0;
487
62
            });
488
62
489
62
        // FIXME: Rather than one at a time, we should sum the savings as the
490
62
        // cost will be completely shared.
491
62
        SmallVector<Instruction *, 16> SpecWorklist;
492
62
        for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
493
39
          int SpecCost = TTI.TCC_Free;
494
39
          for (Use &U : PN->uses())
495
62
            SpecCost +=
496
62
                SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
497
39
          SpecCost *= (NumPreds - 1);
498
39
          // When the user count of a PHI node hits zero, we should check its
499
39
          // profitability. If profitable, we should mark it for speculation
500
39
          // and zero out the cost of everything it depends on.
501
39
          int CostSavings = CostSavingsMap.find(PN)->second;
502
39
          if (SpecCost > CostSavings) {
503
22
            LLVM_DEBUG(dbgs() << "  Not profitable, speculation cost: " << *PN
504
22
                              << "\n"
505
22
                                 "    Cost savings:     "
506
22
                              << CostSavings
507
22
                              << "\n"
508
22
                                 "    Speculation cost: "
509
22
                              << SpecCost << "\n");
510
22
            continue;
511
22
          }
512
17
513
17
          // We're going to speculate this user-associated PHI. Copy it out and
514
17
          // add its users to the worklist to update their cost.
515
17
          SpecPNs.push_back(PN);
516
17
          for (Use &U : PN->uses()) {
517
17
            auto *UI = cast<Instruction>(U.getUser());
518
17
            auto CostMapIt = SpecCostMap.find(UI);
519
17
            if (CostMapIt->second == 0)
520
0
              continue;
521
17
            // Zero out this cost entry to avoid duplicates.
522
17
            CostMapIt->second = 0;
523
17
            SpecWorklist.push_back(UI);
524
17
          }
525
17
        }
526
62
527
62
        // Now walk all the operands of the users in the worklist transitively
528
62
        // to zero out all the memoized costs.
529
79
        while (!SpecWorklist.empty()) {
530
17
          Instruction *SpecI = SpecWorklist.pop_back_val();
531
17
          assert(SpecCostMap.find(SpecI)->second == 0 &&
532
17
                 "Didn't zero out a cost!");
533
17
534
17
          // Walk the operands recursively to zero out their cost as well.
535
34
          for (auto *OpV : SpecI->operand_values()) {
536
34
            auto *OpI = dyn_cast<Instruction>(OpV);
537
34
            if (!OpI)
538
8
              continue;
539
26
            auto CostMapIt = SpecCostMap.find(OpI);
540
26
            if (CostMapIt == SpecCostMap.end() || 
CostMapIt->second == 05
)
541
26
              continue;
542
0
            CostMapIt->second = 0;
543
0
            SpecWorklist.push_back(OpI);
544
0
          }
545
17
        }
546
62
      });
547
35
548
35
  return SpecPNs;
549
35
}
550
551
/// Speculate users around a set of PHI nodes.
552
///
553
/// This routine does the actual speculation around a set of PHI nodes where we
554
/// have determined this to be both safe and profitable.
555
///
556
/// This routine handles any spliting of critical edges necessary to create
557
/// a safe block to speculate into as well as cloning the instructions and
558
/// rewriting all uses.
559
static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
560
                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
561
                          SmallSetVector<BasicBlock *, 16> &PredSet,
562
13
                          DominatorTree &DT) {
563
13
  LLVM_DEBUG(dbgs() << "  Speculating around " << SpecPNs.size() << " PHIs!\n");
564
13
  NumPHIsSpeculated += SpecPNs.size();
565
13
566
13
  // Split any critical edges so that we have a block to hoist into.
567
13
  auto *ParentBB = SpecPNs[0]->getParent();
568
13
  SmallVector<BasicBlock *, 16> SpecPreds;
569
13
  SpecPreds.reserve(PredSet.size());
570
26
  for (auto *PredBB : PredSet) {
571
26
    auto *NewPredBB = SplitCriticalEdge(
572
26
        PredBB, ParentBB,
573
26
        CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
574
26
    if (NewPredBB) {
575
3
      ++NumEdgesSplit;
576
3
      LLVM_DEBUG(dbgs() << "  Split critical edge from: " << PredBB->getName()
577
3
                        << "\n");
578
3
      SpecPreds.push_back(NewPredBB);
579
23
    } else {
580
23
      assert(PredBB->getSingleSuccessor() == ParentBB &&
581
23
             "We need a non-critical predecessor to speculate into.");
582
23
      assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
583
23
             "Cannot have a non-critical invoke!");
584
23
585
23
      // Already non-critical, use existing pred.
586
23
      SpecPreds.push_back(PredBB);
587
23
    }
588
26
  }
589
13
590
13
  SmallPtrSet<Instruction *, 16> SpecSet;
591
13
  SmallVector<Instruction *, 16> SpecList;
592
13
  visitPHIUsersAndDepsInPostOrder(SpecPNs,
593
13
                                  /*IsVisited*/
594
44
                                  [&](Instruction *I) {
595
44
                                    // This is visited if we don't need to
596
44
                                    // speculate it or we already have
597
44
                                    // speculated it.
598
44
                                    return !PotentialSpecSet.count(I) ||
599
44
                                           
SpecSet.count(I)23
;
600
44
                                  },
601
13
                                  /*Visit*/
602
19
                                  [&](Instruction *I) {
603
19
                                    // All operands scheduled, schedule this
604
19
                                    // node.
605
19
                                    SpecSet.insert(I);
606
19
                                    SpecList.push_back(I);
607
19
                                  });
608
13
609
13
  int NumSpecInsts = SpecList.size() * SpecPreds.size();
610
13
  int NumRedundantInsts = NumSpecInsts - SpecList.size();
611
13
  LLVM_DEBUG(dbgs() << "  Inserting " << NumSpecInsts
612
13
                    << " speculated instructions, " << NumRedundantInsts
613
13
                    << " redundancies\n");
614
13
  NumSpeculatedInstructions += NumSpecInsts;
615
13
  NumNewRedundantInstructions += NumRedundantInsts;
616
13
617
13
  // Each predecessor is numbered by its index in `SpecPreds`, so for each
618
13
  // instruction we speculate, the speculated instruction is stored in that
619
13
  // index of the vector associated with the original instruction. We also
620
13
  // store the incoming values for each predecessor from any PHIs used.
621
13
  SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
622
13
623
13
  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
624
13
  // value. This handles both the PHIs we are speculating around and any other
625
13
  // PHIs that happen to be used.
626
13
  for (auto *OrigI : SpecList)
627
36
    
for (auto *OpV : OrigI->operand_values())19
{
628
36
      auto *OpPN = dyn_cast<PHINode>(OpV);
629
36
      if (!OpPN || 
OpPN->getParent() != ParentBB19
)
630
18
        continue;
631
18
632
18
      auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
633
18
      if (!InsertResult.second)
634
0
        continue;
635
18
636
18
      auto &SpeculatedVals = InsertResult.first->second;
637
18
638
18
      // Populating our structure for mapping is particularly annoying because
639
18
      // finding an incoming value for a particular predecessor block in a PHI
640
18
      // node is a linear time operation! To avoid quadratic behavior, we build
641
18
      // a map for this PHI node's incoming values and then translate it into
642
18
      // the more compact representation used below.
643
18
      SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
644
18
      for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
645
36
        IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
646
18
647
18
      for (auto *PredBB : SpecPreds)
648
36
        SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
649
18
    }
650
13
651
13
  // Speculate into each predecessor.
652
26
  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
653
26
    auto *PredBB = SpecPreds[PredIdx];
654
26
    assert(PredBB->getSingleSuccessor() == ParentBB &&
655
26
           "We need a non-critical predecessor to speculate into.");
656
26
657
38
    for (auto *OrigI : SpecList) {
658
38
      auto *NewI = OrigI->clone();
659
38
      NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
660
38
      NewI->insertBefore(PredBB->getTerminator());
661
38
662
38
      // Rewrite all the operands to the previously speculated instructions.
663
38
      // Because we're walking in-order, the defs must precede the uses and we
664
38
      // should already have these mappings.
665
72
      for (Use &U : NewI->operands()) {
666
72
        auto *OpI = dyn_cast<Instruction>(U.get());
667
72
        if (!OpI)
668
18
          continue;
669
54
        auto MapIt = SpeculatedValueMap.find(OpI);
670
54
        if (MapIt == SpeculatedValueMap.end())
671
6
          continue;
672
48
        const auto &SpeculatedVals = MapIt->second;
673
48
        assert(SpeculatedVals[PredIdx] &&
674
48
               "Must have a speculated value for this predecessor!");
675
48
        assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
676
48
               "Speculated value has the wrong type!");
677
48
678
48
        // Rewrite the use to this predecessor's speculated instruction.
679
48
        U.set(SpeculatedVals[PredIdx]);
680
48
      }
681
38
682
38
      // Commute instructions which now have a constant in the LHS but not the
683
38
      // RHS.
684
38
      if (NewI->isBinaryOp() && 
NewI->isCommutative()32
&&
685
38
          
isa<Constant>(NewI->getOperand(0))32
&&
686
38
          
!isa<Constant>(NewI->getOperand(1))3
)
687
2
        NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
688
38
689
38
      SpeculatedValueMap[OrigI].push_back(NewI);
690
38
      assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
691
38
             "Mismatched speculated instruction index!");
692
38
    }
693
26
  }
694
13
695
13
  // Walk the speculated instruction list and if they have uses, insert a PHI
696
13
  // for them from the speculated versions, and replace the uses with the PHI.
697
13
  // Then erase the instructions as they have been fully speculated. The walk
698
13
  // needs to be in reverse so that we don't think there are users when we'll
699
13
  // actually eventually remove them later.
700
13
  IRBuilder<> IRB(SpecPNs[0]);
701
19
  for (auto *OrigI : llvm::reverse(SpecList)) {
702
19
    // Check if we need a PHI for any remaining users and if so, insert it.
703
19
    if (!OrigI->use_empty()) {
704
13
      auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
705
13
                                    Twine(OrigI->getName()) + ".phi");
706
13
      // Add the incoming values we speculated.
707
13
      auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
708
13
      for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
709
26
        SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
710
13
711
13
      // And replace the uses with the PHI node.
712
13
      OrigI->replaceAllUsesWith(SpecIPN);
713
13
    }
714
19
715
19
    // It is important to immediately erase this so that it stops using other
716
19
    // instructions. This avoids inserting needless PHIs of them.
717
19
    OrigI->eraseFromParent();
718
19
  }
719
13
720
13
  // All of the uses of the speculated phi nodes should be removed at this
721
13
  // point, so erase them.
722
17
  for (auto *SpecPN : SpecPNs) {
723
17
    assert(SpecPN->use_empty() && "All users should have been speculated!");
724
17
    SpecPN->eraseFromParent();
725
17
  }
726
13
}
727
728
/// Try to speculate around a series of PHIs from a single basic block.
729
///
730
/// This routine checks whether any of these PHIs are profitable to speculate
731
/// users around. If safe and profitable, it does the speculation. It returns
732
/// true when at least some speculation occurs.
733
static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
734
75
                               DominatorTree &DT, TargetTransformInfo &TTI) {
735
75
  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
736
75
737
75
  // Savings in cost from speculating around a PHI node.
738
75
  SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
739
75
740
75
  // Remember the set of instructions that are candidates for speculation so
741
75
  // that we can quickly walk things within that space. This prunes out
742
75
  // instructions already available along edges, etc.
743
75
  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
744
75
745
75
  // Remember the set of instructions that are (transitively) unsafe to
746
75
  // speculate into the incoming edges of this basic block. This avoids
747
75
  // recomputing them for each PHI node we check. This set is specific to this
748
75
  // block though as things are pruned out of it based on what is available
749
75
  // along incoming edges.
750
75
  SmallPtrSet<Instruction *, 16> UnsafeSet;
751
75
752
75
  // For each PHI node in this block, check whether there are immediate folding
753
75
  // opportunities from speculation, and whether that speculation will be
754
75
  // valid. This determise the set of safe PHIs to speculate.
755
75
  PNs.erase(llvm::remove_if(PNs,
756
88
                            [&](PHINode *PN) {
757
88
                              return !isSafeAndProfitableToSpeculateAroundPHI(
758
88
                                  *PN, CostSavingsMap, PotentialSpecSet,
759
88
                                  UnsafeSet, DT, TTI);
760
88
                            }),
761
75
            PNs.end());
762
75
  // If no PHIs were profitable, skip.
763
75
  if (PNs.empty()) {
764
36
    LLVM_DEBUG(dbgs() << "  No safe and profitable PHIs found!\n");
765
36
    return false;
766
36
  }
767
39
768
39
  // We need to know how much speculation will cost which is determined by how
769
39
  // many incoming edges will need a copy of each speculated instruction.
770
39
  SmallSetVector<BasicBlock *, 16> PredSet;
771
74
  for (auto *PredBB : PNs[0]->blocks()) {
772
74
    if (!PredSet.insert(PredBB))
773
0
      continue;
774
74
775
74
    // We cannot speculate when a predecessor is an indirect branch.
776
74
    // FIXME: We also can't reliably create a non-critical edge block for
777
74
    // speculation if the predecessor is an invoke. This doesn't seem
778
74
    // fundamental and we should probably be splitting critical edges
779
74
    // differently.
780
74
    if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
781
74
        
isa<InvokeInst>(PredBB->getTerminator())73
) {
782
4
      LLVM_DEBUG(dbgs() << "  Invalid: predecessor terminator: "
783
4
                        << PredBB->getName() << "\n");
784
4
      return false;
785
4
    }
786
74
  }
787
39
  
if (35
PredSet.size() < 235
) {
788
0
    LLVM_DEBUG(dbgs() << "  Unimportant: phi with only one predecessor\n");
789
0
    return false;
790
0
  }
791
35
792
35
  SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
793
35
      PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
794
35
  if (SpecPNs.empty())
795
22
    // Nothing to do.
796
22
    return false;
797
13
798
13
  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
799
13
  return true;
800
13
}
801
802
PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
803
880
                                               FunctionAnalysisManager &AM) {
804
880
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
805
880
  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
806
880
807
880
  bool Changed = false;
808
1.09k
  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
809
1.09k
    SmallVector<PHINode *, 16> PNs;
810
1.09k
    auto BBI = BB->begin();
811
1.18k
    while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
812
88
      PNs.push_back(PN);
813
88
      ++BBI;
814
88
    }
815
1.09k
816
1.09k
    if (PNs.empty())
817
1.01k
      continue;
818
75
819
75
    Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
820
75
  }
821
880
822
880
  if (!Changed)
823
867
    return PreservedAnalyses::all();
824
13
825
13
  PreservedAnalyses PA;
826
13
  return PA;
827
13
}