Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/JumpThreading.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the Jump Threading pass.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Scalar/JumpThreading.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/DenseSet.h"
17
#include "llvm/ADT/Optional.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallPtrSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/Analysis/AliasAnalysis.h"
23
#include "llvm/Analysis/BlockFrequencyInfo.h"
24
#include "llvm/Analysis/BranchProbabilityInfo.h"
25
#include "llvm/Analysis/CFG.h"
26
#include "llvm/Analysis/ConstantFolding.h"
27
#include "llvm/Analysis/GlobalsModRef.h"
28
#include "llvm/Analysis/InstructionSimplify.h"
29
#include "llvm/Analysis/LazyValueInfo.h"
30
#include "llvm/Analysis/Loads.h"
31
#include "llvm/Analysis/LoopInfo.h"
32
#include "llvm/Analysis/TargetLibraryInfo.h"
33
#include "llvm/Analysis/ValueTracking.h"
34
#include "llvm/IR/BasicBlock.h"
35
#include "llvm/IR/CFG.h"
36
#include "llvm/IR/Constant.h"
37
#include "llvm/IR/ConstantRange.h"
38
#include "llvm/IR/Constants.h"
39
#include "llvm/IR/DataLayout.h"
40
#include "llvm/IR/Dominators.h"
41
#include "llvm/IR/Function.h"
42
#include "llvm/IR/InstrTypes.h"
43
#include "llvm/IR/Instruction.h"
44
#include "llvm/IR/Instructions.h"
45
#include "llvm/IR/IntrinsicInst.h"
46
#include "llvm/IR/Intrinsics.h"
47
#include "llvm/IR/LLVMContext.h"
48
#include "llvm/IR/MDBuilder.h"
49
#include "llvm/IR/Metadata.h"
50
#include "llvm/IR/Module.h"
51
#include "llvm/IR/PassManager.h"
52
#include "llvm/IR/PatternMatch.h"
53
#include "llvm/IR/Type.h"
54
#include "llvm/IR/Use.h"
55
#include "llvm/IR/User.h"
56
#include "llvm/IR/Value.h"
57
#include "llvm/Pass.h"
58
#include "llvm/Support/BlockFrequency.h"
59
#include "llvm/Support/BranchProbability.h"
60
#include "llvm/Support/Casting.h"
61
#include "llvm/Support/CommandLine.h"
62
#include "llvm/Support/Debug.h"
63
#include "llvm/Support/raw_ostream.h"
64
#include "llvm/Transforms/Scalar.h"
65
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
66
#include "llvm/Transforms/Utils/Cloning.h"
67
#include "llvm/Transforms/Utils/Local.h"
68
#include "llvm/Transforms/Utils/SSAUpdater.h"
69
#include "llvm/Transforms/Utils/ValueMapper.h"
70
#include <algorithm>
71
#include <cassert>
72
#include <cstddef>
73
#include <cstdint>
74
#include <iterator>
75
#include <memory>
76
#include <utility>
77
78
using namespace llvm;
79
using namespace jumpthreading;
80
81
#define DEBUG_TYPE "jump-threading"
82
83
STATISTIC(NumThreads, "Number of jumps threaded");
84
STATISTIC(NumFolds,   "Number of terminators folded");
85
STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
86
87
static cl::opt<unsigned>
88
BBDuplicateThreshold("jump-threading-threshold",
89
          cl::desc("Max block size to duplicate for jump threading"),
90
          cl::init(6), cl::Hidden);
91
92
static cl::opt<unsigned>
93
ImplicationSearchThreshold(
94
  "jump-threading-implication-search-threshold",
95
  cl::desc("The number of predecessors to search for a stronger "
96
           "condition to use to thread over a weaker condition"),
97
  cl::init(3), cl::Hidden);
98
99
static cl::opt<bool> PrintLVIAfterJumpThreading(
100
    "print-lvi-after-jump-threading",
101
    cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
102
    cl::Hidden);
103
104
namespace {
105
106
  /// This pass performs 'jump threading', which looks at blocks that have
107
  /// multiple predecessors and multiple successors.  If one or more of the
108
  /// predecessors of the block can be proven to always jump to one of the
109
  /// successors, we forward the edge from the predecessor to the successor by
110
  /// duplicating the contents of this block.
111
  ///
112
  /// An example of when this can occur is code like this:
113
  ///
114
  ///   if () { ...
115
  ///     X = 4;
116
  ///   }
117
  ///   if (X < 3) {
118
  ///
119
  /// In this case, the unconditional branch at the end of the first if can be
120
  /// revectored to the false side of the second if.
121
  class JumpThreading : public FunctionPass {
122
    JumpThreadingPass Impl;
123
124
  public:
125
    static char ID; // Pass identification
126
127
34.9k
    JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
128
34.9k
      initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
129
34.9k
    }
130
131
    bool runOnFunction(Function &F) override;
132
133
34.9k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
134
34.9k
      AU.addRequired<DominatorTreeWrapperPass>();
135
34.9k
      AU.addPreserved<DominatorTreeWrapperPass>();
136
34.9k
      AU.addRequired<AAResultsWrapperPass>();
137
34.9k
      AU.addRequired<LazyValueInfoWrapperPass>();
138
34.9k
      AU.addPreserved<LazyValueInfoWrapperPass>();
139
34.9k
      AU.addPreserved<GlobalsAAWrapperPass>();
140
34.9k
      AU.addRequired<TargetLibraryInfoWrapperPass>();
141
34.9k
    }
142
143
1.03M
    void releaseMemory() override { Impl.releaseMemory(); }
144
  };
145
146
} // end anonymous namespace
147
148
char JumpThreading::ID = 0;
149
150
41.8k
INITIALIZE_PASS_BEGIN41.8k
(JumpThreading, "jump-threading",
151
41.8k
                "Jump Threading", false, false)
152
41.8k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
153
41.8k
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
154
41.8k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
155
41.8k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
156
41.8k
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
157
                "Jump Threading", false, false)
158
159
// Public interface to the Jump Threading pass
160
34.9k
FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
161
34.9k
  return new JumpThreading(Threshold);
162
34.9k
}
163
164
35.0k
JumpThreadingPass::JumpThreadingPass(int T) {
165
18.4E
  BBDupThreshold = (T == -1) ? 
BBDuplicateThreshold35.0k
:
unsigned(T)18.4E
;
166
35.0k
}
167
168
// Update branch probability information according to conditional
169
// branch probablity. This is usually made possible for cloned branches
170
// in inline instances by the context specific profile in the caller.
171
// For instance,
172
//
173
//  [Block PredBB]
174
//  [Branch PredBr]
175
//  if (t) {
176
//     Block A;
177
//  } else {
178
//     Block B;
179
//  }
180
//
181
//  [Block BB]
182
//  cond = PN([true, %A], [..., %B]); // PHI node
183
//  [Branch CondBr]
184
//  if (cond) {
185
//    ...  // P(cond == true) = 1%
186
//  }
187
//
188
//  Here we know that when block A is taken, cond must be true, which means
189
//      P(cond == true | A) = 1
190
//
191
//  Given that P(cond == true) = P(cond == true | A) * P(A) +
192
//                               P(cond == true | B) * P(B)
193
//  we get
194
//     P(cond == true ) = P(A) + P(cond == true | B) * P(B)
195
//
196
//  which gives us:
197
//     P(A) <= P(c == true), i.e.
198
//     P(t == true) <= P(cond == true)
199
//
200
//  In other words, if we know P(cond == true), we know that P(t == true)
201
//  can not be greater than 1%.
202
16.9k
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
203
16.9k
  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
204
16.9k
  if (!CondBr)
205
0
    return;
206
16.9k
207
16.9k
  BranchProbability BP;
208
16.9k
  uint64_t TrueWeight, FalseWeight;
209
16.9k
  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
210
16.7k
    return;
211
169
212
169
  // Returns the outgoing edge of the dominating predecessor block
213
169
  // that leads to the PhiNode's incoming block:
214
169
  auto GetPredOutEdge =
215
169
      [](BasicBlock *IncomingBB,
216
177
         BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
217
177
    auto *PredBB = IncomingBB;
218
177
    auto *SuccBB = PhiBB;
219
177
    while (
true177
) {
220
177
      BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
221
177
      if (
PredBr && 177
PredBr->isConditional()177
)
222
177
        return {PredBB, SuccBB};
223
0
      auto *SinglePredBB = PredBB->getSinglePredecessor();
224
0
      if (!SinglePredBB)
225
0
        return {nullptr, nullptr};
226
0
      SuccBB = PredBB;
227
0
      PredBB = SinglePredBB;
228
0
    }
229
177
  };
230
169
231
515
  for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e515
;
++i346
) {
232
346
    Value *PhiOpnd = PN->getIncomingValue(i);
233
346
    ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
234
346
235
346
    if (
!CI || 346
!CI->getType()->isIntegerTy(1)177
)
236
169
      continue;
237
177
238
177
    
BP = (CI->isOne() ? 177
BranchProbability::getBranchProbability(
239
45
                            TrueWeight, TrueWeight + FalseWeight)
240
132
                      : BranchProbability::getBranchProbability(
241
132
                            FalseWeight, TrueWeight + FalseWeight));
242
177
243
177
    auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
244
177
    if (!PredOutEdge.first)
245
0
      return;
246
177
247
177
    BasicBlock *PredBB = PredOutEdge.first;
248
177
    BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
249
177
250
177
    uint64_t PredTrueWeight, PredFalseWeight;
251
177
    // FIXME: We currently only set the profile data when it is missing.
252
177
    // With PGO, this can be used to refine even existing profile data with
253
177
    // context information. This needs to be done after more performance
254
177
    // testing.
255
177
    if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
256
18
      continue;
257
159
258
159
    // We can not infer anything useful when BP >= 50%, because BP is the
259
159
    // upper bound probability value.
260
159
    
if (159
BP >= BranchProbability(50, 100)159
)
261
135
      continue;
262
24
263
24
    SmallVector<uint32_t, 2> Weights;
264
24
    if (
PredBr->getSuccessor(0) == PredOutEdge.second24
) {
265
24
      Weights.push_back(BP.getNumerator());
266
24
      Weights.push_back(BP.getCompl().getNumerator());
267
24
    } else {
268
0
      Weights.push_back(BP.getCompl().getNumerator());
269
0
      Weights.push_back(BP.getNumerator());
270
0
    }
271
346
    PredBr->setMetadata(LLVMContext::MD_prof,
272
346
                        MDBuilder(PredBr->getParent()->getContext())
273
346
                            .createBranchWeights(Weights));
274
346
  }
275
16.9k
}
276
277
/// runOnFunction - Toplevel algorithm.
278
1.03M
bool JumpThreading::runOnFunction(Function &F) {
279
1.03M
  if (skipFunction(F))
280
116
    return false;
281
1.03M
  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
282
1.03M
  // Get DT analysis before LVI. When LVI is initialized it conditionally adds
283
1.03M
  // DT if it's available.
284
1.03M
  auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
285
1.03M
  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
286
1.03M
  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
287
1.03M
  std::unique_ptr<BlockFrequencyInfo> BFI;
288
1.03M
  std::unique_ptr<BranchProbabilityInfo> BPI;
289
1.03M
  bool HasProfileData = F.getEntryCount().hasValue();
290
1.03M
  if (
HasProfileData1.03M
) {
291
36
    LoopInfo LI{DominatorTree(F)};
292
36
    BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
293
36
    BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
294
36
  }
295
1.03M
296
1.03M
  bool Changed = Impl.runImpl(F, TLI, LVI, AA, DT, HasProfileData,
297
1.03M
                              std::move(BFI), std::move(BPI));
298
1.03M
  if (
PrintLVIAfterJumpThreading1.03M
) {
299
4
    dbgs() << "LVI for function '" << F.getName() << "':\n";
300
4
    LVI->printLVI(F, *DT, dbgs());
301
4
  }
302
1.03M
  return Changed;
303
1.03M
}
304
305
PreservedAnalyses JumpThreadingPass::run(Function &F,
306
249
                                         FunctionAnalysisManager &AM) {
307
249
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
308
249
  // Get DT analysis before LVI. When LVI is initialized it conditionally adds
309
249
  // DT if it's available.
310
249
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
311
249
  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
312
249
  auto &AA = AM.getResult<AAManager>(F);
313
249
314
249
  std::unique_ptr<BlockFrequencyInfo> BFI;
315
249
  std::unique_ptr<BranchProbabilityInfo> BPI;
316
249
  bool HasProfileData = F.getEntryCount().hasValue();
317
249
  if (
HasProfileData249
) {
318
10
    LoopInfo LI{DominatorTree(F)};
319
10
    BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
320
10
    BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
321
10
  }
322
249
323
249
  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DT, HasProfileData,
324
249
                         std::move(BFI), std::move(BPI));
325
249
326
249
  if (!Changed)
327
229
    return PreservedAnalyses::all();
328
20
  PreservedAnalyses PA;
329
20
  PA.preserve<GlobalsAA>();
330
20
  PA.preserve<DominatorTreeAnalysis>();
331
20
  PA.preserve<LazyValueAnalysis>();
332
20
  return PA;
333
20
}
334
335
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
336
                                LazyValueInfo *LVI_, AliasAnalysis *AA_,
337
                                DominatorTree *DT_, bool HasProfileData_,
338
                                std::unique_ptr<BlockFrequencyInfo> BFI_,
339
1.03M
                                std::unique_ptr<BranchProbabilityInfo> BPI_) {
340
1.03M
  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
341
1.03M
  TLI = TLI_;
342
1.03M
  LVI = LVI_;
343
1.03M
  AA = AA_;
344
1.03M
  DT = DT_;
345
1.03M
  BFI.reset();
346
1.03M
  BPI.reset();
347
1.03M
  // When profile data is available, we need to update edge weights after
348
1.03M
  // successful jump threading, which requires both BPI and BFI being available.
349
1.03M
  HasProfileData = HasProfileData_;
350
1.03M
  auto *GuardDecl = F.getParent()->getFunction(
351
1.03M
      Intrinsic::getName(Intrinsic::experimental_guard));
352
23
  HasGuards = GuardDecl && !GuardDecl->use_empty();
353
1.03M
  if (
HasProfileData1.03M
) {
354
46
    BPI = std::move(BPI_);
355
46
    BFI = std::move(BFI_);
356
46
  }
357
1.03M
358
1.03M
  // Remove unreachable blocks from function as they may result in infinite
359
1.03M
  // loop. We do threading if we found something profitable. Jump threading a
360
1.03M
  // branch can create other opportunities. If these opportunities form a cycle
361
1.03M
  // i.e. if any jump threading is undoing previous threading in the path, then
362
1.03M
  // we will loop forever. We take care of this issue by not jump threading for
363
1.03M
  // back edges. This works for normal cases but not for unreachable blocks as
364
1.03M
  // they may have cycle with no back edge.
365
1.03M
  bool EverChanged = false;
366
1.03M
  EverChanged |= removeUnreachableBlocks(F, LVI, DT);
367
1.03M
  FindLoopHeaders(F);
368
1.03M
369
1.03M
  bool Changed;
370
1.18M
  do {
371
1.18M
    Changed = false;
372
13.7M
    for (Function::iterator I = F.begin(), E = F.end(); 
I != E13.7M
;) {
373
12.5M
      BasicBlock *BB = &*I;
374
12.5M
      // Thread all of the branches we can over this block.
375
12.6M
      while (ProcessBlock(BB))
376
112k
        Changed = true;
377
12.5M
378
12.5M
      ++I;
379
12.5M
380
12.5M
      // If the block is trivially dead, zap it.  This eliminates the successor
381
12.5M
      // edges which simplifies the CFG.
382
12.5M
      if (pred_empty(BB) &&
383
12.5M
          
BB != &BB->getParent()->getEntryBlock()1.18M
) {
384
1.24k
        DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
385
1.24k
              << "' with terminator: " << *BB->getTerminator() << '\n');
386
1.24k
        LoopHeaders.erase(BB);
387
1.24k
        LVI->eraseBlock(BB);
388
1.24k
        DeleteDeadBlock(BB);
389
1.24k
        Changed = true;
390
1.24k
        continue;
391
1.24k
      }
392
12.5M
393
12.5M
      BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
394
12.5M
395
12.5M
      // Can't thread an unconditional jump, but if the block is "almost
396
12.5M
      // empty", we can replace uses of it with uses of the successor and make
397
12.5M
      // this dead.
398
12.5M
      // We should not eliminate the loop header or latch either, because
399
12.5M
      // eliminating a loop header or latch might later prevent LoopSimplify
400
12.5M
      // from transforming nested loops into simplified form. We will rely on
401
12.5M
      // later passes in backend to clean up empty blocks.
402
12.5M
      if (
BI && 12.5M
BI->isUnconditional()11.0M
&&
403
4.95M
          BB != &BB->getParent()->getEntryBlock() &&
404
12.5M
          // If the terminator is the only non-phi instruction, try to nuke it.
405
12.5M
          
BB->getFirstNonPHIOrDbg()->isTerminator()4.89M
&&
!LoopHeaders.count(BB)1.17M
&&
406
12.5M
          
!LoopHeaders.count(BI->getSuccessor(0))1.10M
) {
407
373k
        // FIXME: It is always conservatively correct to drop the info
408
373k
        // for a block even if it doesn't get erased.  This isn't totally
409
373k
        // awesome, but it allows us to use AssertingVH to prevent nasty
410
373k
        // dangling pointer issues within LazyValueInfo.
411
373k
        LVI->eraseBlock(BB);
412
373k
        if (TryToSimplifyUncondBranchFromEmptyBlock(BB, DT))
413
327k
          Changed = true;
414
373k
      }
415
12.5M
    }
416
1.18M
    EverChanged |= Changed;
417
1.18M
  } while (Changed);
418
1.03M
419
1.03M
  LoopHeaders.clear();
420
1.03M
  return EverChanged;
421
1.03M
}
422
423
// Replace uses of Cond with ToVal when safe to do so. If all uses are
424
// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
425
// because we may incorrectly replace uses when guards/assumes are uses of
426
// of `Cond` and we used the guards/assume to reason about the `Cond` value
427
// at the end of block. RAUW unconditionally replaces all uses
428
// including the guards/assumes themselves and the uses before the
429
// guard/assume.
430
209
static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
431
209
  assert(Cond->getType() == ToVal->getType());
432
209
  auto *BB = Cond->getParent();
433
209
  // We can unconditionally replace all uses in non-local blocks (i.e. uses
434
209
  // strictly dominated by BB), since LVI information is true from the
435
209
  // terminator of BB.
436
209
  replaceNonLocalUsesWith(Cond, ToVal);
437
533
  for (Instruction &I : reverse(*BB)) {
438
533
    // Reached the Cond whose uses we are trying to replace, so there are no
439
533
    // more uses.
440
533
    if (&I == Cond)
441
198
      break;
442
335
    // We only replace uses in instructions that are guaranteed to reach the end
443
335
    // of BB, where we know Cond is ToVal.
444
335
    
if (335
!isGuaranteedToTransferExecutionToSuccessor(&I)335
)
445
11
      break;
446
324
    I.replaceUsesOfWith(Cond, ToVal);
447
324
  }
448
209
  if (
Cond->use_empty() && 209
!Cond->mayHaveSideEffects()201
)
449
201
    Cond->eraseFromParent();
450
209
}
451
452
/// Return the cost of duplicating a piece of this block from first non-phi
453
/// and before StopAt instruction to thread across it. Stop scanning the block
454
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
455
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
456
                                             Instruction *StopAt,
457
60.7k
                                             unsigned Threshold) {
458
60.7k
  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
459
60.7k
  /// Ignore PHI nodes, these will be flattened when duplication happens.
460
60.7k
  BasicBlock::const_iterator I(BB->getFirstNonPHI());
461
60.7k
462
60.7k
  // FIXME: THREADING will delete values that are just used to compute the
463
60.7k
  // branch, so they shouldn't count against the duplication cost.
464
60.7k
465
60.7k
  unsigned Bonus = 0;
466
60.7k
  if (
BB->getTerminator() == StopAt60.7k
) {
467
60.7k
    // Threading through a switch statement is particularly profitable.  If this
468
60.7k
    // block ends in a switch, decrease its cost to make it more likely to
469
60.7k
    // happen.
470
60.7k
    if (isa<SwitchInst>(StopAt))
471
1.39k
      Bonus = 6;
472
60.7k
473
60.7k
    // The same holds for indirect branches, but slightly more so.
474
60.7k
    if (isa<IndirectBrInst>(StopAt))
475
1
      Bonus = 8;
476
60.7k
  }
477
60.7k
478
60.7k
  // Bump the threshold up so the early exit from the loop doesn't skip the
479
60.7k
  // terminator-based Size adjustment at the end.
480
60.7k
  Threshold += Bonus;
481
60.7k
482
60.7k
  // Sum up the cost of each instruction until we get to the terminator.  Don't
483
60.7k
  // include the terminator because the copy won't include it.
484
60.7k
  unsigned Size = 0;
485
204k
  for (; 
&*I != StopAt204k
;
++I143k
) {
486
158k
487
158k
    // Stop scanning the block if we've reached the threshold.
488
158k
    if (Size > Threshold)
489
15.1k
      return Size;
490
143k
491
143k
    // Debugger intrinsics don't incur code size.
492
143k
    
if (143k
isa<DbgInfoIntrinsic>(I)143k
)
continue0
;
493
143k
494
143k
    // If this is a pointer->pointer bitcast, it is free.
495
143k
    
if (143k
isa<BitCastInst>(I) && 143k
I->getType()->isPointerTy()1.79k
)
496
1.79k
      continue;
497
141k
498
141k
    // Bail out if this instruction gives back a token type, it is not possible
499
141k
    // to duplicate it if it is used outside this BB.
500
141k
    
if (141k
I->getType()->isTokenTy() && 141k
I->isUsedOutsideOfBlock(BB)0
)
501
0
      return ~0U;
502
141k
503
141k
    // All other instructions count for at least one unit.
504
141k
    ++Size;
505
141k
506
141k
    // Calls are more expensive.  If they are non-intrinsic calls, we model them
507
141k
    // as having cost of 4.  If they are a non-vector intrinsic, we model them
508
141k
    // as having cost of 2 total, and if they are a vector intrinsic, we model
509
141k
    // them as having cost 1.
510
141k
    if (const CallInst *
CI141k
= dyn_cast<CallInst>(I)) {
511
25.2k
      if (
CI->cannotDuplicate() || 25.2k
CI->isConvergent()25.2k
)
512
25.2k
        // Blocks with NoDuplicate are modelled as having infinite cost, so they
513
25.2k
        // are never duplicated.
514
6
        return ~0U;
515
25.1k
      else 
if (25.1k
!isa<IntrinsicInst>(CI)25.1k
)
516
12.8k
        Size += 3;
517
12.3k
      else 
if (12.3k
!CI->getType()->isVectorTy()12.3k
)
518
12.3k
        Size += 1;
519
25.2k
    }
520
158k
  }
521
60.7k
522
45.6k
  
return Size > Bonus ? 45.6k
Size - Bonus36.7k
:
08.91k
;
523
60.7k
}
524
525
/// FindLoopHeaders - We do not want jump threading to turn proper loop
526
/// structures into irreducible loops.  Doing this breaks up the loop nesting
527
/// hierarchy and pessimizes later transformations.  To prevent this from
528
/// happening, we first have to find the loop headers.  Here we approximate this
529
/// by finding targets of backedges in the CFG.
530
///
531
/// Note that there definitely are cases when we want to allow threading of
532
/// edges across a loop header.  For example, threading a jump from outside the
533
/// loop (the preheader) to an exit block of the loop is definitely profitable.
534
/// It is also almost always profitable to thread backedges from within the loop
535
/// to exit blocks, and is often profitable to thread backedges to other blocks
536
/// within the loop (forming a nested loop).  This simple analysis is not rich
537
/// enough to track all of these properties and keep it up-to-date as the CFG
538
/// mutates, so we don't allow any of these transformations.
539
1.03M
void JumpThreadingPass::FindLoopHeaders(Function &F) {
540
1.03M
  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
541
1.03M
  FindFunctionBackedges(F, Edges);
542
1.03M
543
1.03M
  for (const auto &Edge : Edges)
544
678k
    LoopHeaders.insert(Edge.second);
545
1.03M
}
546
547
/// getKnownConstant - Helper method to determine if we can thread over a
548
/// terminator with the given value as its condition, and if so what value to
549
/// use for that. What kind of value this is depends on whether we want an
550
/// integer or a block address, but an undef is always accepted.
551
/// Returns null if Val is null or not an appropriate constant.
552
18.3M
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
553
18.3M
  if (!Val)
554
3.64M
    return nullptr;
555
14.6M
556
14.6M
  // Undef is "known" enough.
557
14.6M
  
if (UndefValue *14.6M
U14.6M
= dyn_cast<UndefValue>(Val))
558
24
    return U;
559
14.6M
560
14.6M
  
if (14.6M
Preference == WantBlockAddress14.6M
)
561
30
    return dyn_cast<BlockAddress>(Val->stripPointerCasts());
562
14.6M
563
14.6M
  return dyn_cast<ConstantInt>(Val);
564
14.6M
}
565
566
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
567
/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
568
/// in any of our predecessors.  If so, return the known list of value and pred
569
/// BB in the result vector.
570
///
571
/// This returns true if there were any known values.
572
bool JumpThreadingPass::ComputeValueKnownInPredecessors(
573
    Value *V, BasicBlock *BB, PredValueInfo &Result,
574
8.15M
    ConstantPreference Preference, Instruction *CxtI) {
575
8.15M
  // This method walks up use-def chains recursively.  Because of this, we could
576
8.15M
  // get into an infinite loop going around loops in the use-def chain.  To
577
8.15M
  // prevent this, keep track of what (value, block) pairs we've already visited
578
8.15M
  // and terminate the search if we loop back to them
579
8.15M
  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
580
10
    return false;
581
8.15M
582
8.15M
  // An RAII help to remove this pair from the recursion set once the recursion
583
8.15M
  // stack pops back out again.
584
8.15M
  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
585
8.15M
586
8.15M
  // If V is a constant, then it is known in all predecessors.
587
8.15M
  if (Constant *
KC8.15M
= getKnownConstant(V, Preference)) {
588
95
    for (BasicBlock *Pred : predecessors(BB))
589
146
      Result.push_back(std::make_pair(KC, Pred));
590
95
591
95
    return !Result.empty();
592
95
  }
593
8.15M
594
8.15M
  // If V is a non-instruction value, or an instruction in a different block,
595
8.15M
  // then it can't be derived from a PHI.
596
8.15M
  Instruction *I = dyn_cast<Instruction>(V);
597
8.15M
  if (
!I || 8.15M
I->getParent() != BB8.14M
) {
598
466k
599
466k
    // Okay, if this is a live-in value, see if it has a known value at the end
600
466k
    // of any of our predecessors.
601
466k
    //
602
466k
    // FIXME: This should be an edge property, not a block end property.
603
466k
    /// TODO: Per PR2563, we could infer value range information about a
604
466k
    /// predecessor based on its terminator.
605
466k
    //
606
466k
    // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
607
466k
    // "I" is a non-local compare-with-a-constant instruction.  This would be
608
466k
    // able to handle value inequalities better, for example if the compare is
609
466k
    // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
610
466k
    // Perhaps getConstantOnEdge should be smart enough to do this?
611
466k
612
605k
    for (BasicBlock *P : predecessors(BB)) {
613
605k
      // If the value is known by LazyValueInfo to be a constant in a
614
605k
      // predecessor, use that information to try to thread this block.
615
605k
      Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
616
605k
      if (Constant *KC = getKnownConstant(PredCst, Preference))
617
37.5k
        Result.push_back(std::make_pair(KC, P));
618
605k
    }
619
466k
620
466k
    return !Result.empty();
621
466k
  }
622
7.69M
623
7.69M
  /// If I is a PHI node, then we know the incoming values for any constants.
624
7.69M
  
if (PHINode *7.69M
PN7.69M
= dyn_cast<PHINode>(I)) {
625
68.4k
    for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e68.4k
;
++i49.6k
) {
626
49.6k
      Value *InVal = PN->getIncomingValue(i);
627
49.6k
      if (Constant *
KC49.6k
= getKnownConstant(InVal, Preference)) {
628
22.6k
        Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
629
49.6k
      } else {
630
27.0k
        Constant *CI = LVI->getConstantOnEdge(InVal,
631
27.0k
                                              PN->getIncomingBlock(i),
632
27.0k
                                              BB, CxtI);
633
27.0k
        if (Constant *KC = getKnownConstant(CI, Preference))
634
1.38k
          Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
635
27.0k
      }
636
49.6k
    }
637
18.8k
638
18.8k
    return !Result.empty();
639
18.8k
  }
640
7.67M
641
7.67M
  // Handle Cast instructions.  Only see through Cast when the source operand is
642
7.67M
  // PHI or Cmp and the source type is i1 to save the compilation time.
643
7.67M
  
if (CastInst *7.67M
CI7.67M
= dyn_cast<CastInst>(I)) {
644
102k
    Value *Source = CI->getOperand(0);
645
102k
    if (!Source->getType()->isIntegerTy(1))
646
90.3k
      return false;
647
12.4k
    
if (12.4k
!isa<PHINode>(Source) && 12.4k
!isa<CmpInst>(Source)12.4k
)
648
560
      return false;
649
11.9k
    ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
650
11.9k
    if (Result.empty())
651
11.8k
      return false;
652
91
653
91
    // Convert the known values.
654
91
    for (auto &R : Result)
655
203
      R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
656
102k
657
102k
    return true;
658
102k
  }
659
7.56M
660
7.56M
  PredValueInfoTy LHSVals, RHSVals;
661
7.56M
662
7.56M
  // Handle some boolean conditions.
663
7.56M
  if (
I->getType()->getPrimitiveSizeInBits() == 17.56M
) {
664
5.29M
    assert(Preference == WantInteger && "One-bit non-integer type?");
665
5.29M
    // X | true -> true
666
5.29M
    // X & false -> false
667
5.29M
    if (I->getOpcode() == Instruction::Or ||
668
5.29M
        
I->getOpcode() == Instruction::And5.18M
) {
669
310k
      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
670
310k
                                      WantInteger, CxtI);
671
310k
      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
672
310k
                                      WantInteger, CxtI);
673
310k
674
310k
      if (
LHSVals.empty() && 310k
RHSVals.empty()304k
)
675
281k
        return false;
676
28.9k
677
28.9k
      ConstantInt *InterestingVal;
678
28.9k
      if (I->getOpcode() == Instruction::Or)
679
24.6k
        InterestingVal = ConstantInt::getTrue(I->getContext());
680
28.9k
      else
681
4.28k
        InterestingVal = ConstantInt::getFalse(I->getContext());
682
28.9k
683
28.9k
      SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
684
28.9k
685
28.9k
      // Scan for the sentinel.  If we find an undef, force it to the
686
28.9k
      // interesting value: x|undef -> true and x&undef -> false.
687
28.9k
      for (const auto &LHSVal : LHSVals)
688
10.8k
        
if (10.8k
LHSVal.first == InterestingVal || 10.8k
isa<UndefValue>(LHSVal.first)8.46k
) {
689
2.35k
          Result.emplace_back(InterestingVal, LHSVal.second);
690
2.35k
          LHSKnownBBs.insert(LHSVal.second);
691
2.35k
        }
692
28.9k
      for (const auto &RHSVal : RHSVals)
693
32.4k
        
if (32.4k
RHSVal.first == InterestingVal || 32.4k
isa<UndefValue>(RHSVal.first)25.9k
) {
694
6.48k
          // If we already inferred a value for this block on the LHS, don't
695
6.48k
          // re-add it.
696
6.48k
          if (!LHSKnownBBs.count(RHSVal.second))
697
6.21k
            Result.emplace_back(InterestingVal, RHSVal.second);
698
32.4k
        }
699
310k
700
310k
      return !Result.empty();
701
310k
    }
702
4.98M
703
4.98M
    // Handle the NOT form of XOR.
704
4.98M
    
if (4.98M
I->getOpcode() == Instruction::Xor &&
705
5.46k
        isa<ConstantInt>(I->getOperand(1)) &&
706
4.98M
        
cast<ConstantInt>(I->getOperand(1))->isOne()4.89k
) {
707
4.89k
      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
708
4.89k
                                      WantInteger, CxtI);
709
4.89k
      if (Result.empty())
710
4.57k
        return false;
711
319
712
319
      // Invert the known values.
713
319
      for (auto &R : Result)
714
440
        R.first = ConstantExpr::getNot(R.first);
715
4.89k
716
4.89k
      return true;
717
4.89k
    }
718
5.29M
719
5.29M
  // Try to simplify some other binary operator values.
720
2.27M
  } else 
if (BinaryOperator *2.27M
BO2.27M
= dyn_cast<BinaryOperator>(I)) {
721
365k
    assert(Preference != WantBlockAddress
722
365k
            && "A binary operator creating a block address?");
723
365k
    if (ConstantInt *
CI365k
= dyn_cast<ConstantInt>(BO->getOperand(1))) {
724
173k
      PredValueInfoTy LHSVals;
725
173k
      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
726
173k
                                      WantInteger, CxtI);
727
173k
728
173k
      // Try to use constant folding to simplify the binary operator.
729
2.03k
      for (const auto &LHSVal : LHSVals) {
730
2.03k
        Constant *V = LHSVal.first;
731
2.03k
        Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
732
2.03k
733
2.03k
        if (Constant *KC = getKnownConstant(Folded, WantInteger))
734
2.03k
          Result.push_back(std::make_pair(KC, LHSVal.second));
735
2.03k
      }
736
173k
    }
737
2.27M
738
2.27M
    return !Result.empty();
739
2.27M
  }
740
6.88M
741
6.88M
  // Handle compare with phi operand, where the PHI is defined in this block.
742
6.88M
  
if (CmpInst *6.88M
Cmp6.88M
= dyn_cast<CmpInst>(I)) {
743
4.87M
    assert(Preference == WantInteger && "Compares only produce integers");
744
4.87M
    Type *CmpType = Cmp->getType();
745
4.87M
    Value *CmpLHS = Cmp->getOperand(0);
746
4.87M
    Value *CmpRHS = Cmp->getOperand(1);
747
4.87M
    CmpInst::Predicate Pred = Cmp->getPredicate();
748
4.87M
749
4.87M
    PHINode *PN = dyn_cast<PHINode>(CmpLHS);
750
4.87M
    if (
PN && 4.87M
PN->getParent() == BB360k
) {
751
176k
      const DataLayout &DL = PN->getModule()->getDataLayout();
752
176k
      // We can do this simplification if any comparisons fold to true or false.
753
176k
      // See if any do.
754
572k
      for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e572k
;
++i395k
) {
755
395k
        BasicBlock *PredBB = PN->getIncomingBlock(i);
756
395k
        Value *LHS = PN->getIncomingValue(i);
757
395k
        Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
758
395k
759
395k
        Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
760
395k
        if (
!Res395k
) {
761
328k
          if (!isa<Constant>(RHS))
762
87.3k
            continue;
763
240k
764
240k
          LazyValueInfo::Tristate
765
240k
            ResT = LVI->getPredicateOnEdge(Pred, LHS,
766
240k
                                           cast<Constant>(RHS), PredBB, BB,
767
240k
                                           CxtI ? 
CxtI240k
:
Cmp0
);
768
240k
          if (ResT == LazyValueInfo::Unknown)
769
219k
            continue;
770
21.9k
          Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
771
21.9k
        }
772
395k
773
89.4k
        
if (Constant *89.4k
KC89.4k
= getKnownConstant(Res, WantInteger))
774
87.5k
          Result.push_back(std::make_pair(KC, PredBB));
775
395k
      }
776
176k
777
176k
      return !Result.empty();
778
176k
    }
779
4.69M
780
4.69M
    // If comparing a live-in value against a constant, see if we know the
781
4.69M
    // live-in value on any predecessors.
782
4.69M
    
if (4.69M
isa<Constant>(CmpRHS) && 4.69M
!CmpType->isVectorTy()3.66M
) {
783
3.66M
      Constant *CmpConst = cast<Constant>(CmpRHS);
784
3.66M
785
3.66M
      if (!isa<Instruction>(CmpLHS) ||
786
3.66M
          
cast<Instruction>(CmpLHS)->getParent() != BB2.94M
) {
787
1.45M
        for (BasicBlock *P : predecessors(BB)) {
788
1.45M
          // If the value is known by LazyValueInfo to be a constant in a
789
1.45M
          // predecessor, use that information to try to thread this block.
790
1.45M
          LazyValueInfo::Tristate Res =
791
1.45M
            LVI->getPredicateOnEdge(Pred, CmpLHS,
792
1.45M
                                    CmpConst, P, BB, CxtI ? 
CxtI1.45M
:
Cmp0
);
793
1.45M
          if (Res == LazyValueInfo::Unknown)
794
1.44M
            continue;
795
9.45k
796
9.45k
          Constant *ResC = ConstantInt::get(CmpType, Res);
797
9.45k
          Result.push_back(std::make_pair(ResC, P));
798
9.45k
        }
799
1.43M
800
1.43M
        return !Result.empty();
801
1.43M
      }
802
2.22M
803
2.22M
      // InstCombine can fold some forms of constant range checks into
804
2.22M
      // (icmp (add (x, C1)), C2). See if we have we have such a thing with
805
2.22M
      // x as a live-in.
806
2.22M
      {
807
2.22M
        using namespace PatternMatch;
808
2.22M
809
2.22M
        Value *AddLHS;
810
2.22M
        ConstantInt *AddConst;
811
2.22M
        if (isa<ConstantInt>(CmpConst) &&
812
2.22M
            
match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))1.65M
) {
813
66.6k
          if (!isa<Instruction>(AddLHS) ||
814
66.6k
              
cast<Instruction>(AddLHS)->getParent() != BB65.2k
) {
815
58.3k
            for (BasicBlock *P : predecessors(BB)) {
816
58.3k
              // If the value is known by LazyValueInfo to be a ConstantRange in
817
58.3k
              // a predecessor, use that information to try to thread this
818
58.3k
              // block.
819
58.3k
              ConstantRange CR = LVI->getConstantRangeOnEdge(
820
58.3k
                  AddLHS, P, BB, CxtI ? 
CxtI58.3k
:
cast<Instruction>(CmpLHS)0
);
821
58.3k
              // Propagate the range through the addition.
822
58.3k
              CR = CR.add(AddConst->getValue());
823
58.3k
824
58.3k
              // Get the range where the compare returns true.
825
58.3k
              ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
826
58.3k
                  Pred, cast<ConstantInt>(CmpConst)->getValue());
827
58.3k
828
58.3k
              Constant *ResC;
829
58.3k
              if (CmpRange.contains(CR))
830
442
                ResC = ConstantInt::getTrue(CmpType);
831
57.8k
              else 
if (57.8k
CmpRange.inverse().contains(CR)57.8k
)
832
993
                ResC = ConstantInt::getFalse(CmpType);
833
57.8k
              else
834
56.8k
                continue;
835
1.43k
836
1.43k
              Result.push_back(std::make_pair(ResC, P));
837
1.43k
            }
838
39.1k
839
39.1k
            return !Result.empty();
840
39.1k
          }
841
2.19M
        }
842
2.19M
      }
843
2.19M
844
2.19M
      // Try to find a constant value for the LHS of a comparison,
845
2.19M
      // and evaluate it statically if we can.
846
2.19M
      PredValueInfoTy LHSVals;
847
2.19M
      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
848
2.19M
                                      WantInteger, CxtI);
849
2.19M
850
2.23k
      for (const auto &LHSVal : LHSVals) {
851
2.23k
        Constant *V = LHSVal.first;
852
2.23k
        Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
853
2.23k
        if (Constant *KC = getKnownConstant(Folded, WantInteger))
854
2.23k
          Result.push_back(std::make_pair(KC, LHSVal.second));
855
2.23k
      }
856
3.66M
857
3.66M
      return !Result.empty();
858
3.66M
    }
859
4.87M
  }
860
3.05M
861
3.05M
  
if (SelectInst *3.05M
SI3.05M
= dyn_cast<SelectInst>(I)) {
862
21.3k
    // Handle select instructions where at least one operand is a known constant
863
21.3k
    // and we can figure out the condition value for any predecessor block.
864
21.3k
    Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
865
21.3k
    Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
866
21.3k
    PredValueInfoTy Conds;
867
21.3k
    if (
(TrueVal || 21.3k
FalseVal17.3k
) &&
868
16.7k
        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
869
21.3k
                                        WantInteger, CxtI)) {
870
560
      for (auto &C : Conds) {
871
560
        Constant *Cond = C.first;
872
560
873
560
        // Figure out what value to use for the condition.
874
560
        bool KnownCond;
875
560
        if (ConstantInt *
CI560
= dyn_cast<ConstantInt>(Cond)) {
876
560
          // A known boolean.
877
560
          KnownCond = CI->isOne();
878
560
        } else {
879
0
          assert(isa<UndefValue>(Cond) && "Unexpected condition value");
880
0
          // Either operand will do, so be sure to pick the one that's a known
881
0
          // constant.
882
0
          // FIXME: Do this more cleverly if both values are known constants?
883
0
          KnownCond = (TrueVal != nullptr);
884
0
        }
885
560
886
560
        // See if the select has a known constant value for this predecessor.
887
560
        if (Constant *Val = KnownCond ? TrueVal : FalseVal)
888
448
          Result.push_back(std::make_pair(Val, C.second));
889
560
      }
890
274
891
274
      return !Result.empty();
892
274
    }
893
3.05M
  }
894
3.05M
895
3.05M
  // If all else fails, see if LVI can figure out a constant value for us.
896
3.05M
  Constant *CI = LVI->getConstant(V, BB, CxtI);
897
3.05M
  if (Constant *
KC3.05M
= getKnownConstant(CI, Preference)) {
898
0
    for (BasicBlock *Pred : predecessors(BB))
899
0
      Result.push_back(std::make_pair(KC, Pred));
900
0
  }
901
8.15M
902
8.15M
  return !Result.empty();
903
8.15M
}
904
905
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
906
/// in an undefined jump, decide which block is best to revector to.
907
///
908
/// Since we can pick an arbitrary destination, we pick the successor with the
909
/// fewest predecessors.  This should reduce the in-degree of the others.
910
28
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
911
28
  TerminatorInst *BBTerm = BB->getTerminator();
912
28
  unsigned MinSucc = 0;
913
28
  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
914
28
  // Compute the successor with the minimum number of predecessors.
915
28
  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
916
56
  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); 
i != e56
;
++i28
) {
917
28
    TestBB = BBTerm->getSuccessor(i);
918
28
    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
919
28
    if (
NumPreds < MinNumPreds28
) {
920
16
      MinSucc = i;
921
16
      MinNumPreds = NumPreds;
922
16
    }
923
28
  }
924
28
925
28
  return MinSucc;
926
28
}
927
928
48.0k
static bool hasAddressTakenAndUsed(BasicBlock *BB) {
929
48.0k
  if (
!BB->hasAddressTaken()48.0k
)
return false48.0k
;
930
8
931
8
  // If the block has its address taken, it may be a tree of dead constants
932
8
  // hanging off of it.  These shouldn't keep the block alive.
933
8
  BlockAddress *BA = BlockAddress::get(BB);
934
8
  BA->removeDeadConstantUsers();
935
8
  return !BA->use_empty();
936
8
}
937
938
/// ProcessBlock - If there are any predecessors whose control can be threaded
939
/// through to a successor, transform them now.
940
12.6M
bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
941
12.6M
  // If the block is trivially dead, just return and let the caller nuke it.
942
12.6M
  // This simplifies other transformations.
943
12.6M
  if (pred_empty(BB) &&
944
1.18M
      BB != &BB->getParent()->getEntryBlock())
945
1.24k
    return false;
946
12.6M
947
12.6M
  // If this block has a single predecessor, and if that pred has a single
948
12.6M
  // successor, merge the blocks.  This encourages recursive jump threading
949
12.6M
  // because now the condition in this block can be threaded through
950
12.6M
  // predecessors of our predecessor block.
951
12.6M
  
if (BasicBlock *12.6M
SinglePred12.6M
= BB->getSinglePredecessor()) {
952
7.56M
    const TerminatorInst *TI = SinglePred->getTerminator();
953
7.56M
    if (
!TI->isExceptional() && 7.56M
TI->getNumSuccessors() == 17.41M
&&
954
7.56M
        
SinglePred != BB48.0k
&&
!hasAddressTakenAndUsed(BB)48.0k
) {
955
48.0k
      // If SinglePred was a loop header, BB becomes one.
956
48.0k
      if (LoopHeaders.erase(SinglePred))
957
819
        LoopHeaders.insert(BB);
958
48.0k
959
48.0k
      LVI->eraseBlock(SinglePred);
960
48.0k
      MergeBasicBlockIntoOnlyPred(BB, DT);
961
48.0k
962
48.0k
      // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
963
48.0k
      // BB code within one basic block `BB`), we need to invalidate the LVI
964
48.0k
      // information associated with BB, because the LVI information need not be
965
48.0k
      // true for all of BB after the merge. For example,
966
48.0k
      // Before the merge, LVI info and code is as follows:
967
48.0k
      // SinglePred: <LVI info1 for %p val>
968
48.0k
      // %y = use of %p
969
48.0k
      // call @exit() // need not transfer execution to successor.
970
48.0k
      // assume(%p) // from this point on %p is true
971
48.0k
      // br label %BB
972
48.0k
      // BB: <LVI info2 for %p val, i.e. %p is true>
973
48.0k
      // %x = use of %p
974
48.0k
      // br label exit
975
48.0k
      //
976
48.0k
      // Note that this LVI info for blocks BB and SinglPred is correct for %p
977
48.0k
      // (info2 and info1 respectively). After the merge and the deletion of the
978
48.0k
      // LVI info1 for SinglePred. We have the following code:
979
48.0k
      // BB: <LVI info2 for %p val>
980
48.0k
      // %y = use of %p
981
48.0k
      // call @exit()
982
48.0k
      // assume(%p)
983
48.0k
      // %x = use of %p <-- LVI info2 is correct from here onwards.
984
48.0k
      // br label exit
985
48.0k
      // LVI info2 for BB is incorrect at the beginning of BB.
986
48.0k
987
48.0k
      // Invalidate LVI information for BB if the LVI is not provably true for
988
48.0k
      // all of BB.
989
48.0k
      if (
any_of(*BB, [](Instruction &I) 48.0k
{
990
411k
            return !isGuaranteedToTransferExecutionToSuccessor(&I);
991
411k
          }))
992
14.3k
        LVI->eraseBlock(BB);
993
48.0k
      return true;
994
48.0k
    }
995
12.6M
  }
996
12.6M
997
12.6M
  
if (12.6M
TryToUnfoldSelectInCurrBB(BB)12.6M
)
998
1.63k
    return true;
999
12.6M
1000
12.6M
  // Look if we can propagate guards to predecessors.
1001
12.6M
  
if (12.6M
HasGuards && 12.6M
ProcessGuards(BB)143
)
1002
2
    return true;
1003
12.6M
1004
12.6M
  // What kind of constant we're looking for.
1005
12.6M
  ConstantPreference Preference = WantInteger;
1006
12.6M
1007
12.6M
  // Look to see if the terminator is a conditional branch, switch or indirect
1008
12.6M
  // branch, if not we can't thread it.
1009
12.6M
  Value *Condition;
1010
12.6M
  Instruction *Terminator = BB->getTerminator();
1011
12.6M
  if (BranchInst *
BI12.6M
= dyn_cast<BranchInst>(Terminator)) {
1012
11.1M
    // Can't thread an unconditional jump.
1013
11.1M
    if (
BI->isUnconditional()11.1M
)
return false4.95M
;
1014
6.17M
    Condition = BI->getCondition();
1015
12.6M
  } else 
if (SwitchInst *1.50M
SI1.50M
= dyn_cast<SwitchInst>(Terminator)) {
1016
118k
    Condition = SI->getCondition();
1017
1.50M
  } else 
if (IndirectBrInst *1.38M
IB1.38M
= dyn_cast<IndirectBrInst>(Terminator)) {
1018
18
    // Can't thread indirect branch with no successors.
1019
18
    if (
IB->getNumSuccessors() == 018
)
return false1
;
1020
17
    Condition = IB->getAddress()->stripPointerCasts();
1021
17
    Preference = WantBlockAddress;
1022
1.38M
  } else {
1023
1.38M
    return false; // Must be an invoke.
1024
1.38M
  }
1025
6.29M
1026
6.29M
  // Run constant folding to see if we can reduce the condition to a simple
1027
6.29M
  // constant.
1028
6.29M
  
if (Instruction *6.29M
I6.29M
= dyn_cast<Instruction>(Condition)) {
1029
6.28M
    Value *SimpleVal =
1030
6.28M
        ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
1031
6.28M
    if (
SimpleVal6.28M
) {
1032
3.61k
      I->replaceAllUsesWith(SimpleVal);
1033
3.61k
      if (isInstructionTriviallyDead(I, TLI))
1034
3.61k
        I->eraseFromParent();
1035
3.61k
      Condition = SimpleVal;
1036
3.61k
    }
1037
6.28M
  }
1038
6.29M
1039
6.29M
  // If the terminator is branching on an undef, we can pick any of the
1040
6.29M
  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
1041
6.29M
  if (
isa<UndefValue>(Condition)6.29M
) {
1042
18
    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
1043
18
    std::vector<DominatorTree::UpdateType> Updates;
1044
18
1045
18
    // Fold the branch/switch.
1046
18
    TerminatorInst *BBTerm = BB->getTerminator();
1047
54
    for (unsigned i = 0, e = BBTerm->getNumSuccessors(); 
i != e54
;
++i36
) {
1048
36
      if (
i == BestSucc36
)
continue18
;
1049
18
      BasicBlock *Succ = BBTerm->getSuccessor(i);
1050
18
      Succ->removePredecessor(BB, true);
1051
18
      if (Succ == BB)
1052
0
        continue;
1053
18
      DominatorTree::UpdateType UT = {DominatorTree::Delete, BB, Succ};
1054
18
      // Make sure to remove a DT edge exactly once and not an edge to itself.
1055
18
      if (std::find(Updates.begin(), Updates.end(), UT) == Updates.end())
1056
18
        Updates.push_back(UT);
1057
36
    }
1058
18
1059
18
    DEBUG(dbgs() << "  In block '" << BB->getName()
1060
18
          << "' folding undef terminator: " << *BBTerm << '\n');
1061
18
    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1062
18
    BBTerm->eraseFromParent();
1063
18
    DT->applyUpdates(Updates);
1064
18
    return true;
1065
18
  }
1066
6.29M
1067
6.29M
  // If the terminator of this block is branching on a constant, simplify the
1068
6.29M
  // terminator to an unconditional branch.  This can occur due to threading in
1069
6.29M
  // other blocks.
1070
6.29M
  
if (6.29M
getKnownConstant(Condition, Preference)6.29M
) {
1071
5.08k
    DEBUG(dbgs() << "  In block '" << BB->getName()
1072
5.08k
          << "' folding terminator: " << *BB->getTerminator() << '\n');
1073
5.08k
    ++NumFolds;
1074
5.08k
    ConstantFoldTerminator(BB, true, nullptr, DT);
1075
5.08k
    return true;
1076
5.08k
  }
1077
6.28M
1078
6.28M
  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1079
6.28M
1080
6.28M
  // All the rest of our checks depend on the condition being an instruction.
1081
6.28M
  if (
!CondInst6.28M
) {
1082
7.99k
    // FIXME: Unify this with code below.
1083
7.99k
    if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
1084
50
      return true;
1085
7.94k
    return false;
1086
7.94k
  }
1087
6.28M
1088
6.28M
  
if (CmpInst *6.28M
CondCmp6.28M
= dyn_cast<CmpInst>(CondInst)) {
1089
5.62M
    // If we're branching on a conditional, LVI might be able to determine
1090
5.62M
    // it's value at the branch instruction.  We only handle comparisons
1091
5.62M
    // against a constant at this time.
1092
5.62M
    // TODO: This should be extended to handle switches as well.
1093
5.62M
    BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
1094
5.62M
    Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
1095
5.62M
    if (
CondBr && 5.62M
CondConst5.62M
) {
1096
4.26M
      // We should have returned as soon as we turn a conditional branch to
1097
4.26M
      // unconditional. Because its no longer interesting as far as jump
1098
4.26M
      // threading is concerned.
1099
4.26M
      assert(CondBr->isConditional() && "Threading on unconditional terminator");
1100
4.26M
1101
4.26M
      LazyValueInfo::Tristate Ret =
1102
4.26M
        LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1103
4.26M
                            CondConst, CondBr);
1104
4.26M
      if (
Ret != LazyValueInfo::Unknown4.26M
) {
1105
10.1k
        unsigned ToRemove = Ret == LazyValueInfo::True ? 
14.59k
:
05.57k
;
1106
10.1k
        unsigned ToKeep = Ret == LazyValueInfo::True ? 
04.59k
:
15.57k
;
1107
10.1k
        BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
1108
10.1k
        ToRemoveSucc->removePredecessor(BB, true);
1109
10.1k
        BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
1110
10.1k
        CondBr->eraseFromParent();
1111
10.1k
        if (BB != ToRemoveSucc)
1112
10.1k
          DT->deleteEdge(BB, ToRemoveSucc);
1113
10.1k
        if (CondCmp->use_empty())
1114
8.13k
          CondCmp->eraseFromParent();
1115
10.1k
        // We can safely replace *some* uses of the CondInst if it has
1116
10.1k
        // exactly one value as returned by LVI. RAUW is incorrect in the
1117
10.1k
        // presence of guards and assumes, that have the `Cond` as the use. This
1118
10.1k
        // is because we use the guards/assume to reason about the `Cond` value
1119
10.1k
        // at the end of block, but RAUW unconditionally replaces all uses
1120
10.1k
        // including the guards/assumes themselves and the uses before the
1121
10.1k
        // guard/assume.
1122
2.03k
        else 
if (2.03k
CondCmp->getParent() == BB2.03k
) {
1123
189
          auto *CI = Ret == LazyValueInfo::True ?
1124
49
            ConstantInt::getTrue(CondCmp->getType()) :
1125
140
            ConstantInt::getFalse(CondCmp->getType());
1126
2.03k
          ReplaceFoldableUses(CondCmp, CI);
1127
2.03k
        }
1128
10.1k
        return true;
1129
10.1k
      }
1130
4.25M
1131
4.25M
      // We did not manage to simplify this branch, try to see whether
1132
4.25M
      // CondCmp depends on a known phi-select pattern.
1133
4.25M
      
if (4.25M
TryToUnfoldSelect(CondCmp, BB)4.25M
)
1134
496
        return true;
1135
6.27M
    }
1136
5.62M
  }
1137
6.27M
1138
6.27M
  // Check for some cases that are worth simplifying.  Right now we want to look
1139
6.27M
  // for loads that are used by a switch or by the condition for the branch.  If
1140
6.27M
  // we see one, check to see if it's partially redundant.  If so, insert a PHI
1141
6.27M
  // which can then be used to thread the values.
1142
6.27M
  Value *SimplifyValue = CondInst;
1143
6.27M
  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1144
5.61M
    
if (5.61M
isa<Constant>(CondCmp->getOperand(1))5.61M
)
1145
4.25M
      SimplifyValue = CondCmp->getOperand(0);
1146
6.27M
1147
6.27M
  // TODO: There are other places where load PRE would be profitable, such as
1148
6.27M
  // more complex comparisons.
1149
6.27M
  if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
1150
1.78M
    
if (1.78M
SimplifyPartiallyRedundantLoad(LI)1.78M
)
1151
2.11k
      return true;
1152
6.26M
1153
6.26M
  // Before threading, try to propagate profile data backwards:
1154
6.26M
  
if (PHINode *6.26M
PN6.26M
= dyn_cast<PHINode>(CondInst))
1155
51.4k
    
if (51.4k
PN->getParent() == BB && 51.4k
isa<BranchInst>(BB->getTerminator())21.4k
)
1156
16.9k
      updatePredecessorProfileMetadata(PN, BB);
1157
6.26M
1158
6.26M
  // Handle a variety of cases where we are branching on something derived from
1159
6.26M
  // a PHI node in the current block.  If we can prove that any predecessors
1160
6.26M
  // compute a predictable value based on a PHI node, thread those predecessors.
1161
6.26M
  if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
1162
44.4k
    return true;
1163
6.22M
1164
6.22M
  // If this is an otherwise-unfoldable branch on a phi node in the current
1165
6.22M
  // block, see if we can simplify.
1166
6.22M
  
if (PHINode *6.22M
PN6.22M
= dyn_cast<PHINode>(CondInst))
1167
45.3k
    
if (45.3k
PN->getParent() == BB && 45.3k
isa<BranchInst>(BB->getTerminator())15.4k
)
1168
12.1k
      return ProcessBranchOnPHI(PN);
1169
6.21M
1170
6.21M
  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1171
6.21M
  
if (6.21M
CondInst->getOpcode() == Instruction::Xor &&
1172
6.21M
      
CondInst->getParent() == BB1.02k
&&
isa<BranchInst>(BB->getTerminator())1.02k
)
1173
964
    return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
1174
6.21M
1175
6.21M
  // Search for a stronger dominating condition that can be used to simplify a
1176
6.21M
  // conditional branch leaving BB.
1177
6.21M
  
if (6.21M
ProcessImpliedCondition(BB)6.21M
)
1178
41
    return true;
1179
6.21M
1180
6.21M
  return false;
1181
6.21M
}
1182
1183
6.21M
bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
1184
6.21M
  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1185
6.21M
  if (
!BI || 6.21M
!BI->isConditional()6.10M
)
1186
110k
    return false;
1187
6.10M
1188
6.10M
  Value *Cond = BI->getCondition();
1189
6.10M
  BasicBlock *CurrentBB = BB;
1190
6.10M
  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1191
6.10M
  unsigned Iter = 0;
1192
6.10M
1193
6.10M
  auto &DL = BB->getModule()->getDataLayout();
1194
6.10M
1195
12.2M
  while (
CurrentPred && 12.2M
Iter++ < ImplicationSearchThreshold7.32M
) {
1196
6.31M
    auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1197
6.31M
    if (
!PBI || 6.31M
!PBI->isConditional()6.14M
)
1198
170k
      return false;
1199
6.14M
    
if (6.14M
PBI->getSuccessor(0) != CurrentBB && 6.14M
PBI->getSuccessor(1) != CurrentBB3.58M
)
1200
0
      return false;
1201
6.14M
1202
6.14M
    bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1203
6.14M
    Optional<bool> Implication =
1204
6.14M
        isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1205
6.14M
    if (
Implication6.14M
) {
1206
41
      BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 
16
:
035
);
1207
41
      RemoveSucc->removePredecessor(BB);
1208
41
      BranchInst::Create(BI->getSuccessor(*Implication ? 
06
:
135
), BI);
1209
41
      BI->eraseFromParent();
1210
41
      if (BB != RemoveSucc)
1211
41
        DT->deleteEdge(BB, RemoveSucc);
1212
41
      return true;
1213
41
    }
1214
6.14M
    CurrentBB = CurrentPred;
1215
6.14M
    CurrentPred = CurrentBB->getSinglePredecessor();
1216
6.14M
  }
1217
6.10M
1218
5.93M
  return false;
1219
6.21M
}
1220
1221
/// Return true if Op is an instruction defined in the given block.
1222
935k
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
1223
935k
  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1224
810k
    
if (810k
OpInst->getParent() == BB810k
)
1225
668k
      return true;
1226
266k
  return false;
1227
266k
}
1228
1229
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
1230
/// load instruction, eliminate it by replacing it with a PHI node.  This is an
1231
/// important optimization that encourages jump threading, and needs to be run
1232
/// interlaced with other jump threading tasks.
1233
1.78M
bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
1234
1.78M
  // Don't hack volatile and ordered loads.
1235
1.78M
  if (
!LI->isUnordered()1.78M
)
return false4.07k
;
1236
1.78M
1237
1.78M
  // If the load is defined in a block with exactly one predecessor, it can't be
1238
1.78M
  // partially redundant.
1239
1.78M
  BasicBlock *LoadBB = LI->getParent();
1240
1.78M
  if (LoadBB->getSinglePredecessor())
1241
848k
    return false;
1242
936k
1243
936k
  // If the load is defined in an EH pad, it can't be partially redundant,
1244
936k
  // because the edges between the invoke and the EH pad cannot have other
1245
936k
  // instructions between them.
1246
936k
  
if (936k
LoadBB->isEHPad()936k
)
1247
740
    return false;
1248
935k
1249
935k
  Value *LoadedPtr = LI->getOperand(0);
1250
935k
1251
935k
  // If the loaded operand is defined in the LoadBB and its not a phi,
1252
935k
  // it can't be available in predecessors.
1253
935k
  if (
isOpDefinedInBlock(LoadedPtr, LoadBB) && 935k
!isa<PHINode>(LoadedPtr)668k
)
1254
638k
    return false;
1255
296k
1256
296k
  // Scan a few instructions up from the load, to see if it is obviously live at
1257
296k
  // the entry to its block.
1258
296k
  BasicBlock::iterator BBIt(LI);
1259
296k
  bool IsLoadCSE;
1260
296k
  if (Value *AvailableVal = FindAvailableLoadedValue(
1261
48
          LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1262
48
    // If the value of the load is locally available within the block, just use
1263
48
    // it.  This frequently occurs for reg2mem'd allocas.
1264
48
1265
48
    if (
IsLoadCSE48
) {
1266
2
      LoadInst *NLI = cast<LoadInst>(AvailableVal);
1267
2
      combineMetadataForCSE(NLI, LI);
1268
2
    };
1269
48
1270
48
    // If the returned value is the load itself, replace with an undef. This can
1271
48
    // only happen in dead loops.
1272
48
    if (
AvailableVal == LI48
)
AvailableVal = UndefValue::get(LI->getType())0
;
1273
48
    if (AvailableVal->getType() != LI->getType())
1274
40
      AvailableVal =
1275
40
          CastInst::CreateBitOrPointerCast(AvailableVal, LI->getType(), "", LI);
1276
48
    LI->replaceAllUsesWith(AvailableVal);
1277
48
    LI->eraseFromParent();
1278
48
    return true;
1279
48
  }
1280
296k
1281
296k
  // Otherwise, if we scanned the whole block and got to the top of the block,
1282
296k
  // we know the block is locally transparent to the load.  If not, something
1283
296k
  // might clobber its value.
1284
296k
  
if (296k
BBIt != LoadBB->begin()296k
)
1285
97.5k
    return false;
1286
199k
1287
199k
  // If all of the loads and stores that feed the value have the same AA tags,
1288
199k
  // then we can propagate them onto any newly inserted loads.
1289
199k
  AAMDNodes AATags;
1290
199k
  LI->getAAMetadata(AATags);
1291
199k
1292
199k
  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1293
199k
1294
199k
  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1295
199k
1296
199k
  AvailablePredsTy AvailablePreds;
1297
199k
  BasicBlock *OneUnavailablePred = nullptr;
1298
199k
  SmallVector<LoadInst*, 8> CSELoads;
1299
199k
1300
199k
  // If we got here, the loaded value is transparent through to the start of the
1301
199k
  // block.  Check to see if it is available in any of the predecessor blocks.
1302
502k
  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1303
502k
    // If we already scanned this predecessor, skip it.
1304
502k
    if (!PredsScanned.insert(PredBB).second)
1305
1.62k
      continue;
1306
501k
1307
501k
    BBIt = PredBB->end();
1308
501k
    unsigned NumScanedInst = 0;
1309
501k
    Value *PredAvailable = nullptr;
1310
501k
    // NOTE: We don't CSE load that is volatile or anything stronger than
1311
501k
    // unordered, that should have been checked when we entered the function.
1312
501k
    assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
1313
501k
    // If this is a load on a phi pointer, phi-translate it and search
1314
501k
    // for available load/store to the pointer in predecessors.
1315
501k
    Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
1316
501k
    PredAvailable = FindAvailablePtrLoadStore(
1317
501k
        Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1318
501k
        AA, &IsLoadCSE, &NumScanedInst);
1319
501k
1320
501k
    // If PredBB has a single predecessor, continue scanning through the
1321
501k
    // single precessor.
1322
501k
    BasicBlock *SinglePredBB = PredBB;
1323
691k
    while (
!PredAvailable && 691k
SinglePredBB689k
&&
BBIt == SinglePredBB->begin()594k
&&
1324
501k
           
NumScanedInst < DefMaxInstsToScan209k
) {
1325
190k
      SinglePredBB = SinglePredBB->getSinglePredecessor();
1326
190k
      if (
SinglePredBB190k
) {
1327
96.0k
        BBIt = SinglePredBB->end();
1328
96.0k
        PredAvailable = FindAvailablePtrLoadStore(
1329
96.0k
            Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
1330
96.0k
            (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1331
96.0k
            &NumScanedInst);
1332
96.0k
      }
1333
190k
    }
1334
501k
1335
501k
    if (
!PredAvailable501k
) {
1336
498k
      OneUnavailablePred = PredBB;
1337
498k
      continue;
1338
498k
    }
1339
2.66k
1340
2.66k
    
if (2.66k
IsLoadCSE2.66k
)
1341
1.70k
      CSELoads.push_back(cast<LoadInst>(PredAvailable));
1342
502k
1343
502k
    // If so, this load is partially redundant.  Remember this info so that we
1344
502k
    // can create a PHI node.
1345
502k
    AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
1346
502k
  }
1347
199k
1348
199k
  // If the loaded value isn't available in any predecessor, it isn't partially
1349
199k
  // redundant.
1350
199k
  if (
AvailablePreds.empty()199k
)
return false197k
;
1351
2.06k
1352
2.06k
  // Okay, the loaded value is available in at least one (and maybe all!)
1353
2.06k
  // predecessors.  If the value is unavailable in more than one unique
1354
2.06k
  // predecessor, we want to insert a merge block for those common predecessors.
1355
2.06k
  // This ensures that we only have to insert one reload, thus not increasing
1356
2.06k
  // code size.
1357
2.06k
  BasicBlock *UnavailablePred = nullptr;
1358
2.06k
1359
2.06k
  // If there is exactly one predecessor where the value is unavailable, the
1360
2.06k
  // already computed 'OneUnavailablePred' block is it.  If it ends in an
1361
2.06k
  // unconditional branch, we know that it isn't a critical edge.
1362
2.06k
  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1363
2.06k
      
OneUnavailablePred->getTerminator()->getNumSuccessors() == 11.05k
) {
1364
651
    UnavailablePred = OneUnavailablePred;
1365
2.06k
  } else 
if (1.41k
PredsScanned.size() != AvailablePreds.size()1.41k
) {
1366
1.18k
    // Otherwise, we had multiple unavailable predecessors or we had a critical
1367
1.18k
    // edge from the one.
1368
1.18k
    SmallVector<BasicBlock*, 8> PredsToSplit;
1369
1.18k
    SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1370
1.18k
1371
1.18k
    for (const auto &AvailablePred : AvailablePreds)
1372
1.48k
      AvailablePredSet.insert(AvailablePred.first);
1373
1.18k
1374
1.18k
    // Add all the unavailable predecessors to the PredsToSplit list.
1375
4.20k
    for (BasicBlock *P : predecessors(LoadBB)) {
1376
4.20k
      // If the predecessor is an indirect goto, we can't split the edge.
1377
4.20k
      if (isa<IndirectBrInst>(P->getTerminator()))
1378
1
        return false;
1379
4.19k
1380
4.19k
      
if (4.19k
!AvailablePredSet.count(P)4.19k
)
1381
2.68k
        PredsToSplit.push_back(P);
1382
4.20k
    }
1383
1.18k
1384
1.18k
    // Split them out to their own block.
1385
1.18k
    UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1386
1.18k
  }
1387
2.06k
1388
2.06k
  // If the value isn't available in all predecessors, then there will be
1389
2.06k
  // exactly one where it isn't available.  Insert a load on that edge and add
1390
2.06k
  // it to the AvailablePreds list.
1391
2.06k
  
if (2.06k
UnavailablePred2.06k
) {
1392
1.83k
    assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1393
1.83k
           "Can't handle critical edge here!");
1394
1.83k
    LoadInst *NewVal = new LoadInst(
1395
1.83k
        LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1396
1.83k
        LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
1397
1.83k
        LI->getSyncScopeID(), UnavailablePred->getTerminator());
1398
1.83k
    NewVal->setDebugLoc(LI->getDebugLoc());
1399
1.83k
    if (AATags)
1400
1.69k
      NewVal->setAAMetadata(AATags);
1401
1.83k
1402
1.83k
    AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
1403
1.83k
  }
1404
2.06k
1405
2.06k
  // Now we know that each predecessor of this block has a value in
1406
2.06k
  // AvailablePreds, sort them for efficient access as we're walking the preds.
1407
2.06k
  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1408
2.06k
1409
2.06k
  // Create a PHI node at the start of the block for the PRE'd load value.
1410
2.06k
  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1411
2.06k
  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
1412
2.06k
                                &LoadBB->front());
1413
2.06k
  PN->takeName(LI);
1414
2.06k
  PN->setDebugLoc(LI->getDebugLoc());
1415
2.06k
1416
2.06k
  // Insert new entries into the PHI for each predecessor.  A single block may
1417
2.06k
  // have multiple entries here.
1418
6.61k
  for (pred_iterator PI = PB; 
PI != PE6.61k
;
++PI4.55k
) {
1419
4.55k
    BasicBlock *P = *PI;
1420
4.55k
    AvailablePredsTy::iterator I =
1421
4.55k
      std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
1422
4.55k
                       std::make_pair(P, (Value*)nullptr));
1423
4.55k
1424
4.55k
    assert(I != AvailablePreds.end() && I->first == P &&
1425
4.55k
           "Didn't find entry for predecessor!");
1426
4.55k
1427
4.55k
    // If we have an available predecessor but it requires casting, insert the
1428
4.55k
    // cast in the predecessor and use the cast. Note that we have to update the
1429
4.55k
    // AvailablePreds vector as we go so that all of the PHI entries for this
1430
4.55k
    // predecessor use the same bitcast.
1431
4.55k
    Value *&PredV = I->second;
1432
4.55k
    if (PredV->getType() != LI->getType())
1433
66
      PredV = CastInst::CreateBitOrPointerCast(PredV, LI->getType(), "",
1434
66
                                               P->getTerminator());
1435
4.55k
1436
4.55k
    PN->addIncoming(PredV, I->first);
1437
4.55k
  }
1438
2.06k
1439
1.70k
  for (LoadInst *PredLI : CSELoads) {
1440
1.70k
    combineMetadataForCSE(PredLI, LI);
1441
1.70k
  }
1442
2.06k
1443
2.06k
  LI->replaceAllUsesWith(PN);
1444
2.06k
  LI->eraseFromParent();
1445
2.06k
1446
2.06k
  return true;
1447
1.78M
}
1448
1449
/// FindMostPopularDest - The specified list contains multiple possible
1450
/// threadable destinations.  Pick the one that occurs the most frequently in
1451
/// the list.
1452
static BasicBlock *
1453
FindMostPopularDest(BasicBlock *BB,
1454
                    const SmallVectorImpl<std::pair<BasicBlock *,
1455
19.5k
                                          BasicBlock *>> &PredToDestList) {
1456
19.5k
  assert(!PredToDestList.empty());
1457
19.5k
1458
19.5k
  // Determine popularity.  If there are multiple possible destinations, we
1459
19.5k
  // explicitly choose to ignore 'undef' destinations.  We prefer to thread
1460
19.5k
  // blocks with known and real destinations to threading undef.  We'll handle
1461
19.5k
  // them later if interesting.
1462
19.5k
  DenseMap<BasicBlock*, unsigned> DestPopularity;
1463
19.5k
  for (const auto &PredToDest : PredToDestList)
1464
65.2k
    
if (65.2k
PredToDest.second65.2k
)
1465
65.2k
      DestPopularity[PredToDest.second]++;
1466
19.5k
1467
19.5k
  // Find the most popular dest.
1468
19.5k
  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
1469
19.5k
  BasicBlock *MostPopularDest = DPI->first;
1470
19.5k
  unsigned Popularity = DPI->second;
1471
19.5k
  SmallVector<BasicBlock*, 4> SamePopularity;
1472
19.5k
1473
39.5k
  for (++DPI; 
DPI != DestPopularity.end()39.5k
;
++DPI19.9k
) {
1474
19.9k
    // If the popularity of this entry isn't higher than the popularity we've
1475
19.9k
    // seen so far, ignore it.
1476
19.9k
    if (DPI->second < Popularity)
1477
5.89k
      ; // ignore.
1478
14.0k
    else 
if (14.0k
DPI->second == Popularity14.0k
) {
1479
9.37k
      // If it is the same as what we've seen so far, keep track of it.
1480
9.37k
      SamePopularity.push_back(DPI->first);
1481
14.0k
    } else {
1482
4.70k
      // If it is more popular, remember it.
1483
4.70k
      SamePopularity.clear();
1484
4.70k
      MostPopularDest = DPI->first;
1485
4.70k
      Popularity = DPI->second;
1486
4.70k
    }
1487
19.9k
  }
1488
19.5k
1489
19.5k
  // Okay, now we know the most popular destination.  If there is more than one
1490
19.5k
  // destination, we need to determine one.  This is arbitrary, but we need
1491
19.5k
  // to make a deterministic decision.  Pick the first one that appears in the
1492
19.5k
  // successor list.
1493
19.5k
  if (
!SamePopularity.empty()19.5k
) {
1494
9.16k
    SamePopularity.push_back(MostPopularDest);
1495
9.16k
    TerminatorInst *TI = BB->getTerminator();
1496
9.16k
    for (unsigned i = 0; ; 
++i615
) {
1497
9.77k
      assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
1498
9.77k
1499
9.77k
      if (!is_contained(SamePopularity, TI->getSuccessor(i)))
1500
615
        continue;
1501
9.16k
1502
9.16k
      MostPopularDest = TI->getSuccessor(i);
1503
9.16k
      break;
1504
9.16k
    }
1505
9.16k
  }
1506
19.5k
1507
19.5k
  // Okay, we have finally picked the most popular destination.
1508
19.5k
  return MostPopularDest;
1509
19.5k
}
1510
1511
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
1512
                                               ConstantPreference Preference,
1513
6.27M
                                               Instruction *CxtI) {
1514
6.27M
  // If threading this would thread across a loop header, don't even try to
1515
6.27M
  // thread the edge.
1516
6.27M
  if (LoopHeaders.count(BB))
1517
1.13M
    return false;
1518
5.13M
1519
5.13M
  PredValueInfoTy PredValues;
1520
5.13M
  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
1521
5.07M
    return false;
1522
62.7k
1523
5.13M
  assert(!PredValues.empty() &&
1524
62.7k
         "ComputeValueKnownInPredecessors returned true with no values");
1525
62.7k
1526
62.7k
  DEBUG(dbgs() << "IN BB: " << *BB;
1527
62.7k
        for (const auto &PredValue : PredValues) {
1528
62.7k
          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
1529
62.7k
            << *PredValue.first
1530
62.7k
            << " for pred '" << PredValue.second->getName() << "'.\n";
1531
62.7k
        });
1532
62.7k
1533
62.7k
  // Decide what we want to thread through.  Convert our list of known values to
1534
62.7k
  // a list of known destinations for each pred.  This also discards duplicate
1535
62.7k
  // predecessors and keeps track of the undefined inputs (which are represented
1536
62.7k
  // as a null dest in the PredToDestList).
1537
62.7k
  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1538
62.7k
  SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
1539
62.7k
1540
62.7k
  BasicBlock *OnlyDest = nullptr;
1541
62.7k
  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1542
62.7k
  Constant *OnlyVal = nullptr;
1543
62.7k
  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1544
62.7k
1545
62.7k
  unsigned PredWithKnownDest = 0;
1546
125k
  for (const auto &PredValue : PredValues) {
1547
125k
    BasicBlock *Pred = PredValue.second;
1548
125k
    if (!SeenPreds.insert(Pred).second)
1549
1.07k
      continue;  // Duplicate predecessor entry.
1550
124k
1551
124k
    Constant *Val = PredValue.first;
1552
124k
1553
124k
    BasicBlock *DestBB;
1554
124k
    if (isa<UndefValue>(Val))
1555
12
      DestBB = nullptr;
1556
124k
    else 
if (BranchInst *124k
BI124k
= dyn_cast<BranchInst>(BB->getTerminator())) {
1557
119k
      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1558
119k
      DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1559
124k
    } else 
if (SwitchInst *5.21k
SI5.21k
= dyn_cast<SwitchInst>(BB->getTerminator())) {
1560
5.21k
      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1561
5.21k
      DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1562
5.21k
    } else {
1563
3
      assert(isa<IndirectBrInst>(BB->getTerminator())
1564
3
              && "Unexpected terminator");
1565
3
      assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1566
3
      DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1567
3
    }
1568
124k
1569
124k
    // If we have exactly one destination, remember it for efficiency below.
1570
124k
    if (
PredToDestList.empty()124k
) {
1571
62.7k
      OnlyDest = DestBB;
1572
62.7k
      OnlyVal = Val;
1573
124k
    } else {
1574
61.5k
      if (OnlyDest != DestBB)
1575
38.9k
        OnlyDest = MultipleDestSentinel;
1576
61.5k
      // It possible we have same destination, but different value, e.g. default
1577
61.5k
      // case in switchinst.
1578
61.5k
      if (Val != OnlyVal)
1579
39.0k
        OnlyVal = MultipleVal;
1580
61.5k
    }
1581
124k
1582
124k
    // We know where this predecessor is going.
1583
124k
    ++PredWithKnownDest;
1584
124k
1585
124k
    // If the predecessor ends with an indirect goto, we can't change its
1586
124k
    // destination.
1587
124k
    if (isa<IndirectBrInst>(Pred->getTerminator()))
1588
0
      continue;
1589
124k
1590
124k
    PredToDestList.push_back(std::make_pair(Pred, DestBB));
1591
124k
  }
1592
62.7k
1593
62.7k
  // If all edges were unthreadable, we fail.
1594
62.7k
  if (PredToDestList.empty())
1595
0
    return false;
1596
62.7k
1597
62.7k
  // If all the predecessors go to a single known successor, we want to fold,
1598
62.7k
  // not thread. By doing so, we do not need to duplicate the current block and
1599
62.7k
  // also miss potential opportunities in case we dont/cant duplicate.
1600
62.7k
  
if (62.7k
OnlyDest && 62.7k
OnlyDest != MultipleDestSentinel62.6k
) {
1601
43.1k
    if (PredWithKnownDest ==
1602
43.1k
        (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
1603
1.62k
      std::vector<DominatorTree::UpdateType> Updates;
1604
1.62k
      bool SeenFirstBranchToOnlyDest = false;
1605
3.33k
      for (BasicBlock *SuccBB : successors(BB)) {
1606
3.33k
        if (
SuccBB == OnlyDest && 3.33k
!SeenFirstBranchToOnlyDest1.65k
) {
1607
1.62k
          SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1608
3.33k
        } else {
1609
1.70k
          SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1610
1.70k
          if (
SuccBB != OnlyDest && 1.70k
SuccBB != BB1.67k
) {
1611
1.67k
            DominatorTree::UpdateType UT = {DominatorTree::Delete, BB, SuccBB};
1612
1.67k
            // Make sure to remove a DT edge exactly once.
1613
1.67k
            if (std::find(Updates.begin(), Updates.end(), UT) == Updates.end())
1614
1.67k
              Updates.push_back(UT);
1615
1.67k
          }
1616
1.70k
        }
1617
3.33k
      }
1618
1.62k
1619
1.62k
      // Finally update the terminator.
1620
1.62k
      TerminatorInst *Term = BB->getTerminator();
1621
1.62k
      BranchInst::Create(OnlyDest, Term);
1622
1.62k
      Term->eraseFromParent();
1623
1.62k
      DT->applyUpdates(Updates);
1624
1.62k
1625
1.62k
      // If the condition is now dead due to the removal of the old terminator,
1626
1.62k
      // erase it.
1627
1.62k
      if (auto *
CondInst1.62k
= dyn_cast<Instruction>(Cond)) {
1628
1.61k
        if (
CondInst->use_empty() && 1.61k
!CondInst->mayHaveSideEffects()360
)
1629
360
          CondInst->eraseFromParent();
1630
1.61k
        // We can safely replace *some* uses of the CondInst if it has
1631
1.61k
        // exactly one value as returned by LVI. RAUW is incorrect in the
1632
1.61k
        // presence of guards and assumes, that have the `Cond` as the use. This
1633
1.61k
        // is because we use the guards/assume to reason about the `Cond` value
1634
1.61k
        // at the end of block, but RAUW unconditionally replaces all uses
1635
1.61k
        // including the guards/assumes themselves and the uses before the
1636
1.61k
        // guard/assume.
1637
1.25k
        else 
if (1.25k
OnlyVal && 1.25k
OnlyVal != MultipleVal1.25k
&&
1638
1.25k
                 CondInst->getParent() == BB)
1639
20
          ReplaceFoldableUses(CondInst, OnlyVal);
1640
1.61k
      }
1641
1.62k
      return true;
1642
1.62k
    }
1643
61.0k
  }
1644
61.0k
1645
61.0k
  // Determine which is the most common successor.  If we have many inputs and
1646
61.0k
  // this block is a switch, we want to start by threading the batch that goes
1647
61.0k
  // to the most popular destination first.  If we only know about one
1648
61.0k
  // threadable destination (the common case) we can avoid this.
1649
61.0k
  BasicBlock *MostPopularDest = OnlyDest;
1650
61.0k
1651
61.0k
  if (MostPopularDest == MultipleDestSentinel)
1652
19.5k
    MostPopularDest = FindMostPopularDest(BB, PredToDestList);
1653
61.0k
1654
61.0k
  // Now that we know what the most popular destination is, factor all
1655
61.0k
  // predecessors that will jump to it into a single predecessor.
1656
61.0k
  SmallVector<BasicBlock*, 16> PredsToFactor;
1657
61.0k
  for (const auto &PredToDest : PredToDestList)
1658
122k
    
if (122k
PredToDest.second == MostPopularDest122k
) {
1659
99.9k
      BasicBlock *Pred = PredToDest.first;
1660
99.9k
1661
99.9k
      // This predecessor may be a switch or something else that has multiple
1662
99.9k
      // edges to the block.  Factor each of these edges by listing them
1663
99.9k
      // according to # occurrences in PredsToFactor.
1664
99.9k
      for (BasicBlock *Succ : successors(Pred))
1665
168k
        
if (168k
Succ == BB168k
)
1666
100k
          PredsToFactor.push_back(Pred);
1667
122k
    }
1668
61.0k
1669
61.0k
  // If the threadable edges are branching on an undefined value, we get to pick
1670
61.0k
  // the destination that these predecessors should get to.
1671
61.0k
  if (!MostPopularDest)
1672
10
    MostPopularDest = BB->getTerminator()->
1673
10
                            getSuccessor(GetBestDestForJumpOnUndef(BB));
1674
6.27M
1675
6.27M
  // Ok, try to thread it!
1676
6.27M
  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
1677
6.27M
}
1678
1679
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
1680
/// a PHI node in the current block.  See if there are any simplifications we
1681
/// can do based on inputs to the phi node.
1682
12.1k
bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
1683
12.1k
  BasicBlock *BB = PN->getParent();
1684
12.1k
1685
12.1k
  // TODO: We could make use of this to do it once for blocks with common PHI
1686
12.1k
  // values.
1687
12.1k
  SmallVector<BasicBlock*, 1> PredBBs;
1688
12.1k
  PredBBs.resize(1);
1689
12.1k
1690
12.1k
  // If any of the predecessor blocks end in an unconditional branch, we can
1691
12.1k
  // *duplicate* the conditional branch into that block in order to further
1692
12.1k
  // encourage jump threading and to eliminate cases where we have branch on a
1693
12.1k
  // phi of an icmp (branch on icmp is much better).
1694
35.4k
  for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e35.4k
;
++i23.3k
) {
1695
23.7k
    BasicBlock *PredBB = PN->getIncomingBlock(i);
1696
23.7k
    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1697
23.2k
      
if (23.2k
PredBr->isUnconditional()23.2k
) {
1698
10.4k
        PredBBs[0] = PredBB;
1699
10.4k
        // Try to duplicate BB into PredBB.
1700
10.4k
        if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1701
438
          return true;
1702
23.2k
      }
1703
23.7k
  }
1704
12.1k
1705
11.7k
  return false;
1706
12.1k
}
1707
1708
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
1709
/// a xor instruction in the current block.  See if there are any
1710
/// simplifications we can do based on inputs to the xor.
1711
964
bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
1712
964
  BasicBlock *BB = BO->getParent();
1713
964
1714
964
  // If either the LHS or RHS of the xor is a constant, don't do this
1715
964
  // optimization.
1716
964
  if (isa<ConstantInt>(BO->getOperand(0)) ||
1717
964
      isa<ConstantInt>(BO->getOperand(1)))
1718
284
    return false;
1719
680
1720
680
  // If the first instruction in BB isn't a phi, we won't be able to infer
1721
680
  // anything special about any particular predecessor.
1722
680
  
if (680
!isa<PHINode>(BB->front())680
)
1723
491
    return false;
1724
189
1725
189
  // If this BB is a landing pad, we won't be able to split the edge into it.
1726
189
  
if (189
BB->isEHPad()189
)
1727
1
    return false;
1728
188
1729
188
  // If we have a xor as the branch input to this block, and we know that the
1730
188
  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1731
188
  // the condition into the predecessor and fix that value to true, saving some
1732
188
  // logical ops on that path and encouraging other paths to simplify.
1733
188
  //
1734
188
  // This copies something like this:
1735
188
  //
1736
188
  //  BB:
1737
188
  //    %X = phi i1 [1],  [%X']
1738
188
  //    %Y = icmp eq i32 %A, %B
1739
188
  //    %Z = xor i1 %X, %Y
1740
188
  //    br i1 %Z, ...
1741
188
  //
1742
188
  // Into:
1743
188
  //  BB':
1744
188
  //    %Y = icmp ne i32 %A, %B
1745
188
  //    br i1 %Y, ...
1746
188
1747
188
  PredValueInfoTy XorOpValues;
1748
188
  bool isLHS = true;
1749
188
  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1750
188
                                       WantInteger, BO)) {
1751
181
    assert(XorOpValues.empty());
1752
181
    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1753
181
                                         WantInteger, BO))
1754
178
      return false;
1755
3
    isLHS = false;
1756
3
  }
1757
188
1758
10
  assert(!XorOpValues.empty() &&
1759
10
         "ComputeValueKnownInPredecessors returned true with no values");
1760
10
1761
10
  // Scan the information to see which is most popular: true or false.  The
1762
10
  // predecessors can be of the set true, false, or undef.
1763
10
  unsigned NumTrue = 0, NumFalse = 0;
1764
14
  for (const auto &XorOpValue : XorOpValues) {
1765
14
    if (isa<UndefValue>(XorOpValue.first))
1766
14
      // Ignore undefs for the count.
1767
2
      continue;
1768
12
    
if (12
cast<ConstantInt>(XorOpValue.first)->isZero()12
)
1769
6
      ++NumFalse;
1770
12
    else
1771
6
      ++NumTrue;
1772
14
  }
1773
10
1774
10
  // Determine which value to split on, true, false, or undef if neither.
1775
10
  ConstantInt *SplitVal = nullptr;
1776
10
  if (NumTrue > NumFalse)
1777
4
    SplitVal = ConstantInt::getTrue(BB->getContext());
1778
6
  else 
if (6
NumTrue != 0 || 6
NumFalse != 06
)
1779
5
    SplitVal = ConstantInt::getFalse(BB->getContext());
1780
10
1781
10
  // Collect all of the blocks that this can be folded into so that we can
1782
10
  // factor this once and clone it once.
1783
10
  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1784
14
  for (const auto &XorOpValue : XorOpValues) {
1785
14
    if (
XorOpValue.first != SplitVal && 14
!isa<UndefValue>(XorOpValue.first)2
)
1786
0
      continue;
1787
14
1788
14
    BlocksToFoldInto.push_back(XorOpValue.second);
1789
14
  }
1790
10
1791
10
  // If we inferred a value for all of the predecessors, then duplication won't
1792
10
  // help us.  However, we can just replace the LHS or RHS with the constant.
1793
10
  if (BlocksToFoldInto.size() ==
1794
10
      cast<PHINode>(BB->front()).getNumIncomingValues()) {
1795
1
    if (
!SplitVal1
) {
1796
1
      // If all preds provide undef, just nuke the xor, because it is undef too.
1797
1
      BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
1798
1
      BO->eraseFromParent();
1799
1
    } else 
if (0
SplitVal->isZero()0
) {
1800
0
      // If all preds provide 0, replace the xor with the other input.
1801
0
      BO->replaceAllUsesWith(BO->getOperand(isLHS));
1802
0
      BO->eraseFromParent();
1803
0
    } else {
1804
0
      // If all preds provide 1, set the computed value to 1.
1805
0
      BO->setOperand(!isLHS, SplitVal);
1806
0
    }
1807
1
1808
1
    return true;
1809
1
  }
1810
9
1811
9
  // Try to duplicate BB into PredBB.
1812
9
  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1813
9
}
1814
1815
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1816
/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
1817
/// NewPred using the entries from OldPred (suitably mapped).
1818
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
1819
                                            BasicBlock *OldPred,
1820
                                            BasicBlock *NewPred,
1821
43.7k
                                     DenseMap<Instruction*, Value*> &ValueMap) {
1822
43.7k
  for (BasicBlock::iterator PNI = PHIBB->begin();
1823
65.2k
       PHINode *
PN65.2k
= dyn_cast<PHINode>(PNI);
++PNI21.4k
) {
1824
21.4k
    // Ok, we have a PHI node.  Figure out what the incoming value was for the
1825
21.4k
    // DestBlock.
1826
21.4k
    Value *IV = PN->getIncomingValueForBlock(OldPred);
1827
21.4k
1828
21.4k
    // Remap the value if necessary.
1829
21.4k
    if (Instruction *
Inst21.4k
= dyn_cast<Instruction>(IV)) {
1830
11.5k
      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
1831
11.5k
      if (I != ValueMap.end())
1832
7.72k
        IV = I->second;
1833
11.5k
    }
1834
21.4k
1835
21.4k
    PN->addIncoming(IV, NewPred);
1836
21.4k
  }
1837
43.7k
}
1838
1839
/// ThreadEdge - We have decided that it is safe and profitable to factor the
1840
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
1841
/// across BB.  Transform the IR to reflect this change.
1842
bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
1843
                                   const SmallVectorImpl<BasicBlock *> &PredBBs,
1844
61.0k
                                   BasicBlock *SuccBB) {
1845
61.0k
  // If threading to the same block as we come from, we would infinite loop.
1846
61.0k
  if (
SuccBB == BB61.0k
) {
1847
0
    DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
1848
0
          << "' - would thread to self!\n");
1849
0
    return false;
1850
0
  }
1851
61.0k
1852
61.0k
  // If threading this would thread across a loop header, don't thread the edge.
1853
61.0k
  // See the comments above FindLoopHeaders for justifications and caveats.
1854
61.0k
  
if (61.0k
LoopHeaders.count(BB) || 61.0k
LoopHeaders.count(SuccBB)61.0k
) {
1855
1.82k
    DEBUG({
1856
1.82k
      bool BBIsHeader = LoopHeaders.count(BB);
1857
1.82k
      bool SuccIsHeader = LoopHeaders.count(SuccBB);
1858
1.82k
      dbgs() << "  Not threading across "
1859
1.82k
          << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
1860
1.82k
          << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
1861
1.82k
          << SuccBB->getName() << "' - it might create an irreducible loop!\n";
1862
1.82k
    });
1863
1.82k
    return false;
1864
1.82k
  }
1865
59.2k
1866
59.2k
  unsigned JumpThreadCost =
1867
59.2k
      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
1868
59.2k
  if (
JumpThreadCost > BBDupThreshold59.2k
) {
1869
16.4k
    DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
1870
16.4k
          << "' - Cost is too high: " << JumpThreadCost << "\n");
1871
16.4k
    return false;
1872
16.4k
  }
1873
42.8k
1874
42.8k
  // And finally, do it!  Start by factoring the predecessors if needed.
1875
42.8k
  BasicBlock *PredBB;
1876
42.8k
  if (PredBBs.size() == 1)
1877
28.8k
    PredBB = PredBBs[0];
1878
13.9k
  else {
1879
13.9k
    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
1880
13.9k
          << " common predecessors.\n");
1881
13.9k
    PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
1882
13.9k
  }
1883
42.8k
1884
42.8k
  // And finally, do it!
1885
42.8k
  DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
1886
42.8k
        << SuccBB->getName() << "' with cost: " << JumpThreadCost
1887
42.8k
        << ", across block:\n    "
1888
42.8k
        << *BB << "\n");
1889
42.8k
1890
42.8k
  LVI->threadEdge(PredBB, BB, SuccBB);
1891
42.8k
1892
42.8k
  // We are going to have to map operands from the original BB block to the new
1893
42.8k
  // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
1894
42.8k
  // account for entry from PredBB.
1895
42.8k
  DenseMap<Instruction*, Value*> ValueMapping;
1896
42.8k
1897
42.8k
  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
1898
42.8k
                                         BB->getName()+".thread",
1899
42.8k
                                         BB->getParent(), BB);
1900
42.8k
  NewBB->moveAfter(PredBB);
1901
42.8k
1902
42.8k
  // Set the block frequency of NewBB.
1903
42.8k
  if (
HasProfileData42.8k
) {
1904
3
    auto NewBBFreq =
1905
3
        BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
1906
3
    BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
1907
3
  }
1908
42.8k
1909
42.8k
  BasicBlock::iterator BI = BB->begin();
1910
95.4k
  for (; PHINode *
PN95.4k
= dyn_cast<PHINode>(BI);
++BI52.5k
)
1911
52.5k
    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
1912
42.8k
1913
42.8k
  // Clone the non-phi instructions of BB into NewBB, keeping track of the
1914
42.8k
  // mapping and using it to remap operands in the cloned instructions.
1915
89.6k
  for (; 
!isa<TerminatorInst>(BI)89.6k
;
++BI46.8k
) {
1916
46.8k
    Instruction *New = BI->clone();
1917
46.8k
    New->setName(BI->getName());
1918
46.8k
    NewBB->getInstList().push_back(New);
1919
46.8k
    ValueMapping[&*BI] = New;
1920
46.8k
1921
46.8k
    // Remap operands to patch up intra-block references.
1922
141k
    for (unsigned i = 0, e = New->getNumOperands(); 
i != e141k
;
++i95.0k
)
1923
95.0k
      
if (Instruction *95.0k
Inst95.0k
= dyn_cast<Instruction>(New->getOperand(i))) {
1924
53.9k
        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
1925
53.9k
        if (I != ValueMapping.end())
1926
35.8k
          New->setOperand(i, I->second);
1927
95.0k
      }
1928
46.8k
  }
1929
42.8k
1930
42.8k
  // We didn't copy the terminator from BB over to NewBB, because there is now
1931
42.8k
  // an unconditional jump to SuccBB.  Insert the unconditional jump.
1932
42.8k
  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
1933
42.8k
  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
1934
42.8k
1935
42.8k
  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
1936
42.8k
  // PHI nodes for NewBB now.
1937
42.8k
  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
1938
42.8k
1939
42.8k
  // If there were values defined in BB that are used outside the block, then we
1940
42.8k
  // now have to update all uses of the value to use either the original value,
1941
42.8k
  // the cloned value, or some PHI derived value.  This can require arbitrary
1942
42.8k
  // PHI insertion, of which we are prepared to do, clean these up now.
1943
42.8k
  SSAUpdater SSAUpdate;
1944
42.8k
  SmallVector<Use*, 16> UsesToRename;
1945
142k
  for (Instruction &I : *BB) {
1946
142k
    // Scan all uses of this instruction to see if it is used outside of its
1947
142k
    // block, and if so, record them in UsesToRename.
1948
157k
    for (Use &U : I.uses()) {
1949
157k
      Instruction *User = cast<Instruction>(U.getUser());
1950
157k
      if (PHINode *
UserPN157k
= dyn_cast<PHINode>(User)) {
1951
35.7k
        if (UserPN->getIncomingBlock(U) == BB)
1952
17.2k
          continue;
1953
121k
      } else 
if (121k
User->getParent() == BB121k
)
1954
74.5k
        continue;
1955
65.3k
1956
65.3k
      UsesToRename.push_back(&U);
1957
65.3k
    }
1958
142k
    // If there are no uses outside the block, we're done with this instruction.
1959
142k
    if (UsesToRename.empty())
1960
113k
      continue;
1961
28.5k
1962
28.5k
    
DEBUG28.5k
(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1963
28.5k
1964
28.5k
    // We found a use of I outside of BB.  Rename all uses of I that are outside
1965
28.5k
    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
1966
28.5k
    // with the two values we know.
1967
28.5k
    SSAUpdate.Initialize(I.getType(), I.getName());
1968
28.5k
    SSAUpdate.AddAvailableValue(BB, &I);
1969
28.5k
    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1970
28.5k
1971
93.9k
    while (!UsesToRename.empty())
1972
65.3k
      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1973
28.5k
    DEBUG(dbgs() << "\n");
1974
142k
  }
1975
42.8k
1976
42.8k
  // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
1977
42.8k
  // NewBB instead of BB.  This eliminates predecessors from BB, which requires
1978
42.8k
  // us to simplify any PHI nodes in BB.
1979
42.8k
  TerminatorInst *PredTerm = PredBB->getTerminator();
1980
104k
  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); 
i != e104k
;
++i61.5k
)
1981
61.5k
    
if (61.5k
PredTerm->getSuccessor(i) == BB61.5k
) {
1982
42.8k
      BB->removePredecessor(PredBB, true);
1983
42.8k
      PredTerm->setSuccessor(i, NewBB);
1984
42.8k
    }
1985
61.0k
1986
61.0k
  DT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
1987
61.0k
                    {DominatorTree::Insert, PredBB, NewBB},
1988
61.0k
                    {DominatorTree::Delete, PredBB, BB}});
1989
61.0k
1990
61.0k
  // At this point, the IR is fully up to date and consistent.  Do a quick scan
1991
61.0k
  // over the new instructions and zap any that are constants or dead.  This
1992
61.0k
  // frequently happens because of phi translation.
1993
61.0k
  SimplifyInstructionsInBlock(NewBB, TLI);
1994
61.0k
1995
61.0k
  // Update the edge weight from BB to SuccBB, which should be less than before.
1996
61.0k
  UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
1997
61.0k
1998
61.0k
  // Threaded an edge!
1999
61.0k
  ++NumThreads;
2000
61.0k
  return true;
2001
61.0k
}
2002
2003
/// Create a new basic block that will be the predecessor of BB and successor of
2004
/// all blocks in Preds. When profile data is available, update the frequency of
2005
/// this new block.
2006
BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
2007
                                               ArrayRef<BasicBlock *> Preds,
2008
15.1k
                                               const char *Suffix) {
2009
15.1k
  // Collect the frequencies of all predecessors of BB, which will be used to
2010
15.1k
  // update the edge weight on BB->SuccBB.
2011
15.1k
  BlockFrequency PredBBFreq(0);
2012
15.1k
  if (HasProfileData)
2013
1
    for (auto Pred : Preds)
2014
2
      PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
2015
15.1k
2016
15.1k
  BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix, DT);
2017
15.1k
2018
15.1k
  // Set the block frequency of the newly created PredBB, which is the sum of
2019
15.1k
  // frequencies of Preds.
2020
15.1k
  if (HasProfileData)
2021
1
    BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
2022
15.1k
  return PredBB;
2023
15.1k
}
2024
2025
3
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2026
3
  const TerminatorInst *TI = BB->getTerminator();
2027
3
  assert(TI->getNumSuccessors() > 1 && "not a split");
2028
3
2029
3
  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
2030
3
  if (!WeightsNode)
2031
2
    return false;
2032
1
2033
1
  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
2034
1
  if (MDName->getString() != "branch_weights")
2035
0
    return false;
2036
1
2037
1
  // Ensure there are weights for all of the successors. Note that the first
2038
1
  // operand to the metadata node is a name, not a weight.
2039
1
  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2040
1
}
2041
2042
/// Update the block frequency of BB and branch weight and the metadata on the
2043
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2044
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2045
void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2046
                                                     BasicBlock *BB,
2047
                                                     BasicBlock *NewBB,
2048
42.8k
                                                     BasicBlock *SuccBB) {
2049
42.8k
  if (!HasProfileData)
2050
42.8k
    return;
2051
3
2052
42.8k
  assert(BFI && BPI && "BFI & BPI should have been created here");
2053
3
2054
3
  // As the edge from PredBB to BB is deleted, we have to update the block
2055
3
  // frequency of BB.
2056
3
  auto BBOrigFreq = BFI->getBlockFreq(BB);
2057
3
  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2058
3
  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2059
3
  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2060
3
  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2061
3
2062
3
  // Collect updated outgoing edges' frequencies from BB and use them to update
2063
3
  // edge probabilities.
2064
3
  SmallVector<uint64_t, 4> BBSuccFreq;
2065
6
  for (BasicBlock *Succ : successors(BB)) {
2066
6
    auto SuccFreq = (Succ == SuccBB)
2067
3
                        ? BB2SuccBBFreq - NewBBFreq
2068
3
                        : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2069
6
    BBSuccFreq.push_back(SuccFreq.getFrequency());
2070
6
  }
2071
3
2072
3
  uint64_t MaxBBSuccFreq =
2073
3
      *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2074
3
2075
3
  SmallVector<BranchProbability, 4> BBSuccProbs;
2076
3
  if (MaxBBSuccFreq == 0)
2077
1
    BBSuccProbs.assign(BBSuccFreq.size(),
2078
1
                       {1, static_cast<unsigned>(BBSuccFreq.size())});
2079
2
  else {
2080
2
    for (uint64_t Freq : BBSuccFreq)
2081
4
      BBSuccProbs.push_back(
2082
4
          BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2083
2
    // Normalize edge probabilities so that they sum up to one.
2084
2
    BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2085
2
                                              BBSuccProbs.end());
2086
2
  }
2087
3
2088
3
  // Update edge probabilities in BPI.
2089
9
  for (int I = 0, E = BBSuccProbs.size(); 
I < E9
;
I++6
)
2090
6
    BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
2091
3
2092
3
  // Update the profile metadata as well.
2093
3
  //
2094
3
  // Don't do this if the profile of the transformed blocks was statically
2095
3
  // estimated.  (This could occur despite the function having an entry
2096
3
  // frequency in completely cold parts of the CFG.)
2097
3
  //
2098
3
  // In this case we don't want to suggest to subsequent passes that the
2099
3
  // calculated weights are fully consistent.  Consider this graph:
2100
3
  //
2101
3
  //                 check_1
2102
3
  //             50% /  |
2103
3
  //             eq_1   | 50%
2104
3
  //                 \  |
2105
3
  //                 check_2
2106
3
  //             50% /  |
2107
3
  //             eq_2   | 50%
2108
3
  //                 \  |
2109
3
  //                 check_3
2110
3
  //             50% /  |
2111
3
  //             eq_3   | 50%
2112
3
  //                 \  |
2113
3
  //
2114
3
  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2115
3
  // the overall probabilities are inconsistent; the total probability that the
2116
3
  // value is either 1, 2 or 3 is 150%.
2117
3
  //
2118
3
  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2119
3
  // becomes 0%.  This is even worse if the edge whose probability becomes 0% is
2120
3
  // the loop exit edge.  Then based solely on static estimation we would assume
2121
3
  // the loop was extremely hot.
2122
3
  //
2123
3
  // FIXME this locally as well so that BPI and BFI are consistent as well.  We
2124
3
  // shouldn't make edges extremely likely or unlikely based solely on static
2125
3
  // estimation.
2126
3
  if (
BBSuccProbs.size() >= 2 && 3
doesBlockHaveProfileData(BB)3
) {
2127
1
    SmallVector<uint32_t, 4> Weights;
2128
1
    for (auto Prob : BBSuccProbs)
2129
2
      Weights.push_back(Prob.getNumerator());
2130
1
2131
1
    auto TI = BB->getTerminator();
2132
1
    TI->setMetadata(
2133
1
        LLVMContext::MD_prof,
2134
1
        MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2135
1
  }
2136
42.8k
}
2137
2138
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2139
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2140
/// If we can duplicate the contents of BB up into PredBB do so now, this
2141
/// improves the odds that the branch will be on an analyzable instruction like
2142
/// a compare.
2143
bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
2144
10.4k
    BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2145
10.4k
  assert(!PredBBs.empty() && "Can't handle an empty set");
2146
10.4k
2147
10.4k
  // If BB is a loop header, then duplicating this block outside the loop would
2148
10.4k
  // cause us to transform this into an irreducible loop, don't do this.
2149
10.4k
  // See the comments above FindLoopHeaders for justifications and caveats.
2150
10.4k
  if (
LoopHeaders.count(BB)10.4k
) {
2151
8.96k
    DEBUG(dbgs() << "  Not duplicating loop header '" << BB->getName()
2152
8.96k
          << "' into predecessor block '" << PredBBs[0]->getName()
2153
8.96k
          << "' - it might create an irreducible loop!\n");
2154
8.96k
    return false;
2155
8.96k
  }
2156
1.53k
2157
1.53k
  unsigned DuplicationCost =
2158
1.53k
      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2159
1.53k
  if (
DuplicationCost > BBDupThreshold1.53k
) {
2160
1.08k
    DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
2161
1.08k
          << "' - Cost is too high: " << DuplicationCost << "\n");
2162
1.08k
    return false;
2163
1.08k
  }
2164
443
2165
443
  // And finally, do it!  Start by factoring the predecessors if needed.
2166
443
  BasicBlock *PredBB;
2167
443
  if (PredBBs.size() == 1)
2168
440
    PredBB = PredBBs[0];
2169
3
  else {
2170
3
    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
2171
3
          << " common predecessors.\n");
2172
3
    PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
2173
3
  }
2174
443
2175
443
  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
2176
443
  // of PredBB.
2177
443
  DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
2178
443
        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
2179
443
        << DuplicationCost << " block is:" << *BB << "\n");
2180
443
2181
443
  // Unless PredBB ends with an unconditional branch, split the edge so that we
2182
443
  // can just clone the bits from BB into the end of the new PredBB.
2183
443
  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2184
443
2185
443
  if (
!OldPredBranch || 443
!OldPredBranch->isUnconditional()442
) {
2186
2
    PredBB = SplitEdge(PredBB, BB, DT);
2187
2
    OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2188
2
  }
2189
443
2190
443
  // We are going to have to map operands from the original BB block into the
2191
443
  // PredBB block.  Evaluate PHI nodes in BB.
2192
443
  DenseMap<Instruction*, Value*> ValueMapping;
2193
443
2194
443
  BasicBlock::iterator BI = BB->begin();
2195
1.74k
  for (; PHINode *
PN1.74k
= dyn_cast<PHINode>(BI);
++BI1.30k
)
2196
1.30k
    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2197
443
  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2198
443
  // mapping and using it to remap operands in the cloned instructions.
2199
999
  for (; 
BI != BB->end()999
;
++BI556
) {
2200
556
    Instruction *New = BI->clone();
2201
556
2202
556
    // Remap operands to patch up intra-block references.
2203
2.08k
    for (unsigned i = 0, e = New->getNumOperands(); 
i != e2.08k
;
++i1.52k
)
2204
1.52k
      
if (Instruction *1.52k
Inst1.52k
= dyn_cast<Instruction>(New->getOperand(i))) {
2205
571
        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2206
571
        if (I != ValueMapping.end())
2207
539
          New->setOperand(i, I->second);
2208
1.52k
      }
2209
556
2210
556
    // If this instruction can be simplified after the operands are updated,
2211
556
    // just use the simplified value instead.  This frequently happens due to
2212
556
    // phi translation.
2213
556
    if (Value *IV = SimplifyInstruction(
2214
556
            New,
2215
6
            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2216
6
      ValueMapping[&*BI] = IV;
2217
6
      if (
!New->mayHaveSideEffects()6
) {
2218
6
        New->deleteValue();
2219
6
        New = nullptr;
2220
6
      }
2221
556
    } else {
2222
550
      ValueMapping[&*BI] = New;
2223
550
    }
2224
556
    if (
New556
) {
2225
550
      // Otherwise, insert the new instruction into the block.
2226
550
      New->setName(BI->getName());
2227
550
      PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2228
550
    }
2229
556
  }
2230
443
2231
443
  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2232
443
  // add entries to the PHI nodes for branch from PredBB now.
2233
443
  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2234
443
  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2235
443
                                  ValueMapping);
2236
443
  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2237
443
                                  ValueMapping);
2238
443
2239
443
  // If there were values defined in BB that are used outside the block, then we
2240
443
  // now have to update all uses of the value to use either the original value,
2241
443
  // the cloned value, or some PHI derived value.  This can require arbitrary
2242
443
  // PHI insertion, of which we are prepared to do, clean these up now.
2243
443
  SSAUpdater SSAUpdate;
2244
443
  SmallVector<Use*, 16> UsesToRename;
2245
1.86k
  for (Instruction &I : *BB) {
2246
1.86k
    // Scan all uses of this instruction to see if it is used outside of its
2247
1.86k
    // block, and if so, record them in UsesToRename.
2248
2.23k
    for (Use &U : I.uses()) {
2249
2.23k
      Instruction *User = cast<Instruction>(U.getUser());
2250
2.23k
      if (PHINode *
UserPN2.23k
= dyn_cast<PHINode>(User)) {
2251
1.34k
        if (UserPN->getIncomingBlock(U) == BB)
2252
1.07k
          continue;
2253
887
      } else 
if (887
User->getParent() == BB887
)
2254
539
        continue;
2255
620
2256
620
      UsesToRename.push_back(&U);
2257
620
    }
2258
1.86k
2259
1.86k
    // If there are no uses outside the block, we're done with this instruction.
2260
1.86k
    if (UsesToRename.empty())
2261
1.48k
      continue;
2262
375
2263
375
    
DEBUG375
(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2264
375
2265
375
    // We found a use of I outside of BB.  Rename all uses of I that are outside
2266
375
    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
2267
375
    // with the two values we know.
2268
375
    SSAUpdate.Initialize(I.getType(), I.getName());
2269
375
    SSAUpdate.AddAvailableValue(BB, &I);
2270
375
    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
2271
375
2272
995
    while (!UsesToRename.empty())
2273
620
      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2274
375
    DEBUG(dbgs() << "\n");
2275
1.86k
  }
2276
443
2277
443
  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2278
443
  // that we nuked.
2279
443
  BB->removePredecessor(PredBB, true);
2280
443
2281
443
  // Remove the unconditional branch at the end of the PredBB block.
2282
443
  OldPredBranch->eraseFromParent();
2283
443
  if (BB != PredBB)
2284
443
    DT->deleteEdge(PredBB, BB);
2285
10.4k
2286
10.4k
  ++NumDupes;
2287
10.4k
  return true;
2288
10.4k
}
2289
2290
/// TryToUnfoldSelect - Look for blocks of the form
2291
/// bb1:
2292
///   %a = select
2293
///   br bb2
2294
///
2295
/// bb2:
2296
///   %p = phi [%a, %bb1] ...
2297
///   %c = icmp %p
2298
///   br i1 %c
2299
///
2300
/// And expand the select into a branch structure if one of its arms allows %c
2301
/// to be folded. This later enables threading from bb1 over bb2.
2302
4.25M
bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
2303
4.25M
  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2304
4.25M
  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2305
4.25M
  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2306
4.25M
2307
4.25M
  if (
!CondBr || 4.25M
!CondBr->isConditional()4.25M
||
!CondLHS4.25M
||
2308
390k
      CondLHS->getParent() != BB)
2309
4.03M
    return false;
2310
218k
2311
685k
  
for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); 218k
I != E685k
;
++I466k
) {
2312
466k
    BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2313
466k
    SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2314
466k
2315
466k
    // Look if one of the incoming values is a select in the corresponding
2316
466k
    // predecessor.
2317
466k
    if (
!SI || 466k
SI->getParent() != Pred4.33k
||
!SI->hasOneUse()3.67k
)
2318
464k
      continue;
2319
2.59k
2320
2.59k
    BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2321
2.59k
    if (
!PredTerm || 2.59k
!PredTerm->isUnconditional()2.59k
)
2322
1.73k
      continue;
2323
866
2324
866
    // Now check if one of the select values would allow us to constant fold the
2325
866
    // terminator in BB. We don't do the transform if both sides fold, those
2326
866
    // cases will be threaded in any case.
2327
866
    LazyValueInfo::Tristate LHSFolds =
2328
866
        LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2329
866
                                CondRHS, Pred, BB, CondCmp);
2330
866
    LazyValueInfo::Tristate RHSFolds =
2331
866
        LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2332
866
                                CondRHS, Pred, BB, CondCmp);
2333
866
    if ((LHSFolds != LazyValueInfo::Unknown ||
2334
311
         RHSFolds != LazyValueInfo::Unknown) &&
2335
866
        
LHSFolds != RHSFolds612
) {
2336
496
      // Expand the select.
2337
496
      //
2338
496
      // Pred --
2339
496
      //  |    v
2340
496
      //  |  NewBB
2341
496
      //  |    |
2342
496
      //  |-----
2343
496
      //  v
2344
496
      // BB
2345
496
      BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2346
496
                                             BB->getParent(), BB);
2347
496
      // Move the unconditional branch to NewBB.
2348
496
      PredTerm->removeFromParent();
2349
496
      NewBB->getInstList().insert(NewBB->end(), PredTerm);
2350
496
      DT->insertEdge(NewBB, BB);
2351
496
      // Create a conditional branch and update PHI nodes.
2352
496
      BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2353
496
      CondLHS->setIncomingValue(I, SI->getFalseValue());
2354
496
      CondLHS->addIncoming(SI->getTrueValue(), NewBB);
2355
496
      // The select is now dead.
2356
496
      SI->eraseFromParent();
2357
496
2358
496
      DT->insertEdge(Pred, NewBB);
2359
496
      // Update any other PHI nodes in BB.
2360
496
      for (BasicBlock::iterator BI = BB->begin();
2361
1.33k
           PHINode *
Phi1.33k
= dyn_cast<PHINode>(BI);
++BI840
)
2362
840
        
if (840
Phi != CondLHS840
)
2363
344
          Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2364
496
      return true;
2365
496
    }
2366
466k
  }
2367
218k
  return false;
2368
4.25M
}
2369
2370
/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2371
/// same BB in the form
2372
/// bb:
2373
///   %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2374
///   %s = select %p, trueval, falseval
2375
///
2376
/// or
2377
///
2378
/// bb:
2379
///   %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2380
///   %c = cmp %p, 0
2381
///   %s = select %c, trueval, falseval
2382
///
2383
/// And expand the select into a branch structure. This later enables
2384
/// jump-threading over bb in this pass.
2385
///
2386
/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2387
/// select if the associated PHI has at least one constant.  If the unfolded
2388
/// select is not jump-threaded, it will be folded again in the later
2389
/// optimizations.
2390
12.6M
bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
2391
12.6M
  // If threading this would thread across a loop header, don't thread the edge.
2392
12.6M
  // See the comments above FindLoopHeaders for justifications and caveats.
2393
12.6M
  if (LoopHeaders.count(BB))
2394
1.24M
    return false;
2395
11.3M
2396
11.3M
  for (BasicBlock::iterator BI = BB->begin();
2397
13.1M
       PHINode *
PN13.1M
= dyn_cast<PHINode>(BI);
++BI1.78M
) {
2398
1.78M
    // Look for a Phi having at least one constant incoming value.
2399
1.78M
    if (llvm::all_of(PN->incoming_values(),
2400
4.80M
                     [](Value *V) { return !isa<ConstantInt>(V); }))
2401
1.23M
      continue;
2402
546k
2403
546k
    
auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) 546k
{
2404
6.15k
      // Check if SI is in BB and use V as condition.
2405
6.15k
      if (SI->getParent() != BB)
2406
2.79k
        return false;
2407
3.35k
      Value *Cond = SI->getCondition();
2408
3.35k
      return (Cond && 
Cond == V3.35k
&&
Cond->getType()->isIntegerTy(1)1.63k
);
2409
6.15k
    };
2410
546k
2411
546k
    SelectInst *SI = nullptr;
2412
798k
    for (Use &U : PN->uses()) {
2413
798k
      if (ICmpInst *
Cmp798k
= dyn_cast<ICmpInst>(U.getUser())) {
2414
124k
        // Look for a ICmp in BB that compares PN with a constant and is the
2415
124k
        // condition of a Select.
2416
124k
        if (
Cmp->getParent() == BB && 124k
Cmp->hasOneUse()47.1k
&&
2417
45.7k
            isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2418
30.8k
          
if (SelectInst *30.8k
SelectI30.8k
= dyn_cast<SelectInst>(Cmp->user_back()))
2419
775
            
if (775
isUnfoldCandidate(SelectI, Cmp->use_begin()->get())775
) {
2420
757
              SI = SelectI;
2421
757
              break;
2422
757
            }
2423
674k
      } else 
if (SelectInst *674k
SelectI674k
= dyn_cast<SelectInst>(U.getUser())) {
2424
5.38k
        // Look for a Select in BB that uses PN as condtion.
2425
5.38k
        if (
isUnfoldCandidate(SelectI, U.get())5.38k
) {
2426
876
          SI = SelectI;
2427
876
          break;
2428
876
        }
2429
546k
      }
2430
798k
    }
2431
546k
2432
546k
    if (!SI)
2433
545k
      continue;
2434
1.63k
    // Expand the select.
2435
1.63k
    TerminatorInst *Term =
2436
1.63k
        SplitBlockAndInsertIfThen(SI->getCondition(), SI, false, nullptr, DT);
2437
1.63k
    PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2438
1.63k
    NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2439
1.63k
    NewPN->addIncoming(SI->getFalseValue(), BB);
2440
1.63k
    SI->replaceAllUsesWith(NewPN);
2441
1.63k
    SI->eraseFromParent();
2442
1.63k
    return true;
2443
1.63k
  }
2444
11.3M
  return false;
2445
12.6M
}
2446
2447
/// Try to propagate a guard from the current BB into one of its predecessors
2448
/// in case if another branch of execution implies that the condition of this
2449
/// guard is always true. Currently we only process the simplest case that
2450
/// looks like:
2451
///
2452
/// Start:
2453
///   %cond = ...
2454
///   br i1 %cond, label %T1, label %F1
2455
/// T1:
2456
///   br label %Merge
2457
/// F1:
2458
///   br label %Merge
2459
/// Merge:
2460
///   %condGuard = ...
2461
///   call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2462
///
2463
/// And cond either implies condGuard or !condGuard. In this case all the
2464
/// instructions before the guard can be duplicated in both branches, and the
2465
/// guard is then threaded to one of them.
2466
143
bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
2467
143
  using namespace PatternMatch;
2468
143
2469
143
  // We only want to deal with two predecessors.
2470
143
  BasicBlock *Pred1, *Pred2;
2471
143
  auto PI = pred_begin(BB), PE = pred_end(BB);
2472
143
  if (PI == PE)
2473
65
    return false;
2474
78
  Pred1 = *PI++;
2475
78
  if (PI == PE)
2476
47
    return false;
2477
31
  Pred2 = *PI++;
2478
31
  if (PI != PE)
2479
5
    return false;
2480
26
  
if (26
Pred1 == Pred226
)
2481
4
    return false;
2482
22
2483
22
  // Try to thread one of the guards of the block.
2484
22
  // TODO: Look up deeper than to immediate predecessor?
2485
22
  auto *Parent = Pred1->getSinglePredecessor();
2486
22
  if (
!Parent || 22
Parent != Pred2->getSinglePredecessor()20
)
2487
12
    return false;
2488
10
2489
10
  
if (auto *10
BI10
= dyn_cast<BranchInst>(Parent->getTerminator()))
2490
10
    for (auto &I : *BB)
2491
34
      
if (34
match(&I, m_Intrinsic<Intrinsic::experimental_guard>())34
)
2492
4
        
if (4
ThreadGuard(BB, cast<IntrinsicInst>(&I), BI)4
)
2493
2
          return true;
2494
8
2495
8
  return false;
2496
8
}
2497
2498
/// Try to propagate the guard from BB which is the lower block of a diamond
2499
/// to one of its branches, in case if diamond's condition implies guard's
2500
/// condition.
2501
bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
2502
4
                                    BranchInst *BI) {
2503
4
  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
2504
4
  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
2505
4
  Value *GuardCond = Guard->getArgOperand(0);
2506
4
  Value *BranchCond = BI->getCondition();
2507
4
  BasicBlock *TrueDest = BI->getSuccessor(0);
2508
4
  BasicBlock *FalseDest = BI->getSuccessor(1);
2509
4
2510
4
  auto &DL = BB->getModule()->getDataLayout();
2511
4
  bool TrueDestIsSafe = false;
2512
4
  bool FalseDestIsSafe = false;
2513
4
2514
4
  // True dest is safe if BranchCond => GuardCond.
2515
4
  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
2516
4
  if (
Impl && 4
*Impl2
)
2517
1
    TrueDestIsSafe = true;
2518
3
  else {
2519
3
    // False dest is safe if !BranchCond => GuardCond.
2520
3
    Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
2521
3
    if (
Impl && 3
*Impl2
)
2522
1
      FalseDestIsSafe = true;
2523
3
  }
2524
4
2525
4
  if (
!TrueDestIsSafe && 4
!FalseDestIsSafe3
)
2526
2
    return false;
2527
2
2528
2
  
BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? 2
TrueDest1
:
FalseDest1
;
2529
2
  BasicBlock *PredGuardedBlock = FalseDestIsSafe ? 
TrueDest1
:
FalseDest1
;
2530
2
2531
2
  ValueToValueMapTy UnguardedMapping, GuardedMapping;
2532
2
  Instruction *AfterGuard = Guard->getNextNode();
2533
2
  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
2534
2
  if (Cost > BBDupThreshold)
2535
0
    return false;
2536
2
  // Duplicate all instructions before the guard and the guard itself to the
2537
2
  // branch where implication is not proved.
2538
2
  BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
2539
2
      BB, PredGuardedBlock, AfterGuard, GuardedMapping);
2540
2
  assert(GuardedBlock && "Could not create the guarded block?");
2541
2
  // Duplicate all instructions before the guard in the unguarded branch.
2542
2
  // Since we have successfully duplicated the guarded block and this block
2543
2
  // has fewer instructions, we expect it to succeed.
2544
2
  BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
2545
2
      BB, PredUnguardedBlock, Guard, UnguardedMapping);
2546
2
  assert(UnguardedBlock && "Could not create the unguarded block?");
2547
2
  DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
2548
2
               << GuardedBlock->getName() << "\n");
2549
2
  // DuplicateInstructionsInSplitBetween inserts a new block, BB.split, between
2550
2
  // PredBB and BB. We need to perform two inserts and one delete in DT for each
2551
2
  // of the above calls.
2552
2
  DT->applyUpdates({// Guarded block split.
2553
2
                    {DominatorTree::Delete, PredGuardedBlock, BB},
2554
2
                    {DominatorTree::Insert, PredGuardedBlock, GuardedBlock},
2555
2
                    {DominatorTree::Insert, GuardedBlock, BB},
2556
2
                    // Unguarded block split.
2557
2
                    {DominatorTree::Delete, PredUnguardedBlock, BB},
2558
2
                    {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock},
2559
2
                    {DominatorTree::Insert, UnguardedBlock, BB}});
2560
2
2561
2
  // Some instructions before the guard may still have uses. For them, we need
2562
2
  // to create Phi nodes merging their copies in both guarded and unguarded
2563
2
  // branches. Those instructions that have no uses can be just removed.
2564
2
  SmallVector<Instruction *, 4> ToRemove;
2565
10
  for (auto BI = BB->begin(); 
&*BI != AfterGuard10
;
++BI8
)
2566
8
    
if (8
!isa<PHINode>(&*BI)8
)
2567
6
      ToRemove.push_back(&*BI);
2568
2
2569
2
  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
2570
2
  assert(InsertionPoint && "Empty block?");
2571
2
  // Substitute with Phis & remove.
2572
6
  for (auto *Inst : reverse(ToRemove)) {
2573
6
    if (
!Inst->use_empty()6
) {
2574
2
      PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
2575
2
      NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
2576
2
      NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
2577
2
      NewPN->insertBefore(InsertionPoint);
2578
2
      Inst->replaceAllUsesWith(NewPN);
2579
2
    }
2580
6
    Inst->eraseFromParent();
2581
6
  }
2582
4
  return true;
2583
4
}