Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Peephole optimize the CFG.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/ArrayRef.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/Optional.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SetOperations.h"
19
#include "llvm/ADT/SetVector.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Analysis/AssumptionCache.h"
25
#include "llvm/Analysis/ConstantFolding.h"
26
#include "llvm/Analysis/EHPersonalities.h"
27
#include "llvm/Analysis/InstructionSimplify.h"
28
#include "llvm/Analysis/MemorySSA.h"
29
#include "llvm/Analysis/MemorySSAUpdater.h"
30
#include "llvm/Analysis/TargetTransformInfo.h"
31
#include "llvm/Analysis/ValueTracking.h"
32
#include "llvm/IR/Attributes.h"
33
#include "llvm/IR/BasicBlock.h"
34
#include "llvm/IR/CFG.h"
35
#include "llvm/IR/CallSite.h"
36
#include "llvm/IR/Constant.h"
37
#include "llvm/IR/ConstantRange.h"
38
#include "llvm/IR/Constants.h"
39
#include "llvm/IR/DataLayout.h"
40
#include "llvm/IR/DerivedTypes.h"
41
#include "llvm/IR/Function.h"
42
#include "llvm/IR/GlobalValue.h"
43
#include "llvm/IR/GlobalVariable.h"
44
#include "llvm/IR/IRBuilder.h"
45
#include "llvm/IR/InstrTypes.h"
46
#include "llvm/IR/Instruction.h"
47
#include "llvm/IR/Instructions.h"
48
#include "llvm/IR/IntrinsicInst.h"
49
#include "llvm/IR/Intrinsics.h"
50
#include "llvm/IR/LLVMContext.h"
51
#include "llvm/IR/MDBuilder.h"
52
#include "llvm/IR/Metadata.h"
53
#include "llvm/IR/Module.h"
54
#include "llvm/IR/NoFolder.h"
55
#include "llvm/IR/Operator.h"
56
#include "llvm/IR/PatternMatch.h"
57
#include "llvm/IR/Type.h"
58
#include "llvm/IR/Use.h"
59
#include "llvm/IR/User.h"
60
#include "llvm/IR/Value.h"
61
#include "llvm/Support/Casting.h"
62
#include "llvm/Support/CommandLine.h"
63
#include "llvm/Support/Debug.h"
64
#include "llvm/Support/ErrorHandling.h"
65
#include "llvm/Support/KnownBits.h"
66
#include "llvm/Support/MathExtras.h"
67
#include "llvm/Support/raw_ostream.h"
68
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69
#include "llvm/Transforms/Utils/Local.h"
70
#include "llvm/Transforms/Utils/ValueMapper.h"
71
#include <algorithm>
72
#include <cassert>
73
#include <climits>
74
#include <cstddef>
75
#include <cstdint>
76
#include <iterator>
77
#include <map>
78
#include <set>
79
#include <tuple>
80
#include <utility>
81
#include <vector>
82
83
using namespace llvm;
84
using namespace PatternMatch;
85
86
#define DEBUG_TYPE "simplifycfg"
87
88
// Chosen as 2 so as to be cheap, but still to have enough power to fold
89
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
90
// To catch this, we need to fold a compare and a select, hence '2' being the
91
// minimum reasonable default.
92
static cl::opt<unsigned> PHINodeFoldingThreshold(
93
    "phi-node-folding-threshold", cl::Hidden, cl::init(2),
94
    cl::desc(
95
        "Control the amount of phi node folding to perform (default = 2)"));
96
97
static cl::opt<bool> DupRet(
98
    "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
99
    cl::desc("Duplicate return instructions into unconditional branches"));
100
101
static cl::opt<bool>
102
    SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
103
               cl::desc("Sink common instructions down to the end block"));
104
105
static cl::opt<bool> HoistCondStores(
106
    "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
107
    cl::desc("Hoist conditional stores if an unconditional store precedes"));
108
109
static cl::opt<bool> MergeCondStores(
110
    "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
111
    cl::desc("Hoist conditional stores even if an unconditional store does not "
112
             "precede - hoist multiple conditional stores into a single "
113
             "predicated store"));
114
115
static cl::opt<bool> MergeCondStoresAggressively(
116
    "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
117
    cl::desc("When merging conditional stores, do so even if the resultant "
118
             "basic blocks are unlikely to be if-converted as a result"));
119
120
static cl::opt<bool> SpeculateOneExpensiveInst(
121
    "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
122
    cl::desc("Allow exactly one expensive instruction to be speculatively "
123
             "executed"));
124
125
static cl::opt<unsigned> MaxSpeculationDepth(
126
    "max-speculation-depth", cl::Hidden, cl::init(10),
127
    cl::desc("Limit maximum recursion depth when calculating costs of "
128
             "speculatively executed instructions"));
129
130
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
131
STATISTIC(NumLinearMaps,
132
          "Number of switch instructions turned into linear mapping");
133
STATISTIC(NumLookupTables,
134
          "Number of switch instructions turned into lookup tables");
135
STATISTIC(
136
    NumLookupTablesHoles,
137
    "Number of switch instructions turned into lookup tables (holes checked)");
138
STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
139
STATISTIC(NumSinkCommons,
140
          "Number of common instructions sunk down to the end block");
141
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
142
143
namespace {
144
145
// The first field contains the value that the switch produces when a certain
146
// case group is selected, and the second field is a vector containing the
147
// cases composing the case group.
148
using SwitchCaseResultVectorTy =
149
    SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
150
151
// The first field contains the phi node that generates a result of the switch
152
// and the second field contains the value generated for a certain case in the
153
// switch for that PHI.
154
using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
155
156
/// ValueEqualityComparisonCase - Represents a case of a switch.
157
struct ValueEqualityComparisonCase {
158
  ConstantInt *Value;
159
  BasicBlock *Dest;
160
161
  ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
162
134k
      : Value(Value), Dest(Dest) {}
163
164
44.1k
  bool operator<(ValueEqualityComparisonCase RHS) const {
165
44.1k
    // Comparing pointers is ok as we only rely on the order for uniquing.
166
44.1k
    return Value < RHS.Value;
167
44.1k
  }
168
169
66.5k
  bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
170
};
171
172
class SimplifyCFGOpt {
173
  const TargetTransformInfo &TTI;
174
  const DataLayout &DL;
175
  SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
176
  const SimplifyCFGOptions &Options;
177
  bool Resimplify;
178
179
  Value *isValueEqualityComparison(Instruction *TI);
180
  BasicBlock *GetValueEqualityComparisonCases(
181
      Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
182
  bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
183
                                                     BasicBlock *Pred,
184
                                                     IRBuilder<> &Builder);
185
  bool FoldValueComparisonIntoPredecessors(Instruction *TI,
186
                                           IRBuilder<> &Builder);
187
188
  bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
189
  bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
190
  bool SimplifySingleResume(ResumeInst *RI);
191
  bool SimplifyCommonResume(ResumeInst *RI);
192
  bool SimplifyCleanupReturn(CleanupReturnInst *RI);
193
  bool SimplifyUnreachable(UnreachableInst *UI);
194
  bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
195
  bool SimplifyIndirectBr(IndirectBrInst *IBI);
196
  bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
197
  bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
198
199
  bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
200
                                             IRBuilder<> &Builder);
201
202
public:
203
  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
204
                 SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
205
                 const SimplifyCFGOptions &Opts)
206
32.9M
      : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
207
208
  bool run(BasicBlock *BB);
209
  bool simplifyOnce(BasicBlock *BB);
210
211
  // Helper to set Resimplify and return change indication.
212
103k
  bool requestResimplify() {
213
103k
    Resimplify = true;
214
103k
    return true;
215
103k
  }
216
};
217
218
} // end anonymous namespace
219
220
/// Return true if it is safe to merge these two
221
/// terminator instructions together.
222
static bool
223
SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
224
2.23M
                       SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
225
2.23M
  if (SI1 == SI2)
226
0
    return false; // Can't merge with self!
227
2.23M
228
2.23M
  // It is not safe to merge these two switch instructions if they have a common
229
2.23M
  // successor, and if that successor has a PHI node, and if *that* PHI node has
230
2.23M
  // conflicting incoming values from the two switch blocks.
231
2.23M
  BasicBlock *SI1BB = SI1->getParent();
232
2.23M
  BasicBlock *SI2BB = SI2->getParent();
233
2.23M
234
2.23M
  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
235
2.23M
  bool Fail = false;
236
2.23M
  for (BasicBlock *Succ : successors(SI2BB))
237
4.51M
    if (SI1Succs.count(Succ))
238
191k
      
for (BasicBlock::iterator BBI = Succ->begin(); 101k
isa<PHINode>(BBI);
++BBI90.3k
) {
239
90.3k
        PHINode *PN = cast<PHINode>(BBI);
240
90.3k
        if (PN->getIncomingValueForBlock(SI1BB) !=
241
90.3k
            PN->getIncomingValueForBlock(SI2BB)) {
242
75.1k
          if (FailBlocks)
243
299
            FailBlocks->insert(Succ);
244
75.1k
          Fail = true;
245
75.1k
        }
246
90.3k
      }
247
2.23M
248
2.23M
  return !Fail;
249
2.23M
}
250
251
/// Return true if it is safe and profitable to merge these two terminator
252
/// instructions together, where SI1 is an unconditional branch. PhiNodes will
253
/// store all PHI nodes in common successors.
254
static bool
255
isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
256
                                Instruction *Cond,
257
120
                                SmallVectorImpl<PHINode *> &PhiNodes) {
258
120
  if (SI1 == SI2)
259
0
    return false; // Can't merge with self!
260
120
  assert(SI1->isUnconditional() && SI2->isConditional());
261
120
262
120
  // We fold the unconditional branch if we can easily update all PHI nodes in
263
120
  // common successors:
264
120
  // 1> We have a constant incoming value for the conditional branch;
265
120
  // 2> We have "Cond" as the incoming value for the unconditional branch;
266
120
  // 3> SI2->getCondition() and Cond have same operands.
267
120
  CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
268
120
  if (!Ci2)
269
65
    return false;
270
55
  if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
271
55
        
Cond->getOperand(1) == Ci2->getOperand(1)30
) &&
272
55
      
!(54
Cond->getOperand(0) == Ci2->getOperand(1)54
&&
273
54
        
Cond->getOperand(1) == Ci2->getOperand(0)1
))
274
53
    return false;
275
2
276
2
  BasicBlock *SI1BB = SI1->getParent();
277
2
  BasicBlock *SI2BB = SI2->getParent();
278
2
  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
279
2
  for (BasicBlock *Succ : successors(SI2BB))
280
4
    if (SI1Succs.count(Succ))
281
3
      
for (BasicBlock::iterator BBI = Succ->begin(); 2
isa<PHINode>(BBI);
++BBI1
) {
282
2
        PHINode *PN = cast<PHINode>(BBI);
283
2
        if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
284
2
            !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
285
1
          return false;
286
1
        PhiNodes.push_back(PN);
287
1
      }
288
2
  
return true1
;
289
2
}
290
291
/// Update PHI nodes in Succ to indicate that there will now be entries in it
292
/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
293
/// will be the same as those coming in from ExistPred, an existing predecessor
294
/// of Succ.
295
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
296
                                  BasicBlock *ExistPred,
297
65.1k
                                  MemorySSAUpdater *MSSAU = nullptr) {
298
65.1k
  for (PHINode &PN : Succ->phis())
299
13.0k
    PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
300
65.1k
  if (MSSAU)
301
0
    if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
302
0
      MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
303
65.1k
}
304
305
/// Compute an abstract "cost" of speculating the given instruction,
306
/// which is assumed to be safe to speculate. TCC_Free means cheap,
307
/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
308
/// expensive.
309
static unsigned ComputeSpeculationCost(const User *I,
310
2.00M
                                       const TargetTransformInfo &TTI) {
311
2.00M
  assert(isSafeToSpeculativelyExecute(I) &&
312
2.00M
         "Instruction is not safe to speculatively execute!");
313
2.00M
  return TTI.getUserCost(I);
314
2.00M
}
315
316
/// If we have a merge point of an "if condition" as accepted above,
317
/// return true if the specified value dominates the block.  We
318
/// don't handle the true generality of domination here, just a special case
319
/// which works well enough for us.
320
///
321
/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
322
/// see if V (which must be an instruction) and its recursive operands
323
/// that do not dominate BB have a combined cost lower than CostRemaining and
324
/// are non-trapping.  If both are true, the instruction is inserted into the
325
/// set and true is returned.
326
///
327
/// The cost for most non-trapping instructions is defined as 1 except for
328
/// Select whose cost is 2.
329
///
330
/// After this function returns, CostRemaining is decreased by the cost of
331
/// V plus its non-dominating operands.  If that cost is greater than
332
/// CostRemaining, false is returned and CostRemaining is undefined.
333
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
334
                                SmallPtrSetImpl<Instruction *> &AggressiveInsts,
335
                                unsigned &CostRemaining,
336
                                const TargetTransformInfo &TTI,
337
2.31M
                                unsigned Depth = 0) {
338
2.31M
  // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
339
2.31M
  // so limit the recursion depth.
340
2.31M
  // TODO: While this recursion limit does prevent pathological behavior, it
341
2.31M
  // would be better to track visited instructions to avoid cycles.
342
2.31M
  if (Depth == MaxSpeculationDepth)
343
4
    return false;
344
2.31M
345
2.31M
  Instruction *I = dyn_cast<Instruction>(V);
346
2.31M
  if (!I) {
347
529k
    // Non-instructions all dominate instructions, but not all constantexprs
348
529k
    // can be executed unconditionally.
349
529k
    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
350
15.5k
      if (C->canTrap())
351
5
        return false;
352
529k
    return true;
353
529k
  }
354
1.79M
  BasicBlock *PBB = I->getParent();
355
1.79M
356
1.79M
  // We don't want to allow weird loops that might have the "if condition" in
357
1.79M
  // the bottom of this block.
358
1.79M
  if (PBB == BB)
359
3
    return false;
360
1.79M
361
1.79M
  // If this instruction is defined in a block that contains an unconditional
362
1.79M
  // branch to BB, then it must be in the 'conditional' part of the "if
363
1.79M
  // statement".  If not, it definitely dominates the region.
364
1.79M
  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
365
1.79M
  if (!BI || 
BI->isConditional()1.78M
||
BI->getSuccessor(0) != BB1.33M
)
366
466k
    return true;
367
1.32M
368
1.32M
  // If we have seen this instruction before, don't count it again.
369
1.32M
  if (AggressiveInsts.count(I))
370
2.53k
    return true;
371
1.32M
372
1.32M
  // Okay, it looks like the instruction IS in the "condition".  Check to
373
1.32M
  // see if it's a cheap instruction to unconditionally compute, and if it
374
1.32M
  // only uses stuff defined outside of the condition.  If so, hoist it out.
375
1.32M
  if (!isSafeToSpeculativelyExecute(I))
376
633k
    return false;
377
687k
378
687k
  unsigned Cost = ComputeSpeculationCost(I, TTI);
379
687k
380
687k
  // Allow exactly one instruction to be speculated regardless of its cost
381
687k
  // (as long as it is safe to do so).
382
687k
  // This is intended to flatten the CFG even if the instruction is a division
383
687k
  // or other expensive operation. The speculation of an expensive instruction
384
687k
  // is expected to be undone in CodeGenPrepare if the speculation has not
385
687k
  // enabled further IR optimizations.
386
687k
  if (Cost > CostRemaining &&
387
687k
      
(71.9k
!SpeculateOneExpensiveInst71.9k
||
!AggressiveInsts.empty()71.9k
||
Depth > 039.8k
))
388
69.1k
    return false;
389
618k
390
618k
  // Avoid unsigned wrap.
391
618k
  CostRemaining = (Cost > CostRemaining) ? 
02.77k
:
CostRemaining - Cost615k
;
392
618k
393
618k
  // Okay, we can only really hoist these out if their operands do
394
618k
  // not take us over the cost threshold.
395
1.18M
  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; 
++i563k
)
396
860k
    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
397
860k
                             Depth + 1))
398
296k
      return false;
399
618k
  // Okay, it's safe to do this!  Remember this instruction.
400
618k
  AggressiveInsts.insert(I);
401
321k
  return true;
402
618k
}
403
404
/// Extract ConstantInt from value, looking through IntToPtr
405
/// and PointerNullValue. Return NULL if value is not a constant int.
406
33.0M
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
407
33.0M
  // Normal constant int.
408
33.0M
  ConstantInt *CI = dyn_cast<ConstantInt>(V);
409
33.0M
  if (CI || 
!isa<Constant>(V)15.6M
||
!V->getType()->isPointerTy()9.19M
)
410
23.8M
    return CI;
411
9.19M
412
9.19M
  // This is some kind of pointer constant. Turn it into a pointer-sized
413
9.19M
  // ConstantInt if possible.
414
9.19M
  IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
415
9.19M
416
9.19M
  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
417
9.19M
  if (isa<ConstantPointerNull>(V))
418
9.16M
    return ConstantInt::get(PtrTy, 0);
419
35.9k
420
35.9k
  // IntToPtr const int.
421
35.9k
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
422
27.2k
    if (CE->getOpcode() == Instruction::IntToPtr)
423
16.3k
      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
424
16.3k
        // The constant is very likely to have the right type already.
425
16.3k
        if (CI->getType() == PtrTy)
426
16.3k
          return CI;
427
26
        else
428
26
          return cast<ConstantInt>(
429
26
              ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
430
19.6k
      }
431
19.6k
  return nullptr;
432
19.6k
}
433
434
namespace {
435
436
/// Given a chain of or (||) or and (&&) comparison of a value against a
437
/// constant, this will try to recover the information required for a switch
438
/// structure.
439
/// It will depth-first traverse the chain of comparison, seeking for patterns
440
/// like %a == 12 or %a < 4 and combine them to produce a set of integer
441
/// representing the different cases for the switch.
442
/// Note that if the chain is composed of '||' it will build the set of elements
443
/// that matches the comparisons (i.e. any of this value validate the chain)
444
/// while for a chain of '&&' it will build the set elements that make the test
445
/// fail.
446
struct ConstantComparesGatherer {
447
  const DataLayout &DL;
448
449
  /// Value found for the switch comparison
450
  Value *CompValue = nullptr;
451
452
  /// Extra clause to be checked before the switch
453
  Value *Extra = nullptr;
454
455
  /// Set of integers to match in switch
456
  SmallVector<ConstantInt *, 8> Vals;
457
458
  /// Number of comparisons matched in the and/or chain
459
  unsigned UsedICmps = 0;
460
461
  /// Construct and compute the result for the comparison instruction Cond
462
15.5M
  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
463
15.5M
    gather(Cond);
464
15.5M
  }
465
466
  ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
467
  ConstantComparesGatherer &
468
  operator=(const ConstantComparesGatherer &) = delete;
469
470
private:
471
  /// Try to set the current value used for the comparison, it succeeds only if
472
  /// it wasn't set before or if the new value is the same as the old one
473
978k
  bool setValueOnce(Value *NewVal) {
474
978k
    if (CompValue && 
CompValue != NewVal67.7k
)
475
67.0k
      return false;
476
911k
    CompValue = NewVal;
477
911k
    return (CompValue != nullptr);
478
911k
  }
479
480
  /// Try to match Instruction "I" as a comparison against a constant and
481
  /// populates the array Vals with the set of values that match (or do not
482
  /// match depending on isEQ).
483
  /// Return false on failure. On success, the Value the comparison matched
484
  /// against is placed in CompValue.
485
  /// If CompValue is already set, the function is expected to fail if a match
486
  /// is found but the value compared to is different.
487
16.3M
  bool matchInstruction(Instruction *I, bool isEQ) {
488
16.3M
    // If this is an icmp against a constant, handle this as one of the cases.
489
16.3M
    ICmpInst *ICI;
490
16.3M
    ConstantInt *C;
491
16.3M
    if (!((ICI = dyn_cast<ICmpInst>(I)) &&
492
16.3M
          
(C = GetConstantInt(I->getOperand(1), DL))15.4M
)) {
493
5.16M
      return false;
494
5.16M
    }
495
11.2M
496
11.2M
    Value *RHSVal;
497
11.2M
    const APInt *RHSC;
498
11.2M
499
11.2M
    // Pattern match a special case
500
11.2M
    // (x & ~2^z) == y --> x == y || x == y|2^z
501
11.2M
    // This undoes a transformation done by instcombine to fuse 2 compares.
502
11.2M
    if (ICI->getPredicate() == (isEQ ? 
ICmpInst::ICMP_EQ327k
:
ICmpInst::ICMP_NE10.9M
)) {
503
782k
      // It's a little bit hard to see why the following transformations are
504
782k
      // correct. Here is a CVC3 program to verify them for 64-bit values:
505
782k
506
782k
      /*
507
782k
         ONE  : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
508
782k
         x    : BITVECTOR(64);
509
782k
         y    : BITVECTOR(64);
510
782k
         z    : BITVECTOR(64);
511
782k
         mask : BITVECTOR(64) = BVSHL(ONE, z);
512
782k
         QUERY( (y & ~mask = y) =>
513
782k
                ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
514
782k
         );
515
782k
         QUERY( (y |  mask = y) =>
516
782k
                ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
517
782k
         );
518
782k
      */
519
782k
520
782k
      // Please note that each pattern must be a dual implication (<--> or
521
782k
      // iff). One directional implication can create spurious matches. If the
522
782k
      // implication is only one-way, an unsatisfiable condition on the left
523
782k
      // side can imply a satisfiable condition on the right side. Dual
524
782k
      // implication ensures that satisfiable conditions are transformed to
525
782k
      // other satisfiable conditions and unsatisfiable conditions are
526
782k
      // transformed to other unsatisfiable conditions.
527
782k
528
782k
      // Here is a concrete example of a unsatisfiable condition on the left
529
782k
      // implying a satisfiable condition on the right:
530
782k
      //
531
782k
      // mask = (1 << z)
532
782k
      // (x & ~mask) == y  --> (x == y || x == (y | mask))
533
782k
      //
534
782k
      // Substituting y = 3, z = 0 yields:
535
782k
      // (x & -2) == 3 --> (x == 3 || x == 2)
536
782k
537
782k
      // Pattern match a special case:
538
782k
      /*
539
782k
        QUERY( (y & ~mask = y) =>
540
782k
               ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
541
782k
        );
542
782k
      */
543
782k
      if (match(ICI->getOperand(0),
544
782k
                m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
545
37.7k
        APInt Mask = ~*RHSC;
546
37.7k
        if (Mask.isPowerOf2() && 
(C->getValue() & ~Mask) == C->getValue()3.93k
) {
547
3.92k
          // If we already have a value for the switch, it has to match!
548
3.92k
          if (!setValueOnce(RHSVal))
549
2
            return false;
550
3.92k
551
3.92k
          Vals.push_back(C);
552
3.92k
          Vals.push_back(
553
3.92k
              ConstantInt::get(C->getContext(),
554
3.92k
                               C->getValue() | Mask));
555
3.92k
          UsedICmps++;
556
3.92k
          return true;
557
3.92k
        }
558
37.7k
      }
559
778k
560
778k
      // Pattern match a special case:
561
778k
      /*
562
778k
        QUERY( (y |  mask = y) =>
563
778k
               ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
564
778k
        );
565
778k
      */
566
778k
      if (match(ICI->getOperand(0),
567
778k
                m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
568
205
        APInt Mask = *RHSC;
569
205
        if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
570
202
          // If we already have a value for the switch, it has to match!
571
202
          if (!setValueOnce(RHSVal))
572
23
            return false;
573
179
574
179
          Vals.push_back(C);
575
179
          Vals.push_back(ConstantInt::get(C->getContext(),
576
179
                                          C->getValue() & ~Mask));
577
179
          UsedICmps++;
578
179
          return true;
579
179
        }
580
205
      }
581
778k
582
778k
      // If we already have a value for the switch, it has to match!
583
778k
      if (!setValueOnce(ICI->getOperand(0)))
584
66.6k
        return false;
585
711k
586
711k
      UsedICmps++;
587
711k
      Vals.push_back(C);
588
711k
      return ICI->getOperand(0);
589
711k
    }
590
10.4M
591
10.4M
    // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
592
10.4M
    ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
593
10.4M
        ICI->getPredicate(), C->getValue());
594
10.4M
595
10.4M
    // Shift the range if the compare is fed by an add. This is the range
596
10.4M
    // compare idiom as emitted by instcombine.
597
10.4M
    Value *CandidateVal = I->getOperand(0);
598
10.4M
    if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
599
531k
      Span = Span.subtract(*RHSC);
600
531k
      CandidateVal = RHSVal;
601
531k
    }
602
10.4M
603
10.4M
    // If this is an and/!= check, then we are looking to build the set of
604
10.4M
    // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
605
10.4M
    // x != 0 && x != 1.
606
10.4M
    if (!isEQ)
607
10.2M
      Span = Span.inverse();
608
10.4M
609
10.4M
    // If there are a ton of values, we don't want to make a ginormous switch.
610
10.4M
    if (Span.isSizeLargerThan(8) || 
Span.isEmptySet()196k
) {
611
10.2M
      return false;
612
10.2M
    }
613
196k
614
196k
    // If we already have a value for the switch, it has to match!
615
196k
    if (!setValueOnce(CandidateVal))
616
427
      return false;
617
195k
618
195k
    // Add all values from the range to the set
619
625k
    
for (APInt Tmp = Span.getLower(); 195k
Tmp != Span.getUpper();
++Tmp429k
)
620
429k
      Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
621
195k
622
195k
    UsedICmps++;
623
195k
    return true;
624
195k
  }
625
626
  /// Given a potentially 'or'd or 'and'd together collection of icmp
627
  /// eq/ne/lt/gt instructions that compare a value against a constant, extract
628
  /// the value being compared, and stick the list constants into the Vals
629
  /// vector.
630
  /// One "Extra" case is allowed to differ from the other.
631
15.5M
  void gather(Value *V) {
632
15.5M
    Instruction *I = dyn_cast<Instruction>(V);
633
15.5M
    bool isEQ = (I->getOpcode() == Instruction::Or);
634
15.5M
635
15.5M
    // Keep a stack (SmallVector for efficiency) for depth-first traversal
636
15.5M
    SmallVector<Value *, 8> DFT;
637
15.5M
    SmallPtrSet<Value *, 8> Visited;
638
15.5M
639
15.5M
    // Initialize
640
15.5M
    Visited.insert(V);
641
15.5M
    DFT.push_back(V);
642
15.5M
643
32.2M
    while (!DFT.empty()) {
644
17.2M
      V = DFT.pop_back_val();
645
17.2M
646
17.2M
      if (Instruction *I = dyn_cast<Instruction>(V)) {
647
17.2M
        // If it is a || (or && depending on isEQ), process the operands.
648
17.2M
        if (I->getOpcode() == (isEQ ? 
Instruction::Or892k
:
Instruction::And16.4M
)) {
649
894k
          if (Visited.insert(I->getOperand(1)).second)
650
894k
            DFT.push_back(I->getOperand(1));
651
894k
          if (Visited.insert(I->getOperand(0)).second)
652
894k
            DFT.push_back(I->getOperand(0));
653
894k
          continue;
654
894k
        }
655
16.3M
656
16.3M
        // Try to match the current instruction
657
16.3M
        if (matchInstruction(I, isEQ))
658
911k
          // Match succeed, continue the loop
659
911k
          continue;
660
15.4M
      }
661
15.4M
662
15.4M
      // One element of the sequence of || (or &&) could not be match as a
663
15.4M
      // comparison against the same value as the others.
664
15.4M
      // We allow only one "Extra" case to be checked before the switch
665
15.4M
      if (!Extra) {
666
14.8M
        Extra = V;
667
14.8M
        continue;
668
14.8M
      }
669
609k
      // Failed to parse a proper sequence, abort now
670
609k
      CompValue = nullptr;
671
609k
      break;
672
609k
    }
673
15.5M
  }
674
};
675
676
} // end anonymous namespace
677
678
static void EraseTerminatorAndDCECond(Instruction *TI,
679
22.1k
                                      MemorySSAUpdater *MSSAU = nullptr) {
680
22.1k
  Instruction *Cond = nullptr;
681
22.1k
  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
682
8.37k
    Cond = dyn_cast<Instruction>(SI->getCondition());
683
13.7k
  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
684
13.7k
    if (BI->isConditional())
685
13.6k
      Cond = dyn_cast<Instruction>(BI->getCondition());
686
13.7k
  } else 
if (IndirectBrInst *10
IBI10
= dyn_cast<IndirectBrInst>(TI)) {
687
10
    Cond = dyn_cast<Instruction>(IBI->getAddress());
688
10
  }
689
22.1k
690
22.1k
  TI->eraseFromParent();
691
22.1k
  if (Cond)
692
15.3k
    RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
693
22.1k
}
694
695
/// Return true if the specified terminator checks
696
/// to see if a value is equal to constant integer value.
697
25.4M
Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
698
25.4M
  Value *CV = nullptr;
699
25.4M
  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
700
638k
    // Do not permit merging of large switch instructions into their
701
638k
    // predecessors unless there is only one predecessor.
702
638k
    if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
703
630k
      CV = SI->getCondition();
704
24.8M
  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
705
24.7M
    if (BI->isConditional() && 
BI->getCondition()->hasOneUse()24.7M
)
706
23.7M
      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
707
21.8M
        if (ICI->isEquality() && 
GetConstantInt(ICI->getOperand(1), DL)17.5M
)
708
15.2M
          CV = ICI->getOperand(0);
709
21.8M
      }
710
25.4M
711
25.4M
  // Unwrap any lossless ptrtoint cast.
712
25.4M
  if (CV) {
713
15.9M
    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
714
935
      Value *Ptr = PTII->getPointerOperand();
715
935
      if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
716
925
        CV = Ptr;
717
935
    }
718
15.9M
  }
719
25.4M
  return CV;
720
25.4M
}
721
722
/// Given a value comparison instruction,
723
/// decode all of the 'cases' that it represents and return the 'default' block.
724
BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
725
64.6k
    Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
726
64.6k
  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
727
14.7k
    Cases.reserve(SI->getNumCases());
728
14.7k
    for (auto Case : SI->cases())
729
84.3k
      Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
730
84.3k
                                                  Case.getCaseSuccessor()));
731
14.7k
    return SI->getDefaultDest();
732
14.7k
  }
733
49.9k
734
49.9k
  BranchInst *BI = cast<BranchInst>(TI);
735
49.9k
  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
736
49.9k
  BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
737
49.9k
  Cases.push_back(ValueEqualityComparisonCase(
738
49.9k
      GetConstantInt(ICI->getOperand(1), DL), Succ));
739
49.9k
  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
740
49.9k
}
741
742
/// Given a vector of bb/value pairs, remove any entries
743
/// in the list that match the specified block.
744
static void
745
EliminateBlockCases(BasicBlock *BB,
746
38.9k
                    std::vector<ValueEqualityComparisonCase> &Cases) {
747
38.9k
  Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
748
38.9k
}
749
750
/// Return true if there are any keys in C1 that exist in C2 as well.
751
static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
752
18.7k
                          std::vector<ValueEqualityComparisonCase> &C2) {
753
18.7k
  std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
754
18.7k
755
18.7k
  // Make V1 be smaller than V2.
756
18.7k
  if (V1->size() > V2->size())
757
4.35k
    std::swap(V1, V2);
758
18.7k
759
18.7k
  if (V1->empty())
760
0
    return false;
761
18.7k
  if (V1->size() == 1) {
762
18.7k
    // Just scan V2.
763
18.7k
    ConstantInt *TheVal = (*V1)[0].Value;
764
47.4k
    for (unsigned i = 0, e = V2->size(); i != e; 
++i28.7k
)
765
30.2k
      if (TheVal == (*V2)[i].Value)
766
1.52k
        return true;
767
18.7k
  }
768
18.7k
769
18.7k
  // Otherwise, just sort both lists and compare element by element.
770
18.7k
  array_pod_sort(V1->begin(), V1->end());
771
17.2k
  array_pod_sort(V2->begin(), V2->end());
772
17.2k
  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
773
42.5k
  while (i1 != e1 && 
i2 != e232.7k
) {
774
25.2k
    if ((*V1)[i1].Value == (*V2)[i2].Value)
775
5
      return true;
776
25.2k
    if ((*V1)[i1].Value < (*V2)[i2].Value)
777
9.91k
      ++i1;
778
15.3k
    else
779
15.3k
      ++i2;
780
25.2k
  }
781
17.2k
  
return false17.2k
;
782
17.2k
}
783
784
// Set branch weights on SwitchInst. This sets the metadata if there is at
785
// least one non-zero weight.
786
101
static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
787
101
  // Check that there is at least one non-zero weight. Otherwise, pass
788
101
  // nullptr to setMetadata which will erase the existing metadata.
789
101
  MDNode *N = nullptr;
790
102
  if (
llvm::any_of(Weights, [](uint32_t W) 101
{ return W != 0; }))
791
101
    N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
792
101
  SI->setMetadata(LLVMContext::MD_prof, N);
793
101
}
794
795
// Similar to the above, but for branch and select instructions that take
796
// exactly 2 weights.
797
static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
798
523
                             uint32_t FalseWeight) {
799
523
  assert(isa<BranchInst>(I) || isa<SelectInst>(I));
800
523
  // Check that there is at least one non-zero weight. Otherwise, pass
801
523
  // nullptr to setMetadata which will erase the existing metadata.
802
523
  MDNode *N = nullptr;
803
523
  if (TrueWeight || 
FalseWeight58
)
804
523
    N = MDBuilder(I->getParent()->getContext())
805
523
            .createBranchWeights(TrueWeight, FalseWeight);
806
523
  I->setMetadata(LLVMContext::MD_prof, N);
807
523
}
808
809
/// If TI is known to be a terminator instruction and its block is known to
810
/// only have a single predecessor block, check to see if that predecessor is
811
/// also a value comparison with the same value, and if that comparison
812
/// determines the outcome of this comparison. If so, simplify TI. This does a
813
/// very limited form of jump threading.
814
bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
815
3.90M
    Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
816
3.90M
  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
817
3.90M
  if (!PredVal)
818
1.11M
    return false; // Not a value comparison in predecessor.
819
2.79M
820
2.79M
  Value *ThisVal = isValueEqualityComparison(TI);
821
2.79M
  assert(ThisVal && "This isn't a value comparison!!");
822
2.79M
  if (ThisVal != PredVal)
823
2.77M
    return false; // Different predicates.
824
19.4k
825
19.4k
  // TODO: Preserve branch weight metadata, similarly to how
826
19.4k
  // FoldValueComparisonIntoPredecessors preserves it.
827
19.4k
828
19.4k
  // Find out information about when control will move from Pred to TI's block.
829
19.4k
  std::vector<ValueEqualityComparisonCase> PredCases;
830
19.4k
  BasicBlock *PredDef =
831
19.4k
      GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
832
19.4k
  EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
833
19.4k
834
19.4k
  // Find information about how control leaves this block.
835
19.4k
  std::vector<ValueEqualityComparisonCase> ThisCases;
836
19.4k
  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
837
19.4k
  EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
838
19.4k
839
19.4k
  // If TI's block is the default block from Pred's comparison, potentially
840
19.4k
  // simplify TI based on this knowledge.
841
19.4k
  if (PredDef == TI->getParent()) {
842
18.7k
    // If we are here, we know that the value is none of those cases listed in
843
18.7k
    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
844
18.7k
    // can simplify TI.
845
18.7k
    if (!ValuesOverlap(PredCases, ThisCases))
846
17.2k
      return false;
847
1.53k
848
1.53k
    if (isa<BranchInst>(TI)) {
849
1.52k
      // Okay, one of the successors of this condbr is dead.  Convert it to a
850
1.52k
      // uncond br.
851
1.52k
      assert(ThisCases.size() == 1 && "Branch can only have one case!");
852
1.52k
      // Insert the new branch.
853
1.52k
      Instruction *NI = Builder.CreateBr(ThisDef);
854
1.52k
      (void)NI;
855
1.52k
856
1.52k
      // Remove PHI node entries for the dead edge.
857
1.52k
      ThisCases[0].Dest->removePredecessor(TI->getParent());
858
1.52k
859
1.52k
      LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
860
1.52k
                        << "Through successor TI: " << *TI << "Leaving: " << *NI
861
1.52k
                        << "\n");
862
1.52k
863
1.52k
      EraseTerminatorAndDCECond(TI);
864
1.52k
      return true;
865
1.52k
    }
866
12
867
12
    SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
868
12
    // Okay, TI has cases that are statically dead, prune them away.
869
12
    SmallPtrSet<Constant *, 16> DeadCases;
870
50
    for (unsigned i = 0, e = PredCases.size(); i != e; 
++i38
)
871
38
      DeadCases.insert(PredCases[i].Value);
872
12
873
12
    LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
874
12
                      << "Through successor TI: " << *TI);
875
12
876
88
    for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
877
76
      --i;
878
76
      if (DeadCases.count(i->getCaseValue())) {
879
25
        i->getCaseSuccessor()->removePredecessor(TI->getParent());
880
25
        SI.removeCase(i);
881
25
      }
882
76
    }
883
12
    LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
884
12
    return true;
885
12
  }
886
677
887
677
  // Otherwise, TI's block must correspond to some matched value.  Find out
888
677
  // which value (or set of values) this is.
889
677
  ConstantInt *TIV = nullptr;
890
677
  BasicBlock *TIBB = TI->getParent();
891
11.9k
  for (unsigned i = 0, e = PredCases.size(); i != e; 
++i11.2k
)
892
11.2k
    if (PredCases[i].Dest == TIBB) {
893
677
      if (TIV)
894
0
        return false; // Cannot handle multiple values coming to this block.
895
677
      TIV = PredCases[i].Value;
896
677
    }
897
677
  assert(TIV && "No edge from pred to succ?");
898
677
899
677
  // Okay, we found the one constant that our value can be if we get into TI's
900
677
  // BB.  Find out which successor will unconditionally be branched to.
901
677
  BasicBlock *TheRealDest = nullptr;
902
6.22k
  for (unsigned i = 0, e = ThisCases.size(); i != e; 
++i5.54k
)
903
5.59k
    if (ThisCases[i].Value == TIV) {
904
46
      TheRealDest = ThisCases[i].Dest;
905
46
      break;
906
46
    }
907
677
908
677
  // If not handled by any explicit cases, it is handled by the default case.
909
677
  if (!TheRealDest)
910
631
    TheRealDest = ThisDef;
911
677
912
677
  // Remove PHI node entries for dead edges.
913
677
  BasicBlock *CheckEdge = TheRealDest;
914
677
  for (BasicBlock *Succ : successors(TIBB))
915
6.37k
    if (Succ != CheckEdge)
916
5.69k
      Succ->removePredecessor(TIBB);
917
677
    else
918
677
      CheckEdge = nullptr;
919
677
920
677
  // Insert the new branch.
921
677
  Instruction *NI = Builder.CreateBr(TheRealDest);
922
677
  (void)NI;
923
677
924
677
  LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
925
677
                    << "Through successor TI: " << *TI << "Leaving: " << *NI
926
677
                    << "\n");
927
677
928
677
  EraseTerminatorAndDCECond(TI);
929
677
  return true;
930
677
}
931
932
namespace {
933
934
/// This class implements a stable ordering of constant
935
/// integers that does not depend on their address.  This is important for
936
/// applications that sort ConstantInt's to ensure uniqueness.
937
struct ConstantIntOrdering {
938
222k
  bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
939
222k
    return LHS->getValue().ult(RHS->getValue());
940
222k
  }
941
};
942
943
} // end anonymous namespace
944
945
static int ConstantIntSortPredicate(ConstantInt *const *P1,
946
145k
                                    ConstantInt *const *P2) {
947
145k
  const ConstantInt *LHS = *P1;
948
145k
  const ConstantInt *RHS = *P2;
949
145k
  if (LHS == RHS)
950
9
    return 0;
951
145k
  return LHS->getValue().ult(RHS->getValue()) ? 
152.8k
:
-192.1k
;
952
145k
}
953
954
29.8k
static inline bool HasBranchWeights(const Instruction *I) {
955
29.8k
  MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
956
29.8k
  if (ProfMD && 
ProfMD->getOperand(0)158
)
957
158
    if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
958
158
      return MDS->getString().equals("branch_weights");
959
29.6k
960
29.6k
  return false;
961
29.6k
}
962
963
/// Get Weights of a given terminator, the default weight is at the front
964
/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
965
/// metadata.
966
static void GetBranchWeights(Instruction *TI,
967
158
                             SmallVectorImpl<uint64_t> &Weights) {
968
158
  MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
969
158
  assert(MD);
970
489
  for (unsigned i = 1, e = MD->getNumOperands(); i < e; 
++i331
) {
971
331
    ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
972
331
    Weights.push_back(CI->getValue().getZExtValue());
973
331
  }
974
158
975
158
  // If TI is a conditional eq, the default case is the false case,
976
158
  // and the corresponding branch-weight data is at index 2. We swap the
977
158
  // default weight to be the first entry.
978
158
  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
979
147
    assert(Weights.size() == 2);
980
147
    ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
981
147
    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
982
140
      std::swap(Weights.front(), Weights.back());
983
147
  }
984
158
}
985
986
/// Keep halving the weights until all can fit in uint32_t.
987
622
static void FitWeights(MutableArrayRef<uint64_t> Weights) {
988
622
  uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
989
622
  if (Max > UINT_MAX) {
990
61
    unsigned Offset = 32 - countLeadingZeros(Max);
991
61
    for (uint64_t &I : Weights)
992
122
      I >>= Offset;
993
61
  }
994
622
}
995
996
/// The specified terminator is a value equality comparison instruction
997
/// (either a switch or a branch on "X == c").
998
/// See if any of the predecessors of the terminator block are value comparisons
999
/// on the same value.  If so, and if safe to do so, fold them together.
1000
bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1001
1.53M
                                                         IRBuilder<> &Builder) {
1002
1.53M
  BasicBlock *BB = TI->getParent();
1003
1.53M
  Value *CV = isValueEqualityComparison(TI); // CondVal
1004
1.53M
  assert(CV && "Not a comparison?");
1005
1.53M
  bool Changed = false;
1006
1.53M
1007
1.53M
  SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1008
2.96M
  while (!Preds.empty()) {
1009
1.43M
    BasicBlock *Pred = Preds.pop_back_val();
1010
1.43M
1011
1.43M
    // See if the predecessor is a comparison with the same value.
1012
1.43M
    Instruction *PTI = Pred->getTerminator();
1013
1.43M
    Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1014
1.43M
1015
1.43M
    if (PCV == CV && 
TI != PTI12.8k
) {
1016
12.8k
      SmallSetVector<BasicBlock*, 4> FailBlocks;
1017
12.8k
      if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1018
288
        for (auto *Succ : FailBlocks) {
1019
288
          if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
1020
0
            return false;
1021
288
        }
1022
288
      }
1023
12.8k
1024
12.8k
      // Figure out which 'cases' to copy from SI to PSI.
1025
12.8k
      std::vector<ValueEqualityComparisonCase> BBCases;
1026
12.8k
      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1027
12.8k
1028
12.8k
      std::vector<ValueEqualityComparisonCase> PredCases;
1029
12.8k
      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1030
12.8k
1031
12.8k
      // Based on whether the default edge from PTI goes to BB or not, fill in
1032
12.8k
      // PredCases and PredDefault with the new switch cases we would like to
1033
12.8k
      // build.
1034
12.8k
      SmallVector<BasicBlock *, 8> NewSuccessors;
1035
12.8k
1036
12.8k
      // Update the branch weight metadata along the way
1037
12.8k
      SmallVector<uint64_t, 8> Weights;
1038
12.8k
      bool PredHasWeights = HasBranchWeights(PTI);
1039
12.8k
      bool SuccHasWeights = HasBranchWeights(TI);
1040
12.8k
1041
12.8k
      if (PredHasWeights) {
1042
72
        GetBranchWeights(PTI, Weights);
1043
72
        // branch-weight metadata is inconsistent here.
1044
72
        if (Weights.size() != 1 + PredCases.size())
1045
0
          PredHasWeights = SuccHasWeights = false;
1046
12.7k
      } else if (SuccHasWeights)
1047
29
        // If there are no predecessor weights but there are successor weights,
1048
29
        // populate Weights with 1, which will later be scaled to the sum of
1049
29
        // successor's weights
1050
29
        Weights.assign(1 + PredCases.size(), 1);
1051
12.8k
1052
12.8k
      SmallVector<uint64_t, 8> SuccWeights;
1053
12.8k
      if (SuccHasWeights) {
1054
84
        GetBranchWeights(TI, SuccWeights);
1055
84
        // branch-weight metadata is inconsistent here.
1056
84
        if (SuccWeights.size() != 1 + BBCases.size())
1057
0
          PredHasWeights = SuccHasWeights = false;
1058
12.7k
      } else if (PredHasWeights)
1059
17
        SuccWeights.assign(1 + BBCases.size(), 1);
1060
12.8k
1061
12.8k
      if (PredDefault == BB) {
1062
12.0k
        // If this is the default destination from PTI, only the edges in TI
1063
12.0k
        // that don't occur in PTI, or that branch to BB will be activated.
1064
12.0k
        std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1065
59.9k
        for (unsigned i = 0, e = PredCases.size(); i != e; 
++i47.9k
)
1066
47.9k
          if (PredCases[i].Dest != BB)
1067
47.9k
            PTIHandled.insert(PredCases[i].Value);
1068
1
          else {
1069
1
            // The default destination is BB, we don't need explicit targets.
1070
1
            std::swap(PredCases[i], PredCases.back());
1071
1
1072
1
            if (PredHasWeights || SuccHasWeights) {
1073
0
              // Increase weight for the default case.
1074
0
              Weights[0] += Weights[i + 1];
1075
0
              std::swap(Weights[i + 1], Weights.back());
1076
0
              Weights.pop_back();
1077
0
            }
1078
1
1079
1
            PredCases.pop_back();
1080
1
            --i;
1081
1
            --e;
1082
1
          }
1083
12.0k
1084
12.0k
        // Reconstruct the new switch statement we will be building.
1085
12.0k
        if (PredDefault != BBDefault) {
1086
12.0k
          PredDefault->removePredecessor(Pred);
1087
12.0k
          PredDefault = BBDefault;
1088
12.0k
          NewSuccessors.push_back(BBDefault);
1089
12.0k
        }
1090
12.0k
1091
12.0k
        unsigned CasesFromPred = Weights.size();
1092
12.0k
        uint64_t ValidTotalSuccWeight = 0;
1093
25.3k
        for (unsigned i = 0, e = BBCases.size(); i != e; 
++i13.2k
)
1094
13.2k
          if (!PTIHandled.count(BBCases[i].Value) &&
1095
13.2k
              
BBCases[i].Dest != BBDefault13.1k
) {
1096
13.1k
            PredCases.push_back(BBCases[i]);
1097
13.1k
            NewSuccessors.push_back(BBCases[i].Dest);
1098
13.1k
            if (SuccHasWeights || 
PredHasWeights13.1k
) {
1099
95
              // The default weight is at index 0, so weight for the ith case
1100
95
              // should be at index i+1. Scale the cases from successor by
1101
95
              // PredDefaultWeight (Weights[0]).
1102
95
              Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1103
95
              ValidTotalSuccWeight += SuccWeights[i + 1];
1104
95
            }
1105
13.1k
          }
1106
12.0k
1107
12.0k
        if (SuccHasWeights || 
PredHasWeights11.9k
) {
1108
95
          ValidTotalSuccWeight += SuccWeights[0];
1109
95
          // Scale the cases from predecessor by ValidTotalSuccWeight.
1110
201
          for (unsigned i = 1; i < CasesFromPred; 
++i106
)
1111
106
            Weights[i] *= ValidTotalSuccWeight;
1112
95
          // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1113
95
          Weights[0] *= SuccWeights[0];
1114
95
        }
1115
12.0k
      } else {
1116
821
        // If this is not the default destination from PSI, only the edges
1117
821
        // in SI that occur in PSI with a destination of BB will be
1118
821
        // activated.
1119
821
        std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1120
821
        std::map<ConstantInt *, uint64_t> WeightsForHandled;
1121
4.84k
        for (unsigned i = 0, e = PredCases.size(); i != e; 
++i4.02k
)
1122
4.02k
          if (PredCases[i].Dest == BB) {
1123
820
            PTIHandled.insert(PredCases[i].Value);
1124
820
1125
820
            if (PredHasWeights || 
SuccHasWeights818
) {
1126
6
              WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1127
6
              std::swap(Weights[i + 1], Weights.back());
1128
6
              Weights.pop_back();
1129
6
            }
1130
820
1131
820
            std::swap(PredCases[i], PredCases.back());
1132
820
            PredCases.pop_back();
1133
820
            --i;
1134
820
            --e;
1135
820
          }
1136
821
1137
821
        // Okay, now we know which constants were sent to BB from the
1138
821
        // predecessor.  Figure out where they will all go now.
1139
3.32k
        for (unsigned i = 0, e = BBCases.size(); i != e; 
++i2.50k
)
1140
2.50k
          if (PTIHandled.count(BBCases[i].Value)) {
1141
609
            // If this is one we are capable of getting...
1142
609
            if (PredHasWeights || 
SuccHasWeights608
)
1143
3
              Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1144
609
            PredCases.push_back(BBCases[i]);
1145
609
            NewSuccessors.push_back(BBCases[i].Dest);
1146
609
            PTIHandled.erase(
1147
609
                BBCases[i].Value); // This constant is taken care of
1148
609
          }
1149
821
1150
821
        // If there are any constants vectored to BB that TI doesn't handle,
1151
821
        // they must go to the default destination of TI.
1152
821
        for (ConstantInt *I : PTIHandled) {
1153
211
          if (PredHasWeights || 
SuccHasWeights210
)
1154
3
            Weights.push_back(WeightsForHandled[I]);
1155
211
          PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1156
211
          NewSuccessors.push_back(BBDefault);
1157
211
        }
1158
821
      }
1159
12.8k
1160
12.8k
      // Okay, at this point, we know which new successor Pred will get.  Make
1161
12.8k
      // sure we update the number of entries in the PHI nodes for these
1162
12.8k
      // successors.
1163
12.8k
      for (BasicBlock *NewSuccessor : NewSuccessors)
1164
26.0k
        AddPredecessorToBlock(NewSuccessor, Pred, BB);
1165
12.8k
1166
12.8k
      Builder.SetInsertPoint(PTI);
1167
12.8k
      // Convert pointer to int before we switch.
1168
12.8k
      if (CV->getType()->isPointerTy()) {
1169
97
        CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
1170
97
                                    "magicptr");
1171
97
      }
1172
12.8k
1173
12.8k
      // Now that the successors are updated, create the new Switch instruction.
1174
12.8k
      SwitchInst *NewSI =
1175
12.8k
          Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1176
12.8k
      NewSI->setDebugLoc(PTI->getDebugLoc());
1177
12.8k
      for (ValueEqualityComparisonCase &V : PredCases)
1178
65.1k
        NewSI->addCase(V.Value, V.Dest);
1179
12.8k
1180
12.8k
      if (PredHasWeights || 
SuccHasWeights12.7k
) {
1181
101
        // Halve the weights if any of them cannot fit in an uint32_t
1182
101
        FitWeights(Weights);
1183
101
1184
101
        SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1185
101
1186
101
        setBranchWeights(NewSI, MDWeights);
1187
101
      }
1188
12.8k
1189
12.8k
      EraseTerminatorAndDCECond(PTI);
1190
12.8k
1191
12.8k
      // Okay, last check.  If BB is still a successor of PSI, then we must
1192
12.8k
      // have an infinite loop case.  If so, add an infinitely looping block
1193
12.8k
      // to handle the case to preserve the behavior of the code.
1194
12.8k
      BasicBlock *InfLoopBlock = nullptr;
1195
90.8k
      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; 
++i78.0k
)
1196
78.0k
        if (NewSI->getSuccessor(i) == BB) {
1197
1
          if (!InfLoopBlock) {
1198
1
            // Insert it at the end of the function, because it's either code,
1199
1
            // or it won't matter if it's hot. :)
1200
1
            InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
1201
1
                                              BB->getParent());
1202
1
            BranchInst::Create(InfLoopBlock, InfLoopBlock);
1203
1
          }
1204
1
          NewSI->setSuccessor(i, InfLoopBlock);
1205
1
        }
1206
12.8k
1207
12.8k
      Changed = true;
1208
12.8k
    }
1209
1.43M
  }
1210
1.53M
  return Changed;
1211
1.53M
}
1212
1213
// If we would need to insert a select that uses the value of this invoke
1214
// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
1215
// can't hoist the invoke, as there is nowhere to put the select in this case.
1216
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1217
6
                                Instruction *I1, Instruction *I2) {
1218
10
  for (BasicBlock *Succ : successors(BB1)) {
1219
10
    for (const PHINode &PN : Succ->phis()) {
1220
4
      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1221
4
      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1222
4
      if (BB1V != BB2V && (BB1V == I1 || 
BB2V == I22
)) {
1223
2
        return false;
1224
2
      }
1225
4
    }
1226
10
  }
1227
6
  
return true4
;
1228
6
}
1229
1230
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
1231
1232
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
1233
/// in the two blocks up into the branch block. The caller of this function
1234
/// guarantees that BI's block dominates BB1 and BB2.
1235
static bool HoistThenElseCodeToIf(BranchInst *BI,
1236
3.74M
                                  const TargetTransformInfo &TTI) {
1237
3.74M
  // This does very trivial matching, with limited scanning, to find identical
1238
3.74M
  // instructions in the two blocks.  In particular, we don't want to get into
1239
3.74M
  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
1240
3.74M
  // such, we currently just scan for obviously identical instructions in an
1241
3.74M
  // identical order.
1242
3.74M
  BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
1243
3.74M
  BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
1244
3.74M
1245
3.74M
  BasicBlock::iterator BB1_Itr = BB1->begin();
1246
3.74M
  BasicBlock::iterator BB2_Itr = BB2->begin();
1247
3.74M
1248
3.74M
  Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
1249
3.74M
  // Skip debug info if it is not identical.
1250
3.74M
  DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1251
3.74M
  DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1252
3.74M
  if (!DBI1 || 
!DBI28
||
!DBI1->isIdenticalToWhenDefined(DBI2)6
) {
1253
3.74M
    while (isa<DbgInfoIntrinsic>(I1))
1254
10
      I1 = &*BB1_Itr++;
1255
3.74M
    while (isa<DbgInfoIntrinsic>(I2))
1256
7
      I2 = &*BB2_Itr++;
1257
3.74M
  }
1258
3.74M
  // FIXME: Can we define a safety predicate for CallBr?
1259
3.74M
  if (isa<PHINode>(I1) || 
!I1->isIdenticalToWhenDefined(I2)3.70M
||
1260
3.74M
      
(30.7k
isa<InvokeInst>(I1)30.7k
&&
!isSafeToHoistInvoke(BB1, BB2, I1, I2)4
) ||
1261
3.74M
      
isa<CallBrInst>(I1)30.7k
)
1262
3.70M
    return false;
1263
30.7k
1264
30.7k
  BasicBlock *BIParent = BI->getParent();
1265
30.7k
1266
30.7k
  bool Changed = false;
1267
65.8k
  do {
1268
65.8k
    // If we are hoisting the terminator instruction, don't move one (making a
1269
65.8k
    // broken BB), instead clone it, and remove BI.
1270
65.8k
    if (I1->isTerminator())
1271
5.31k
      goto HoistTerminator;
1272
60.5k
1273
60.5k
    // If we're going to hoist a call, make sure that the two instructions we're
1274
60.5k
    // commoning/hoisting are both marked with musttail, or neither of them is
1275
60.5k
    // marked as such. Otherwise, we might end up in a situation where we hoist
1276
60.5k
    // from a block where the terminator is a `ret` to a block where the terminator
1277
60.5k
    // is a `br`, and `musttail` calls expect to be followed by a return.
1278
60.5k
    auto *C1 = dyn_cast<CallInst>(I1);
1279
60.5k
    auto *C2 = dyn_cast<CallInst>(I2);
1280
60.5k
    if (C1 && 
C25.42k
)
1281
5.42k
      if (C1->isMustTailCall() != C2->isMustTailCall())
1282
1
        return Changed;
1283
60.5k
1284
60.5k
    if (!TTI.isProfitableToHoist(I1) || 
!TTI.isProfitableToHoist(I2)60.5k
)
1285
3
      return Changed;
1286
60.5k
1287
60.5k
    if (isa<DbgInfoIntrinsic>(I1) || 
isa<DbgInfoIntrinsic>(I2)60.5k
) {
1288
2
      assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
1289
2
      // The debug location is an integral part of a debug info intrinsic
1290
2
      // and can't be separated from it or replaced.  Instead of attempting
1291
2
      // to merge locations, simply hoist both copies of the intrinsic.
1292
2
      BIParent->getInstList().splice(BI->getIterator(),
1293
2
                                     BB1->getInstList(), I1);
1294
2
      BIParent->getInstList().splice(BI->getIterator(),
1295
2
                                     BB2->getInstList(), I2);
1296
2
      Changed = true;
1297
60.5k
    } else {
1298
60.5k
      // For a normal instruction, we just move one to right before the branch,
1299
60.5k
      // then replace all uses of the other with the first.  Finally, we remove
1300
60.5k
      // the now redundant second instruction.
1301
60.5k
      BIParent->getInstList().splice(BI->getIterator(),
1302
60.5k
                                     BB1->getInstList(), I1);
1303
60.5k
      if (!I2->use_empty())
1304
53.8k
        I2->replaceAllUsesWith(I1);
1305
60.5k
      I1->andIRFlags(I2);
1306
60.5k
      unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
1307
60.5k
                             LLVMContext::MD_range,
1308
60.5k
                             LLVMContext::MD_fpmath,
1309
60.5k
                             LLVMContext::MD_invariant_load,
1310
60.5k
                             LLVMContext::MD_nonnull,
1311
60.5k
                             LLVMContext::MD_invariant_group,
1312
60.5k
                             LLVMContext::MD_align,
1313
60.5k
                             LLVMContext::MD_dereferenceable,
1314
60.5k
                             LLVMContext::MD_dereferenceable_or_null,
1315
60.5k
                             LLVMContext::MD_mem_parallel_loop_access,
1316
60.5k
                             LLVMContext::MD_access_group};
1317
60.5k
      combineMetadata(I1, I2, KnownIDs, true);
1318
60.5k
1319
60.5k
      // I1 and I2 are being combined into a single instruction.  Its debug
1320
60.5k
      // location is the merged locations of the original instructions.
1321
60.5k
      I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1322
60.5k
1323
60.5k
      I2->eraseFromParent();
1324
60.5k
      Changed = true;
1325
60.5k
    }
1326
60.5k
1327
60.5k
    I1 = &*BB1_Itr++;
1328
60.5k
    I2 = &*BB2_Itr++;
1329
60.5k
    // Skip debug info if it is not identical.
1330
60.5k
    DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1331
60.5k
    DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1332
60.5k
    if (!DBI1 || 
!DBI22
||
!DBI1->isIdenticalToWhenDefined(DBI2)2
) {
1333
60.5k
      while (isa<DbgInfoIntrinsic>(I1))
1334
0
        I1 = &*BB1_Itr++;
1335
60.5k
      while (isa<DbgInfoIntrinsic>(I2))
1336
0
        I2 = &*BB2_Itr++;
1337
60.5k
    }
1338
60.5k
  } while (I1->isIdenticalToWhenDefined(I2));
1339
30.7k
1340
30.7k
  
return true25.4k
;
1341
5.31k
1342
5.31k
HoistTerminator:
1343
5.31k
  // It may not be possible to hoist an invoke.
1344
5.31k
  // FIXME: Can we define a safety predicate for CallBr?
1345
5.31k
  if (isa<InvokeInst>(I1) && 
!isSafeToHoistInvoke(BB1, BB2, I1, I2)2
)
1346
0
    return Changed;
1347
5.31k
1348
5.31k
  // TODO: callbr hoisting currently disabled pending further study.
1349
5.31k
  if (isa<CallBrInst>(I1))
1350
0
    return Changed;
1351
5.31k
1352
5.31k
  for (BasicBlock *Succ : successors(BB1)) {
1353
5.59k
    for (PHINode &PN : Succ->phis()) {
1354
5.59k
      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1355
5.59k
      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1356
5.59k
      if (BB1V == BB2V)
1357
632
        continue;
1358
4.96k
1359
4.96k
      // Check for passingValueIsAlwaysUndefined here because we would rather
1360
4.96k
      // eliminate undefined control flow then converting it to a select.
1361
4.96k
      if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
1362
4.96k
          
passingValueIsAlwaysUndefined(BB2V, &PN)4.95k
)
1363
1
        return Changed;
1364
4.95k
1365
4.95k
      if (isa<ConstantExpr>(BB1V) && 
!isSafeToSpeculativelyExecute(BB1V)64
)
1366
0
        return Changed;
1367
4.95k
      if (isa<ConstantExpr>(BB2V) && 
!isSafeToSpeculativelyExecute(BB2V)59
)
1368
1
        return Changed;
1369
4.95k
    }
1370
5.30k
  }
1371
5.31k
1372
5.31k
  // Okay, it is safe to hoist the terminator.
1373
5.31k
  Instruction *NT = I1->clone();
1374
5.31k
  BIParent->getInstList().insert(BI->getIterator(), NT);
1375
5.31k
  if (!NT->getType()->isVoidTy()) {
1376
0
    I1->replaceAllUsesWith(NT);
1377
0
    I2->replaceAllUsesWith(NT);
1378
0
    NT->takeName(I1);
1379
0
  }
1380
5.31k
1381
5.31k
  // Ensure terminator gets a debug location, even an unknown one, in case
1382
5.31k
  // it involves inlinable calls.
1383
5.31k
  NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1384
5.31k
1385
5.31k
  // PHIs created below will adopt NT's merged DebugLoc.
1386
5.31k
  IRBuilder<NoFolder> Builder(NT);
1387
5.31k
1388
5.31k
  // Hoisting one of the terminators from our successor is a great thing.
1389
5.31k
  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
1390
5.31k
  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
1391
5.31k
  // nodes, so we insert select instruction to compute the final result.
1392
5.31k
  std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1393
5.31k
  for (BasicBlock *Succ : successors(BB1)) {
1394
5.59k
    for (PHINode &PN : Succ->phis()) {
1395
5.59k
      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1396
5.59k
      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1397
5.59k
      if (BB1V == BB2V)
1398
632
        continue;
1399
4.95k
1400
4.95k
      // These values do not agree.  Insert a select instruction before NT
1401
4.95k
      // that determines the right value.
1402
4.95k
      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1403
4.95k
      if (!SI)
1404
4.91k
        SI = cast<SelectInst>(
1405
4.91k
            Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
1406
4.91k
                                 BB1V->getName() + "." + BB2V->getName(), BI));
1407
4.95k
1408
4.95k
      // Make the PHI node use the select for all incoming values for BB1/BB2
1409
21.3k
      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; 
++i16.3k
)
1410
16.3k
        if (PN.getIncomingBlock(i) == BB1 || 
PN.getIncomingBlock(i) == BB211.4k
)
1411
9.91k
          PN.setIncomingValue(i, SI);
1412
4.95k
    }
1413
5.30k
  }
1414
5.31k
1415
5.31k
  // Update any PHI nodes in our new successors.
1416
5.31k
  for (BasicBlock *Succ : successors(BB1))
1417
5.30k
    AddPredecessorToBlock(Succ, BIParent, BB1);
1418
5.31k
1419
5.31k
  EraseTerminatorAndDCECond(BI);
1420
5.31k
  return true;
1421
5.31k
}
1422
1423
// All instructions in Insts belong to different blocks that all unconditionally
1424
// branch to a common successor. Analyze each instruction and return true if it
1425
// would be possible to sink them into their successor, creating one common
1426
// instruction instead. For every value that would be required to be provided by
1427
// PHI node (because an operand varies in each input block), add to PHIOperands.
1428
static bool canSinkInstructions(
1429
    ArrayRef<Instruction *> Insts,
1430
305k
    DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1431
305k
  // Prune out obviously bad instructions to move. Each instruction must have
1432
305k
  // exactly zero or one use, and we check later that use is by a single, common
1433
305k
  // PHI instruction in the successor.
1434
305k
  bool HasUse = !Insts.front()->user_empty();
1435
777k
  for (auto *I : Insts) {
1436
777k
    // These instructions may change or break semantics if moved.
1437
777k
    if (isa<PHINode>(I) || 
I->isEHPad()774k
||
isa<AllocaInst>(I)768k
||
1438
777k
        
I->getType()->isTokenTy()768k
)
1439
8.51k
      return false;
1440
768k
1441
768k
    // Conservatively return false if I is an inline-asm instruction. Sinking
1442
768k
    // and merging inline-asm instructions can potentially create arguments
1443
768k
    // that cannot satisfy the inline-asm constraints.
1444
768k
    if (const auto *C = dyn_cast<CallBase>(I))
1445
436k
      if (C->isInlineAsm())
1446
1.20k
        return false;
1447
767k
1448
767k
    // Each instruction must have zero or one use.
1449
767k
    if (HasUse && 
!I->hasOneUse()444k
)
1450
27.7k
      return false;
1451
739k
    if (!HasUse && 
!I->user_empty()323k
)
1452
43.3k
      return false;
1453
739k
  }
1454
305k
1455
305k
  const Instruction *I0 = Insts.front();
1456
225k
  for (auto *I : Insts)
1457
475k
    if (!I->isSameOperationAs(I0))
1458
121k
      return false;
1459
225k
1460
225k
  // All instructions in Insts are known to be the same opcode. If they have a
1461
225k
  // use, check that the only user is a PHI or in the same block as the
1462
225k
  // instruction, because if a user is in the same block as an instruction we're
1463
225k
  // contemplating sinking, it must already be determined to be sinkable.
1464
225k
  
if (103k
HasUse103k
) {
1465
65.7k
    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1466
65.7k
    auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1467
138k
    if (
!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool 65.7k
{
1468
138k
          auto *U = cast<Instruction>(*I->user_begin());
1469
138k
          return (PNUse &&
1470
138k
                  
PNUse->getParent() == Succ57.5k
&&
1471
138k
                  
PNUse->getIncomingValueForBlock(I->getParent()) == I57.5k
) ||
1472
138k
                 
U->getParent() == I->getParent()81.9k
;
1473
138k
        }))
1474
946
      return false;
1475
102k
  }
1476
102k
1477
102k
  // Because SROA can't handle speculating stores of selects, try not
1478
102k
  // to sink loads or stores of allocas when we'd have to create a PHI for
1479
102k
  // the address operand. Also, because it is likely that loads or stores
1480
102k
  // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
1481
102k
  // This can cause code churn which can have unintended consequences down
1482
102k
  // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
1483
102k
  // FIXME: This is a workaround for a deficiency in SROA - see
1484
102k
  // https://llvm.org/bugs/show_bug.cgi?id=30188
1485
102k
  if (isa<StoreInst>(I0) && 
any_of(Insts, [](const Instruction *I) 15.9k
{
1486
35.4k
        return isa<AllocaInst>(I->getOperand(1));
1487
35.4k
      }))
1488
155
    return false;
1489
102k
  if (isa<LoadInst>(I0) && 
any_of(Insts, [](const Instruction *I) 3.25k
{
1490
7.06k
        return isa<AllocaInst>(I->getOperand(0));
1491
7.06k
      }))
1492
63
    return false;
1493
102k
1494
352k
  
for (unsigned OI = 0, OE = I0->getNumOperands(); 102k
OI != OE;
++OI250k
) {
1495
263k
    if (I0->getOperand(OI)->getType()->isTokenTy())
1496
0
      // Don't touch any operand of token type.
1497
0
      return false;
1498
263k
1499
548k
    
auto SameAsI0 = [&I0, OI](const Instruction *I) 263k
{
1500
548k
      assert(I->getNumOperands() == I0->getNumOperands());
1501
548k
      return I->getOperand(OI) == I0->getOperand(OI);
1502
548k
    };
1503
263k
    if (!all_of(Insts, SameAsI0)) {
1504
151k
      if (!canReplaceOperandWithVariable(I0, OI))
1505
12.1k
        // We can't create a PHI from this GEP.
1506
12.1k
        return false;
1507
139k
      // Don't create indirect calls! The called value is the final operand.
1508
139k
      if (isa<CallBase>(I0) && 
OI == OE - 181.4k
) {
1509
1.35k
        // FIXME: if the call was *already* indirect, we should do this.
1510
1.35k
        return false;
1511
1.35k
      }
1512
137k
      for (auto *I : Insts)
1513
292k
        PHIOperands[I].push_back(I->getOperand(OI));
1514
137k
    }
1515
263k
  }
1516
102k
  
return true88.6k
;
1517
102k
}
1518
1519
// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
1520
// instruction of every block in Blocks to their common successor, commoning
1521
// into one instruction.
1522
32.4k
static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1523
32.4k
  auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1524
32.4k
1525
32.4k
  // canSinkLastInstruction returning true guarantees that every block has at
1526
32.4k
  // least one non-terminator instruction.
1527
32.4k
  SmallVector<Instruction*,4> Insts;
1528
74.3k
  for (auto *BB : Blocks) {
1529
74.3k
    Instruction *I = BB->getTerminator();
1530
74.3k
    do {
1531
74.3k
      I = I->getPrevNode();
1532
74.3k
    } while (isa<DbgInfoIntrinsic>(I) && 
I != &BB->front()8
);
1533
74.3k
    if (!isa<DbgInfoIntrinsic>(I))
1534
74.3k
      Insts.push_back(I);
1535
74.3k
  }
1536
32.4k
1537
32.4k
  // The only checking we need to do now is that all users of all instructions
1538
32.4k
  // are the same PHI node. canSinkLastInstruction should have checked this but
1539
32.4k
  // it is slightly over-aggressive - it gets confused by commutative instructions
1540
32.4k
  // so double-check it here.
1541
32.4k
  Instruction *I0 = Insts.front();
1542
32.4k
  if (!I0->user_empty()) {
1543
21.3k
    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1544
46.9k
    if (
!all_of(Insts, [&PNUse](const Instruction *I) -> bool 21.3k
{
1545
46.9k
          auto *U = cast<Instruction>(*I->user_begin());
1546
46.9k
          return U == PNUse;
1547
46.9k
        }))
1548
35
      return false;
1549
32.4k
  }
1550
32.4k
1551
32.4k
  // We don't need to do any more checking here; canSinkLastInstruction should
1552
32.4k
  // have done it all for us.
1553
32.4k
  SmallVector<Value*, 4> NewOperands;
1554
117k
  for (unsigned O = 0, E = I0->getNumOperands(); O != E; 
++O84.6k
) {
1555
84.6k
    // This check is different to that in canSinkLastInstruction. There, we
1556
84.6k
    // cared about the global view once simplifycfg (and instcombine) have
1557
84.6k
    // completed - it takes into account PHIs that become trivially
1558
84.6k
    // simplifiable.  However here we need a more local view; if an operand
1559
84.6k
    // differs we create a PHI and rely on instcombine to clean up the very
1560
84.6k
    // small mess we may make.
1561
181k
    bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1562
181k
      return I->getOperand(O) != I0->getOperand(O);
1563
181k
    });
1564
84.6k
    if (!NeedPHI) {
1565
48.8k
      NewOperands.push_back(I0->getOperand(O));
1566
48.8k
      continue;
1567
48.8k
    }
1568
35.8k
1569
35.8k
    // Create a new PHI in the successor block and populate it.
1570
35.8k
    auto *Op = I0->getOperand(O);
1571
35.8k
    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
1572
35.8k
    auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1573
35.8k
                               Op->getName() + ".sink", &BBEnd->front());
1574
35.8k
    for (auto *I : Insts)
1575
79.1k
      PN->addIncoming(I->getOperand(O), I->getParent());
1576
35.8k
    NewOperands.push_back(PN);
1577
35.8k
  }
1578
32.4k
1579
32.4k
  // Arbitrarily use I0 as the new "common" instruction; remap its operands
1580
32.4k
  // and move it to the start of the successor block.
1581
117k
  for (unsigned O = 0, E = I0->getNumOperands(); O != E; 
++O84.6k
)
1582
84.6k
    I0->getOperandUse(O).set(NewOperands[O]);
1583
32.4k
  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1584
32.4k
1585
32.4k
  // Update metadata and IR flags, and merge debug locations.
1586
32.4k
  for (auto *I : Insts)
1587
74.2k
    if (I != I0) {
1588
41.8k
      // The debug location for the "common" instruction is the merged locations
1589
41.8k
      // of all the commoned instructions.  We start with the original location
1590
41.8k
      // of the "common" instruction and iteratively merge each location in the
1591
41.8k
      // loop below.
1592
41.8k
      // This is an N-way merge, which will be inefficient if I0 is a CallInst.
1593
41.8k
      // However, as N-way merge for CallInst is rare, so we use simplified API
1594
41.8k
      // instead of using complex API for N-way merge.
1595
41.8k
      I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
1596
41.8k
      combineMetadataForCSE(I0, I, true);
1597
41.8k
      I0->andIRFlags(I);
1598
41.8k
    }
1599
32.4k
1600
32.4k
  if (!I0->user_empty()) {
1601
21.3k
    // canSinkLastInstruction checked that all instructions were used by
1602
21.3k
    // one and only one PHI node. Find that now, RAUW it to our common
1603
21.3k
    // instruction and nuke it.
1604
21.3k
    auto *PN = cast<PHINode>(*I0->user_begin());
1605
21.3k
    PN->replaceAllUsesWith(I0);
1606
21.3k
    PN->eraseFromParent();
1607
21.3k
  }
1608
32.4k
1609
32.4k
  // Finally nuke all instructions apart from the common instruction.
1610
32.4k
  for (auto *I : Insts)
1611
74.2k
    if (I != I0)
1612
41.8k
      I->eraseFromParent();
1613
32.4k
1614
32.4k
  return true;
1615
32.4k
}
1616
1617
namespace {
1618
1619
  // LockstepReverseIterator - Iterates through instructions
1620
  // in a set of blocks in reverse order from the first non-terminator.
1621
  // For example (assume all blocks have size n):
1622
  //   LockstepReverseIterator I([B1, B2, B3]);
1623
  //   *I-- = [B1[n], B2[n], B3[n]];
1624
  //   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1625
  //   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1626
  //   ...
1627
  class LockstepReverseIterator {
1628
    ArrayRef<BasicBlock*> Blocks;
1629
    SmallVector<Instruction*,4> Insts;
1630
    bool Fail;
1631
1632
  public:
1633
240k
    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
1634
240k
      reset();
1635
240k
    }
1636
1637
302k
    void reset() {
1638
302k
      Fail = false;
1639
302k
      Insts.clear();
1640
841k
      for (auto *BB : Blocks) {
1641
841k
        Instruction *Inst = BB->getTerminator();
1642
841k
        for (Inst = Inst->getPrevNode(); Inst && 
isa<DbgInfoIntrinsic>(Inst)839k
;)
1643
13
          Inst = Inst->getPrevNode();
1644
841k
        if (!Inst) {
1645
1.90k
          // Block wasn't big enough.
1646
1.90k
          Fail = true;
1647
1.90k
          return;
1648
1.90k
        }
1649
839k
        Insts.push_back(Inst);
1650
839k
      }
1651
302k
    }
1652
1653
328k
    bool isValid() const {
1654
328k
      return !Fail;
1655
328k
    }
1656
1657
98.7k
    void operator--() {
1658
98.7k
      if (Fail)
1659
0
        return;
1660
194k
      
for (auto *&Inst : Insts)98.7k
{
1661
194k
        for (Inst = Inst->getPrevNode(); Inst && 
isa<DbgInfoIntrinsic>(Inst)164k
;)
1662
0
          Inst = Inst->getPrevNode();
1663
194k
        // Already at beginning of block.
1664
194k
        if (!Inst) {
1665
29.4k
          Fail = true;
1666
29.4k
          return;
1667
29.4k
        }
1668
194k
      }
1669
98.7k
    }
1670
1671
570k
    ArrayRef<Instruction*> operator * () const {
1672
570k
      return Insts;
1673
570k
    }
1674
  };
1675
1676
} // end anonymous namespace
1677
1678
/// Check whether BB's predecessors end with unconditional branches. If it is
1679
/// true, sink any common code from the predecessors to BB.
1680
/// We also allow one predecessor to end with conditional branch (but no more
1681
/// than one).
1682
6.83M
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
1683
6.83M
  // We support two situations:
1684
6.83M
  //   (1) all incoming arcs are unconditional
1685
6.83M
  //   (2) one incoming arc is conditional
1686
6.83M
  //
1687
6.83M
  // (2) is very common in switch defaults and
1688
6.83M
  // else-if patterns;
1689
6.83M
  //
1690
6.83M
  //   if (a) f(1);
1691
6.83M
  //   else if (b) f(2);
1692
6.83M
  //
1693
6.83M
  // produces:
1694
6.83M
  //
1695
6.83M
  //       [if]
1696
6.83M
  //      /    \
1697
6.83M
  //    [f(1)] [if]
1698
6.83M
  //      |     | \
1699
6.83M
  //      |     |  |
1700
6.83M
  //      |  [f(2)]|
1701
6.83M
  //       \    | /
1702
6.83M
  //        [ end ]
1703
6.83M
  //
1704
6.83M
  // [end] has two unconditional predecessor arcs and one conditional. The
1705
6.83M
  // conditional refers to the implicit empty 'else' arc. This conditional
1706
6.83M
  // arc can also be caused by an empty default block in a switch.
1707
6.83M
  //
1708
6.83M
  // In this case, we attempt to sink code from all *unconditional* arcs.
1709
6.83M
  // If we can sink instructions from these arcs (determined during the scan
1710
6.83M
  // phase below) we insert a common successor for all unconditional arcs and
1711
6.83M
  // connect that to [end], to enable sinking:
1712
6.83M
  //
1713
6.83M
  //       [if]
1714
6.83M
  //      /    \
1715
6.83M
  //    [x(1)] [if]
1716
6.83M
  //      |     | \
1717
6.83M
  //      |     |  \
1718
6.83M
  //      |  [x(2)] |
1719
6.83M
  //       \   /    |
1720
6.83M
  //   [sink.split] |
1721
6.83M
  //         \     /
1722
6.83M
  //         [ end ]
1723
6.83M
  //
1724
6.83M
  SmallVector<BasicBlock*,4> UnconditionalPreds;
1725
6.83M
  Instruction *Cond = nullptr;
1726
8.84M
  for (auto *B : predecessors(BB)) {
1727
8.84M
    auto *T = B->getTerminator();
1728
8.84M
    if (isa<BranchInst>(T) && 
cast<BranchInst>(T)->isUnconditional()8.45M
)
1729
1.85M
      UnconditionalPreds.push_back(B);
1730
6.98M
    else if ((isa<BranchInst>(T) || 
isa<SwitchInst>(T)381k
) &&
!Cond6.83M
)
1731
5.87M
      Cond = T;
1732
1.11M
    else
1733
1.11M
      return false;
1734
8.84M
  }
1735
6.83M
  
if (5.72M
UnconditionalPreds.size() < 25.72M
)
1736
5.47M
    return false;
1737
240k
1738
240k
  bool Changed = false;
1739
240k
  // We take a two-step approach to tail sinking. First we scan from the end of
1740
240k
  // each block upwards in lockstep. If the n'th instruction from the end of each
1741
240k
  // block can be sunk, those instructions are added to ValuesToSink and we
1742
240k
  // carry on. If we can sink an instruction but need to PHI-merge some operands
1743
240k
  // (because they're not identical in each instruction) we add these to
1744
240k
  // PHIOperands.
1745
240k
  unsigned ScanIdx = 0;
1746
240k
  SmallPtrSet<Value*,4> InstructionsToSink;
1747
240k
  DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
1748
240k
  LockstepReverseIterator LRI(UnconditionalPreds);
1749
328k
  while (LRI.isValid() &&
1750
328k
         
canSinkInstructions(*LRI, PHIOperands)305k
) {
1751
88.6k
    LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
1752
88.6k
                      << "\n");
1753
88.6k
    InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
1754
88.6k
    ++ScanIdx;
1755
88.6k
    --LRI;
1756
88.6k
  }
1757
240k
1758
240k
  auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
1759
72.3k
    unsigned NumPHIdValues = 0;
1760
72.3k
    for (auto *I : *LRI)
1761
160k
      for (auto *V : PHIOperands[I])
1762
193k
        if (InstructionsToSink.count(V) == 0)
1763
147k
          ++NumPHIdValues;
1764
72.3k
    LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
1765
72.3k
    unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
1766
72.3k
    if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
1767
932
        NumPHIInsts++;
1768
72.3k
1769
72.3k
    return NumPHIInsts <= 1;
1770
72.3k
  };
1771
240k
1772
240k
  if (ScanIdx > 0 && 
Cond38.9k
) {
1773
15.5k
    // Check if we would actually sink anything first! This mutates the CFG and
1774
15.5k
    // adds an extra block. The goal in doing this is to allow instructions that
1775
15.5k
    // couldn't be sunk before to be sunk - obviously, speculatable instructions
1776
15.5k
    // (such as trunc, add) can be sunk and predicated already. So we check that
1777
15.5k
    // we're going to sink at least one non-speculatable instruction.
1778
15.5k
    LRI.reset();
1779
15.5k
    unsigned Idx = 0;
1780
15.5k
    bool Profitable = false;
1781
25.6k
    while (ProfitableToSinkInstruction(LRI) && 
Idx < ScanIdx24.2k
) {
1782
15.4k
      if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
1783
5.26k
        Profitable = true;
1784
5.26k
        break;
1785
5.26k
      }
1786
10.1k
      --LRI;
1787
10.1k
      ++Idx;
1788
10.1k
    }
1789
15.5k
    if (!Profitable)
1790
10.2k
      return false;
1791
5.26k
1792
5.26k
    LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
1793
5.26k
    // We have a conditional edge and we're going to sink some instructions.
1794
5.26k
    // Insert a new block postdominating all blocks we're going to sink from.
1795
5.26k
    if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
1796
0
      // Edges couldn't be split.
1797
0
      return false;
1798
5.26k
    Changed = true;
1799
5.26k
  }
1800
240k
1801
240k
  // Now that we've analyzed all potential sinking candidates, perform the
1802
240k
  // actual sink. We iteratively sink the last non-terminator of the source
1803
240k
  // blocks into their common successor unless doing so would require too
1804
240k
  // many PHI instructions to be generated (currently only one PHI is allowed
1805
240k
  // per sunk instruction).
1806
240k
  //
1807
240k
  // We can use InstructionsToSink to discount values needing PHI-merging that will
1808
240k
  // actually be sunk in a later iteration. This allows us to be more
1809
240k
  // aggressive in what we sink. This does allow a false positive where we
1810
240k
  // sink presuming a later value will also be sunk, but stop half way through
1811
240k
  // and never actually sink it which means we produce more PHIs than intended.
1812
240k
  // This is unlikely in practice though.
1813
262k
  
for (unsigned SinkIdx = 0; 229k
SinkIdx != ScanIdx;
++SinkIdx32.4k
) {
1814
46.6k
    LLVM_DEBUG(dbgs() << "SINK: Sink: "
1815
46.6k
                      << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
1816
46.6k
                      << "\n");
1817
46.6k
1818
46.6k
    // Because we've sunk every instruction in turn, the current instruction to
1819
46.6k
    // sink is always at index 0.
1820
46.6k
    LRI.reset();
1821
46.6k
    if (!ProfitableToSinkInstruction(LRI)) {
1822
14.1k
      // Too many PHIs would be created.
1823
14.1k
      LLVM_DEBUG(
1824
14.1k
          dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
1825
14.1k
      break;
1826
14.1k
    }
1827
32.4k
1828
32.4k
    if (!sinkLastInstruction(UnconditionalPreds))
1829
35
      return Changed;
1830
32.4k
    NumSinkCommons++;
1831
32.4k
    Changed = true;
1832
32.4k
  }
1833
229k
  
return Changed229k
;
1834
229k
}
1835
1836
/// Determine if we can hoist sink a sole store instruction out of a
1837
/// conditional block.
1838
///
1839
/// We are looking for code like the following:
1840
///   BrBB:
1841
///     store i32 %add, i32* %arrayidx2
1842
///     ... // No other stores or function calls (we could be calling a memory
1843
///     ... // function).
1844
///     %cmp = icmp ult %x, %y
1845
///     br i1 %cmp, label %EndBB, label %ThenBB
1846
///   ThenBB:
1847
///     store i32 %add5, i32* %arrayidx2
1848
///     br label EndBB
1849
///   EndBB:
1850
///     ...
1851
///   We are going to transform this into:
1852
///   BrBB:
1853
///     store i32 %add, i32* %arrayidx2
1854
///     ... //
1855
///     %cmp = icmp ult %x, %y
1856
///     %add.add5 = select i1 %cmp, i32 %add, %add5
1857
///     store i32 %add.add5, i32* %arrayidx2
1858
///     ...
1859
///
1860
/// \return The pointer to the value of the previous store if the store can be
1861
///         hoisted into the predecessor block. 0 otherwise.
1862
static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
1863
1.33M
                                     BasicBlock *StoreBB, BasicBlock *EndBB) {
1864
1.33M
  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
1865
1.33M
  if (!StoreToHoist)
1866
1.11M
    return nullptr;
1867
217k
1868
217k
  // Volatile or atomic.
1869
217k
  if (!StoreToHoist->isSimple())
1870
5.61k
    return nullptr;
1871
211k
1872
211k
  Value *StorePtr = StoreToHoist->getPointerOperand();
1873
211k
1874
211k
  // Look for a store to the same pointer in BrBB.
1875
211k
  unsigned MaxNumInstToLookAt = 9;
1876
930k
  for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
1877
930k
    if (!MaxNumInstToLookAt)
1878
29.0k
      break;
1879
901k
    --MaxNumInstToLookAt;
1880
901k
1881
901k
    // Could be calling an instruction that affects memory like free().
1882
901k
    if (CurI.mayHaveSideEffects() && 
!isa<StoreInst>(CurI)90.7k
)
1883
52.6k
      return nullptr;
1884
848k
1885
848k
    if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
1886
38.1k
      // Found the previous store make sure it stores to the same location.
1887
38.1k
      if (SI->getPointerOperand() == StorePtr)
1888
1.37k
        // Found the previous store, return its value operand.
1889
1.37k
        return SI->getValueOperand();
1890
36.7k
      return nullptr; // Unknown store.
1891
36.7k
    }
1892
848k
  }
1893
211k
1894
211k
  
return nullptr121k
;
1895
211k
}
1896
1897
/// Speculate a conditional basic block flattening the CFG.
1898
///
1899
/// Note that this is a very risky transform currently. Speculating
1900
/// instructions like this is most often not desirable. Instead, there is an MI
1901
/// pass which can do it with full awareness of the resource constraints.
1902
/// However, some cases are "obvious" and we should do directly. An example of
1903
/// this is speculating a single, reasonably cheap instruction.
1904
///
1905
/// There is only one distinct advantage to flattening the CFG at the IR level:
1906
/// it makes very common but simplistic optimizations such as are common in
1907
/// instcombine and the DAG combiner more powerful by removing CFG edges and
1908
/// modeling their effects with easier to reason about SSA value graphs.
1909
///
1910
///
1911
/// An illustration of this transform is turning this IR:
1912
/// \code
1913
///   BB:
1914
///     %cmp = icmp ult %x, %y
1915
///     br i1 %cmp, label %EndBB, label %ThenBB
1916
///   ThenBB:
1917
///     %sub = sub %x, %y
1918
///     br label BB2
1919
///   EndBB:
1920
///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
1921
///     ...
1922
/// \endcode
1923
///
1924
/// Into this IR:
1925
/// \code
1926
///   BB:
1927
///     %cmp = icmp ult %x, %y
1928
///     %sub = sub %x, %y
1929
///     %cond = select i1 %cmp, 0, %sub
1930
///     ...
1931
/// \endcode
1932
///
1933
/// \returns true if the conditional block is removed.
1934
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
1935
2.70M
                                   const TargetTransformInfo &TTI) {
1936
2.70M
  // Be conservative for now. FP select instruction can often be expensive.
1937
2.70M
  Value *BrCond = BI->getCondition();
1938
2.70M
  if (isa<FCmpInst>(BrCond))
1939
29.8k
    return false;
1940
2.67M
1941
2.67M
  BasicBlock *BB = BI->getParent();
1942
2.67M
  BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
1943
2.67M
1944
2.67M
  // If ThenBB is actually on the false edge of the conditional branch, remember
1945
2.67M
  // to swap the select operands later.
1946
2.67M
  bool Invert = false;
1947
2.67M
  if (ThenBB != BI->getSuccessor(0)) {
1948
1.43M
    assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
1949
1.43M
    Invert = true;
1950
1.43M
  }
1951
2.67M
  assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
1952
2.67M
1953
2.67M
  // Keep a count of how many times instructions are used within ThenBB when
1954
2.67M
  // they are candidates for sinking into ThenBB. Specifically:
1955
2.67M
  // - They are defined in BB, and
1956
2.67M
  // - They have no side effects, and
1957
2.67M
  // - All of their uses are in ThenBB.
1958
2.67M
  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
1959
2.67M
1960
2.67M
  SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
1961
2.67M
1962
2.67M
  unsigned SpeculationCost = 0;
1963
2.67M
  Value *SpeculatedStoreValue = nullptr;
1964
2.67M
  StoreInst *SpeculatedStore = nullptr;
1965
2.67M
  for (BasicBlock::iterator BBI = ThenBB->begin(),
1966
2.67M
                            BBE = std::prev(ThenBB->end());
1967
3.99M
       BBI != BBE; 
++BBI1.31M
) {
1968
3.96M
    Instruction *I = &*BBI;
1969
3.96M
    // Skip debug info.
1970
3.96M
    if (isa<DbgInfoIntrinsic>(I)) {
1971
3
      SpeculatedDbgIntrinsics.push_back(I);
1972
3
      continue;
1973
3
    }
1974
3.96M
1975
3.96M
    // Only speculatively execute a single instruction (not counting the
1976
3.96M
    // terminator) for now.
1977
3.96M
    ++SpeculationCost;
1978
3.96M
    if (SpeculationCost > 1)
1979
1.31M
      return false;
1980
2.65M
1981
2.65M
    // Don't hoist the instruction if it's unsafe or expensive.
1982
2.65M
    if (!isSafeToSpeculativelyExecute(I) &&
1983
2.65M
        
!(1.33M
HoistCondStores1.33M
&& (SpeculatedStoreValue = isSafeToSpeculateStore(
1984
1.33M
                                  I, BB, ThenBB, EndBB))))
1985
1.33M
      return false;
1986
1.32M
    if (!SpeculatedStoreValue &&
1987
1.32M
        ComputeSpeculationCost(I, TTI) >
1988
1.31M
            PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
1989
3.50k
      return false;
1990
1.31M
1991
1.31M
    // Store the store speculation candidate.
1992
1.31M
    if (SpeculatedStoreValue)
1993
1.37k
      SpeculatedStore = cast<StoreInst>(I);
1994
1.31M
1995
1.31M
    // Do not hoist the instruction if any of its operands are defined but not
1996
1.31M
    // used in BB. The transformation will prevent the operand from
1997
1.31M
    // being sunk into the use block.
1998
4.25M
    for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; 
++i2.94M
) {
1999
2.94M
      Instruction *OpI = dyn_cast<Instruction>(*i);
2000
2.94M
      if (!OpI || 
OpI->getParent() != BB1.17M
||
OpI->mayHaveSideEffects()454k
)
2001
2.53M
        continue; // Not a candidate for sinking.
2002
401k
2003
401k
      ++SinkCandidateUseCounts[OpI];
2004
401k
    }
2005
1.31M
  }
2006
2.67M
2007
2.67M
  // Consider any sink candidates which are only used in ThenBB as costs for
2008
2.67M
  // speculation. Note, while we iterate over a DenseMap here, we are summing
2009
2.67M
  // and so iteration order isn't significant.
2010
2.67M
  for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
2011
25.6k
           I = SinkCandidateUseCounts.begin(),
2012
25.6k
           E = SinkCandidateUseCounts.end();
2013
27.8k
       I != E; 
++I2.16k
)
2014
2.17k
    if (I->first->hasNUses(I->second)) {
2015
13
      ++SpeculationCost;
2016
13
      if (SpeculationCost > 1)
2017
13
        return false;
2018
13
    }
2019
25.6k
2020
25.6k
  // Check that the PHI nodes can be converted to selects.
2021
25.6k
  bool HaveRewritablePHIs = false;
2022
26.9k
  for (PHINode &PN : EndBB->phis()) {
2023
26.9k
    Value *OrigV = PN.getIncomingValueForBlock(BB);
2024
26.9k
    Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2025
26.9k
2026
26.9k
    // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
2027
26.9k
    // Skip PHIs which are trivial.
2028
26.9k
    if (ThenV == OrigV)
2029
14.3k
      continue;
2030
12.6k
2031
12.6k
    // Don't convert to selects if we could remove undefined behavior instead.
2032
12.6k
    if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2033
12.6k
        
passingValueIsAlwaysUndefined(ThenV, &PN)12.6k
)
2034
4
      return false;
2035
12.6k
2036
12.6k
    HaveRewritablePHIs = true;
2037
12.6k
    ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2038
12.6k
    ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2039
12.6k
    if (!OrigCE && 
!ThenCE12.3k
)
2040
12.3k
      continue; // Known safe and cheap.
2041
305
2042
305
    if ((ThenCE && 
!isSafeToSpeculativelyExecute(ThenCE)132
) ||
2043
305
        
(302
OrigCE302
&&
!isSafeToSpeculativelyExecute(OrigCE)224
))
2044
5
      return false;
2045
300
    unsigned OrigCost = OrigCE ? 
ComputeSpeculationCost(OrigCE, TTI)222
:
078
;
2046
300
    unsigned ThenCost = ThenCE ? 
ComputeSpeculationCost(ThenCE, TTI)129
:
0171
;
2047
300
    unsigned MaxCost =
2048
300
        2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2049
300
    if (OrigCost + ThenCost > MaxCost)
2050
0
      return false;
2051
300
2052
300
    // Account for the cost of an unfolded ConstantExpr which could end up
2053
300
    // getting expanded into Instructions.
2054
300
    // FIXME: This doesn't account for how many operations are combined in the
2055
300
    // constant expression.
2056
300
    ++SpeculationCost;
2057
300
    if (SpeculationCost > 1)
2058
149
      return false;
2059
300
  }
2060
25.6k
2061
25.6k
  // If there are no PHIs to process, bail early. This helps ensure idempotence
2062
25.6k
  // as well.
2063
25.6k
  
if (25.5k
!HaveRewritablePHIs25.5k
&&
!(14.4k
HoistCondStores14.4k
&&
SpeculatedStoreValue14.4k
))
2064
13.7k
    return false;
2065
11.7k
2066
11.7k
  // If we get here, we can hoist the instruction and if-convert.
2067
11.7k
  LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
2068
11.7k
2069
11.7k
  // Insert a select of the value of the speculated store.
2070
11.7k
  if (SpeculatedStoreValue) {
2071
662
    IRBuilder<NoFolder> Builder(BI);
2072
662
    Value *TrueV = SpeculatedStore->getValueOperand();
2073
662
    Value *FalseV = SpeculatedStoreValue;
2074
662
    if (Invert)
2075
122
      std::swap(TrueV, FalseV);
2076
662
    Value *S = Builder.CreateSelect(
2077
662
        BrCond, TrueV, FalseV, "spec.store.select", BI);
2078
662
    SpeculatedStore->setOperand(0, S);
2079
662
    SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
2080
662
                                         SpeculatedStore->getDebugLoc());
2081
662
  }
2082
11.7k
2083
11.7k
  // Metadata can be dependent on the condition we are hoisting above.
2084
11.7k
  // Conservatively strip all metadata on the instruction.
2085
11.7k
  for (auto &I : *ThenBB)
2086
16.3k
    I.dropUnknownNonDebugMetadata();
2087
11.7k
2088
11.7k
  // Hoist the instructions.
2089
11.7k
  BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
2090
11.7k
                           ThenBB->begin(), std::prev(ThenBB->end()));
2091
11.7k
2092
11.7k
  // Insert selects and rewrite the PHI operands.
2093
11.7k
  IRBuilder<NoFolder> Builder(BI);
2094
13.3k
  for (PHINode &PN : EndBB->phis()) {
2095
13.3k
    unsigned OrigI = PN.getBasicBlockIndex(BB);
2096
13.3k
    unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
2097
13.3k
    Value *OrigV = PN.getIncomingValue(OrigI);
2098
13.3k
    Value *ThenV = PN.getIncomingValue(ThenI);
2099
13.3k
2100
13.3k
    // Skip PHIs which are trivial.
2101
13.3k
    if (OrigV == ThenV)
2102
857
      continue;
2103
12.4k
2104
12.4k
    // Create a select whose true value is the speculatively executed value and
2105
12.4k
    // false value is the preexisting value. Swap them if the branch
2106
12.4k
    // destinations were inverted.
2107
12.4k
    Value *TrueV = ThenV, *FalseV = OrigV;
2108
12.4k
    if (Invert)
2109
3.95k
      std::swap(TrueV, FalseV);
2110
12.4k
    Value *V = Builder.CreateSelect(
2111
12.4k
        BrCond, TrueV, FalseV, "spec.select", BI);
2112
12.4k
    PN.setIncomingValue(OrigI, V);
2113
12.4k
    PN.setIncomingValue(ThenI, V);
2114
12.4k
  }
2115
11.7k
2116
11.7k
  // Remove speculated dbg intrinsics.
2117
11.7k
  // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
2118
11.7k
  // dbg value for the different flows and inserting it after the select.
2119
11.7k
  for (Instruction *I : SpeculatedDbgIntrinsics)
2120
1
    I->eraseFromParent();
2121
11.7k
2122
11.7k
  ++NumSpeculations;
2123
11.7k
  return true;
2124
11.7k
}
2125
2126
/// Return true if we can thread a branch across this block.
2127
47.9k
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
2128
47.9k
  unsigned Size = 0;
2129
47.9k
2130
105k
  for (Instruction &I : BB->instructionsWithoutDebug()) {
2131
105k
    if (Size > 10)
2132
657
      return false; // Don't clone large BB's.
2133
104k
    ++Size;
2134
104k
2135
104k
    // We can only support instructions that do not define values that are
2136
104k
    // live outside of the current basic block.
2137
104k
    for (User *U : I.users()) {
2138
84.8k
      Instruction *UI = cast<Instruction>(U);
2139
84.8k
      if (UI->getParent() != BB || 
isa<PHINode>(UI)53.8k
)
2140
30.9k
        return false;
2141
84.8k
    }
2142
104k
2143
104k
    // Looks ok, continue checking.
2144
104k
  }
2145
47.9k
2146
47.9k
  
return true16.3k
;
2147
47.9k
}
2148
2149
/// If we have a conditional branch on a PHI node value that is defined in the
2150
/// same block as the branch and if any PHI entries are constants, thread edges
2151
/// corresponding to that entry to be branches to their ultimate destination.
2152
static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
2153
62.3k
                                AssumptionCache *AC) {
2154
62.3k
  BasicBlock *BB = BI->getParent();
2155
62.3k
  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
2156
62.3k
  // NOTE: we currently cannot transform this case if the PHI node is used
2157
62.3k
  // outside of the block.
2158
62.3k
  if (!PN || 
PN->getParent() != BB57.0k
||
!PN->hasOneUse()56.8k
)
2159
7.37k
    return false;
2160
54.9k
2161
54.9k
  // Degenerate case of a single entry PHI.
2162
54.9k
  if (PN->getNumIncomingValues() == 1) {
2163
11.6k
    FoldSingleEntryPHINodes(PN->getParent());
2164
11.6k
    return true;
2165
11.6k
  }
2166
43.3k
2167
43.3k
  // Now we know that this block has multiple preds and two succs.
2168
43.3k
  if (!BlockIsSimpleEnoughToThreadThrough(BB))
2169
27.7k
    return false;
2170
15.6k
2171
15.6k
  // Can't fold blocks that contain noduplicate or convergent calls.
2172
35.1k
  
if (15.6k
any_of(*BB, [](const Instruction &I) 15.6k
{
2173
35.1k
        const CallInst *CI = dyn_cast<CallInst>(&I);
2174
35.1k
        return CI && 
(2.26k
CI->cannotDuplicate()2.26k
||
CI->isConvergent()2.26k
);
2175
35.1k
      }))
2176
25
    return false;
2177
15.5k
2178
15.5k
  // Okay, this is a simple enough basic block.  See if any phi values are
2179
15.5k
  // constants.
2180
42.4k
  
for (unsigned i = 0, e = PN->getNumIncomingValues(); 15.5k
i != e;
++i26.8k
) {
2181
34.1k
    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
2182
34.1k
    if (!CB || 
!CB->getType()->isIntegerTy(1)7.30k
)
2183
26.8k
      continue;
2184
7.30k
2185
7.30k
    // Okay, we now know that all edges from PredBB should be revectored to
2186
7.30k
    // branch to RealDest.
2187
7.30k
    BasicBlock *PredBB = PN->getIncomingBlock(i);
2188
7.30k
    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
2189
7.30k
2190
7.30k
    if (RealDest == BB)
2191
29
      continue; // Skip self loops.
2192
7.27k
    // Skip if the predecessor's terminator is an indirect branch.
2193
7.27k
    if (isa<IndirectBrInst>(PredBB->getTerminator()))
2194
6
      continue;
2195
7.27k
2196
7.27k
    // The dest block might have PHI nodes, other predecessors and other
2197
7.27k
    // difficult cases.  Instead of being smart about this, just insert a new
2198
7.27k
    // block that jumps to the destination block, effectively splitting
2199
7.27k
    // the edge we are about to create.
2200
7.27k
    BasicBlock *EdgeBB =
2201
7.27k
        BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
2202
7.27k
                           RealDest->getParent(), RealDest);
2203
7.27k
    BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
2204
7.27k
    CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
2205
7.27k
2206
7.27k
    // Update PHI nodes.
2207
7.27k
    AddPredecessorToBlock(RealDest, EdgeBB, BB);
2208
7.27k
2209
7.27k
    // BB may have instructions that are being threaded over.  Clone these
2210
7.27k
    // instructions into EdgeBB.  We know that there will be no uses of the
2211
7.27k
    // cloned instructions outside of EdgeBB.
2212
7.27k
    BasicBlock::iterator InsertPt = EdgeBB->begin();
2213
7.27k
    DenseMap<Value *, Value *> TranslateMap; // Track translated values.
2214
17.3k
    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; 
++BBI10.1k
) {
2215
10.1k
      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
2216
7.50k
        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
2217
7.50k
        continue;
2218
7.50k
      }
2219
2.61k
      // Clone the instruction.
2220
2.61k
      Instruction *N = BBI->clone();
2221
2.61k
      if (BBI->hasName())
2222
3
        N->setName(BBI->getName() + ".c");
2223
2.61k
2224
2.61k
      // Update operands due to translation.
2225
8.89k
      for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; 
++i6.28k
) {
2226
6.28k
        DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
2227
6.28k
        if (PI != TranslateMap.end())
2228
1.14k
          *i = PI->second;
2229
6.28k
      }
2230
2.61k
2231
2.61k
      // Check for trivial simplification.
2232
2.61k
      if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
2233
53
        if (!BBI->use_empty())
2234
53
          TranslateMap[&*BBI] = V;
2235
53
        if (!N->mayHaveSideEffects()) {
2236
53
          N->deleteValue(); // Instruction folded away, don't need actual inst
2237
53
          N = nullptr;
2238
53
        }
2239
2.55k
      } else {
2240
2.55k
        if (!BBI->use_empty())
2241
882
          TranslateMap[&*BBI] = N;
2242
2.55k
      }
2243
2.61k
      // Insert the new instruction into its new home.
2244
2.61k
      if (N)
2245
2.55k
        EdgeBB->getInstList().insert(InsertPt, N);
2246
2.61k
2247
2.61k
      // Register the new instruction with the assumption cache if necessary.
2248
2.61k
      if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
2249
1.09k
        if (II->getIntrinsicID() == Intrinsic::assume)
2250
2
          AC->registerAssumption(II);
2251
2.61k
    }
2252
7.27k
2253
7.27k
    // Loop over all of the edges from PredBB to BB, changing them to branch
2254
7.27k
    // to EdgeBB instead.
2255
7.27k
    Instruction *PredBBTI = PredBB->getTerminator();
2256
20.5k
    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; 
++i13.2k
)
2257
13.2k
      if (PredBBTI->getSuccessor(i) == BB) {
2258
7.33k
        BB->removePredecessor(PredBB);
2259
7.33k
        PredBBTI->setSuccessor(i, EdgeBB);
2260
7.33k
      }
2261
7.27k
2262
7.27k
    // Recurse, simplifying any other constants.
2263
7.27k
    return FoldCondBranchOnPHI(BI, DL, AC) || 
true6.04k
;
2264
7.27k
  }
2265
15.5k
2266
15.5k
  
return false8.31k
;
2267
15.5k
}
2268
2269
/// Given a BB that starts with the specified two-entry PHI node,
2270
/// see if we can eliminate it.
2271
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
2272
4.66M
                                const DataLayout &DL) {
2273
4.66M
  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
2274
4.66M
  // statement", which has a very simple dominance structure.  Basically, we
2275
4.66M
  // are trying to find the condition that is being branched on, which
2276
4.66M
  // subsequently causes this merge to happen.  We really want control
2277
4.66M
  // dependence information for this check, but simplifycfg can't keep it up
2278
4.66M
  // to date, and this catches most of the cases we care about anyway.
2279
4.66M
  BasicBlock *BB = PN->getParent();
2280
4.66M
  const Function *Fn = BB->getParent();
2281
4.66M
  if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
2282
1
    return false;
2283
4.66M
2284
4.66M
  BasicBlock *IfTrue, *IfFalse;
2285
4.66M
  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
2286
4.66M
  if (!IfCond ||
2287
4.66M
      // Don't bother if the branch will be constant folded trivially.
2288
4.66M
      
isa<ConstantInt>(IfCond)885k
)
2289
3.77M
    return false;
2290
885k
2291
885k
  // Okay, we found that we can merge this two-entry phi node into a select.
2292
885k
  // Doing so would require us to fold *all* two entry phi nodes in this block.
2293
885k
  // At some point this becomes non-profitable (particularly if the target
2294
885k
  // doesn't support cmov's).  Only do this transformation if there are two or
2295
885k
  // fewer PHI nodes in this block.
2296
885k
  unsigned NumPhis = 0;
2297
1.91M
  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); 
++NumPhis, ++I1.03M
)
2298
1.04M
    if (NumPhis > 2)
2299
6.55k
      return false;
2300
885k
2301
885k
  // Loop over the PHI's seeing if we can promote them all to select
2302
885k
  // instructions.  While we are at it, keep track of the instructions
2303
885k
  // that need to be moved to the dominating block.
2304
885k
  SmallPtrSet<Instruction *, 4> AggressiveInsts;
2305
878k
  unsigned MaxCostVal0 = PHINodeFoldingThreshold,
2306
878k
           MaxCostVal1 = PHINodeFoldingThreshold;
2307
878k
  MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
2308
878k
  MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
2309
878k
2310
1.13M
  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
2311
963k
    PHINode *PN = cast<PHINode>(II++);
2312
963k
    if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
2313
2.05k
      PN->replaceAllUsesWith(V);
2314
2.05k
      PN->eraseFromParent();
2315
2.05k
      continue;
2316
2.05k
    }
2317
961k
2318
961k
    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
2319
961k
                             MaxCostVal0, TTI) ||
2320
961k
        !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
2321
498k
                             MaxCostVal1, TTI))
2322
702k
      return false;
2323
961k
  }
2324
878k
2325
878k
  // If we folded the first phi, PN dangles at this point.  Refresh it.  If
2326
878k
  // we ran out of PHIs then we simplified them all.
2327
878k
  PN = dyn_cast<PHINode>(BB->begin());
2328
175k
  if (!PN)
2329
1.19k
    return true;
2330
174k
2331
174k
  // Don't fold i1 branches on PHIs which contain binary operators.  These can
2332
174k
  // often be turned into switches and other things.
2333
174k
  if (PN->getType()->isIntegerTy(1) &&
2334
174k
      
(9.01k
isa<BinaryOperator>(PN->getIncomingValue(0))9.01k
||
2335
9.01k
       
isa<BinaryOperator>(PN->getIncomingValue(1))9.00k
||
2336
9.01k
       
isa<BinaryOperator>(IfCond)8.88k
))
2337
1.30k
    return false;
2338
173k
2339
173k
  // If all PHI nodes are promotable, check to make sure that all instructions
2340
173k
  // in the predecessor blocks can be promoted as well. If not, we won't be able
2341
173k
  // to get rid of the control flow, so it's not worth promoting to select
2342
173k
  // instructions.
2343
173k
  BasicBlock *DomBlock = nullptr;
2344
173k
  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
2345
173k
  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
2346
173k
  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
2347
25.5k
    IfBlock1 = nullptr;
2348
147k
  } else {
2349
147k
    DomBlock = *pred_begin(IfBlock1);
2350
241k
    for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); 
++I93.6k
)
2351
223k
      if (!AggressiveInsts.count(&*I) && 
!isa<DbgInfoIntrinsic>(I)129k
) {
2352
129k
        // This is not an aggressive instruction that we can promote.
2353
129k
        // Because of this, we won't be able to get rid of the control flow, so
2354
129k
        // the xform is not worth it.
2355
129k
        return false;
2356
129k
      }
2357
147k
  }
2358
173k
2359
173k
  
if (44.0k
cast<BranchInst>(IfBlock2->getTerminator())->isConditional()44.0k
) {
2360
7.59k
    IfBlock2 = nullptr;
2361
36.4k
  } else {
2362
36.4k
    DomBlock = *pred_begin(IfBlock2);
2363
49.6k
    for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); 
++I13.2k
)
2364
39.4k
      if (!AggressiveInsts.count(&*I) && 
!isa<DbgInfoIntrinsic>(I)26.2k
) {
2365
26.1k
        // This is not an aggressive instruction that we can promote.
2366
26.1k
        // Because of this, we won't be able to get rid of the control flow, so
2367
26.1k
        // the xform is not worth it.
2368
26.1k
        return false;
2369
26.1k
      }
2370
36.4k
  }
2371
44.0k
2372
44.0k
  
LLVM_DEBUG17.8k
(dbgs() << "FOUND IF CONDITION! " << *IfCond
2373
17.8k
                    << "  T: " << IfTrue->getName()
2374
17.8k
                    << "  F: " << IfFalse->getName() << "\n");
2375
17.8k
2376
17.8k
  // If we can still promote the PHI nodes after this gauntlet of tests,
2377
17.8k
  // do all of the PHI's now.
2378
17.8k
  Instruction *InsertPt = DomBlock->getTerminator();
2379
17.8k
  IRBuilder<NoFolder> Builder(InsertPt);
2380
17.8k
2381
17.8k
  // Move all 'aggressive' instructions, which are defined in the
2382
17.8k
  // conditional parts of the if's up to the dominating block.
2383
17.8k
  if (IfBlock1)
2384
16.0k
    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
2385
17.8k
  if (IfBlock2)
2386
10.2k
    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
2387
17.8k
2388
37.3k
  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
2389
19.5k
    // Change the PHI node into a select instruction.
2390
19.5k
    Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
2391
19.5k
    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
2392
19.5k
2393
19.5k
    Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
2394
19.5k
    PN->replaceAllUsesWith(Sel);
2395
19.5k
    Sel->takeName(PN);
2396
19.5k
    PN->eraseFromParent();
2397
19.5k
  }
2398
17.8k
2399
17.8k
  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
2400
17.8k
  // has been flattened.  Change DomBlock to jump directly to our new block to
2401
17.8k
  // avoid other simplifycfg's kicking in on the diamond.
2402
17.8k
  Instruction *OldTI = DomBlock->getTerminator();
2403
17.8k
  Builder.SetInsertPoint(OldTI);
2404
17.8k
  Builder.CreateBr(BB);
2405
17.8k
  OldTI->eraseFromParent();
2406
17.8k
  return true;
2407
44.0k
}
2408
2409
/// If we found a conditional branch that goes to two returning blocks,
2410
/// try to merge them together into one return,
2411
/// introducing a select if the return values disagree.
2412
static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
2413
29.3k
                                           IRBuilder<> &Builder) {
2414
29.3k
  assert(BI->isConditional() && "Must be a conditional branch");
2415
29.3k
  BasicBlock *TrueSucc = BI->getSuccessor(0);
2416
29.3k
  BasicBlock *FalseSucc = BI->getSuccessor(1);
2417
29.3k
  ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
2418
29.3k
  ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
2419
29.3k
2420
29.3k
  // Check to ensure both blocks are empty (just a return) or optionally empty
2421
29.3k
  // with PHI nodes.  If there are other instructions, merging would cause extra
2422
29.3k
  // computation on one path or the other.
2423
29.3k
  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
2424
14.3k
    return false;
2425
15.0k
  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
2426
14.1k
    return false;
2427
913
2428
913
  Builder.SetInsertPoint(BI);
2429
913
  // Okay, we found a branch that is going to two return nodes.  If
2430
913
  // there is no return value for this function, just change the
2431
913
  // branch into a return.
2432
913
  if (FalseRet->getNumOperands() == 0) {
2433
12
    TrueSucc->removePredecessor(BI->getParent());
2434
12
    FalseSucc->removePredecessor(BI->getParent());
2435
12
    Builder.CreateRetVoid();
2436
12
    EraseTerminatorAndDCECond(BI);
2437
12
    return true;
2438
12
  }
2439
901
2440
901
  // Otherwise, figure out what the true and false return values are
2441
901
  // so we can insert a new select instruction.
2442
901
  Value *TrueValue = TrueRet->getReturnValue();
2443
901
  Value *FalseValue = FalseRet->getReturnValue();
2444
901
2445
901
  // Unwrap any PHI nodes in the return blocks.
2446
901
  if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
2447
887
    if (TVPN->getParent() == TrueSucc)
2448
887
      TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
2449
901
  if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
2450
887
    if (FVPN->getParent() == FalseSucc)
2451
887
      FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
2452
901
2453
901
  // In order for this transformation to be safe, we must be able to
2454
901
  // unconditionally execute both operands to the return.  This is
2455
901
  // normally the case, but we could have a potentially-trapping
2456
901
  // constant expression that prevents this transformation from being
2457
901
  // safe.
2458
901
  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
2459
2
    if (TCV->canTrap())
2460
2
      return false;
2461
899
  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
2462
0
    if (FCV->canTrap())
2463
0
      return false;
2464
899
2465
899
  // Okay, we collected all the mapped values and checked them for sanity, and
2466
899
  // defined to really do this transformation.  First, update the CFG.
2467
899
  TrueSucc->removePredecessor(BI->getParent());
2468
899
  FalseSucc->removePredecessor(BI->getParent());
2469
899
2470
899
  // Insert select instructions where needed.
2471
899
  Value *BrCond = BI->getCondition();
2472
899
  if (TrueValue) {
2473
899
    // Insert a select if the results differ.
2474
899
    if (TrueValue == FalseValue || 
isa<UndefValue>(FalseValue)2
) {
2475
897
    } else 
if (2
isa<UndefValue>(TrueValue)2
) {
2476
0
      TrueValue = FalseValue;
2477
2
    } else {
2478
2
      TrueValue =
2479
2
          Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
2480
2
    }
2481
899
  }
2482
899
2483
899
  Value *RI =
2484
899
      !TrueValue ? 
Builder.CreateRetVoid()0
: Builder.CreateRet(TrueValue);
2485
899
2486
899
  (void)RI;
2487
899
2488
899
  LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
2489
899
                    << "\n  " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
2490
899
                    << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
2491
899
2492
899
  EraseTerminatorAndDCECond(BI);
2493
899
2494
899
  return true;
2495
899
}
2496
2497
/// Return true if the given instruction is available
2498
/// in its predecessor block. If yes, the instruction will be removed.
2499
2.69M
static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
2500
2.69M
  if (!isa<BinaryOperator>(Inst) && 
!isa<CmpInst>(Inst)2.55M
)
2501
2.55M
    return false;
2502
785k
  
for (Instruction &I : *PB)139k
{
2503
785k
    Instruction *PBI = &I;
2504
785k
    // Check whether Inst and PBI generate the same value.
2505
785k
    if (Inst->isIdenticalTo(PBI)) {
2506
44
      Inst->replaceAllUsesWith(PBI);
2507
44
      Inst->eraseFromParent();
2508
44
      return true;
2509
44
    }
2510
785k
  }
2511
139k
  
return false139k
;
2512
139k
}
2513
2514
/// Return true if either PBI or BI has branch weight available, and store
2515
/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
2516
/// not have branch weight, use 1:1 as its weight.
2517
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
2518
                                   uint64_t &PredTrueWeight,
2519
                                   uint64_t &PredFalseWeight,
2520
                                   uint64_t &SuccTrueWeight,
2521
26.4k
                                   uint64_t &SuccFalseWeight) {
2522
26.4k
  bool PredHasWeights =
2523
26.4k
      PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
2524
26.4k
  bool SuccHasWeights =
2525
26.4k
      BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
2526
26.4k
  if (PredHasWeights || 
SuccHasWeights25.9k
) {
2527
518
    if (!PredHasWeights)
2528
33
      PredTrueWeight = PredFalseWeight = 1;
2529
518
    if (!SuccHasWeights)
2530
414
      SuccTrueWeight = SuccFalseWeight = 1;
2531
518
    return true;
2532
25.8k
  } else {
2533
25.8k
    return false;
2534
25.8k
  }
2535
26.4k
}
2536
2537
/// If this basic block is simple enough, and if a predecessor branches to us
2538
/// and one of our successors, fold the block into the predecessor and use
2539
/// logical operations to pick the right destination.
2540
bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
2541
26.0M
                                  unsigned BonusInstThreshold) {
2542
26.0M
  BasicBlock *BB = BI->getParent();
2543
26.0M
2544
26.0M
  const unsigned PredCount = pred_size(BB);
2545
26.0M
2546
26.0M
  Instruction *Cond = nullptr;
2547
26.0M
  if (BI->isConditional())
2548
15.5M
    Cond = dyn_cast<Instruction>(BI->getCondition());
2549
10.4M
  else {
2550
10.4M
    // For unconditional branch, check for a simple CFG pattern, where
2551
10.4M
    // BB has a single predecessor and BB's successor is also its predecessor's
2552
10.4M
    // successor. If such pattern exists, check for CSE between BB and its
2553
10.4M
    // predecessor.
2554
10.4M
    if (BasicBlock *PB = BB->getSinglePredecessor())
2555
8.16M
      if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
2556
7.59M
        if (PBI->isConditional() &&
2557
7.59M
            
(7.59M
BI->getSuccessor(0) == PBI->getSuccessor(0)7.59M
||
2558
7.59M
             
BI->getSuccessor(0) == PBI->getSuccessor(1)6.14M
)) {
2559
2.70M
          for (auto I = BB->instructionsWithoutDebug().begin(),
2560
2.70M
                    E = BB->instructionsWithoutDebug().end();
2561
2.70M
               I != E;) {
2562
2.70M
            Instruction *Curr = &*I++;
2563
2.70M
            if (isa<CmpInst>(Curr)) {
2564
13.0k
              Cond = Curr;
2565
13.0k
              break;
2566
13.0k
            }
2567
2.69M
            // Quit if we can't remove this instruction.
2568
2.69M
            if (!tryCSEWithPredecessor(Curr, PB))
2569
2.69M
              return false;
2570
2.69M
          }
2571
2.70M
        }
2572
10.4M
2573
10.4M
    
if (7.77M
!Cond7.77M
)
2574
7.76M
      return false;
2575
15.5M
  }
2576
15.5M
2577
15.5M
  if (!Cond || 
(15.5M
!isa<CmpInst>(Cond)15.5M
&&
!isa<BinaryOperator>(Cond)1.49M
) ||
2578
15.5M
      
Cond->getParent() != BB14.9M
||
!Cond->hasOneUse()14.2M
)
2579
1.45M
    return false;
2580
14.0M
2581
14.0M
  // Make sure the instruction after the condition is the cond branch.
2582
14.0M
  BasicBlock::iterator CondIt = ++Cond->getIterator();
2583
14.0M
2584
14.0M
  // Ignore dbg intrinsics.
2585
14.0M
  while (isa<DbgInfoIntrinsic>(CondIt))
2586
4
    ++CondIt;
2587
14.0M
2588
14.0M
  if (&*CondIt != BI)
2589
568k
    return false;
2590
13.5M
2591
13.5M
  // Only allow this transformation if computing the condition doesn't involve
2592
13.5M
  // too many instructions and these involved instructions can be executed
2593
13.5M
  // unconditionally. We denote all involved instructions except the condition
2594
13.5M
  // as "bonus instructions", and only allow this transformation when the
2595
13.5M
  // number of the bonus instructions we'll need to create when cloning into
2596
13.5M
  // each predecessor does not exceed a certain threshold.
2597
13.5M
  unsigned NumBonusInsts = 0;
2598
16.8M
  for (auto I = BB->begin(); Cond != &*I; 
++I3.31M
) {
2599
14.2M
    // Ignore dbg intrinsics.
2600
14.2M
    if (isa<DbgInfoIntrinsic>(I))
2601
24
      continue;
2602
14.2M
    if (!I->hasOneUse() || 
!isSafeToSpeculativelyExecute(&*I)7.79M
)
2603
9.15M
      return false;
2604
5.12M
    // I has only one use and can be executed unconditionally.
2605
5.12M
    Instruction *User = dyn_cast<Instruction>(I->user_back());
2606
5.12M
    if (User == nullptr || User->getParent() != BB)
2607
98.3k
      return false;
2608
5.02M
    // I is used in the same BB. Since BI uses Cond and doesn't have more slots
2609
5.02M
    // to use any other instruction, User must be an instruction between next(I)
2610
5.02M
    // and Cond.
2611
5.02M
2612
5.02M
    // Account for the cost of duplicating this instruction into each
2613
5.02M
    // predecessor.
2614
5.02M
    NumBonusInsts += PredCount;
2615
5.02M
    // Early exits once we reach the limit.
2616
5.02M
    if (NumBonusInsts > BonusInstThreshold)
2617
1.71M
      return false;
2618
5.02M
  }
2619
13.5M
2620
13.5M
  // Cond is known to be a compare or binary operator.  Check to make sure that
2621
13.5M
  // neither operand is a potentially-trapping constant expression.
2622
13.5M
  
if (ConstantExpr *2.56M
CE2.56M
= dyn_cast<ConstantExpr>(Cond->getOperand(0)))
2623
15
    if (CE->canTrap())
2624
1
      return false;
2625
2.56M
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
2626
2.38k
    if (CE->canTrap())
2627
0
      return false;
2628
2.56M
2629
2.56M
  // Finally, don't infinitely unroll conditional loops.
2630
2.56M
  BasicBlock *TrueDest = BI->getSuccessor(0);
2631
2.56M
  BasicBlock *FalseDest = (BI->isConditional()) ? 
BI->getSuccessor(1)2.56M
:
nullptr120
;
2632
2.56M
  if (TrueDest == BB || 
FalseDest == BB2.56M
)
2633
21
    return false;
2634
2.56M
2635
4.94M
  
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); 2.56M
PI != E;
++PI2.38M
) {
2636
2.40M
    BasicBlock *PredBlock = *PI;
2637
2.40M
    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
2638
2.40M
2639
2.40M
    // Check that we have two conditional branches.  If there is a PHI node in
2640
2.40M
    // the common successor, verify that the same value flows in from both
2641
2.40M
    // blocks.
2642
2.40M
    SmallVector<PHINode *, 4> PHIs;
2643
2.40M
    if (!PBI || 
PBI->isUnconditional()2.35M
||
2644
2.40M
        
(2.22M
BI->isConditional()2.22M
&&
!SafeToMergeTerminators(BI, PBI)2.22M
) ||
2645
2.40M
        
(2.15M
!BI->isConditional()2.15M
&&
2646
2.15M
         
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)120
))
2647
249k
      continue;
2648
2.15M
2649
2.15M
    // Determine if the two branches share a common destination.
2650
2.15M
    Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
2651
2.15M
    bool InvertPredCond = false;
2652
2.15M
2653
2.15M
    if (BI->isConditional()) {
2654
2.15M
      if (PBI->getSuccessor(0) == TrueDest) {
2655
9.12k
        Opc = Instruction::Or;
2656
2.14M
      } else if (PBI->getSuccessor(1) == FalseDest) {
2657
6.36k
        Opc = Instruction::And;
2658
2.14M
      } else if (PBI->getSuccessor(0) == FalseDest) {
2659
8.13k
        Opc = Instruction::And;
2660
8.13k
        InvertPredCond = true;
2661
2.13M
      } else if (PBI->getSuccessor(1) == TrueDest) {
2662
1.93k
        Opc = Instruction::Or;
2663
1.93k
        InvertPredCond = true;
2664
2.13M
      } else {
2665
2.13M
        continue;
2666
2.13M
      }
2667
1
    } else {
2668
1
      if (PBI->getSuccessor(0) != TrueDest && 
PBI->getSuccessor(1) != TrueDest0
)
2669
0
        continue;
2670
25.5k
    }
2671
25.5k
2672
25.5k
    LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
2673
25.5k
    IRBuilder<> Builder(PBI);
2674
25.5k
2675
25.5k
    // If we need to invert the condition in the pred block to match, do so now.
2676
25.5k
    if (InvertPredCond) {
2677
10.0k
      Value *NewCond = PBI->getCondition();
2678
10.0k
2679
10.0k
      if (NewCond->hasOneUse() && 
isa<CmpInst>(NewCond)9.75k
) {
2680
9.17k
        CmpInst *CI = cast<CmpInst>(NewCond);
2681
9.17k
        CI->setPredicate(CI->getInversePredicate());
2682
9.17k
      } else {
2683
887
        NewCond =
2684
887
            Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
2685
887
      }
2686
10.0k
2687
10.0k
      PBI->setCondition(NewCond);
2688
10.0k
      PBI->swapSuccessors();
2689
10.0k
    }
2690
25.5k
2691
25.5k
    // If we have bonus instructions, clone them into the predecessor block.
2692
25.5k
    // Note that there may be multiple predecessor blocks, so we cannot move
2693
25.5k
    // bonus instructions to a predecessor block.
2694
25.5k
    ValueToValueMapTy VMap; // maps original values to cloned values
2695
25.5k
    // We already make sure Cond is the last instruction before BI. Therefore,
2696
25.5k
    // all instructions before Cond other than DbgInfoIntrinsic are bonus
2697
25.5k
    // instructions.
2698
37.0k
    for (auto BonusInst = BB->begin(); Cond != &*BonusInst; 
++BonusInst11.5k
) {
2699
11.5k
      if (isa<DbgInfoIntrinsic>(BonusInst))
2700
0
        continue;
2701
11.5k
      Instruction *NewBonusInst = BonusInst->clone();
2702
11.5k
      RemapInstruction(NewBonusInst, VMap,
2703
11.5k
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2704
11.5k
      VMap[&*BonusInst] = NewBonusInst;
2705
11.5k
2706
11.5k
      // If we moved a load, we cannot any longer claim any knowledge about
2707
11.5k
      // its potential value. The previous information might have been valid
2708
11.5k
      // only given the branch precondition.
2709
11.5k
      // For an analogous reason, we must also drop all the metadata whose
2710
11.5k
      // semantics we don't understand.
2711
11.5k
      NewBonusInst->dropUnknownNonDebugMetadata();
2712
11.5k
2713
11.5k
      PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
2714
11.5k
      NewBonusInst->takeName(&*BonusInst);
2715
11.5k
      BonusInst->setName(BonusInst->getName() + ".old");
2716
11.5k
    }
2717
25.5k
2718
25.5k
    // Clone Cond into the predecessor basic block, and or/and the
2719
25.5k
    // two conditions together.
2720
25.5k
    Instruction *CondInPred = Cond->clone();
2721
25.5k
    RemapInstruction(CondInPred, VMap,
2722
25.5k
                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2723
25.5k
    PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
2724
25.5k
    CondInPred->takeName(Cond);
2725
25.5k
    Cond->setName(CondInPred->getName() + ".old");
2726
25.5k
2727
25.5k
    if (BI->isConditional()) {
2728
25.5k
      Instruction *NewCond = cast<Instruction>(
2729
25.5k
          Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
2730
25.5k
      PBI->setCondition(NewCond);
2731
25.5k
2732
25.5k
      uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
2733
25.5k
      bool HasWeights =
2734
25.5k
          extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
2735
25.5k
                                 SuccTrueWeight, SuccFalseWeight);
2736
25.5k
      SmallVector<uint64_t, 8> NewWeights;
2737
25.5k
2738
25.5k
      if (PBI->getSuccessor(0) == BB) {
2739
14.4k
        if (HasWeights) {
2740
187
          // PBI: br i1 %x, BB, FalseDest
2741
187
          // BI:  br i1 %y, TrueDest, FalseDest
2742
187
          // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
2743
187
          NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
2744
187
          // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
2745
187
          //               TrueWeight for PBI * FalseWeight for BI.
2746
187
          // We assume that total weights of a BranchInst can fit into 32 bits.
2747
187
          // Therefore, we will not have overflow using 64-bit arithmetic.
2748
187
          NewWeights.push_back(PredFalseWeight *
2749
187
                                   (SuccFalseWeight + SuccTrueWeight) +
2750
187
                               PredTrueWeight * SuccFalseWeight);
2751
187
        }
2752
14.4k
        AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
2753
14.4k
        PBI->setSuccessor(0, TrueDest);
2754
14.4k
      }
2755
25.5k
      if (PBI->getSuccessor(1) == BB) {
2756
11.0k
        if (HasWeights) {
2757
327
          // PBI: br i1 %x, TrueDest, BB
2758
327
          // BI:  br i1 %y, TrueDest, FalseDest
2759
327
          // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
2760
327
          //              FalseWeight for PBI * TrueWeight for BI.
2761
327
          NewWeights.push_back(PredTrueWeight *
2762
327
                                   (SuccFalseWeight + SuccTrueWeight) +
2763
327
                               PredFalseWeight * SuccTrueWeight);
2764
327
          // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
2765
327
          NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
2766
327
        }
2767
11.0k
        AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
2768
11.0k
        PBI->setSuccessor(1, FalseDest);
2769
11.0k
      }
2770
25.5k
      if (NewWeights.size() == 2) {
2771
514
        // Halve the weights if any of them cannot fit in an uint32_t
2772
514
        FitWeights(NewWeights);
2773
514
2774
514
        SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
2775
514
                                           NewWeights.end());
2776
514
        setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
2777
514
      } else
2778
25.0k
        PBI->setMetadata(LLVMContext::MD_prof, nullptr);
2779
25.5k
    } else {
2780
1
      // Update PHI nodes in the common successors.
2781
2
      for (unsigned i = 0, e = PHIs.size(); i != e; 
++i1
) {
2782
1
        ConstantInt *PBI_C = cast<ConstantInt>(
2783
1
            PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
2784
1
        assert(PBI_C->getType()->isIntegerTy(1));
2785
1
        Instruction *MergedCond = nullptr;
2786
1
        if (PBI->getSuccessor(0) == TrueDest) {
2787
1
          // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
2788
1
          // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
2789
1
          //       is false: !PBI_Cond and BI_Value
2790
1
          Instruction *NotCond = cast<Instruction>(
2791
1
              Builder.CreateNot(PBI->getCondition(), "not.cond"));
2792
1
          MergedCond = cast<Instruction>(
2793
1
               Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
2794
1
                                   "and.cond"));
2795
1
          if (PBI_C->isOne())
2796
0
            MergedCond = cast<Instruction>(Builder.CreateBinOp(
2797
0
                Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
2798
1
        } else {
2799
0
          // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
2800
0
          // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
2801
0
          //       is false: PBI_Cond and BI_Value
2802
0
          MergedCond = cast<Instruction>(Builder.CreateBinOp(
2803
0
              Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
2804
0
          if (PBI_C->isOne()) {
2805
0
            Instruction *NotCond = cast<Instruction>(
2806
0
                Builder.CreateNot(PBI->getCondition(), "not.cond"));
2807
0
            MergedCond = cast<Instruction>(Builder.CreateBinOp(
2808
0
                Instruction::Or, NotCond, MergedCond, "or.cond"));
2809
0
          }
2810
0
        }
2811
1
        // Update PHI Node.
2812
1
  PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
2813
1
      }
2814
1
2815
1
      // PBI is changed to branch to TrueDest below. Remove itself from
2816
1
      // potential phis from all other successors.
2817
1
      if (MSSAU)
2818
0
        MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
2819
1
2820
1
      // Change PBI from Conditional to Unconditional.
2821
1
      BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
2822
1
      EraseTerminatorAndDCECond(PBI, MSSAU);
2823
1
      PBI = New_PBI;
2824
1
    }
2825
25.5k
2826
25.5k
    // If BI was a loop latch, it may have had associated loop metadata.
2827
25.5k
    // We need to copy it to the new latch, that is, PBI.
2828
25.5k
    if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
2829
15
      PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
2830
25.5k
2831
25.5k
    // TODO: If BB is reachable from all paths through PredBlock, then we
2832
25.5k
    // could replace PBI's branch probabilities with BI's.
2833
25.5k
2834
25.5k
    // Copy any debug value intrinsics into the end of PredBlock.
2835
25.5k
    for (Instruction &I : *BB)
2836
62.6k
      if (isa<DbgInfoIntrinsic>(I))
2837
0
        I.clone()->insertBefore(PBI);
2838
25.5k
2839
25.5k
    return true;
2840
25.5k
  }
2841
2.56M
  
return false2.54M
;
2842
2.56M
}
2843
2844
// If there is only one store in BB1 and BB2, return it, otherwise return
2845
// nullptr.
2846
2.07k
static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
2847
2.07k
  StoreInst *S = nullptr;
2848
4.08k
  for (auto *BB : {BB1, BB2}) {
2849
4.08k
    if (!BB)
2850
1.95k
      continue;
2851
2.13k
    for (auto &I : *BB)
2852
4.60k
      if (auto *SI = dyn_cast<StoreInst>(&I)) {
2853
2.26k
        if (S)
2854
187
          // Multiple stores seen.
2855
187
          return nullptr;
2856
2.07k
        else
2857
2.07k
          S = SI;
2858
2.26k
      }
2859
2.13k
  }
2860
2.07k
  
return S1.88k
;
2861
2.07k
}
2862
2863
static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
2864
118
                                              Value *AlternativeV = nullptr) {
2865
118
  // PHI is going to be a PHI node that allows the value V that is defined in
2866
118
  // BB to be referenced in BB's only successor.
2867
118
  //
2868
118
  // If AlternativeV is nullptr, the only value we care about in PHI is V. It
2869
118
  // doesn't matter to us what the other operand is (it'll never get used). We
2870
118
  // could just create a new PHI with an undef incoming value, but that could
2871
118
  // increase register pressure if EarlyCSE/InstCombine can't fold it with some
2872
118
  // other PHI. So here we directly look for some PHI in BB's successor with V
2873
118
  // as an incoming operand. If we find one, we use it, else we create a new
2874
118
  // one.
2875
118
  //
2876
118
  // If AlternativeV is not nullptr, we care about both incoming values in PHI.
2877
118
  // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
2878
118
  // where OtherBB is the single other predecessor of BB's only successor.
2879
118
  PHINode *PHI = nullptr;
2880
118
  BasicBlock *Succ = BB->getSingleSuccessor();
2881
118
2882
123
  for (auto I = Succ->begin(); isa<PHINode>(I); 
++I5
)
2883
33
    if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
2884
28
      PHI = cast<PHINode>(I);
2885
28
      if (!AlternativeV)
2886
14
        break;
2887
14
2888
14
      assert(Succ->hasNPredecessors(2));
2889
14
      auto PredI = pred_begin(Succ);
2890
14
      BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : 
*PredI0
;
2891
14
      if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
2892
14
        break;
2893
0
      PHI = nullptr;
2894
0
    }
2895
118
  if (PHI)
2896
28
    return PHI;
2897
90
2898
90
  // If V is not an instruction defined in BB, just return it.
2899
90
  if (!AlternativeV &&
2900
90
      
(45
!isa<Instruction>(V)45
||
cast<Instruction>(V)->getParent() != BB20
))
2901
41
    return V;
2902
49
2903
49
  PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
2904
49
  PHI->addIncoming(V, BB);
2905
49
  for (BasicBlock *PredBB : predecessors(Succ))
2906
98
    if (PredBB != BB)
2907
49
      PHI->addIncoming(
2908
49
          AlternativeV ? 
AlternativeV45
:
UndefValue::get(V->getType())4
, PredBB);
2909
49
  return PHI;
2910
49
}
2911
2912
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
2913
                                           BasicBlock *QTB, BasicBlock *QFB,
2914
                                           BasicBlock *PostBB, Value *Address,
2915
                                           bool InvertPCond, bool InvertQCond,
2916
33.9k
                                           const DataLayout &DL) {
2917
38.8k
  auto IsaBitcastOfPointerType = [](const Instruction &I) {
2918
38.8k
    return Operator::getOpcode(&I) == Instruction::BitCast &&
2919
38.8k
           
I.getType()->isPointerTy()6.80k
;
2920
38.8k
  };
2921
33.9k
2922
33.9k
  // If we're not in aggressive mode, we only optimize if we have some
2923
33.9k
  // confidence that by optimizing we'll allow P and/or Q to be if-converted.
2924
66.4k
  auto IsWorthwhile = [&](BasicBlock *BB) {
2925
66.4k
    if (!BB)
2926
30.4k
      return true;
2927
35.9k
    // Heuristic: if the block can be if-converted/phi-folded and the
2928
35.9k
    // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
2929
35.9k
    // thread this store.
2930
35.9k
    unsigned N = 0;
2931
60.8k
    for (auto &I : BB->instructionsWithoutDebug()) {
2932
60.8k
      // Cheap instructions viable for folding.
2933
60.8k
      if (isa<BinaryOperator>(I) || 
isa<GetElementPtrInst>(I)55.6k
||
2934
60.8k
          
isa<StoreInst>(I)52.5k
)
2935
18.0k
        ++N;
2936
42.7k
      // Free instructions.
2937
42.7k
      else if (I.isTerminator() || 
IsaBitcastOfPointerType(I)38.8k
)
2938
10.6k
        continue;
2939
32.0k
      else
2940
32.0k
        return false;
2941
60.8k
    }
2942
35.9k
    // The store we want to merge is counted in N, so add 1 to make sure
2943
35.9k
    // we're counting the instructions that would be left.
2944
35.9k
    
return N <= (PHINodeFoldingThreshold + 1)3.88k
;
2945
35.9k
  };
2946
33.9k
2947
33.9k
  if (!MergeCondStoresAggressively &&
2948
33.9k
      (!IsWorthwhile(PTB) || 
!IsWorthwhile(PFB)29.2k
||
!IsWorthwhile(QTB)1.64k
||
2949
33.9k
       
!IsWorthwhile(QFB)1.61k
))
2950
32.9k
    return false;
2951
1.03k
2952
1.03k
  // For every pointer, there must be exactly two stores, one coming from
2953
1.03k
  // PTB or PFB, and the other from QTB or QFB. We don't support more than one
2954
1.03k
  // store (to any address) in PTB,PFB or QTB,QFB.
2955
1.03k
  // FIXME: We could relax this restriction with a bit more work and performance
2956
1.03k
  // testing.
2957
1.03k
  StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
2958
1.03k
  StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
2959
1.03k
  if (!PStore || 
!QStore918
)
2960
139
    return false;
2961
899
2962
899
  // Now check the stores are compatible.
2963
899
  if (!QStore->isUnordered() || !PStore->isUnordered())
2964
0
    return false;
2965
899
2966
899
  // Check that sinking the store won't cause program behavior changes. Sinking
2967
899
  // the store out of the Q blocks won't change any behavior as we're sinking
2968
899
  // from a block to its unconditional successor. But we're moving a store from
2969
899
  // the P blocks down through the middle block (QBI) and past both QFB and QTB.
2970
899
  // So we need to check that there are no aliasing loads or stores in
2971
899
  // QBI, QTB and QFB. We also need to check there are no conflicting memory
2972
899
  // operations between PStore and the end of its parent block.
2973
899
  //
2974
899
  // The ideal way to do this is to query AliasAnalysis, but we don't
2975
899
  // preserve AA currently so that is dangerous. Be super safe and just
2976
899
  // check there are no other memory operations at all.
2977
899
  for (auto &I : *QFB->getSinglePredecessor())
2978
1.98k
    if (I.mayReadOrWriteMemory())
2979
840
      return false;
2980
899
  
for (auto &I : *QFB)59
2981
145
    if (&I != QStore && 
I.mayReadOrWriteMemory()86
)
2982
0
      return false;
2983
59
  if (QTB)
2984
1
    for (auto &I : *QTB)
2985
2
      if (&I != QStore && I.mayReadOrWriteMemory())
2986
0
        return false;
2987
59
  for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
2988
177
       I != E; 
++I118
)
2989
118
    if (&*I != PStore && 
I->mayReadOrWriteMemory()59
)
2990
0
      return false;
2991
59
2992
59
  // If PostBB has more than two predecessors, we need to split it so we can
2993
59
  // sink the store.
2994
59
  if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
2995
6
    // We know that QFB's only successor is PostBB. And QFB has a single
2996
6
    // predecessor. If QTB exists, then its only successor is also PostBB.
2997
6
    // If QTB does not exist, then QFB's only predecessor has a conditional
2998
6
    // branch to QFB and PostBB.
2999
6
    BasicBlock *TruePred = QTB ? 
QTB0
: QFB->getSinglePredecessor();
3000
6
    BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
3001
6
                                               "condstore.split");
3002
6
    if (!NewBB)
3003
0
      return false;
3004
6
    PostBB = NewBB;
3005
6
  }
3006
59
3007
59
  // OK, we're going to sink the stores to PostBB. The store has to be
3008
59
  // conditional though, so first create the predicate.
3009
59
  Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
3010
59
                     ->getCondition();
3011
59
  Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
3012
59
                     ->getCondition();
3013
59
3014
59
  Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
3015
59
                                                PStore->getParent());
3016
59
  Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
3017
59
                                                QStore->getParent(), PPHI);
3018
59
3019
59
  IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
3020
59
3021
59
  Value *PPred = PStore->getParent() == PTB ? 
PCond0
: QB.CreateNot(PCond);
3022
59
  Value *QPred = QStore->getParent() == QTB ? 
QCond0
: QB.CreateNot(QCond);
3023
59
3024
59
  if (InvertPCond)
3025
46
    PPred = QB.CreateNot(PPred);
3026
59
  if (InvertQCond)
3027
29
    QPred = QB.CreateNot(QPred);
3028
59
  Value *CombinedPred = QB.CreateOr(PPred, QPred);
3029
59
3030
59
  auto *T =
3031
59
      SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
3032
59
  QB.SetInsertPoint(T);
3033
59
  StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
3034
59
  AAMDNodes AAMD;
3035
59
  PStore->getAAMetadata(AAMD, /*Merge=*/false);
3036
59
  PStore->getAAMetadata(AAMD, /*Merge=*/true);
3037
59
  SI->setAAMetadata(AAMD);
3038
59
  unsigned PAlignment = PStore->getAlignment();
3039
59
  unsigned QAlignment = QStore->getAlignment();
3040
59
  unsigned TypeAlignment =
3041
59
      DL.getABITypeAlignment(SI->getValueOperand()->getType());
3042
59
  unsigned MinAlignment;
3043
59
  unsigned MaxAlignment;
3044
59
  std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment);
3045
59
  // Choose the minimum alignment. If we could prove both stores execute, we
3046
59
  // could use biggest one.  In this case, though, we only know that one of the
3047
59
  // stores executes.  And we don't know it's safe to take the alignment from a
3048
59
  // store that doesn't execute.
3049
59
  if (MinAlignment != 0) {
3050
49
    // Choose the minimum of all non-zero alignments.
3051
49
    SI->setAlignment(MinAlignment);
3052
49
  } else 
if (10
MaxAlignment != 010
) {
3053
4
    // Choose the minimal alignment between the non-zero alignment and the ABI
3054
4
    // default alignment for the type of the stored value.
3055
4
    SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
3056
6
  } else {
3057
6
    // If both alignments are zero, use ABI default alignment for the type of
3058
6
    // the stored value.
3059
6
    SI->setAlignment(TypeAlignment);
3060
6
  }
3061
59
3062
59
  QStore->eraseFromParent();
3063
59
  PStore->eraseFromParent();
3064
59
3065
59
  return true;
3066
59
}
3067
3068
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
3069
19.4M
                                   const DataLayout &DL) {
3070
19.4M
  // The intention here is to find diamonds or triangles (see below) where each
3071
19.4M
  // conditional block contains a store to the same address. Both of these
3072
19.4M
  // stores are conditional, so they can't be unconditionally sunk. But it may
3073
19.4M
  // be profitable to speculatively sink the stores into one merged store at the
3074
19.4M
  // end, and predicate the merged store on the union of the two conditions of
3075
19.4M
  // PBI and QBI.
3076
19.4M
  //
3077
19.4M
  // This can reduce the number of stores executed if both of the conditions are
3078
19.4M
  // true, and can allow the blocks to become small enough to be if-converted.
3079
19.4M
  // This optimization will also chain, so that ladders of test-and-set
3080
19.4M
  // sequences can be if-converted away.
3081
19.4M
  //
3082
19.4M
  // We only deal with simple diamonds or triangles:
3083
19.4M
  //
3084
19.4M
  //     PBI       or      PBI        or a combination of the two
3085
19.4M
  //    /   \               | \
3086
19.4M
  //   PTB  PFB             |  PFB
3087
19.4M
  //    \   /               | /
3088
19.4M
  //     QBI                QBI
3089
19.4M
  //    /  \                | \
3090
19.4M
  //   QTB  QFB             |  QFB
3091
19.4M
  //    \  /                | /
3092
19.4M
  //    PostBB            PostBB
3093
19.4M
  //
3094
19.4M
  // We model triangles as a type of diamond with a nullptr "true" block.
3095
19.4M
  // Triangles are canonicalized so that the fallthrough edge is represented by
3096
19.4M
  // a true condition, as in the diagram above.
3097
19.4M
  BasicBlock *PTB = PBI->getSuccessor(0);
3098
19.4M
  BasicBlock *PFB = PBI->getSuccessor(1);
3099
19.4M
  BasicBlock *QTB = QBI->getSuccessor(0);
3100
19.4M
  BasicBlock *QFB = QBI->getSuccessor(1);
3101
19.4M
  BasicBlock *PostBB = QFB->getSingleSuccessor();
3102
19.4M
3103
19.4M
  // Make sure we have a good guess for PostBB. If QTB's only successor is
3104
19.4M
  // QFB, then QFB is a better PostBB.
3105
19.4M
  if (QTB->getSingleSuccessor() == QFB)
3106
1.40M
    PostBB = QFB;
3107
19.4M
3108
19.4M
  // If we couldn't find a good PostBB, stop.
3109
19.4M
  if (!PostBB)
3110
12.0M
    return false;
3111
7.38M
3112
7.38M
  bool InvertPCond = false, InvertQCond = false;
3113
7.38M
  // Canonicalize fallthroughs to the true branches.
3114
7.38M
  if (PFB == QBI->getParent()) {
3115
2.71M
    std::swap(PFB, PTB);
3116
2.71M
    InvertPCond = true;
3117
2.71M
  }
3118
7.38M
  if (QFB == PostBB) {
3119
1.41M
    std::swap(QFB, QTB);
3120
1.41M
    InvertQCond = true;
3121
1.41M
  }
3122
7.38M
3123
7.38M
  // From this point on we can assume PTB or QTB may be fallthroughs but PFB
3124
7.38M
  // and QFB may not. Model fallthroughs as a nullptr block.
3125
7.38M
  if (PTB == QBI->getParent())
3126
5.52M
    PTB = nullptr;
3127
7.38M
  if (QTB == PostBB)
3128
3.20M
    QTB = nullptr;
3129
7.38M
3130
7.38M
  // Legality bailouts. We must have at least the non-fallthrough blocks and
3131
7.38M
  // the post-dominating block, and the non-fallthroughs must only have one
3132
7.38M
  // predecessor.
3133
8.46M
  auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
3134
8.46M
    return BB->getSinglePredecessor() == P && 
BB->getSingleSuccessor() == S5.49M
;
3135
8.46M
  };
3136
7.38M
  if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
3137
7.38M
      
!HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)835k
)
3138
6.59M
    return false;
3139
784k
  if ((PTB && 
!HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())72.5k
) ||
3140
784k
      
(783k
QTB783k
&&
!HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)171k
))
3141
107k
    return false;
3142
676k
  if (!QBI->getParent()->hasNUses(2))
3143
95.2k
    return false;
3144
581k
3145
581k
  // OK, this is a sequence of two diamonds or triangles.
3146
581k
  // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
3147
581k
  SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
3148
1.16M
  for (auto *BB : {PTB, PFB}) {
3149
1.16M
    if (!BB)
3150
532k
      continue;
3151
630k
    for (auto &I : *BB)
3152
3.34M
      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3153
540k
        PStoreAddresses.insert(SI->getPointerOperand());
3154
630k
  }
3155
1.16M
  for (auto *BB : {QTB, QFB}) {
3156
1.16M
    if (!BB)
3157
521k
      continue;
3158
642k
    for (auto &I : *BB)
3159
3.32M
      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3160
549k
        QStoreAddresses.insert(SI->getPointerOperand());
3161
642k
  }
3162
581k
3163
581k
  set_intersect(PStoreAddresses, QStoreAddresses);
3164
581k
  // set_intersect mutates PStoreAddresses in place. Rename it here to make it
3165
581k
  // clear what it contains.
3166
581k
  auto &CommonAddresses = PStoreAddresses;
3167
581k
3168
581k
  bool Changed = false;
3169
581k
  for (auto *Address : CommonAddresses)
3170
33.9k
    Changed |= mergeConditionalStoreToAddress(
3171
33.9k
        PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
3172
581k
  return Changed;
3173
581k
}
3174
3175
/// If we have a conditional branch as a predecessor of another block,
3176
/// this function tries to simplify it.  We know
3177
/// that PBI and BI are both conditional branches, and BI is in one of the
3178
/// successor blocks of PBI - PBI branches to BI.
3179
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3180
15.1M
                                           const DataLayout &DL) {
3181
15.1M
  assert(PBI->isConditional() && BI->isConditional());
3182
15.1M
  BasicBlock *BB = BI->getParent();
3183
15.1M
3184
15.1M
  // If this block ends with a branch instruction, and if there is a
3185
15.1M
  // predecessor that ends on a branch of the same condition, make
3186
15.1M
  // this conditional branch redundant.
3187
15.1M
  if (PBI->getCondition() == BI->getCondition() &&
3188
15.1M
      
PBI->getSuccessor(0) != PBI->getSuccessor(1)4.61k
) {
3189
4.60k
    // Okay, the outcome of this conditional branch is statically
3190
4.60k
    // knowable.  If this block had a single pred, handle specially.
3191
4.60k
    if (BB->getSinglePredecessor()) {
3192
0
      // Turn this into a branch on constant.
3193
0
      bool CondIsTrue = PBI->getSuccessor(0) == BB;
3194
0
      BI->setCondition(
3195
0
          ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
3196
0
      return true; // Nuke the branch on constant.
3197
0
    }
3198
4.60k
3199
4.60k
    // Otherwise, if there are multiple predecessors, insert a PHI that merges
3200
4.60k
    // in the constant and simplify the block result.  Subsequent passes of
3201
4.60k
    // simplifycfg will thread the block.
3202
4.60k
    if (BlockIsSimpleEnoughToThreadThrough(BB)) {
3203
706
      pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
3204
706
      PHINode *NewPN = PHINode::Create(
3205
706
          Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
3206
706
          BI->getCondition()->getName() + ".pr", &BB->front());
3207
706
      // Okay, we're going to insert the PHI node.  Since PBI is not the only
3208
706
      // predecessor, compute the PHI'd conditional value for all of the preds.
3209
706
      // Any predecessor where the condition is not computable we keep symbolic.
3210
2.61k
      for (pred_iterator PI = PB; PI != PE; 
++PI1.90k
) {
3211
1.90k
        BasicBlock *P = *PI;
3212
1.90k
        if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && 
PBI != BI1.88k
&&
3213
1.90k
            
PBI->isConditional()1.85k
&&
PBI->getCondition() == BI->getCondition()1.40k
&&
3214
1.90k
            
PBI->getSuccessor(0) != PBI->getSuccessor(1)1.08k
) {
3215
1.08k
          bool CondIsTrue = PBI->getSuccessor(0) == BB;
3216
1.08k
          NewPN->addIncoming(
3217
1.08k
              ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
3218
1.08k
              P);
3219
1.08k
        } else {
3220
823
          NewPN->addIncoming(BI->getCondition(), P);
3221
823
        }
3222
1.90k
      }
3223
706
3224
706
      BI->setCondition(NewPN);
3225
706
      return true;
3226
706
    }
3227
15.1M
  }
3228
15.1M
3229
15.1M
  if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
3230
2.32k
    if (CE->canTrap())
3231
1
      return false;
3232
15.1M
3233
15.1M
  // If both branches are conditional and both contain stores to the same
3234
15.1M
  // address, remove the stores from the conditionals and create a conditional
3235
15.1M
  // merged store at the end.
3236
15.1M
  if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
3237
58
    return true;
3238
15.1M
3239
15.1M
  // If this is a conditional branch in an empty block, and if any
3240
15.1M
  // predecessors are a conditional branch to one of our destinations,
3241
15.1M
  // fold the conditions into logical ops and one cond br.
3242
15.1M
3243
15.1M
  // Ignore dbg intrinsics.
3244
15.1M
  if (&*BB->instructionsWithoutDebug().begin() != BI)
3245
14.7M
    return false;
3246
429k
3247
429k
  int PBIOp, BIOp;
3248
429k
  if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3249
361
    PBIOp = 0;
3250
361
    BIOp = 0;
3251
429k
  } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3252
174
    PBIOp = 0;
3253
174
    BIOp = 1;
3254
428k
  } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3255
168
    PBIOp = 1;
3256
168
    BIOp = 0;
3257
428k
  } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3258
229
    PBIOp = 1;
3259
229
    BIOp = 1;
3260
428k
  } else {
3261
428k
    return false;
3262
428k
  }
3263
932
3264
932
  // Check to make sure that the other destination of this branch
3265
932
  // isn't BB itself.  If so, this is an infinite loop that will
3266
932
  // keep getting unwound.
3267
932
  if (PBI->getSuccessor(PBIOp) == BB)
3268
7
    return false;
3269
925
3270
925
  // Do not perform this transformation if it would require
3271
925
  // insertion of a large number of select instructions. For targets
3272
925
  // without predication/cmovs, this is a big pessimization.
3273
925
3274
925
  // Also do not perform this transformation if any phi node in the common
3275
925
  // destination block can trap when reached by BB or PBB (PR17073). In that
3276
925
  // case, it would be unsafe to hoist the operation into a select instruction.
3277
925
3278
925
  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
3279
925
  unsigned NumPhis = 0;
3280
1.72k
  for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
3281
925
       
++II, ++NumPhis799
) {
3282
867
    if (NumPhis > 2) // Disable this xform.
3283
66
      return false;
3284
801
3285
801
    PHINode *PN = cast<PHINode>(II);
3286
801
    Value *BIV = PN->getIncomingValueForBlock(BB);
3287
801
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
3288
1
      if (CE->canTrap())
3289
1
        return false;
3290
800
3291
800
    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
3292
800
    Value *PBIV = PN->getIncomingValue(PBBIdx);
3293
800
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
3294
1
      if (CE->canTrap())
3295
1
        return false;
3296
800
  }
3297
925
3298
925
  // Finally, if everything is ok, fold the branches to logical ops.
3299
925
  BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
3300
857
3301
857
  LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
3302
857
                    << "AND: " << *BI->getParent());
3303
857
3304
857
  // If OtherDest *is* BB, then BB is a basic block with a single conditional
3305
857
  // branch in it, where one edge (OtherDest) goes back to itself but the other
3306
857
  // exits.  We don't *know* that the program avoids the infinite loop
3307
857
  // (even though that seems likely).  If we do this xform naively, we'll end up
3308
857
  // recursively unpeeling the loop.  Since we know that (after the xform is
3309
857
  // done) that the block *is* infinite if reached, we just make it an obviously
3310
857
  // infinite loop with no cond branch.
3311
857
  if (OtherDest == BB) {
3312
1
    // Insert it at the end of the function, because it's either code,
3313
1
    // or it won't matter if it's hot. :)
3314
1
    BasicBlock *InfLoopBlock =
3315
1
        BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
3316
1
    BranchInst::Create(InfLoopBlock, InfLoopBlock);
3317
1
    OtherDest = InfLoopBlock;
3318
1
  }
3319
857
3320
857
  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3321
857
3322
857
  // BI may have other predecessors.  Because of this, we leave
3323
857
  // it alone, but modify PBI.
3324
857
3325
857
  // Make sure we get to CommonDest on True&True directions.
3326
857
  Value *PBICond = PBI->getCondition();
3327
857
  IRBuilder<NoFolder> Builder(PBI);
3328
857
  if (PBIOp)
3329
368
    PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
3330
857
3331
857
  Value *BICond = BI->getCondition();
3332
857
  if (BIOp)
3333
375
    BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
3334
857
3335
857
  // Merge the conditions.
3336
857
  Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
3337
857
3338
857
  // Modify PBI to branch on the new condition to the new dests.
3339
857
  PBI->setCondition(Cond);
3340
857
  PBI->setSuccessor(0, CommonDest);
3341
857
  PBI->setSuccessor(1, OtherDest);
3342
857
3343
857
  // Update branch weight for PBI.
3344
857
  uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3345
857
  uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
3346
857
  bool HasWeights =
3347
857
      extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3348
857
                             SuccTrueWeight, SuccFalseWeight);
3349
857
  if (HasWeights) {
3350
4
    PredCommon = PBIOp ? 
PredFalseWeight2
:
PredTrueWeight2
;
3351
4
    PredOther = PBIOp ? 
PredTrueWeight2
:
PredFalseWeight2
;
3352
4
    SuccCommon = BIOp ? 
SuccFalseWeight2
:
SuccTrueWeight2
;
3353
4
    SuccOther = BIOp ? 
SuccTrueWeight2
:
SuccFalseWeight2
;
3354
4
    // The weight to CommonDest should be PredCommon * SuccTotal +
3355
4
    //                                    PredOther * SuccCommon.
3356
4
    // The weight to OtherDest should be PredOther * SuccOther.
3357
4
    uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
3358
4
                                  PredOther * SuccCommon,
3359
4
                              PredOther * SuccOther};
3360
4
    // Halve the weights if any of them cannot fit in an uint32_t
3361
4
    FitWeights(NewWeights);
3362
4
3363
4
    setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
3364
4
  }
3365
857
3366
857
  // OtherDest may have phi nodes.  If so, add an entry from PBI's
3367
857
  // block that are identical to the entries for BI's block.
3368
857
  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
3369
857
3370
857
  // We know that the CommonDest already had an edge from PBI to
3371
857
  // it.  If it has PHIs though, the PHIs may have different
3372
857
  // entries for BB and PBI's BB.  If so, insert a select to make
3373
857
  // them agree.
3374
857
  for (PHINode &PN : CommonDest->phis()) {
3375
601
    Value *BIV = PN.getIncomingValueForBlock(BB);
3376
601
    unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
3377
601
    Value *PBIV = PN.getIncomingValue(PBBIdx);
3378
601
    if (BIV != PBIV) {
3379
259
      // Insert a select in PBI to pick the right value.
3380
259
      SelectInst *NV = cast<SelectInst>(
3381
259
          Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
3382
259
      PN.setIncomingValue(PBBIdx, NV);
3383
259
      // Although the select has the same condition as PBI, the original branch
3384
259
      // weights for PBI do not apply to the new select because the select's
3385
259
      // 'logical' edges are incoming edges of the phi that is eliminated, not
3386
259
      // the outgoing edges of PBI.
3387
259
      if (HasWeights) {
3388
3
        uint64_t PredCommon = PBIOp ? 
PredFalseWeight2
:
PredTrueWeight1
;
3389
3
        uint64_t PredOther = PBIOp ? 
PredTrueWeight2
:
PredFalseWeight1
;
3390
3
        uint64_t SuccCommon = BIOp ? 
SuccFalseWeight2
:
SuccTrueWeight1
;
3391
3
        uint64_t SuccOther = BIOp ? 
SuccTrueWeight2
:
SuccFalseWeight1
;
3392
3
        // The weight to PredCommonDest should be PredCommon * SuccTotal.
3393
3
        // The weight to PredOtherDest should be PredOther * SuccCommon.
3394
3
        uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
3395
3
                                  PredOther * SuccCommon};
3396
3
3397
3
        FitWeights(NewWeights);
3398
3
3399
3
        setBranchWeights(NV, NewWeights[0], NewWeights[1]);
3400
3
      }
3401
259
    }
3402
601
  }
3403
857
3404
857
  LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
3405
857
  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3406
857
3407
857
  // This basic block is probably dead.  We know it has at least
3408
857
  // one fewer predecessor.
3409
857
  return true;
3410
925
}
3411
3412
// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
3413
// true or to FalseBB if Cond is false.
3414
// Takes care of updating the successors and removing the old terminator.
3415
// Also makes sure not to introduce new successors by assuming that edges to
3416
// non-successor TrueBBs and FalseBBs aren't reachable.
3417
static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
3418
                                       BasicBlock *TrueBB, BasicBlock *FalseBB,
3419
                                       uint32_t TrueWeight,
3420
20
                                       uint32_t FalseWeight) {
3421
20
  // Remove any superfluous successor edges from the CFG.
3422
20
  // First, figure out which successors to preserve.
3423
20
  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
3424
20
  // successor.
3425
20
  BasicBlock *KeepEdge1 = TrueBB;
3426
20
  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? 
FalseBB15
:
nullptr5
;
3427
20
3428
20
  // Then remove the rest.
3429
65
  for (BasicBlock *Succ : successors(OldTerm)) {
3430
65
    // Make sure only to keep exactly one copy of each edge.
3431
65
    if (Succ == KeepEdge1)
3432
18
      KeepEdge1 = nullptr;
3433
47
    else if (Succ == KeepEdge2)
3434
14
      KeepEdge2 = nullptr;
3435
33
    else
3436
33
      Succ->removePredecessor(OldTerm->getParent(),
3437
33
                              /*KeepOneInputPHIs=*/true);
3438
65
  }
3439
20
3440
20
  IRBuilder<> Builder(OldTerm);
3441
20
  Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
3442
20
3443
20
  // Insert an appropriate new terminator.
3444
20
  if (!KeepEdge1 && 
!KeepEdge218
) {
3445
18
    if (TrueBB == FalseBB)
3446
4
      // We were only looking for one successor, and it was present.
3447
4
      // Create an unconditional branch to it.
3448
4
      Builder.CreateBr(TrueBB);
3449
14
    else {
3450
14
      // We found both of the successors we were looking for.
3451
14
      // Create a conditional branch sharing the condition of the select.
3452
14
      BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
3453
14
      if (TrueWeight != FalseWeight)
3454
1
        setBranchWeights(NewBI, TrueWeight, FalseWeight);
3455
14
    }
3456
18
  } else 
if (2
KeepEdge12
&&
(2
KeepEdge22
||
TrueBB == FalseBB1
)) {
3457
2
    // Neither of the selected blocks were successors, so this
3458
2
    // terminator must be unreachable.
3459
2
    new UnreachableInst(OldTerm->getContext(), OldTerm);
3460
2
  } else {
3461
0
    // One of the selected values was a successor, but the other wasn't.
3462
0
    // Insert an unconditional branch to the one that was found;
3463
0
    // the edge to the one that wasn't must be unreachable.
3464
0
    if (!KeepEdge1)
3465
0
      // Only TrueBB was found.
3466
0
      Builder.CreateBr(TrueBB);
3467
0
    else
3468
0
      // Only FalseBB was found.
3469
0
      Builder.CreateBr(FalseBB);
3470
0
  }
3471
20
3472
20
  EraseTerminatorAndDCECond(OldTerm);
3473
20
  return true;
3474
20
}
3475
3476
// Replaces
3477
//   (switch (select cond, X, Y)) on constant X, Y
3478
// with a branch - conditional if X and Y lead to distinct BBs,
3479
// unconditional otherwise.
3480
1.72k
static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
3481
1.72k
  // Check for constant integer values in the select.
3482
1.72k
  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
3483
1.72k
  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
3484
1.72k
  if (!TrueVal || 
!FalseVal391
)
3485
1.70k
    return false;
3486
16
3487
16
  // Find the relevant condition and destinations.
3488
16
  Value *Condition = Select->getCondition();
3489
16
  BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
3490
16
  BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
3491
16
3492
16
  // Get weight for TrueBB and FalseBB.
3493
16
  uint32_t TrueWeight = 0, FalseWeight = 0;
3494
16
  SmallVector<uint64_t, 8> Weights;
3495
16
  bool HasWeights = HasBranchWeights(SI);
3496
16
  if (HasWeights) {
3497
1
    GetBranchWeights(SI, Weights);
3498
1
    if (Weights.size() == 1 + SI->getNumCases()) {
3499
1
      TrueWeight =
3500
1
          (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
3501
1
      FalseWeight =
3502
1
          (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
3503
1
    }
3504
1
  }
3505
16
3506
16
  // Perform the actual simplification.
3507
16
  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
3508
16
                                    FalseWeight);
3509
16
}
3510
3511
// Replaces
3512
//   (indirectbr (select cond, blockaddress(@fn, BlockA),
3513
//                             blockaddress(@fn, BlockB)))
3514
// with
3515
//   (br cond, BlockA, BlockB).
3516
6
static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
3517
6
  // Check that both operands of the select are block addresses.
3518
6
  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
3519
6
  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
3520
6
  if (!TBA || 
!FBA4
)
3521
2
    return false;
3522
4
3523
4
  // Extract the actual blocks.
3524
4
  BasicBlock *TrueBB = TBA->getBasicBlock();
3525
4
  BasicBlock *FalseBB = FBA->getBasicBlock();
3526
4
3527
4
  // Perform the actual simplification.
3528
4
  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
3529
4
                                    0);
3530
4
}
3531
3532
/// This is called when we find an icmp instruction
3533
/// (a seteq/setne with a constant) as the only instruction in a
3534
/// block that ends with an uncond branch.  We are looking for a very specific
3535
/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
3536
/// this case, we merge the first two "or's of icmp" into a switch, but then the
3537
/// default value goes to an uncond block with a seteq in it, we get something
3538
/// like:
3539
///
3540
///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
3541
/// DEFAULT:
3542
///   %tmp = icmp eq i8 %A, 92
3543
///   br label %end
3544
/// end:
3545
///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
3546
///
3547
/// We prefer to split the edge to 'end' so that there is a true/false entry to
3548
/// the PHI, merging the third icmp into the switch.
3549
bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
3550
2.98k
    ICmpInst *ICI, IRBuilder<> &Builder) {
3551
2.98k
  BasicBlock *BB = ICI->getParent();
3552
2.98k
3553
2.98k
  // If the block has any PHIs in it or the icmp has multiple uses, it is too
3554
2.98k
  // complex.
3555
2.98k
  if (isa<PHINode>(BB->begin()) || 
!ICI->hasOneUse()2.50k
)
3556
536
    return false;
3557
2.45k
3558
2.45k
  Value *V = ICI->getOperand(0);
3559
2.45k
  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
3560
2.45k
3561
2.45k
  // The pattern we're looking for is where our only predecessor is a switch on
3562
2.45k
  // 'V' and this block is the default case for the switch.  In this case we can
3563
2.45k
  // fold the compared value into the switch to simplify things.
3564
2.45k
  BasicBlock *Pred = BB->getSinglePredecessor();
3565
2.45k
  if (!Pred || 
!isa<SwitchInst>(Pred->getTerminator())2.25k
)
3566
2.20k
    return false;
3567
247
3568
247
  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
3569
247
  if (SI->getCondition() != V)
3570
80
    return false;
3571
167
3572
167
  // If BB is reachable on a non-default case, then we simply know the value of
3573
167
  // V in this block.  Substitute it and constant fold the icmp instruction
3574
167
  // away.
3575
167
  if (SI->getDefaultDest() != BB) {
3576
0
    ConstantInt *VVal = SI->findCaseDest(BB);
3577
0
    assert(VVal && "Should have a unique destination value");
3578
0
    ICI->setOperand(0, VVal);
3579
0
3580
0
    if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
3581
0
      ICI->replaceAllUsesWith(V);
3582
0
      ICI->eraseFromParent();
3583
0
    }
3584
0
    // BB is now empty, so it is likely to simplify away.
3585
0
    return requestResimplify();
3586
0
  }
3587
167
3588
167
  // Ok, the block is reachable from the default dest.  If the constant we're
3589
167
  // comparing exists in one of the other edges, then we can constant fold ICI
3590
167
  // and zap it.
3591
167
  if (SI->findCaseValue(Cst) != SI->case_default()) {
3592
3
    Value *V;
3593
3
    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
3594
3
      V = ConstantInt::getFalse(BB->getContext());
3595
0
    else
3596
0
      V = ConstantInt::getTrue(BB->getContext());
3597
3
3598
3
    ICI->replaceAllUsesWith(V);
3599
3
    ICI->eraseFromParent();
3600
3
    // BB is now empty, so it is likely to simplify away.
3601
3
    return requestResimplify();
3602
3
  }
3603
164
3604
164
  // The use of the icmp has to be in the 'end' block, by the only PHI node in
3605
164
  // the block.
3606
164
  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
3607
164
  PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
3608
164
  if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
3609
164
      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
3610
0
    return false;
3611
164
3612
164
  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
3613
164
  // true in the PHI.
3614
164
  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
3615
164
  Constant *NewCst = ConstantInt::getFalse(BB->getContext());
3616
164
3617
164
  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
3618
129
    std::swap(DefaultCst, NewCst);
3619
164
3620
164
  // Replace ICI (which is used by the PHI for the default value) with true or
3621
164
  // false depending on if it is EQ or NE.
3622
164
  ICI->replaceAllUsesWith(DefaultCst);
3623
164
  ICI->eraseFromParent();
3624
164
3625
164
  // Okay, the switch goes to this block on a default value.  Add an edge from
3626
164
  // the switch to the merge point on the compared value.
3627
164
  BasicBlock *NewBB =
3628
164
      BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
3629
164
  {
3630
164
    SwitchInstProfUpdateWrapper SIW(*SI);
3631
164
    auto W0 = SIW.getSuccessorWeight(0);
3632
164
    SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
3633
164
    if (W0) {
3634
1
      NewW = ((uint64_t(*W0) + 1) >> 1);
3635
1
      SIW.setSuccessorWeight(0, *NewW);
3636
1
    }
3637
164
    SIW.addCase(Cst, NewBB, NewW);
3638
164
  }
3639
164
3640
164
  // NewBB branches to the phi block, add the uncond branch and the phi entry.
3641
164
  Builder.SetInsertPoint(NewBB);
3642
164
  Builder.SetCurrentDebugLocation(SI->getDebugLoc());
3643
164
  Builder.CreateBr(SuccBlock);
3644
164
  PHIUse->addIncoming(NewCst, NewBB);
3645
164
  return true;
3646
164
}
3647
3648
/// The specified branch is a conditional branch.
3649
/// Check to see if it is branching on an or/and chain of icmp instructions, and
3650
/// fold it into a switch instruction if so.
3651
static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
3652
15.5M
                                      const DataLayout &DL) {
3653
15.5M
  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3654
15.5M
  if (!Cond)
3655
11.1k
    return false;
3656
15.5M
3657
15.5M
  // Change br (X == 0 | X == 1), T, F into a switch instruction.
3658
15.5M
  // If this is a bunch of seteq's or'd together, or if it's a bunch of
3659
15.5M
  // 'setne's and'ed together, collect them.
3660
15.5M
3661
15.5M
  // Try to gather values from a chain of and/or to be turned into a switch
3662
15.5M
  ConstantComparesGatherer ConstantCompare(Cond, DL);
3663
15.5M
  // Unpack the result
3664
15.5M
  SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
3665
15.5M
  Value *CompVal = ConstantCompare.CompValue;
3666
15.5M
  unsigned UsedICmps = ConstantCompare.UsedICmps;
3667
15.5M
  Value *ExtraCase = ConstantCompare.Extra;
3668
15.5M
3669
15.5M
  // If we didn't have a multiply compared value, fail.
3670
15.5M
  if (!CompVal)
3671
14.6M
    return false;
3672
891k
3673
891k
  // Avoid turning single icmps into a switch.
3674
891k
  if (UsedICmps <= 1)
3675
891k
    return false;
3676
508
3677
508
  bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
3678
508
3679
508
  // There might be duplicate constants in the list, which the switch
3680
508
  // instruction can't handle, remove them now.
3681
508
  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
3682
508
  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
3683
508
3684
508
  // If Extra was used, we require at least two switch values to do the
3685
508
  // transformation.  A switch with one value is just a conditional branch.
3686
508
  if (ExtraCase && 
Values.size() < 224
)
3687
0
    return false;
3688
508
3689
508
  // TODO: Preserve branch weight metadata, similarly to how
3690
508
  // FoldValueComparisonIntoPredecessors preserves it.
3691
508
3692
508
  // Figure out which block is which destination.
3693
508
  BasicBlock *DefaultBB = BI->getSuccessor(1);
3694
508
  BasicBlock *EdgeBB = BI->getSuccessor(0);
3695
508
  if (!TrueWhenEqual)
3696
133
    std::swap(DefaultBB, EdgeBB);
3697
508
3698
508
  BasicBlock *BB = BI->getParent();
3699
508
3700
508
  LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
3701
508
                    << " cases into SWITCH.  BB is:\n"
3702
508
                    << *BB);
3703
508
3704
508
  // If there are any extra values that couldn't be folded into the switch
3705
508
  // then we evaluate them with an explicit branch first.  Split the block
3706
508
  // right before the condbr to handle it.
3707
508
  if (ExtraCase) {
3708
24
    BasicBlock *NewBB =
3709
24
        BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
3710
24
    // Remove the uncond branch added to the old block.
3711
24
    Instruction *OldTI = BB->getTerminator();
3712
24
    Builder.SetInsertPoint(OldTI);
3713
24
3714
24
    if (TrueWhenEqual)
3715
21
      Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
3716
3
    else
3717
3
      Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
3718
24
3719
24
    OldTI->eraseFromParent();
3720
24
3721
24
    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
3722
24
    // for the edge we just added.
3723
24
    AddPredecessorToBlock(EdgeBB, BB, NewBB);
3724
24
3725
24
    LLVM_DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
3726
24
                      << "\nEXTRABB = " << *BB);
3727
24
    BB = NewBB;
3728
24
  }
3729
508
3730
508
  Builder.SetInsertPoint(BI);
3731
508
  // Convert pointer to int before we switch.
3732
508
  if (CompVal->getType()->isPointerTy()) {
3733
6
    CompVal = Builder.CreatePtrToInt(
3734
6
        CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
3735
6
  }
3736
508
3737
508
  // Create the new switch instruction now.
3738
508
  SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
3739
508
3740
508
  // Add all of the 'cases' to the switch instruction.
3741
2.03k
  for (unsigned i = 0, e = Values.size(); i != e; 
++i1.52k
)
3742
1.52k
    New->addCase(Values[i], EdgeBB);
3743
508
3744
508
  // We added edges from PI to the EdgeBB.  As such, if there were any
3745
508
  // PHI nodes in EdgeBB, they need entries to be added corresponding to
3746
508
  // the number of edges added.
3747
638
  for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); 
++BBI130
) {
3748
130
    PHINode *PN = cast<PHINode>(BBI);
3749
130
    Value *InVal = PN->getIncomingValueForBlock(BB);
3750
511
    for (unsigned i = 0, e = Values.size() - 1; i != e; 
++i381
)
3751
381
      PN->addIncoming(InVal, BB);
3752
130
  }
3753
508
3754
508
  // Erase the old branch instruction.
3755
508
  EraseTerminatorAndDCECond(BI);
3756
508
3757
508
  LLVM_DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
3758
508
  return true;
3759
508
}
3760
3761
53.6k
bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
3762
53.6k
  if (isa<PHINode>(RI->getValue()))
3763
2.08k
    return SimplifyCommonResume(RI);
3764
51.5k
  else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
3765
51.5k
           
RI->getValue() == RI->getParent()->getFirstNonPHI()21.2k
)
3766
18.4k
    // The resume must unwind the exception that caused control to branch here.
3767
18.4k
    return SimplifySingleResume(RI);
3768
33.1k
3769
33.1k
  return false;
3770
33.1k
}
3771
3772
// Simplify resume that is shared by several landing pads (phi of landing pad).
3773
2.08k
bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
3774
2.08k
  BasicBlock *BB = RI->getParent();
3775
2.08k
3776
2.08k
  // Check that there are no other instructions except for debug intrinsics
3777
2.08k
  // between the phi of landing pads (RI->getValue()) and resume instruction.
3778
2.08k
  BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
3779
2.08k
                       E = RI->getIterator();
3780
2.08k
  while (++I != E)
3781
2.01k
    if (!isa<DbgInfoIntrinsic>(I))
3782
2.01k
      return false;
3783
2.08k
3784
2.08k
  SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
3785
69
  auto *PhiLPInst = cast<PHINode>(RI->getValue());
3786
69
3787
69
  // Check incoming blocks to see if any of them are trivial.
3788
237
  for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
3789
168
       Idx++) {
3790
168
    auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
3791
168
    auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
3792
168
3793
168
    // If the block has other successors, we can not delete it because
3794
168
    // it has other dependents.
3795
168
    if (IncomingBB->getUniqueSuccessor() != BB)
3796
59
      continue;
3797
109
3798
109
    auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
3799
109
    // Not the landing pad that caused the control to branch here.
3800
109
    if (IncomingValue != LandingPad)
3801
48
      continue;
3802
61
3803
61
    bool isTrivial = true;
3804
61
3805
61
    I = IncomingBB->getFirstNonPHI()->getIterator();
3806
61
    E = IncomingBB->getTerminator()->getIterator();
3807
61
    while (++I != E)
3808
3
      if (!isa<DbgInfoIntrinsic>(I)) {
3809
3
        isTrivial = false;
3810
3
        break;
3811
3
      }
3812
61
3813
61
    if (isTrivial)
3814
58
      TrivialUnwindBlocks.insert(IncomingBB);
3815
61
  }
3816
69
3817
69
  // If no trivial unwind blocks, don't do any simplifications.
3818
69
  if (TrivialUnwindBlocks.empty())
3819
30
    return false;
3820
39
3821
39
  // Turn all invokes that unwind here into calls.
3822
57
  
for (auto *TrivialBB : TrivialUnwindBlocks)39
{
3823
57
    // Blocks that will be simplified should be removed from the phi node.
3824
57
    // Note there could be multiple edges to the resume block, and we need
3825
57
    // to remove them all.
3826
115
    while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
3827
58
      BB->removePredecessor(TrivialBB, true);
3828
57
3829
57
    for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
3830
161
         PI != PE;) {
3831
104
      BasicBlock *Pred = *PI++;
3832
104
      removeUnwindEdge(Pred);
3833
104
    }
3834
57
3835
57
    // In each SimplifyCFG run, only the current processed block can be erased.
3836
57
    // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
3837
57
    // of erasing TrivialBB, we only remove the branch to the common resume
3838
57
    // block so that we can later erase the resume block since it has no
3839
57
    // predecessors.
3840
57
    TrivialBB->getTerminator()->eraseFromParent();
3841
57
    new UnreachableInst(RI->getContext(), TrivialBB);
3842
57
  }
3843
39
3844
39
  // Delete the resume block if all its predecessors have been removed.
3845
39
  if (pred_empty(BB))
3846
10
    BB->eraseFromParent();
3847
39
3848
39
  return !TrivialUnwindBlocks.empty();
3849
39
}
3850
3851
// Simplify resume that is only used by a single (non-phi) landing pad.
3852
18.4k
bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
3853
18.4k
  BasicBlock *BB = RI->getParent();
3854
18.4k
  LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
3855
18.4k
  assert(RI->getValue() == LPInst &&
3856
18.4k
         "Resume must unwind the exception that caused control to here");
3857
18.4k
3858
18.4k
  // Check that there are no other instructions except for debug intrinsics.
3859
18.4k
  BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
3860
18.4k
  while (++I != E)
3861
18.2k
    if (!isa<DbgInfoIntrinsic>(I))
3862
18.2k
      return false;
3863
18.4k
3864
18.4k
  // Turn all invokes that unwind here into calls and delete the basic block.
3865
18.4k
  
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 108
PI != PE285
;) {
3866
177
    BasicBlock *Pred = *PI++;
3867
177
    removeUnwindEdge(Pred);
3868
177
  }
3869
108
3870
108
  // The landingpad is now unreachable.  Zap it.
3871
108
  if (LoopHeaders)
3872
108
    LoopHeaders->erase(BB);
3873
108
  BB->eraseFromParent();
3874
108
  return true;
3875
18.4k
}
3876
3877
77
static bool removeEmptyCleanup(CleanupReturnInst *RI) {
3878
77
  // If this is a trivial cleanup pad that executes no instructions, it can be
3879
77
  // eliminated.  If the cleanup pad continues to the caller, any predecessor
3880
77
  // that is an EH pad will be updated to continue to the caller and any
3881
77
  // predecessor that terminates with an invoke instruction will have its invoke
3882
77
  // instruction converted to a call instruction.  If the cleanup pad being
3883
77
  // simplified does not continue to the caller, each predecessor will be
3884
77
  // updated to continue to the unwind destination of the cleanup pad being
3885
77
  // simplified.
3886
77
  BasicBlock *BB = RI->getParent();
3887
77
  CleanupPadInst *CPInst = RI->getCleanupPad();
3888
77
  if (CPInst->getParent() != BB)
3889
8
    // This isn't an empty cleanup.
3890
8
    return false;
3891
69
3892
69
  // We cannot kill the pad if it has multiple uses.  This typically arises
3893
69
  // from unreachable basic blocks.
3894
69
  if (!CPInst->hasOneUse())
3895
19
    return false;
3896
50
3897
50
  // Check that there are no other instructions except for benign intrinsics.
3898
50
  BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
3899
51
  while (++I != E) {
3900
37
    auto *II = dyn_cast<IntrinsicInst>(I);
3901
37
    if (!II)
3902
27
      return false;
3903
10
3904
10
    Intrinsic::ID IntrinsicID = II->getIntrinsicID();
3905
10
    switch (IntrinsicID) {
3906
10
    case Intrinsic::dbg_declare:
3907
1
    case Intrinsic::dbg_value:
3908
1
    case Intrinsic::dbg_label:
3909
1
    case Intrinsic::lifetime_end:
3910
1
      break;
3911
9
    default:
3912
9
      return false;
3913
10
    }
3914
10
  }
3915
50
3916
50
  // If the cleanup return we are simplifying unwinds to the caller, this will
3917
50
  // set UnwindDest to nullptr.
3918
50
  BasicBlock *UnwindDest = RI->getUnwindDest();
3919
14
  Instruction *DestEHPad = UnwindDest ? 
UnwindDest->getFirstNonPHI()7
:
nullptr7
;
3920
14
3921
14
  // We're about to remove BB from the control flow.  Before we do, sink any
3922
14
  // PHINodes into the unwind destination.  Doing this before changing the
3923
14
  // control flow avoids some potentially slow checks, since we can currently
3924
14
  // be certain that UnwindDest and BB have no common predecessors (since they
3925
14
  // are both EH pads).
3926
14
  if (UnwindDest) {
3927
7
    // First, go through the PHI nodes in UnwindDest and update any nodes that
3928
7
    // reference the block we are removing
3929
7
    for (BasicBlock::iterator I = UnwindDest->begin(),
3930
7
                              IE = DestEHPad->getIterator();
3931
10
         I != IE; 
++I3
) {
3932
3
      PHINode *DestPN = cast<PHINode>(I);
3933
3
3934
3
      int Idx = DestPN->getBasicBlockIndex(BB);
3935
3
      // Since BB unwinds to UnwindDest, it has to be in the PHI node.
3936
3
      assert(Idx != -1);
3937
3
      // This PHI node has an incoming value that corresponds to a control
3938
3
      // path through the cleanup pad we are removing.  If the incoming
3939
3
      // value is in the cleanup pad, it must be a PHINode (because we
3940
3
      // verified above that the block is otherwise empty).  Otherwise, the
3941
3
      // value is either a constant or a value that dominates the cleanup
3942
3
      // pad being removed.
3943
3
      //
3944
3
      // Because BB and UnwindDest are both EH pads, all of their
3945
3
      // predecessors must unwind to these blocks, and since no instruction
3946
3
      // can have multiple unwind destinations, there will be no overlap in
3947
3
      // incoming blocks between SrcPN and DestPN.
3948
3
      Value *SrcVal = DestPN->getIncomingValue(Idx);
3949
3
      PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
3950
3
3951
3
      // Remove the entry for the block we are deleting.
3952
3
      DestPN->removeIncomingValue(Idx, false);
3953
3
3954
3
      if (SrcPN && 
SrcPN->getParent() == BB1
) {
3955
1
        // If the incoming value was a PHI node in the cleanup pad we are
3956
1
        // removing, we need to merge that PHI node's incoming values into
3957
1
        // DestPN.
3958
1
        for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
3959
3
             SrcIdx != SrcE; 
++SrcIdx2
) {
3960
2
          DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
3961
2
                              SrcPN->getIncomingBlock(SrcIdx));
3962
2
        }
3963
2
      } else {
3964
2
        // Otherwise, the incoming value came from above BB and
3965
2
        // so we can just reuse it.  We must associate all of BB's
3966
2
        // predecessors with this value.
3967
2
        for (auto *pred : predecessors(BB)) {
3968
2
          DestPN->addIncoming(SrcVal, pred);
3969
2
        }
3970
2
      }
3971
3
    }
3972
7
3973
7
    // Sink any remaining PHI nodes directly into UnwindDest.
3974
7
    Instruction *InsertPt = DestEHPad;
3975
7
    for (BasicBlock::iterator I = BB->begin(),
3976
7
                              IE = BB->getFirstNonPHI()->getIterator();
3977
10
         I != IE;) {
3978
3
      // The iterator must be incremented here because the instructions are
3979
3
      // being moved to another block.
3980
3
      PHINode *PN = cast<PHINode>(I++);
3981
3
      if (PN->use_empty())
3982
1
        // If the PHI node has no uses, just leave it.  It will be erased
3983
1
        // when we erase BB below.
3984
1
        continue;
3985
2
3986
2
      // Otherwise, sink this PHI node into UnwindDest.
3987
2
      // Any predecessors to UnwindDest which are not already represented
3988
2
      // must be back edges which inherit the value from the path through
3989
2
      // BB.  In this case, the PHI value must reference itself.
3990
2
      for (auto *pred : predecessors(UnwindDest))
3991
3
        if (pred != BB)
3992
1
          PN->addIncoming(PN, pred);
3993
2
      PN->moveBefore(InsertPt);
3994
2
    }
3995
7
  }
3996
14
3997
32
  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
3998
18
    // The iterator must be updated here because we are removing this pred.
3999
18
    BasicBlock *PredBB = *PI++;
4000
18
    if (UnwindDest == nullptr) {
4001
8
      removeUnwindEdge(PredBB);
4002
10
    } else {
4003
10
      Instruction *TI = PredBB->getTerminator();
4004
10
      TI->replaceUsesOfWith(BB, UnwindDest);
4005
10
    }
4006
18
  }
4007
14
4008
14
  // The cleanup pad is now unreachable.  Zap it.
4009
14
  BB->eraseFromParent();
4010
14
  return true;
4011
50
}
4012
4013
// Try to merge two cleanuppads together.
4014
82
static bool mergeCleanupPad(CleanupReturnInst *RI) {
4015
82
  // Skip any cleanuprets which unwind to caller, there is nothing to merge
4016
82
  // with.
4017
82
  BasicBlock *UnwindDest = RI->getUnwindDest();
4018
82
  if (!UnwindDest)
4019
44
    return false;
4020
38
4021
38
  // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
4022
38
  // be safe to merge without code duplication.
4023
38
  if (UnwindDest->getSinglePredecessor() != RI->getParent())
4024
25
    return false;
4025
13
4026
13
  // Verify that our cleanuppad's unwind destination is another cleanuppad.
4027
13
  auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
4028
13
  if (!SuccessorCleanupPad)
4029
8
    return false;
4030
5
4031
5
  CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
4032
5
  // Replace any uses of the successor cleanupad with the predecessor pad
4033
5
  // The only cleanuppad uses should be this cleanupret, it's cleanupret and
4034
5
  // funclet bundle operands.
4035
5
  SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
4036
5
  // Remove the old cleanuppad.
4037
5
  SuccessorCleanupPad->eraseFromParent();
4038
5
  // Now, we simply replace the cleanupret with a branch to the unwind
4039
5
  // destination.
4040
5
  BranchInst::Create(UnwindDest, RI->getParent());
4041
5
  RI->eraseFromParent();
4042
5
4043
5
  return true;
4044
5
}
4045
4046
83
bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
4047
83
  // It is possible to transiantly have an undef cleanuppad operand because we
4048
83
  // have deleted some, but not all, dead blocks.
4049
83
  // Eventually, this block will be deleted.
4050
83
  if (isa<UndefValue>(RI->getOperand(0)))
4051
1
    return false;
4052
82
4053
82
  if (mergeCleanupPad(RI))
4054
5
    return true;
4055
77
4056
77
  if (removeEmptyCleanup(RI))
4057
14
    return true;
4058
63
4059
63
  return false;
4060
63
}
4061
4062
4.18M
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
4063
4.18M
  BasicBlock *BB = RI->getParent();
4064
4.18M
  if (!BB->getFirstNonPHIOrDbg()->isTerminator())
4065
2.81M
    return false;
4066
1.36M
4067
1.36M
  // Find predecessors that end with branches.
4068
1.36M
  SmallVector<BasicBlock *, 8> UncondBranchPreds;
4069
1.36M
  SmallVector<BranchInst *, 8> CondBranchPreds;
4070
5.91M
  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; 
++PI4.54M
) {
4071
4.54M
    BasicBlock *P = *PI;
4072
4.54M
    Instruction *PTI = P->getTerminator();
4073
4.54M
    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
4074
4.43M
      if (BI->isUnconditional())
4075
2.48M
        UncondBranchPreds.push_back(P);
4076
1.94M
      else
4077
1.94M
        CondBranchPreds.push_back(BI);
4078
4.43M
    }
4079
4.54M
  }
4080
1.36M
4081
1.36M
  // If we found some, do the transformation!
4082
1.36M
  if (!UncondBranchPreds.empty() && 
DupRet852k
) {
4083
0
    while (!UncondBranchPreds.empty()) {
4084
0
      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
4085
0
      LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
4086
0
                        << "INTO UNCOND BRANCH PRED: " << *Pred);
4087
0
      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
4088
0
    }
4089
0
4090
0
    // If we eliminated all predecessors of the block, delete the block now.
4091
0
    if (pred_empty(BB)) {
4092
0
      // We know there are no successors, so just nuke the block.
4093
0
      if (LoopHeaders)
4094
0
        LoopHeaders->erase(BB);
4095
0
      BB->eraseFromParent();
4096
0
    }
4097
0
4098
0
    return true;
4099
0
  }
4100
1.36M
4101
1.36M
  // Check out all of the conditional branches going to this return
4102
1.36M
  // instruction.  If any of them just select between returns, change the
4103
1.36M
  // branch itself into a select/return pair.
4104
3.31M
  
while (1.36M
!CondBranchPreds.empty()) {
4105
1.94M
    BranchInst *BI = CondBranchPreds.pop_back_val();
4106
1.94M
4107
1.94M
    // Check to see if the non-BB successor is also a return block.
4108
1.94M
    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
4109
1.94M
        
isa<ReturnInst>(BI->getSuccessor(1)->getTerminator())1.14M
&&
4110
1.94M
        
SimplifyCondBranchToTwoReturns(BI, Builder)29.3k
)
4111
911
      return true;
4112
1.94M
  }
4113
1.36M
  
return false1.36M
;
4114
1.36M
}
4115
4116
463k
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
4117
463k
  BasicBlock *BB = UI->getParent();
4118
463k
4119
463k
  bool Changed = false;
4120
463k
4121
463k
  // If there are any instructions immediately before the unreachable that can
4122
463k
  // be removed, do so.
4123
463k
  while (UI->getIterator() != BB->begin()) {
4124
442k
    BasicBlock::iterator BBI = UI->getIterator();
4125
442k
    --BBI;
4126
442k
    // Do not delete instructions that can have side effects which might cause
4127
442k
    // the unreachable to not be reachable; specifically, calls and volatile
4128
442k
    // operations may have this effect.
4129
442k
    if (isa<CallInst>(BBI) && 
!isa<DbgInfoIntrinsic>(BBI)442k
)
4130
442k
      break;
4131
131
4132
131
    if (BBI->mayHaveSideEffects()) {
4133
36
      if (auto *SI = dyn_cast<StoreInst>(BBI)) {
4134
27
        if (SI->isVolatile())
4135
6
          break;
4136
9
      } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
4137
2
        if (LI->isVolatile())
4138
2
          break;
4139
7
      } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
4140
1
        if (RMWI->isVolatile())
4141
1
          break;
4142
6
      } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
4143
1
        if (CXI->isVolatile())
4144
1
          break;
4145
5
      } else if (isa<CatchPadInst>(BBI)) {
4146
5
        // A catchpad may invoke exception object constructors and such, which
4147
5
        // in some languages can be arbitrary code, so be conservative by
4148
5
        // default.
4149
5
        // For CoreCLR, it just involves a type test, so can be removed.
4150
5
        if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
4151
5
            EHPersonality::CoreCLR)
4152
0
          break;
4153
0
      } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
4154
0
                 !isa<LandingPadInst>(BBI)) {
4155
0
        break;
4156
0
      }
4157
121
      // Note that deleting LandingPad's here is in fact okay, although it
4158
121
      // involves a bit of subtle reasoning. If this inst is a LandingPad,
4159
121
      // all the predecessors of this block will be the unwind edges of Invokes,
4160
121
      // and we can therefore guarantee this block will be erased.
4161
121
    }
4162
121
4163
121
    // Delete this instruction (any uses are guaranteed to be dead)
4164
121
    if (!BBI->use_empty())
4165
0
      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
4166
121
    BBI->eraseFromParent();
4167
121
    Changed = true;
4168
121
  }
4169
463k
4170
463k
  // If the unreachable instruction is the first in the block, take a gander
4171
463k
  // at all of the predecessors of this instruction, and simplify them.
4172
463k
  if (&BB->front() != UI)
4173
442k
    return Changed;
4174
21.2k
4175
21.2k
  SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
4176
42.8k
  for (unsigned i = 0, e = Preds.size(); i != e; 
++i21.6k
) {
4177
21.6k
    Instruction *TI = Preds[i]->getTerminator();
4178
21.6k
    IRBuilder<> Builder(TI);
4179
21.6k
    if (auto *BI = dyn_cast<BranchInst>(TI)) {
4180
170
      if (BI->isUnconditional()) {
4181
9
        if (BI->getSuccessor(0) == BB) {
4182
9
          new UnreachableInst(TI->getContext(), TI);
4183
9
          TI->eraseFromParent();
4184
9
          Changed = true;
4185
9
        }
4186
161
      } else {
4187
161
        Value* Cond = BI->getCondition();
4188
161
        if (BI->getSuccessor(0) == BB) {
4189
94
          Builder.CreateAssumption(Builder.CreateNot(Cond));
4190
94
          Builder.CreateBr(BI->getSuccessor(1));
4191
94
          EraseTerminatorAndDCECond(BI);
4192
94
        } else 
if (67
BI->getSuccessor(1) == BB67
) {
4193
67
          Builder.CreateAssumption(Cond);
4194
67
          Builder.CreateBr(BI->getSuccessor(0));
4195
67
          EraseTerminatorAndDCECond(BI);
4196
67
          Changed = true;
4197
67
        }
4198
161
      }
4199
21.4k
    } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
4200
1.11k
      SwitchInstProfUpdateWrapper SU(*SI);
4201
5.30k
      for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
4202
4.19k
        if (i->getCaseSuccessor() != BB) {
4203
4.06k
          ++i;
4204
4.06k
          continue;
4205
4.06k
        }
4206
126
        BB->removePredecessor(SU->getParent());
4207
126
        i = SU.removeCase(i);
4208
126
        e = SU->case_end();
4209
126
        Changed = true;
4210
126
      }
4211
20.3k
    } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
4212
20.3k
      if (II->getUnwindDest() == BB) {
4213
3
        removeUnwindEdge(TI->getParent());
4214
3
        Changed = true;
4215
3
      }
4216
20.3k
    } else 
if (auto *10
CSI10
= dyn_cast<CatchSwitchInst>(TI)) {
4217
6
      if (CSI->getUnwindDest() == BB) {
4218
1
        removeUnwindEdge(TI->getParent());
4219
1
        Changed = true;
4220
1
        continue;
4221
1
      }
4222
5
4223
5
      for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
4224
5
                                             E = CSI->handler_end();
4225
13
           I != E; 
++I8
) {
4226
8
        if (*I == BB) {
4227
5
          CSI->removeHandler(I);
4228
5
          --I;
4229
5
          --E;
4230
5
          Changed = true;
4231
5
        }
4232
8
      }
4233
5
      if (CSI->getNumHandlers() == 0) {
4234
2
        BasicBlock *CatchSwitchBB = CSI->getParent();
4235
2
        if (CSI->hasUnwindDest()) {
4236
1
          // Redirect preds to the unwind dest
4237
1
          CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
4238
1
        } else {
4239
1
          // Rewrite all preds to unwind to caller (or from invoke to call).
4240
1
          SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
4241
1
          for (BasicBlock *EHPred : EHPreds)
4242
3
            removeUnwindEdge(EHPred);
4243
1
        }
4244
2
        // The catchswitch is no longer reachable.
4245
2
        new UnreachableInst(CSI->getContext(), CSI);
4246
2
        CSI->eraseFromParent();
4247
2
        Changed = true;
4248
2
      }
4249
5
    } else 
if (4
isa<CleanupReturnInst>(TI)4
) {
4250
1
      new UnreachableInst(TI->getContext(), TI);
4251
1
      TI->eraseFromParent();
4252
1
      Changed = true;
4253
1
    }
4254
21.6k
  }
4255
21.2k
4256
21.2k
  // If this block is now dead, remove it.
4257
21.2k
  if (pred_empty(BB) && 
BB != &BB->getParent()->getEntryBlock()533
) {
4258
271
    // We know there are no successors, so just nuke the block.
4259
271
    if (LoopHeaders)
4260
241
      LoopHeaders->erase(BB);
4261
271
    BB->eraseFromParent();
4262
271
    return true;
4263
271
  }
4264
20.9k
4265
20.9k
  return Changed;
4266
20.9k
}
4267
4268
84.0k
static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
4269
84.0k
  assert(Cases.size() >= 1);
4270
84.0k
4271
84.0k
  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
4272
89.6k
  for (size_t I = 1, E = Cases.size(); I != E; 
++I5.57k
) {
4273
85.5k
    if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
4274
79.9k
      return false;
4275
85.5k
  }
4276
84.0k
  
return true4.07k
;
4277
84.0k
}
4278
4279
/// Turn a switch with two reachable destinations into an integer range
4280
/// comparison and branch.
4281
276k
static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
4282
276k
  assert(SI->getNumCases() > 1 && "Degenerate switch?");
4283
276k
4284
276k
  bool HasDefault =
4285
276k
      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4286
276k
4287
276k
  // Partition the cases into two sets with different destinations.
4288
276k
  BasicBlock *DestA = HasDefault ? 
SI->getDefaultDest()272k
:
nullptr3.97k
;
4289
276k
  BasicBlock *DestB = nullptr;
4290
276k
  SmallVector<ConstantInt *, 16> CasesA;
4291
276k
  SmallVector<ConstantInt *, 16> CasesB;
4292
276k
4293
595k
  for (auto Case : SI->cases()) {
4294
595k
    BasicBlock *Dest = Case.getCaseSuccessor();
4295
595k
    if (!DestA)
4296
3.97k
      DestA = Dest;
4297
595k
    if (Dest == DestA) {
4298
4.06k
      CasesA.push_back(Case.getCaseValue());
4299
4.06k
      continue;
4300
4.06k
    }
4301
591k
    if (!DestB)
4302
276k
      DestB = Dest;
4303
591k
    if (Dest == DestB) {
4304
398k
      CasesB.push_back(Case.getCaseValue());
4305
398k
      continue;
4306
398k
    }
4307
192k
    return false; // More than two destinations.
4308
192k
  }
4309
276k
4310
276k
  assert(DestA && DestB &&
4311
83.9k
         "Single-destination switch should have been folded.");
4312
83.9k
  assert(DestA != DestB);
4313
83.9k
  assert(DestB != SI->getDefaultDest());
4314
83.9k
  assert(!CasesB.empty() && "There must be non-default cases.");
4315
83.9k
  assert(!CasesA.empty() || HasDefault);
4316
83.9k
4317
83.9k
  // Figure out if one of the sets of cases form a contiguous range.
4318
83.9k
  SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
4319
83.9k
  BasicBlock *ContiguousDest = nullptr;
4320
83.9k
  BasicBlock *OtherDest = nullptr;
4321
83.9k
  if (!CasesA.empty() && 
CasesAreContiguous(CasesA)2.99k
) {
4322
2.92k
    ContiguousCases = &CasesA;
4323
2.92k
    ContiguousDest = DestA;
4324
2.92k
    OtherDest = DestB;
4325
81.0k
  } else if (CasesAreContiguous(CasesB)) {
4326
1.15k
    ContiguousCases = &CasesB;
4327
1.15k
    ContiguousDest = DestB;
4328
1.15k
    OtherDest = DestA;
4329
1.15k
  } else
4330
79.8k
    return false;
4331
4.07k
4332
4.07k
  // Start building the compare and branch.
4333
4.07k
4334
4.07k
  Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
4335
4.07k
  Constant *NumCases =
4336
4.07k
      ConstantInt::get(Offset->getType(), ContiguousCases->size());
4337
4.07k
4338
4.07k
  Value *Sub = SI->getCondition();
4339
4.07k
  if (!Offset->isNullValue())
4340
1.94k
    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
4341
4.07k
4342
4.07k
  Value *Cmp;
4343
4.07k
  // If NumCases overflowed, then all possible values jump to the successor.
4344
4.07k
  if (NumCases->isNullValue() && 
!ContiguousCases->empty()1
)
4345
1
    Cmp = ConstantInt::getTrue(SI->getContext());
4346
4.07k
  else
4347
4.07k
    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
4348
4.07k
  BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
4349
4.07k
4350
4.07k
  // Update weight for the newly-created conditional branch.
4351
4.07k
  if (HasBranchWeights(SI)) {
4352
1
    SmallVector<uint64_t, 8> Weights;
4353
1
    GetBranchWeights(SI, Weights);
4354
1
    if (Weights.size() == 1 + SI->getNumCases()) {
4355
1
      uint64_t TrueWeight = 0;
4356
1
      uint64_t FalseWeight = 0;
4357
5
      for (size_t I = 0, E = Weights.size(); I != E; 
++I4
) {
4358
4
        if (SI->getSuccessor(I) == ContiguousDest)
4359
3
          TrueWeight += Weights[I];
4360
1
        else
4361
1
          FalseWeight += Weights[I];
4362
4
      }
4363
1
      while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
4364
0
        TrueWeight /= 2;
4365
0
        FalseWeight /= 2;
4366
0
      }
4367
1
      setBranchWeights(NewBI, TrueWeight, FalseWeight);
4368
1
    }
4369
1
  }
4370
4.07k
4371
4.07k
  // Prune obsolete incoming values off the successors' PHI nodes.
4372
4.28k
  for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); 
++BBI204
) {
4373
204
    unsigned PreviousEdges = ContiguousCases->size();
4374
204
    if (ContiguousDest == SI->getDefaultDest())
4375
0
      ++PreviousEdges;
4376
367
    for (unsigned I = 0, E = PreviousEdges - 1; I != E; 
++I163
)
4377
163
      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4378
204
  }
4379
4.29k
  for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); 
++BBI223
) {
4380
223
    unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
4381
223
    if (OtherDest == SI->getDefaultDest())
4382
143
      ++PreviousEdges;
4383
225
    for (unsigned I = 0, E = PreviousEdges - 1; I != E; 
++I2
)
4384
2
      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4385
223
  }
4386
4.07k
4387
4.07k
  // Drop the switch.
4388
4.07k
  SI->eraseFromParent();
4389
4.07k
4390
4.07k
  return true;
4391
4.07k
}
4392
4393
/// Compute masked bits for the condition of a switch
4394
/// and use it to remove dead cases.
4395
static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
4396
272k
                                     const DataLayout &DL) {
4397
272k
  Value *Cond = SI->getCondition();
4398
272k
  unsigned Bits = Cond->getType()->getIntegerBitWidth();
4399
272k
  KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
4400
272k
4401
272k
  // We can also eliminate cases by determining that their values are outside of
4402
272k
  // the limited range of the condition based on how many significant (non-sign)
4403
272k
  // bits are in the condition value.
4404
272k
  unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
4405
272k
  unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
4406
272k
4407
272k
  // Gather dead cases.
4408
272k
  SmallVector<ConstantInt *, 8> DeadCases;
4409
901k
  for (auto &Case : SI->cases()) {
4410
901k
    const APInt &CaseVal = Case.getCaseValue()->getValue();
4411
901k
    if (Known.Zero.intersects(CaseVal) || 
!Known.One.isSubsetOf(CaseVal)901k
||
4412
901k
        
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)901k
) {
4413
268
      DeadCases.push_back(Case.getCaseValue());
4414
268
      LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
4415
268
                        << " is dead.\n");
4416
268
    }
4417
901k
  }
4418
272k
4419
272k
  // If we can prove that the cases must cover all possible values, the
4420
272k
  // default destination becomes dead and we can remove it.  If we know some
4421
272k
  // of the bits in the value, we can use that to more precisely compute the
4422
272k
  // number of possible unique case values.
4423
272k
  bool HasDefault =
4424
272k
      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4425
272k
  const unsigned NumUnknownBits =
4426
272k
      Bits - (Known.Zero | Known.One).countPopulation();
4427
272k
  assert(NumUnknownBits <= Bits);
4428
272k
  if (HasDefault && 
DeadCases.empty()271k
&&
4429
272k
      
NumUnknownBits < 64271k
/* avoid overflow */ &&
4430
272k
      
SI->getNumCases() == (1ULL << NumUnknownBits)268k
) {
4431
124
    LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
4432
124
    BasicBlock *NewDefault =
4433
124
        SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
4434
124
    SI->setDefaultDest(&*NewDefault);
4435
124
    SplitBlock(&*NewDefault, &NewDefault->front());
4436
124
    auto *OldTI = NewDefault->getTerminator();
4437
124
    new UnreachableInst(SI->getContext(), OldTI);
4438
124
    EraseTerminatorAndDCECond(OldTI);
4439
124
    return true;
4440
124
  }
4441
272k
4442
272k
  if (DeadCases.empty())
4443
272k
    return false;
4444
99
4445
99
  SwitchInstProfUpdateWrapper SIW(*SI);
4446
268
  for (ConstantInt *DeadCase : DeadCases) {
4447
268
    SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
4448
268
    assert(CaseI != SI->case_default() &&
4449
268
           "Case was not found. Probably mistake in DeadCases forming.");
4450
268
    // Prune unused values from PHI nodes.
4451
268
    CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
4452
268
    SIW.removeCase(CaseI);
4453
268
  }
4454
99
4455
99
  return true;
4456
99
}
4457
4458
/// If BB would be eligible for simplification by
4459
/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
4460
/// by an unconditional branch), look at the phi node for BB in the successor
4461
/// block and see if the incoming value is equal to CaseValue. If so, return
4462
/// the phi node, and set PhiIndex to BB's index in the phi node.
4463
static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
4464
195k
                                              BasicBlock *BB, int *PhiIndex) {
4465
195k
  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
4466
154k
    return nullptr; // BB must be empty to be a candidate for simplification.
4467
40.9k
  if (!BB->getSinglePredecessor())
4468
9.51k
    return nullptr; // BB must be dominated by the switch.
4469
31.4k
4470
31.4k
  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
4471
31.4k
  if (!Branch || 
!Branch->isUnconditional()17.0k
)
4472
22.0k
    return nullptr; // Terminator must be unconditional branch.
4473
9.43k
4474
9.43k
  BasicBlock *Succ = Branch->getSuccessor(0);
4475
9.43k
4476
11.5k
  for (PHINode &PHI : Succ->phis()) {
4477
11.5k
    int Idx = PHI.getBasicBlockIndex(BB);
4478
11.5k
    assert(Idx >= 0 && "PHI has no entry for predecessor?");
4479
11.5k
4480
11.5k
    Value *InValue = PHI.getIncomingValue(Idx);
4481
11.5k
    if (InValue != CaseValue)
4482
11.2k
      continue;
4483
312
4484
312
    *PhiIndex = Idx;
4485
312
    return &PHI;
4486
312
  }
4487
9.43k
4488
9.43k
  
return nullptr9.11k
;
4489
9.43k
}
4490
4491
/// Try to forward the condition of a switch instruction to a phi node
4492
/// dominated by the switch, if that would mean that some of the destination
4493
/// blocks of the switch can be folded away. Return true if a change is made.
4494
61.4k
static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
4495
61.4k
  using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
4496
61.4k
4497
61.4k
  ForwardingNodesMap ForwardingNodes;
4498
61.4k
  BasicBlock *SwitchBlock = SI->getParent();
4499
61.4k
  bool Changed = false;
4500
195k
  for (auto &Case : SI->cases()) {
4501
195k
    ConstantInt *CaseValue = Case.getCaseValue();
4502
195k
    BasicBlock *CaseDest = Case.getCaseSuccessor();
4503
195k
4504
195k
    // Replace phi operands in successor blocks that are using the constant case
4505
195k
    // value rather than the switch condition variable:
4506
195k
    //   switchbb:
4507
195k
    //   switch i32 %x, label %default [
4508
195k
    //     i32 17, label %succ
4509
195k
    //   ...
4510
195k
    //   succ:
4511
195k
    //     %r = phi i32 ... [ 17, %switchbb ] ...
4512
195k
    // -->
4513
195k
    //     %r = phi i32 ... [ %x, %switchbb ] ...
4514
195k
4515
195k
    for (PHINode &Phi : CaseDest->phis()) {
4516
24.5k
      // This only works if there is exactly 1 incoming edge from the switch to
4517
24.5k
      // a phi. If there is >1, that means multiple cases of the switch map to 1
4518
24.5k
      // value in the phi, and that phi value is not the switch condition. Thus,
4519
24.5k
      // this transform would not make sense (the phi would be invalid because
4520
24.5k
      // a phi can't have different incoming values from the same block).
4521
24.5k
      int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
4522
24.5k
      if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
4523
24.5k
          
count(Phi.blocks(), SwitchBlock) == 1172
) {
4524
146
        Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
4525
146
        Changed = true;
4526
146
      }
4527
24.5k
    }
4528
195k
4529
195k
    // Collect phi nodes that are indirectly using this switch's case constants.
4530
195k
    int PhiIdx;
4531
195k
    if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
4532
312
      ForwardingNodes[Phi].push_back(PhiIdx);
4533
195k
  }
4534
61.4k
4535
61.4k
  for (auto &ForwardingNode : ForwardingNodes) {
4536
213
    PHINode *Phi = ForwardingNode.first;
4537
213
    SmallVectorImpl<int> &Indexes = ForwardingNode.second;
4538
213
    if (Indexes.size() < 2)
4539
152
      continue;
4540
61
4541
61
    for (int Index : Indexes)
4542
160
      Phi->setIncomingValue(Index, SI->getCondition());
4543
61
    Changed = true;
4544
61
  }
4545
61.4k
4546
61.4k
  return Changed;
4547
61.4k
}
4548
4549
/// Return true if the backend will be able to handle
4550
/// initializing an array of constants like C.
4551
81.1k
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
4552
81.1k
  if (C->isThreadDependent())
4553
4
    return false;
4554
81.1k
  if (C->isDLLImportDependent())
4555
4
    return false;
4556
81.1k
4557
81.1k
  if (!isa<ConstantFP>(C) && 
!isa<ConstantInt>(C)81.0k
&&
4558
81.1k
      
!isa<ConstantPointerNull>(C)27.4k
&&
!isa<GlobalValue>(C)25.1k
&&
4559
81.1k
      
!isa<UndefValue>(C)12.3k
&&
!isa<ConstantExpr>(C)12.3k
)
4560
365
    return false;
4561
80.7k
4562
80.7k
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
4563
11.9k
    if (!CE->isGEPWithNoNotionalOverIndexing())
4564
43
      return false;
4565
11.9k
    if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
4566
2
      return false;
4567
80.7k
  }
4568
80.7k
4569
80.7k
  if (!TTI.shouldBuildLookupTablesForConstant(C))
4570
60
    return false;
4571
80.6k
4572
80.6k
  return true;
4573
80.6k
}
4574
4575
/// If V is a Constant, return it. Otherwise, try to look up
4576
/// its constant value in ConstantPool, returning 0 if it's not there.
4577
static Constant *
4578
LookupConstant(Value *V,
4579
528k
               const SmallDenseMap<Value *, Constant *> &ConstantPool) {
4580
528k
  if (Constant *C = dyn_cast<Constant>(V))
4581
232k
    return C;
4582
296k
  return ConstantPool.lookup(V);
4583
296k
}
4584
4585
/// Try to fold instruction I into a constant. This works for
4586
/// simple instructions such as binary operations where both operands are
4587
/// constant or can be replaced by constants from the ConstantPool. Returns the
4588
/// resulting constant on success, 0 otherwise.
4589
static Constant *
4590
ConstantFold(Instruction *I, const DataLayout &DL,
4591
291k
             const SmallDenseMap<Value *, Constant *> &ConstantPool) {
4592
291k
  if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
4593
166
    Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
4594
166
    if (!A)
4595
115
      return nullptr;
4596
51
    if (A->isAllOnesValue())
4597
22
      return LookupConstant(Select->getTrueValue(), ConstantPool);
4598
29
    if (A->isNullValue())
4599
29
      return LookupConstant(Select->getFalseValue(), ConstantPool);
4600
0
    return nullptr;
4601
0
  }
4602
290k
4603
290k
  SmallVector<Constant *, 4> COps;
4604
475k
  for (unsigned N = 0, E = I->getNumOperands(); N != E; 
++N184k
) {
4605
446k
    if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
4606
184k
      COps.push_back(A);
4607
262k
    else
4608
262k
      return nullptr;
4609
446k
  }
4610
290k
4611
290k
  
if (CmpInst *28.6k
Cmp28.6k
= dyn_cast<CmpInst>(I)) {
4612
247
    return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
4613
247
                                           COps[1], DL);
4614
247
  }
4615
28.4k
4616
28.4k
  return ConstantFoldInstOperands(I, COps, DL);
4617
28.4k
}
4618
4619
/// Try to determine the resulting constant values in phi nodes
4620
/// at the common destination basic block, *CommonDest, for one of the case
4621
/// destionations CaseDest corresponding to value CaseVal (0 for the default
4622
/// case), of a switch instruction SI.
4623
static bool
4624
GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
4625
               BasicBlock **CommonDest,
4626
               SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
4627
349k
               const DataLayout &DL, const TargetTransformInfo &TTI) {
4628
349k
  // The block from which we enter the common destination.
4629
349k
  BasicBlock *Pred = SI->getParent();
4630
349k
4631
349k
  // If CaseDest is empty except for some side-effect free instructions through
4632
349k
  // which we can constant-propagate the CaseVal, continue to its successor.
4633
349k
  SmallDenseMap<Value *, Constant *> ConstantPool;
4634
349k
  ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
4635
349k
  for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
4636
349k
    if (I.isTerminator()) {
4637
58.7k
      // If the terminator is a simple branch, continue to the next block.
4638
58.7k
      if (I.getNumSuccessors() != 1 || 
I.isExceptionalTerminator()21.6k
)
4639
37.1k
        return false;
4640
21.6k
      Pred = CaseDest;
4641
21.6k
      CaseDest = I.getSuccessor(0);
4642
291k
    } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
4643
740
      // Instruction is side-effect free and constant.
4644
740
4645
740
      // If the instruction has uses outside this block or a phi node slot for
4646
740
      // the block, it is not safe to bypass the instruction since it would then
4647
740
      // no longer dominate all its uses.
4648
796
      for (auto &Use : I.uses()) {
4649
796
        User *User = Use.getUser();
4650
796
        if (Instruction *I = dyn_cast<Instruction>(User))
4651
796
          if (I->getParent() == CaseDest)
4652
642
            continue;
4653
154
        if (PHINode *Phi = dyn_cast<PHINode>(User))
4654
96
          if (Phi->getIncomingBlock(Use) == CaseDest)
4655
62
            continue;
4656
92
        return false;
4657
92
      }
4658
740
4659
740
      ConstantPool.insert(std::make_pair(&I, C));
4660
290k
    } else {
4661
290k
      break;
4662
290k
    }
4663
349k
  }
4664
349k
4665
349k
  // If we did not have a CommonDest before, use the current one.
4666
349k
  
if (311k
!*CommonDest311k
)
4667
257k
    *CommonDest = CaseDest;
4668
311k
  // If the destination isn't the common one, abort.
4669
311k
  if (CaseDest != *CommonDest)
4670
32.3k
    return false;
4671
279k
4672
279k
  // Get the values for this case from phi nodes in the destination block.
4673
279k
  for (PHINode &PHI : (*CommonDest)->phis()) {
4674
82.0k
    int Idx = PHI.getBasicBlockIndex(Pred);
4675
82.0k
    if (Idx == -1)
4676
0
      continue;
4677
82.0k
4678
82.0k
    Constant *ConstVal =
4679
82.0k
        LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
4680
82.0k
    if (!ConstVal)
4681
12.8k
      return false;
4682
69.2k
4683
69.2k
    // Be conservative about which kinds of constants we support.
4684
69.2k
    if (!ValidLookupTableConstant(ConstVal, TTI))
4685
476
      return false;
4686
68.7k
4687
68.7k
    Res.push_back(std::make_pair(&PHI, ConstVal));
4688
68.7k
  }
4689
279k
4690
279k
  
return Res.size() > 0266k
;
4691
279k
}
4692
4693
// Helper function used to add CaseVal to the list of cases that generate
4694
// Result. Returns the updated number of cases that generate this result.
4695
static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
4696
                                 SwitchCaseResultVectorTy &UniqueResults,
4697
49.9k
                                 Constant *Result) {
4698
49.9k
  for (auto &I : UniqueResults) {
4699
18.8k
    if (I.first == Result) {
4700
3.74k
      I.second.push_back(CaseVal);
4701
3.74k
      return I.second.size();
4702
3.74k
    }
4703
18.8k
  }
4704
49.9k
  UniqueResults.push_back(
4705
46.2k
      std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
4706
46.2k
  return 1;
4707
49.9k
}
4708
4709
// Helper function that initializes a map containing
4710
// results for the PHI node of the common destination block for a switch
4711
// instruction. Returns false if multiple PHI nodes have been found or if
4712
// there is not a common destination block for the switch.
4713
static bool
4714
InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
4715
                      SwitchCaseResultVectorTy &UniqueResults,
4716
                      Constant *&DefaultResult, const DataLayout &DL,
4717
                      const TargetTransformInfo &TTI,
4718
272k
                      uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
4719
312k
  for (auto &I : SI->cases()) {
4720
312k
    ConstantInt *CaseVal = I.getCaseValue();
4721
312k
4722
312k
    // Resulting value at phi nodes for this case value.
4723
312k
    SwitchCaseResultsTy Results;
4724
312k
    if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
4725
312k
                        DL, TTI))
4726
261k
      return false;
4727
50.9k
4728
50.9k
    // Only one value per case is permitted.
4729
50.9k
    if (Results.size() > 1)
4730
951
      return false;
4731
49.9k
4732
49.9k
    // Add the case->result mapping to UniqueResults.
4733
49.9k
    const uintptr_t NumCasesForResult =
4734
49.9k
        MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
4735
49.9k
4736
49.9k
    // Early out if there are too many cases for this result.
4737
49.9k
    if (NumCasesForResult > MaxCasesPerResult)
4738
3.74k
      return false;
4739
46.2k
4740
46.2k
    // Early out if there are too many unique results.
4741
46.2k
    if (UniqueResults.size() > MaxUniqueResults)
4742
4.54k
      return false;
4743
41.6k
4744
41.6k
    // Check the PHI consistency.
4745
41.6k
    if (!PHI)
4746
35.7k
      PHI = Results[0].first;
4747
5.94k
    else if (PHI != Results[0].first)
4748
0
      return false;
4749
41.6k
  }
4750
272k
  // Find the default result value.
4751
272k
  SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
4752
1.30k
  BasicBlock *DefaultDest = SI->getDefaultDest();
4753
1.30k
  GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
4754
1.30k
                 DL, TTI);
4755
1.30k
  // If the default value is not found abort unless the default destination
4756
1.30k
  // is unreachable.
4757
1.30k
  DefaultResult =
4758
1.30k
      DefaultResults.size() == 1 ? 
DefaultResults.begin()->second212
:
nullptr1.09k
;
4759
1.30k
  if ((!DefaultResult &&
4760
1.30k
       
!isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())1.09k
))
4761
1.09k
    return false;
4762
212
4763
212
  return true;
4764
212
}
4765
4766
// Helper function that checks if it is possible to transform a switch with only
4767
// two cases (or two cases + default) that produces a result into a select.
4768
// Example:
4769
// switch (a) {
4770
//   case 10:                %0 = icmp eq i32 %a, 10
4771
//     return 10;            %1 = select i1 %0, i32 10, i32 4
4772
//   case 20:        ---->   %2 = icmp eq i32 %a, 20
4773
//     return 2;             %3 = select i1 %2, i32 2, i32 %1
4774
//   default:
4775
//     return 4;
4776
// }
4777
static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
4778
                                   Constant *DefaultResult, Value *Condition,
4779
212
                                   IRBuilder<> &Builder) {
4780
212
  assert(ResultVector.size() == 2 &&
4781
212
         "We should have exactly two unique results at this point");
4782
212
  // If we are selecting between only two cases transform into a simple
4783
212
  // select or a two-way select if default is possible.
4784
212
  if (ResultVector[0].second.size() == 1 &&
4785
212
      ResultVector[1].second.size() == 1) {
4786
212
    ConstantInt *const FirstCase = ResultVector[0].second[0];
4787
212
    ConstantInt *const SecondCase = ResultVector[1].second[0];
4788
212
4789
212
    bool DefaultCanTrigger = DefaultResult;
4790
212
    Value *SelectValue = ResultVector[1].first;
4791
212
    if (DefaultCanTrigger) {
4792
212
      Value *const ValueCompare =
4793
212
          Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
4794
212
      SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
4795
212
                                         DefaultResult, "switch.select");
4796
212
    }
4797
212
    Value *const ValueCompare =
4798
212
        Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
4799
212
    return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
4800
212
                                SelectValue, "switch.select");
4801
212
  }
4802
0
4803
0
  return nullptr;
4804
0
}
4805
4806
// Helper function to cleanup a switch instruction that has been converted into
4807
// a select, fixing up PHI nodes and basic blocks.
4808
static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
4809
                                              Value *SelectValue,
4810
212
                                              IRBuilder<> &Builder) {
4811
212
  BasicBlock *SelectBB = SI->getParent();
4812
341
  while (PHI->getBasicBlockIndex(SelectBB) >= 0)
4813
129
    PHI->removeIncomingValue(SelectBB);
4814
212
  PHI->addIncoming(SelectValue, SelectBB);
4815
212
4816
212
  Builder.CreateBr(PHI->getParent());
4817
212
4818
212
  // Remove the switch.
4819
848
  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; 
++i636
) {
4820
636
    BasicBlock *Succ = SI->getSuccessor(i);
4821
636
4822
636
    if (Succ == PHI->getParent())
4823
129
      continue;
4824
507
    Succ->removePredecessor(SelectBB);
4825
507
  }
4826
212
  SI->eraseFromParent();
4827
212
}
4828
4829
/// If the switch is only used to initialize one or more
4830
/// phi nodes in a common successor block with only two different
4831
/// constant values, replace the switch with select.
4832
static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
4833
                           const DataLayout &DL,
4834
272k
                           const TargetTransformInfo &TTI) {
4835
272k
  Value *const Cond = SI->getCondition();
4836
272k
  PHINode *PHI = nullptr;
4837
272k
  BasicBlock *CommonDest = nullptr;
4838
272k
  Constant *DefaultResult;
4839
272k
  SwitchCaseResultVectorTy UniqueResults;
4840
272k
  // Collect all the cases that will deliver the same value from the switch.
4841
272k
  if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
4842
272k
                             DL, TTI, 2, 1))
4843
272k
    return false;
4844
212
  // Selects choose between maximum two values.
4845
212
  if (UniqueResults.size() != 2)
4846
0
    return false;
4847
212
  assert(PHI != nullptr && "PHI for value select not found");
4848
212
4849
212
  Builder.SetInsertPoint(SI);
4850
212
  Value *SelectValue =
4851
212
      ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
4852
212
  if (SelectValue) {
4853
212
    RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
4854
212
    return true;
4855
212
  }
4856
0
  // The switch couldn't be converted into a select.
4857
0
  return false;
4858
0
}
4859
4860
namespace {
4861
4862
/// This class represents a lookup table that can be used to replace a switch.
4863
class SwitchLookupTable {
4864
public:
4865
  /// Create a lookup table to use as a switch replacement with the contents
4866
  /// of Values, using DefaultValue to fill any holes in the table.
4867
  SwitchLookupTable(
4868
      Module &M, uint64_t TableSize, ConstantInt *Offset,
4869
      const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
4870
      Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
4871
4872
  /// Build instructions with Builder to retrieve the value at
4873
  /// the position given by Index in the lookup table.
4874
  Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
4875
4876
  /// Return true if a table with TableSize elements of
4877
  /// type ElementType would fit in a target-legal register.
4878
  static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
4879
                                 Type *ElementType);
4880
4881
private:
4882
  // Depending on the contents of the table, it can be represented in
4883
  // different ways.
4884
  enum {
4885
    // For tables where each element contains the same value, we just have to
4886
    // store that single value and return it for each lookup.
4887
    SingleValueKind,
4888
4889
    // For tables where there is a linear relationship between table index
4890
    // and values. We calculate the result with a simple multiplication
4891
    // and addition instead of a table lookup.
4892
    LinearMapKind,
4893
4894
    // For small tables with integer elements, we can pack them into a bitmap
4895
    // that fits into a target-legal register. Values are retrieved by
4896
    // shift and mask operations.
4897
    BitMapKind,
4898
4899
    // The table is stored as an array of values. Values are retrieved by load
4900
    // instructions from the table.
4901
    ArrayKind
4902
  } Kind;
4903
4904
  // For SingleValueKind, this is the single value.
4905
  Constant *SingleValue = nullptr;
4906
4907
  // For BitMapKind, this is the bitmap.
4908
  ConstantInt *BitMap = nullptr;
4909
  IntegerType *BitMapElementTy = nullptr;
4910
4911
  // For LinearMapKind, these are the constants used to derive the value.
4912
  ConstantInt *LinearOffset = nullptr;
4913
  ConstantInt *LinearMultiplier = nullptr;
4914
4915
  // For ArrayKind, this is the array.
4916
  GlobalVariable *Array = nullptr;
4917
};
4918
4919
} // end anonymous namespace
4920
4921
SwitchLookupTable::SwitchLookupTable(
4922
    Module &M, uint64_t TableSize, ConstantInt *Offset,
4923
    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
4924
565
    Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
4925
565
  assert(Values.size() && "Can't build lookup table without values!");
4926
565
  assert(TableSize >= Values.size() && "Can't fit values in table!");
4927
565
4928
565
  // If all values in the table are equal, this is that value.
4929
565
  SingleValue = Values.begin()->second;
4930
565
4931
565
  Type *ValueType = Values.begin()->second->getType();
4932
565
4933
565
  // Build up the table contents.
4934
565
  SmallVector<Constant *, 64> TableContents(TableSize);
4935
4.37k
  for (size_t I = 0, E = Values.size(); I != E; 
++I3.81k
) {
4936
3.81k
    ConstantInt *CaseVal = Values[I].first;
4937
3.81k
    Constant *CaseRes = Values[I].second;
4938
3.81k
    assert(CaseRes->getType() == ValueType);
4939
3.81k
4940
3.81k
    uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
4941
3.81k
    TableContents[Idx] = CaseRes;
4942
3.81k
4943
3.81k
    if (CaseRes != SingleValue)
4944
2.85k
      SingleValue = nullptr;
4945
3.81k
  }
4946
565
4947
565
  // Fill in any holes in the table with the default result.
4948
565
  if (Values.size() < TableSize) {
4949
166
    assert(DefaultValue &&
4950
166
           "Need a default value to fill the lookup table holes.");
4951
166
    assert(DefaultValue->getType() == ValueType);
4952
2.14k
    for (uint64_t I = 0; I < TableSize; 
++I1.97k
) {
4953
1.97k
      if (!TableContents[I])
4954
1.12k
        TableContents[I] = DefaultValue;
4955
1.97k
    }
4956
166
4957
166
    if (DefaultValue != SingleValue)
4958
153
      SingleValue = nullptr;
4959
166
  }
4960
565
4961
565
  // If each element in the table contains the same value, we only need to store
4962
565
  // that single value.
4963
565
  if (SingleValue) {
4964
36
    Kind = SingleValueKind;
4965
36
    return;
4966
36
  }
4967
529
4968
529
  // Check if we can derive the value with a linear transformation from the
4969
529
  // table index.
4970
529
  if (isa<IntegerType>(ValueType)) {
4971
258
    bool LinearMappingPossible = true;
4972
258
    APInt PrevVal;
4973
258
    APInt DistToPrev;
4974
258
    assert(TableSize >= 2 && "Should be a SingleValue table.");
4975
258
    // Check if there is the same distance between two consecutive values.
4976
944
    for (uint64_t I = 0; I < TableSize; 
++I686
) {
4977
876
      ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
4978
876
      if (!ConstVal) {
4979
2
        // This is an undef. We could deal with it, but undefs in lookup tables
4980
2
        // are very seldom. It's probably not worth the additional complexity.
4981
2
        LinearMappingPossible = false;
4982
2
        break;
4983
2
      }
4984
874
      const APInt &Val = ConstVal->getValue();
4985
874
      if (I != 0) {
4986
616
        APInt Dist = Val - PrevVal;
4987
616
        if (I == 1) {
4988
258
          DistToPrev = Dist;
4989
358
        } else if (Dist != DistToPrev) {
4990
188
          LinearMappingPossible = false;
4991
188
          break;
4992
188
        }
4993
686
      }
4994
686
      PrevVal = Val;
4995
686
    }
4996
258
    if (LinearMappingPossible) {
4997
68
      LinearOffset = cast<ConstantInt>(TableContents[0]);
4998
68
      LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
4999
68
      Kind = LinearMapKind;
5000
68
      ++NumLinearMaps;
5001
68
      return;
5002
68
    }
5003
461
  }
5004
461
5005
461
  // If the type is integer and the table fits in a register, build a bitmap.
5006
461
  if (WouldFitInRegister(DL, TableSize, ValueType)) {
5007
34
    IntegerType *IT = cast<IntegerType>(ValueType);
5008
34
    APInt TableInt(TableSize * IT->getBitWidth(), 0);
5009
990
    for (uint64_t I = TableSize; I > 0; 
--I956
) {
5010
956
      TableInt <<= IT->getBitWidth();
5011
956
      // Insert values into the bitmap. Undef values are set to zero.
5012
956
      if (!isa<UndefValue>(TableContents[I - 1])) {
5013
946
        ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
5014
946
        TableInt |= Val->getValue().zext(TableInt.getBitWidth());
5015
946
      }
5016
956
    }
5017
34
    BitMap = ConstantInt::get(M.getContext(), TableInt);
5018
34
    BitMapElementTy = IT;
5019
34
    Kind = BitMapKind;
5020
34
    ++NumBitMaps;
5021
34
    return;
5022
34
  }
5023
427
5024
427
  // Store the table in an array.
5025
427
  ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
5026
427
  Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
5027
427
5028
427
  Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
5029
427
                             GlobalVariable::PrivateLinkage, Initializer,
5030
427
                             "switch.table." + FuncName);
5031
427
  Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
5032
427
  // Set the alignment to that of an array items. We will be only loading one
5033
427
  // value out of it.
5034
427
  Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
5035
427
  Kind = ArrayKind;
5036
427
}
5037
5038
565
Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
5039
565
  switch (Kind) {
5040
565
  case SingleValueKind:
5041
36
    return SingleValue;
5042
565
  case LinearMapKind: {
5043
68
    // Derive the result value from the input value.
5044
68
    Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
5045
68
                                          false, "switch.idx.cast");
5046
68
    if (!LinearMultiplier->isOne())
5047
4
      Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
5048
68
    if (!LinearOffset->isZero())
5049
16
      Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");