Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass transforms loops that contain branches on loop-invariant conditions
10
// to multiple loops.  For example, it turns the left into the right code:
11
//
12
//  for (...)                  if (lic)
13
//    A                          for (...)
14
//    if (lic)                     A; B; C
15
//      B                      else
16
//    C                          for (...)
17
//                                 A; C
18
//
19
// This can increase the size of the code exponentially (doubling it every time
20
// a loop is unswitched) so we only unswitch if the resultant code will be
21
// smaller than a threshold.
22
//
23
// This pass expects LICM to be run before it to hoist invariant conditions out
24
// of the loop, to make the unswitching opportunity obvious.
25
//
26
//===----------------------------------------------------------------------===//
27
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/SmallPtrSet.h"
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/AssumptionCache.h"
33
#include "llvm/Analysis/CodeMetrics.h"
34
#include "llvm/Analysis/InstructionSimplify.h"
35
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
36
#include "llvm/Analysis/LoopInfo.h"
37
#include "llvm/Analysis/LoopIterator.h"
38
#include "llvm/Analysis/LoopPass.h"
39
#include "llvm/Analysis/MemorySSA.h"
40
#include "llvm/Analysis/MemorySSAUpdater.h"
41
#include "llvm/Analysis/ScalarEvolution.h"
42
#include "llvm/Analysis/TargetTransformInfo.h"
43
#include "llvm/IR/Attributes.h"
44
#include "llvm/IR/BasicBlock.h"
45
#include "llvm/IR/CallSite.h"
46
#include "llvm/IR/Constant.h"
47
#include "llvm/IR/Constants.h"
48
#include "llvm/IR/DerivedTypes.h"
49
#include "llvm/IR/Dominators.h"
50
#include "llvm/IR/Function.h"
51
#include "llvm/IR/IRBuilder.h"
52
#include "llvm/IR/InstrTypes.h"
53
#include "llvm/IR/Instruction.h"
54
#include "llvm/IR/Instructions.h"
55
#include "llvm/IR/IntrinsicInst.h"
56
#include "llvm/IR/Intrinsics.h"
57
#include "llvm/IR/Module.h"
58
#include "llvm/IR/Type.h"
59
#include "llvm/IR/User.h"
60
#include "llvm/IR/Value.h"
61
#include "llvm/IR/ValueHandle.h"
62
#include "llvm/Pass.h"
63
#include "llvm/Support/Casting.h"
64
#include "llvm/Support/CommandLine.h"
65
#include "llvm/Support/Debug.h"
66
#include "llvm/Support/raw_ostream.h"
67
#include "llvm/Transforms/Scalar.h"
68
#include "llvm/Transforms/Scalar/LoopPassManager.h"
69
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
70
#include "llvm/Transforms/Utils/Cloning.h"
71
#include "llvm/Transforms/Utils/Local.h"
72
#include "llvm/Transforms/Utils/LoopUtils.h"
73
#include "llvm/Transforms/Utils/ValueMapper.h"
74
#include <algorithm>
75
#include <cassert>
76
#include <map>
77
#include <set>
78
#include <tuple>
79
#include <utility>
80
#include <vector>
81
82
using namespace llvm;
83
84
#define DEBUG_TYPE "loop-unswitch"
85
86
STATISTIC(NumBranches, "Number of branches unswitched");
87
STATISTIC(NumSwitches, "Number of switches unswitched");
88
STATISTIC(NumGuards,   "Number of guards unswitched");
89
STATISTIC(NumSelects , "Number of selects unswitched");
90
STATISTIC(NumTrivial , "Number of unswitches that are trivial");
91
STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
92
STATISTIC(TotalInsts,  "Total number of instructions analyzed");
93
94
// The specific value of 100 here was chosen based only on intuition and a
95
// few specific examples.
96
static cl::opt<unsigned>
97
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
98
          cl::init(100), cl::Hidden);
99
100
namespace {
101
102
  class LUAnalysisCache {
103
    using UnswitchedValsMap =
104
        DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>;
105
    using UnswitchedValsIt = UnswitchedValsMap::iterator;
106
107
    struct LoopProperties {
108
      unsigned CanBeUnswitchedCount;
109
      unsigned WasUnswitchedCount;
110
      unsigned SizeEstimation;
111
      UnswitchedValsMap UnswitchedVals;
112
    };
113
114
    // Here we use std::map instead of DenseMap, since we need to keep valid
115
    // LoopProperties pointer for current loop for better performance.
116
    using LoopPropsMap = std::map<const Loop *, LoopProperties>;
117
    using LoopPropsMapIt = LoopPropsMap::iterator;
118
119
    LoopPropsMap LoopsProperties;
120
    UnswitchedValsMap *CurLoopInstructions = nullptr;
121
    LoopProperties *CurrentLoopProperties = nullptr;
122
123
    // A loop unswitching with an estimated cost above this threshold
124
    // is not performed. MaxSize is turned into unswitching quota for
125
    // the current loop, and reduced correspondingly, though note that
126
    // the quota is returned by releaseMemory() when the loop has been
127
    // processed, so that MaxSize will return to its previous
128
    // value. So in most cases MaxSize will equal the Threshold flag
129
    // when a new loop is processed. An exception to that is that
130
    // MaxSize will have a smaller value while processing nested loops
131
    // that were introduced due to loop unswitching of an outer loop.
132
    //
133
    // FIXME: The way that MaxSize works is subtle and depends on the
134
    // pass manager processing loops and calling releaseMemory() in a
135
    // specific order. It would be good to find a more straightforward
136
    // way of doing what MaxSize does.
137
    unsigned MaxSize;
138
139
  public:
140
13.0k
    LUAnalysisCache() : MaxSize(Threshold) {}
141
142
    // Analyze loop. Check its size, calculate is it possible to unswitch
143
    // it. Returns true if we can unswitch this loop.
144
    bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
145
                   AssumptionCache *AC);
146
147
    // Clean all data related to given loop.
148
    void forgetLoop(const Loop *L);
149
150
    // Mark case value as unswitched.
151
    // Since SI instruction can be partly unswitched, in order to avoid
152
    // extra unswitching in cloned loops keep track all unswitched values.
153
    void setUnswitched(const SwitchInst *SI, const Value *V);
154
155
    // Check was this case value unswitched before or not.
156
    bool isUnswitched(const SwitchInst *SI, const Value *V);
157
158
    // Returns true if another unswitching could be done within the cost
159
    // threshold.
160
    bool CostAllowsUnswitching();
161
162
    // Clone all loop-unswitch related loop properties.
163
    // Redistribute unswitching quotas.
164
    // Note, that new loop data is stored inside the VMap.
165
    void cloneData(const Loop *NewLoop, const Loop *OldLoop,
166
                   const ValueToValueMapTy &VMap);
167
  };
168
169
  class LoopUnswitch : public LoopPass {
170
    LoopInfo *LI;  // Loop information
171
    LPPassManager *LPM;
172
    AssumptionCache *AC;
173
174
    // Used to check if second loop needs processing after
175
    // RewriteLoopBodyWithConditionConstant rewrites first loop.
176
    std::vector<Loop*> LoopProcessWorklist;
177
178
    LUAnalysisCache BranchesInfo;
179
180
    bool OptimizeForSize;
181
    bool redoLoop = false;
182
183
    Loop *currentLoop = nullptr;
184
    DominatorTree *DT = nullptr;
185
    MemorySSA *MSSA = nullptr;
186
    std::unique_ptr<MemorySSAUpdater> MSSAU;
187
    BasicBlock *loopHeader = nullptr;
188
    BasicBlock *loopPreheader = nullptr;
189
190
    bool SanitizeMemory;
191
    SimpleLoopSafetyInfo SafetyInfo;
192
193
    // LoopBlocks contains all of the basic blocks of the loop, including the
194
    // preheader of the loop, the body of the loop, and the exit blocks of the
195
    // loop, in that order.
196
    std::vector<BasicBlock*> LoopBlocks;
197
    // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
198
    std::vector<BasicBlock*> NewBlocks;
199
200
    bool hasBranchDivergence;
201
202
  public:
203
    static char ID; // Pass ID, replacement for typeid
204
205
    explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false)
206
        : LoopPass(ID), OptimizeForSize(Os),
207
13.0k
          hasBranchDivergence(hasBranchDivergence) {
208
13.0k
        initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
209
13.0k
    }
210
211
    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
212
    bool processCurrentLoop();
213
    bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
214
215
    /// This transformation requires natural loop information & requires that
216
    /// loop preheaders be inserted into the CFG.
217
    ///
218
13.0k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
219
13.0k
      AU.addRequired<AssumptionCacheTracker>();
220
13.0k
      AU.addRequired<TargetTransformInfoWrapperPass>();
221
13.0k
      if (EnableMSSALoopDependency) {
222
28
        AU.addRequired<MemorySSAWrapperPass>();
223
28
        AU.addPreserved<MemorySSAWrapperPass>();
224
28
      }
225
13.0k
      if (hasBranchDivergence)
226
97
        AU.addRequired<LegacyDivergenceAnalysis>();
227
13.0k
      getLoopAnalysisUsage(AU);
228
13.0k
    }
229
230
  private:
231
223k
    void releaseMemory() override {
232
223k
      BranchesInfo.forgetLoop(currentLoop);
233
223k
    }
234
235
227k
    void initLoopData() {
236
227k
      loopHeader = currentLoop->getHeader();
237
227k
      loopPreheader = currentLoop->getLoopPreheader();
238
227k
    }
239
240
    /// Split all of the edges from inside the loop to their exit blocks.
241
    /// Update the appropriate Phi nodes as we do so.
242
    void SplitExitEdges(Loop *L,
243
                        const SmallVectorImpl<BasicBlock *> &ExitBlocks);
244
245
    bool TryTrivialLoopUnswitch(bool &Changed);
246
247
    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
248
                              Instruction *TI = nullptr);
249
    void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
250
                                  BasicBlock *ExitBlock, Instruction *TI);
251
    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
252
                                     Instruction *TI);
253
254
    void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
255
                                              Constant *Val, bool isEqual);
256
257
    void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
258
                                        BasicBlock *TrueDest,
259
                                        BasicBlock *FalseDest,
260
                                        BranchInst *OldBranch, Instruction *TI);
261
262
    void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
263
264
    /// Given that the Invariant is not equal to Val. Simplify instructions
265
    /// in the loop.
266
    Value *SimplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
267
                                           Constant *Val);
268
  };
269
270
} // end anonymous namespace
271
272
// Analyze loop. Check its size, calculate is it possible to unswitch
273
// it. Returns true if we can unswitch this loop.
274
bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
275
227k
                                AssumptionCache *AC) {
276
227k
  LoopPropsMapIt PropsIt;
277
227k
  bool Inserted;
278
227k
  std::tie(PropsIt, Inserted) =
279
227k
      LoopsProperties.insert(std::make_pair(L, LoopProperties()));
280
227k
281
227k
  LoopProperties &Props = PropsIt->second;
282
227k
283
227k
  if (Inserted) {
284
220k
    // New loop.
285
220k
286
220k
    // Limit the number of instructions to avoid causing significant code
287
220k
    // expansion, and the number of basic blocks, to avoid loops with
288
220k
    // large numbers of branches which cause loop unswitching to go crazy.
289
220k
    // This is a very ad-hoc heuristic.
290
220k
291
220k
    SmallPtrSet<const Value *, 32> EphValues;
292
220k
    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
293
220k
294
220k
    // FIXME: This is overly conservative because it does not take into
295
220k
    // consideration code simplification opportunities and code that can
296
220k
    // be shared by the resultant unswitched loops.
297
220k
    CodeMetrics Metrics;
298
1.19M
    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
299
973k
         ++I)
300
973k
      Metrics.analyzeBasicBlock(*I, TTI, EphValues);
301
220k
302
220k
    Props.SizeEstimation = Metrics.NumInsts;
303
220k
    Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
304
220k
    Props.WasUnswitchedCount = 0;
305
220k
    MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
306
220k
307
220k
    if (Metrics.notDuplicatable) {
308
0
      LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()
309
0
                        << ", contents cannot be "
310
0
                        << "duplicated!\n");
311
0
      return false;
312
0
    }
313
227k
  }
314
227k
315
227k
  // Be careful. This links are good only before new loop addition.
316
227k
  CurrentLoopProperties = &Props;
317
227k
  CurLoopInstructions = &Props.UnswitchedVals;
318
227k
319
227k
  return true;
320
227k
}
321
322
// Clean all data related to given loop.
323
223k
void LUAnalysisCache::forgetLoop(const Loop *L) {
324
223k
  LoopPropsMapIt LIt = LoopsProperties.find(L);
325
223k
326
223k
  if (LIt != LoopsProperties.end()) {
327
223k
    LoopProperties &Props = LIt->second;
328
223k
    MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
329
223k
               Props.SizeEstimation;
330
223k
    LoopsProperties.erase(LIt);
331
223k
  }
332
223k
333
223k
  CurrentLoopProperties = nullptr;
334
223k
  CurLoopInstructions = nullptr;
335
223k
}
336
337
// Mark case value as unswitched.
338
// Since SI instruction can be partly unswitched, in order to avoid
339
// extra unswitching in cloned loops keep track all unswitched values.
340
182
void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
341
182
  (*CurLoopInstructions)[SI].insert(V);
342
182
}
343
344
// Check was this case value unswitched before or not.
345
5.44k
bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
346
5.44k
  return (*CurLoopInstructions)[SI].count(V);
347
5.44k
}
348
349
20.1k
bool LUAnalysisCache::CostAllowsUnswitching() {
350
20.1k
  return CurrentLoopProperties->CanBeUnswitchedCount > 0;
351
20.1k
}
352
353
// Clone all loop-unswitch related loop properties.
354
// Redistribute unswitching quotas.
355
// Note, that new loop data is stored inside the VMap.
356
void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
357
3.76k
                                const ValueToValueMapTy &VMap) {
358
3.76k
  LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
359
3.76k
  LoopProperties &OldLoopProps = *CurrentLoopProperties;
360
3.76k
  UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
361
3.76k
362
3.76k
  // Reallocate "can-be-unswitched quota"
363
3.76k
364
3.76k
  --OldLoopProps.CanBeUnswitchedCount;
365
3.76k
  ++OldLoopProps.WasUnswitchedCount;
366
3.76k
  NewLoopProps.WasUnswitchedCount = 0;
367
3.76k
  unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
368
3.76k
  NewLoopProps.CanBeUnswitchedCount = Quota / 2;
369
3.76k
  OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
370
3.76k
371
3.76k
  NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
372
3.76k
373
3.76k
  // Clone unswitched values info:
374
3.76k
  // for new loop switches we clone info about values that was
375
3.76k
  // already unswitched and has redundant successors.
376
3.87k
  for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); 
++I102
) {
377
102
    const SwitchInst *OldInst = I->first;
378
102
    Value *NewI = VMap.lookup(OldInst);
379
102
    const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
380
102
    assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
381
102
382
102
    NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
383
102
  }
384
3.76k
}
385
386
char LoopUnswitch::ID = 0;
387
388
48.6k
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
389
48.6k
                      false, false)
390
48.6k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
391
48.6k
INITIALIZE_PASS_DEPENDENCY(LoopPass)
392
48.6k
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
393
48.6k
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
394
48.6k
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
395
48.6k
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
396
                      false, false)
397
398
12.9k
Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
399
12.9k
  return new LoopUnswitch(Os, hasBranchDivergence);
400
12.9k
}
401
402
/// Operator chain lattice.
403
enum OperatorChain {
404
  OC_OpChainNone,    ///< There is no operator.
405
  OC_OpChainOr,      ///< There are only ORs.
406
  OC_OpChainAnd,     ///< There are only ANDs.
407
  OC_OpChainMixed    ///< There are ANDs and ORs.
408
};
409
410
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
411
/// an invariant piece, return the invariant. Otherwise, return null.
412
//
413
/// NOTE: FindLIVLoopCondition will not return a partial LIV by walking up a
414
/// mixed operator chain, as we can not reliably find a value which will simplify
415
/// the operator chain. If the chain is AND-only or OR-only, we can use 0 or ~0
416
/// to simplify the chain.
417
///
418
/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
419
/// simplify the condition itself to a loop variant condition, but at the
420
/// cost of creating an entirely new loop.
421
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
422
                                   OperatorChain &ParentChain,
423
840k
                                   DenseMap<Value *, Value *> &Cache) {
424
840k
  auto CacheIt = Cache.find(Cond);
425
840k
  if (CacheIt != Cache.end())
426
120
    return CacheIt->second;
427
840k
428
840k
  // We started analyze new instruction, increment scanned instructions counter.
429
840k
  ++TotalInsts;
430
840k
431
840k
  // We can never unswitch on vector conditions.
432
840k
  if (Cond->getType()->isVectorTy())
433
122
    return nullptr;
434
839k
435
839k
  // Constants should be folded, not unswitched on!
436
839k
  if (isa<Constant>(Cond)) 
return nullptr13.0k
;
437
826k
438
826k
  // TODO: Handle: br (VARIANT|INVARIANT).
439
826k
440
826k
  // Hoist simple values out.
441
826k
  if (L->makeLoopInvariant(Cond, Changed)) {
442
23.5k
    Cache[Cond] = Cond;
443
23.5k
    return Cond;
444
23.5k
  }
445
803k
446
803k
  // Walk up the operator chain to find partial invariant conditions.
447
803k
  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
448
27.9k
    if (BO->getOpcode() == Instruction::And ||
449
27.9k
        
BO->getOpcode() == Instruction::Or11.5k
) {
450
26.8k
      // Given the previous operator, compute the current operator chain status.
451
26.8k
      OperatorChain NewChain;
452
26.8k
      switch (ParentChain) {
453
26.8k
      case OC_OpChainNone:
454
25.4k
        NewChain = BO->getOpcode() == Instruction::And ? 
OC_OpChainAnd15.5k
:
455
25.4k
                                      
OC_OpChainOr9.82k
;
456
25.4k
        break;
457
26.8k
      case OC_OpChainOr:
458
802
        NewChain = BO->getOpcode() == Instruction::Or ? 
OC_OpChainOr592
:
459
802
                                      
OC_OpChainMixed210
;
460
802
        break;
461
26.8k
      case OC_OpChainAnd:
462
655
        NewChain = BO->getOpcode() == Instruction::And ? 
OC_OpChainAnd583
:
463
655
                                      
OC_OpChainMixed72
;
464
655
        break;
465
26.8k
      case OC_OpChainMixed:
466
0
        NewChain = OC_OpChainMixed;
467
0
        break;
468
26.8k
      }
469
26.8k
470
26.8k
      // If we reach a Mixed state, we do not want to keep walking up as we can not
471
26.8k
      // reliably find a value that will simplify the chain. With this check, we
472
26.8k
      // will return null on the first sight of mixed chain and the caller will
473
26.8k
      // either backtrack to find partial LIV in other operand or return null.
474
26.8k
      if (NewChain != OC_OpChainMixed) {
475
26.5k
        // Update the current operator chain type before we search up the chain.
476
26.5k
        ParentChain = NewChain;
477
26.5k
        // If either the left or right side is invariant, we can unswitch on this,
478
26.5k
        // which will cause the branch to go away in one loop and the condition to
479
26.5k
        // simplify in the other one.
480
26.5k
        if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed,
481
981
                                              ParentChain, Cache)) {
482
981
          Cache[Cond] = LHS;
483
981
          return LHS;
484
981
        }
485
25.6k
        // We did not manage to find a partial LIV in operand(0). Backtrack and try
486
25.6k
        // operand(1).
487
25.6k
        ParentChain = NewChain;
488
25.6k
        if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed,
489
57
                                              ParentChain, Cache)) {
490
57
          Cache[Cond] = RHS;
491
57
          return RHS;
492
57
        }
493
802k
      }
494
26.8k
    }
495
802k
496
802k
  Cache[Cond] = nullptr;
497
802k
  return nullptr;
498
802k
}
499
500
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
501
/// an invariant piece, return the invariant along with the operator chain type.
502
/// Otherwise, return null.
503
static std::pair<Value *, OperatorChain> FindLIVLoopCondition(Value *Cond,
504
                                                              Loop *L,
505
788k
                                                              bool &Changed) {
506
788k
  DenseMap<Value *, Value *> Cache;
507
788k
  OperatorChain OpChain = OC_OpChainNone;
508
788k
  Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache);
509
788k
510
788k
  // In case we do find a LIV, it can not be obtained by walking up a mixed
511
788k
  // operator chain.
512
788k
  assert((!FCond || OpChain != OC_OpChainMixed) &&
513
788k
        "Do not expect a partial LIV with mixed operator chain");
514
788k
  return {FCond, OpChain};
515
788k
}
516
517
223k
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
518
223k
  if (skipLoop(L))
519
16
    return false;
520
223k
521
223k
  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
522
223k
      *L->getHeader()->getParent());
523
223k
  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
524
223k
  LPM = &LPM_Ref;
525
223k
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
526
223k
  if (EnableMSSALoopDependency) {
527
100
    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
528
100
    MSSAU = make_unique<MemorySSAUpdater>(MSSA);
529
100
    assert(DT && "Cannot update MemorySSA without a valid DomTree.");
530
100
  }
531
223k
  currentLoop = L;
532
223k
  Function *F = currentLoop->getHeader()->getParent();
533
223k
534
223k
  SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
535
223k
  if (SanitizeMemory)
536
8
    SafetyInfo.computeLoopSafetyInfo(L);
537
223k
538
223k
  if (MSSA && 
VerifyMemorySSA100
)
539
100
    MSSA->verifyMemorySSA();
540
223k
541
223k
  bool Changed = false;
542
227k
  do {
543
227k
    assert(currentLoop->isLCSSAForm(*DT));
544
227k
    if (MSSA && 
VerifyMemorySSA187
)
545
187
      MSSA->verifyMemorySSA();
546
227k
    redoLoop = false;
547
227k
    Changed |= processCurrentLoop();
548
227k
  } while(redoLoop);
549
223k
550
223k
  if (MSSA && 
VerifyMemorySSA100
)
551
100
    MSSA->verifyMemorySSA();
552
223k
553
223k
  return Changed;
554
223k
}
555
556
// Return true if the BasicBlock BB is unreachable from the loop header.
557
// Return false, otherwise.
558
996k
bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
559
996k
  auto *Node = DT->getNode(BB)->getIDom();
560
996k
  BasicBlock *DomBB = Node->getBlock();
561
4.76M
  while (currentLoop->contains(DomBB)) {
562
3.81M
    BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
563
3.81M
564
3.81M
    Node = DT->getNode(DomBB)->getIDom();
565
3.81M
    DomBB = Node->getBlock();
566
3.81M
567
3.81M
    if (!BInst || 
!BInst->isConditional()3.61M
)
568
732k
      continue;
569
3.08M
570
3.08M
    Value *Cond = BInst->getCondition();
571
3.08M
    if (!isa<ConstantInt>(Cond))
572
2.99M
      continue;
573
92.2k
574
92.2k
    BasicBlock *UnreachableSucc =
575
92.2k
        Cond == ConstantInt::getTrue(Cond->getContext())
576
92.2k
            ? 
BInst->getSuccessor(1)45.7k
577
92.2k
            : 
BInst->getSuccessor(0)46.5k
;
578
92.2k
579
92.2k
    if (DT->dominates(UnreachableSucc, BB))
580
43.2k
      return true;
581
92.2k
  }
582
996k
  
return false952k
;
583
996k
}
584
585
/// FIXME: Remove this workaround when freeze related patches are done.
586
/// LoopUnswitch and Equality propagation in GVN have discrepancy about
587
/// whether branch on undef/poison has undefine behavior. Here it is to
588
/// rule out some common cases that we found such discrepancy already
589
/// causing problems. Detail could be found in PR31652. Note if the
590
/// func returns true, it is unsafe. But if it is false, it doesn't mean
591
/// it is necessarily safe.
592
16.6k
static bool EqualityPropUnSafe(Value &LoopCond) {
593
16.6k
  ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond);
594
16.6k
  if (!CI || 
!CI->isEquality()15.8k
)
595
6.33k
    return false;
596
10.2k
597
10.2k
  Value *LHS = CI->getOperand(0);
598
10.2k
  Value *RHS = CI->getOperand(1);
599
10.2k
  if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
600
0
    return true;
601
10.2k
602
10.2k
  auto hasUndefInPHI = [](PHINode &PN) {
603
7.19k
    for (Value *Opd : PN.incoming_values()) {
604
7.19k
      if (isa<UndefValue>(Opd))
605
0
        return true;
606
7.19k
    }
607
6.29k
    return false;
608
6.29k
  };
609
10.2k
  PHINode *LPHI = dyn_cast<PHINode>(LHS);
610
10.2k
  PHINode *RPHI = dyn_cast<PHINode>(RHS);
611
10.2k
  if ((LPHI && 
hasUndefInPHI(*LPHI)6.26k
) || (RPHI &&
hasUndefInPHI(*RPHI)33
))
612
0
    return true;
613
10.2k
614
10.2k
  auto hasUndefInSelect = [](SelectInst &SI) {
615
41
    if (isa<UndefValue>(SI.getTrueValue()) ||
616
41
        isa<UndefValue>(SI.getFalseValue()))
617
0
      return true;
618
41
    return false;
619
41
  };
620
10.2k
  SelectInst *LSI = dyn_cast<SelectInst>(LHS);
621
10.2k
  SelectInst *RSI = dyn_cast<SelectInst>(RHS);
622
10.2k
  if ((LSI && 
hasUndefInSelect(*LSI)41
) || (RSI &&
hasUndefInSelect(*RSI)0
))
623
0
    return true;
624
10.2k
  return false;
625
10.2k
}
626
627
/// Do actual work and unswitch loop if possible and profitable.
628
227k
bool LoopUnswitch::processCurrentLoop() {
629
227k
  bool Changed = false;
630
227k
631
227k
  initLoopData();
632
227k
633
227k
  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
634
227k
  if (!loopPreheader)
635
0
    return false;
636
227k
637
227k
  // Loops with indirectbr cannot be cloned.
638
227k
  if (!currentLoop->isSafeToClone())
639
9
    return false;
640
227k
641
227k
  // Without dedicated exits, splitting the exit edge may fail.
642
227k
  if (!currentLoop->hasDedicatedExits())
643
0
    return false;
644
227k
645
227k
  LLVMContext &Context = loopHeader->getContext();
646
227k
647
227k
  // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
648
227k
  if (!BranchesInfo.countLoop(
649
227k
          currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
650
227k
                           *currentLoop->getHeader()->getParent()),
651
227k
          AC))
652
0
    return false;
653
227k
654
227k
  // Try trivial unswitch first before loop over other basic blocks in the loop.
655
227k
  if (TryTrivialLoopUnswitch(Changed)) {
656
122
    return true;
657
122
  }
658
227k
659
227k
  // Do not do non-trivial unswitch while optimizing for size.
660
227k
  // FIXME: Use Function::hasOptSize().
661
227k
  if (OptimizeForSize ||
662
227k
      
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)227k
)
663
246
    return false;
664
227k
665
227k
  // Run through the instructions in the loop, keeping track of three things:
666
227k
  //
667
227k
  //  - That we do not unswitch loops containing convergent operations, as we
668
227k
  //    might be making them control dependent on the unswitch value when they
669
227k
  //    were not before.
670
227k
  //    FIXME: This could be refined to only bail if the convergent operation is
671
227k
  //    not already control-dependent on the unswitch value.
672
227k
  //
673
227k
  //  - That basic blocks in the loop contain invokes whose predecessor edges we
674
227k
  //    cannot split.
675
227k
  //
676
227k
  //  - The set of guard intrinsics encountered (these are non terminator
677
227k
  //    instructions that are also profitable to be unswitched).
678
227k
679
227k
  SmallVector<IntrinsicInst *, 4> Guards;
680
227k
681
1.04M
  for (const auto BB : currentLoop->blocks()) {
682
6.37M
    for (auto &I : *BB) {
683
6.37M
      auto CS = CallSite(&I);
684
6.37M
      if (!CS) 
continue5.93M
;
685
441k
      if (CS.hasFnAttr(Attribute::Convergent))
686
2
        return false;
687
441k
      if (auto *II = dyn_cast<InvokeInst>(&I))
688
11.1k
        if (!II->getUnwindDest()->canSplitPredecessors())
689
2
          return false;
690
441k
      if (auto *II = dyn_cast<IntrinsicInst>(&I))
691
59.1k
        if (II->getIntrinsicID() == Intrinsic::experimental_guard)
692
32
          Guards.push_back(II);
693
441k
    }
694
1.04M
  }
695
227k
696
227k
  
for (IntrinsicInst *Guard : Guards)227k
{
697
28
    Value *LoopCond =
698
28
        FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first;
699
28
    if (LoopCond &&
700
28
        
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))10
) {
701
10
      // NB! Unswitching (if successful) could have erased some of the
702
10
      // instructions in Guards leaving dangling pointers there.  This is fine
703
10
      // because we're returning now, and won't look at Guards again.
704
10
      ++NumGuards;
705
10
      return true;
706
10
    }
707
28
  }
708
227k
709
227k
  // Loop over all of the basic blocks in the loop.  If we find an interior
710
227k
  // block that is branching on a loop-invariant condition, we can unswitch this
711
227k
  // loop.
712
227k
  for (Loop::block_iterator I = currentLoop->block_begin(),
713
1.24M
         E = currentLoop->block_end(); I != E; 
++I1.01M
) {
714
1.01M
    Instruction *TI = (*I)->getTerminator();
715
1.01M
716
1.01M
    // Unswitching on a potentially uninitialized predicate is not
717
1.01M
    // MSan-friendly. Limit this to the cases when the original predicate is
718
1.01M
    // guaranteed to execute, to avoid creating a use-of-uninitialized-value
719
1.01M
    // in the code that did not have one.
720
1.01M
    // This is a workaround for the discrepancy between LLVM IR and MSan
721
1.01M
    // semantics. See PR28054 for more details.
722
1.01M
    if (SanitizeMemory &&
723
1.01M
        
!SafetyInfo.isGuaranteedToExecute(*TI, DT, currentLoop)38
)
724
32
      continue;
725
1.01M
726
1.01M
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
727
996k
      // Some branches may be rendered unreachable because of previous
728
996k
      // unswitching.
729
996k
      // Unswitch only those branches that are reachable.
730
996k
      if (isUnreachableDueToPreviousUnswitching(*I))
731
43.2k
        continue;
732
952k
733
952k
      // If this isn't branching on an invariant condition, we can't unswitch
734
952k
      // it.
735
952k
      if (BI->isConditional()) {
736
645k
        // See if this, or some part of it, is loop invariant.  If so, we can
737
645k
        // unswitch on it if we desire.
738
645k
        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
739
645k
                                               currentLoop, Changed).first;
740
645k
        if (LoopCond && 
!EqualityPropUnSafe(*LoopCond)16.5k
&&
741
645k
            
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)16.5k
) {
742
3.50k
          ++NumBranches;
743
3.50k
          return true;
744
3.50k
        }
745
21.5k
      }
746
21.5k
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
747
10.5k
      Value *SC = SI->getCondition();
748
10.5k
      Value *LoopCond;
749
10.5k
      OperatorChain OpChain;
750
10.5k
      std::tie(LoopCond, OpChain) =
751
10.5k
        FindLIVLoopCondition(SC, currentLoop, Changed);
752
10.5k
753
10.5k
      unsigned NumCases = SI->getNumCases();
754
10.5k
      if (LoopCond && 
NumCases3.03k
) {
755
3.03k
        // Find a value to unswitch on:
756
3.03k
        // FIXME: this should chose the most expensive case!
757
3.03k
        // FIXME: scan for a case with a non-critical edge?
758
3.03k
        Constant *UnswitchVal = nullptr;
759
3.03k
        // Find a case value such that at least one case value is unswitched
760
3.03k
        // out.
761
3.03k
        if (OpChain == OC_OpChainAnd) {
762
4
          // If the chain only has ANDs and the switch has a case value of 0.
763
4
          // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
764
4
          auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
765
4
          if (BranchesInfo.isUnswitched(SI, AllZero))
766
2
            continue;
767
2
          // We are unswitching 0 out.
768
2
          UnswitchVal = AllZero;
769
3.03k
        } else if (OpChain == OC_OpChainOr) {
770
8
          // If the chain only has ORs and the switch has a case value of ~0.
771
8
          // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
772
8
          auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
773
8
          if (BranchesInfo.isUnswitched(SI, AllOne))
774
4
            continue;
775
4
          // We are unswitching ~0 out.
776
4
          UnswitchVal = AllOne;
777
3.02k
        } else {
778
3.02k
          assert(OpChain == OC_OpChainNone &&
779
3.02k
                 "Expect to unswitch on trivial chain");
780
3.02k
          // Do not process same value again and again.
781
3.02k
          // At this point we have some cases already unswitched and
782
3.02k
          // some not yet unswitched. Let's find the first not yet unswitched one.
783
3.36k
          for (auto Case : SI->cases()) {
784
3.36k
            Constant *UnswitchValCandidate = Case.getCaseValue();
785
3.36k
            if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
786
2.97k
              UnswitchVal = UnswitchValCandidate;
787
2.97k
              break;
788
2.97k
            }
789
3.36k
          }
790
3.02k
        }
791
3.03k
792
3.03k
        
if (3.02k
!UnswitchVal3.02k
)
793
46
          continue;
794
2.98k
795
2.98k
        if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
796
98
          ++NumSwitches;
797
98
          // In case of a full LIV, UnswitchVal is the value we unswitched out.
798
98
          // In case of a partial LIV, we only unswitch when its an AND-chain
799
98
          // or OR-chain. In both cases switch input value simplifies to
800
98
          // UnswitchVal.
801
98
          BranchesInfo.setUnswitched(SI, UnswitchVal);
802
98
          return true;
803
98
        }
804
970k
      }
805
10.5k
    }
806
970k
807
970k
    // Scan the instructions to check for unswitchable values.
808
970k
    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
809
6.94M
         BBI != E; 
++BBI5.97M
)
810
5.97M
      if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
811
41.8k
        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
812
41.8k
                                               currentLoop, Changed).first;
813
41.8k
        if (LoopCond && UnswitchIfProfitable(LoopCond,
814
536
                                             ConstantInt::getTrue(Context))) {
815
155
          ++NumSelects;
816
155
          return true;
817
155
        }
818
41.8k
      }
819
970k
  }
820
227k
  
return Changed223k
;
821
227k
}
822
823
/// Check to see if all paths from BB exit the loop with no side effects
824
/// (including infinite loops).
825
///
826
/// If true, we return true and set ExitBB to the block we
827
/// exit through.
828
///
829
static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
830
                                         BasicBlock *&ExitBB,
831
30.3k
                                         std::set<BasicBlock*> &Visited) {
832
30.3k
  if (!Visited.insert(BB).second) {
833
6.20k
    // Already visited. Without more analysis, this could indicate an infinite
834
6.20k
    // loop.
835
6.20k
    return false;
836
6.20k
  }
837
24.1k
  if (!L->contains(BB)) {
838
3.60k
    // Otherwise, this is a loop exit, this is fine so long as this is the
839
3.60k
    // first exit.
840
3.60k
    if (ExitBB) 
return false145
;
841
3.45k
    ExitBB = BB;
842
3.45k
    return true;
843
3.45k
  }
844
20.4k
845
20.4k
  // Otherwise, this is an unvisited intra-loop node.  Check all successors.
846
21.7k
  
for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); 20.4k
SI != E;
++SI1.28k
) {
847
21.7k
    // Check to see if the successor is a trivial loop exit.
848
21.7k
    if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
849
20.4k
      return false;
850
21.7k
  }
851
20.4k
852
20.4k
  // Okay, everything after this looks good, check to make sure that this block
853
20.4k
  // doesn't include any side effects.
854
20.4k
  
for (Instruction &I : *BB)0
855
0
    if (I.mayHaveSideEffects())
856
0
      return false;
857
0
858
0
  return true;
859
0
}
860
861
/// Return true if the specified block unconditionally leads to an exit from
862
/// the specified loop, and has no side-effects in the process. If so, return
863
/// the block that is exited to, otherwise return null.
864
8.52k
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
865
8.52k
  std::set<BasicBlock*> Visited;
866
8.52k
  Visited.insert(L->getHeader());  // Branches to header make infinite loops.
867
8.52k
  BasicBlock *ExitBB = nullptr;
868
8.52k
  if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
869
2.16k
    return ExitBB;
870
6.35k
  return nullptr;
871
6.35k
}
872
873
/// We have found that we can unswitch currentLoop when LoopCond == Val to
874
/// simplify the loop.  If we decide that this is profitable,
875
/// unswitch the loop, reprocess the pieces, then return true.
876
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
877
20.1k
                                        Instruction *TI) {
878
20.1k
  // Check to see if it would be profitable to unswitch current loop.
879
20.1k
  if (!BranchesInfo.CostAllowsUnswitching()) {
880
16.3k
    LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
881
16.3k
                      << currentLoop->getHeader()->getName()
882
16.3k
                      << " at non-trivial condition '" << *Val
883
16.3k
                      << "' == " << *LoopCond << "\n"
884
16.3k
                      << ". Cost too high.\n");
885
16.3k
    return false;
886
16.3k
  }
887
3.77k
  if (hasBranchDivergence &&
888
3.77k
      
getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)2
) {
889
1
    LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
890
1
                      << currentLoop->getHeader()->getName()
891
1
                      << " at non-trivial condition '" << *Val
892
1
                      << "' == " << *LoopCond << "\n"
893
1
                      << ". Condition is divergent.\n");
894
1
    return false;
895
1
  }
896
3.76k
897
3.76k
  UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
898
3.76k
  return true;
899
3.76k
}
900
901
/// Recursively clone the specified loop and all of its children,
902
/// mapping the blocks with the specified map.
903
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
904
8.44k
                       LoopInfo *LI, LPPassManager *LPM) {
905
8.44k
  Loop &New = *LI->AllocateLoop();
906
8.44k
  if (PL)
907
5.84k
    PL->addChildLoop(&New);
908
2.60k
  else
909
2.60k
    LI->addTopLevelLoop(&New);
910
8.44k
  LPM->addLoop(New);
911
8.44k
912
8.44k
  // Add all of the blocks in L to the new loop.
913
8.44k
  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
914
56.6k
       I != E; 
++I48.1k
)
915
48.1k
    if (LI->getLoopFor(*I) == L)
916
36.3k
      New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
917
8.44k
918
8.44k
  // Add all of the subloops to the new loop.
919
8.44k
  for (Loop *I : *L)
920
4.67k
    CloneLoop(I, &New, VM, LI, LPM);
921
8.44k
922
8.44k
  return &New;
923
8.44k
}
924
925
/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
926
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
927
/// and remove (but not erase!) it from the function.
928
void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
929
                                                  BasicBlock *TrueDest,
930
                                                  BasicBlock *FalseDest,
931
                                                  BranchInst *OldBranch,
932
3.89k
                                                  Instruction *TI) {
933
3.89k
  assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
934
3.89k
  assert(TrueDest != FalseDest && "Branch targets should be different");
935
3.89k
  // Insert a conditional branch on LIC to the two preheaders.  The original
936
3.89k
  // code is the true version and the new code is the false version.
937
3.89k
  Value *BranchVal = LIC;
938
3.89k
  bool Swapped = false;
939
3.89k
  if (!isa<ConstantInt>(Val) ||
940
3.89k
      Val->getType() != Type::getInt1Ty(LIC->getContext()))
941
182
    BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
942
3.70k
  else if (Val != ConstantInt::getTrue(Val->getContext())) {
943
18
    // We want to enter the new loop when the condition is true.
944
18
    std::swap(TrueDest, FalseDest);
945
18
    Swapped = true;
946
18
  }
947
3.89k
948
3.89k
  // Old branch will be removed, so save its parent and successor to update the
949
3.89k
  // DomTree.
950
3.89k
  auto *OldBranchSucc = OldBranch->getSuccessor(0);
951
3.89k
  auto *OldBranchParent = OldBranch->getParent();
952
3.89k
953
3.89k
  // Insert the new branch.
954
3.89k
  BranchInst *BI =
955
3.89k
      IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
956
3.89k
  if (Swapped)
957
18
    BI->swapProfMetadata();
958
3.89k
959
3.89k
  // Remove the old branch so there is only one branch at the end. This is
960
3.89k
  // needed to perform DomTree's internal DFS walk on the function's CFG.
961
3.89k
  OldBranch->removeFromParent();
962
3.89k
963
3.89k
  // Inform the DT about the new branch.
964
3.89k
  if (DT) {
965
3.89k
    // First, add both successors.
966
3.89k
    SmallVector<DominatorTree::UpdateType, 3> Updates;
967
3.89k
    if (TrueDest != OldBranchSucc)
968
3.87k
      Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest});
969
3.89k
    if (FalseDest != OldBranchSucc)
970
18
      Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest});
971
3.89k
    // If both of the new successors are different from the old one, inform the
972
3.89k
    // DT that the edge was deleted.
973
3.89k
    if (OldBranchSucc != TrueDest && 
OldBranchSucc != FalseDest3.87k
) {
974
0
      Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
975
0
    }
976
3.89k
    DT->applyUpdates(Updates);
977
3.89k
978
3.89k
    if (MSSAU)
979
87
      MSSAU->applyUpdates(Updates, *DT);
980
3.89k
  }
981
3.89k
982
3.89k
  // If either edge is critical, split it. This helps preserve LoopSimplify
983
3.89k
  // form for enclosing loops.
984
3.89k
  auto Options =
985
3.89k
      CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
986
3.89k
  SplitCriticalEdge(BI, 0, Options);
987
3.89k
  SplitCriticalEdge(BI, 1, Options);
988
3.89k
}
989
990
/// Given a loop that has a trivial unswitchable condition in it (a cond branch
991
/// from its header block to its latch block, where the path through the loop
992
/// that doesn't execute its body has no side-effects), unswitch it. This
993
/// doesn't involve any code duplication, just moving the conditional branch
994
/// outside of the loop and updating loop info.
995
void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
996
                                            BasicBlock *ExitBlock,
997
122
                                            Instruction *TI) {
998
122
  LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
999
122
                    << loopHeader->getName() << " [" << L->getBlocks().size()
1000
122
                    << " blocks] in Function "
1001
122
                    << L->getHeader()->getParent()->getName()
1002
122
                    << " on cond: " << *Val << " == " << *Cond << "\n");
1003
122
  // We are going to make essential changes to CFG. This may invalidate cached
1004
122
  // information for L or one of its parent loops in SCEV.
1005
122
  if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
1006
122
    SEWP->getSE().forgetTopmostLoop(L);
1007
122
1008
122
  // First step, split the preheader, so that we know that there is a safe place
1009
122
  // to insert the conditional branch.  We will change loopPreheader to have a
1010
122
  // conditional branch on Cond.
1011
122
  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
1012
122
1013
122
  // Now that we have a place to insert the conditional branch, create a place
1014
122
  // to branch to: this is the exit block out of the loop that we should
1015
122
  // short-circuit to.
1016
122
1017
122
  // Split this block now, so that the loop maintains its exit block, and so
1018
122
  // that the jump from the preheader can execute the contents of the exit block
1019
122
  // without actually branching to it (the exit block should be dominated by the
1020
122
  // loop header, not the preheader).
1021
122
  assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
1022
122
  BasicBlock *NewExit =
1023
122
      SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
1024
122
1025
122
  // Okay, now we have a position to branch from and a position to branch to,
1026
122
  // insert the new conditional branch.
1027
122
  auto *OldBranch = dyn_cast<BranchInst>(loopPreheader->getTerminator());
1028
122
  assert(OldBranch && "Failed to split the preheader");
1029
122
  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI);
1030
122
  LPM->deleteSimpleAnalysisValue(OldBranch, L);
1031
122
1032
122
  // EmitPreheaderBranchOnCondition removed the OldBranch from the function.
1033
122
  // Delete it, as it is no longer needed.
1034
122
  delete OldBranch;
1035
122
1036
122
  // We need to reprocess this loop, it could be unswitched again.
1037
122
  redoLoop = true;
1038
122
1039
122
  // Now that we know that the loop is never entered when this condition is a
1040
122
  // particular value, rewrite the loop with this info.  We know that this will
1041
122
  // at least eliminate the old branch.
1042
122
  RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
1043
122
1044
122
  ++NumTrivial;
1045
122
}
1046
1047
/// Check if the first non-constant condition starting from the loop header is
1048
/// a trivial unswitch condition: that is, a condition controls whether or not
1049
/// the loop does anything at all. If it is a trivial condition, unswitching
1050
/// produces no code duplications (equivalently, it produces a simpler loop and
1051
/// a new empty loop, which gets deleted). Therefore always unswitch trivial
1052
/// condition.
1053
227k
bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
1054
227k
  BasicBlock *CurrentBB = currentLoop->getHeader();
1055
227k
  Instruction *CurrentTerm = CurrentBB->getTerminator();
1056
227k
  LLVMContext &Context = CurrentBB->getContext();
1057
227k
1058
227k
  // If loop header has only one reachable successor (currently via an
1059
227k
  // unconditional branch or constant foldable conditional branch, but
1060
227k
  // should also consider adding constant foldable switch instruction in
1061
227k
  // future), we should keep looking for trivial condition candidates in
1062
227k
  // the successor as well. An alternative is to constant fold conditions
1063
227k
  // and merge successors into loop header (then we only need to check header's
1064
227k
  // terminator). The reason for not doing this in LoopUnswitch pass is that
1065
227k
  // it could potentially break LoopPassManager's invariants. Folding dead
1066
227k
  // branches could either eliminate the current loop or make other loops
1067
227k
  // unreachable. LCSSA form might also not be preserved after deleting
1068
227k
  // branches. The following code keeps traversing loop header's successors
1069
227k
  // until it finds the trivial condition candidate (condition that is not a
1070
227k
  // constant). Since unswitching generates branches with constant conditions,
1071
227k
  // this scenario could be very common in practice.
1072
227k
  SmallPtrSet<BasicBlock*, 8> Visited;
1073
227k
1074
254k
  while (true) {
1075
254k
    // If we exit loop or reach a previous visited block, then
1076
254k
    // we can not reach any trivial condition candidates (unfoldable
1077
254k
    // branch instructions or switch instructions) and no unswitch
1078
254k
    // can happen. Exit and return false.
1079
254k
    if (!currentLoop->contains(CurrentBB) || 
!Visited.insert(CurrentBB).second254k
)
1080
190
      return false;
1081
254k
1082
254k
    // Check if this loop will execute any side-effecting instructions (e.g.
1083
254k
    // stores, calls, volatile loads) in the part of the loop that the code
1084
254k
    // *would* execute. Check the header first.
1085
254k
    for (Instruction &I : *CurrentBB)
1086
1.48M
      if (I.mayHaveSideEffects())
1087
138k
        return false;
1088
254k
1089
254k
    
if (BranchInst *116k
BI116k
= dyn_cast<BranchInst>(CurrentTerm)) {
1090
113k
      if (BI->isUnconditional()) {
1091
20.5k
        CurrentBB = BI->getSuccessor(0);
1092
93.1k
      } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
1093
2.97k
        CurrentBB = BI->getSuccessor(0);
1094
90.2k
      } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
1095
3.13k
        CurrentBB = BI->getSuccessor(1);
1096
87.0k
      } else {
1097
87.0k
        // Found a trivial condition candidate: non-foldable conditional branch.
1098
87.0k
        break;
1099
87.0k
      }
1100
2.53k
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1101
2.53k
      // At this point, any constant-foldable instructions should have probably
1102
2.53k
      // been folded.
1103
2.53k
      ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
1104
2.53k
      if (!Cond)
1105
2.44k
        break;
1106
93
      // Find the target block we are definitely going to.
1107
93
      CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
1108
93
    } else {
1109
0
      // We do not understand these terminator instructions.
1110
0
      break;
1111
0
    }
1112
26.7k
1113
26.7k
    CurrentTerm = CurrentBB->getTerminator();
1114
26.7k
  }
1115
227k
1116
227k
  // CondVal is the condition that controls the trivial condition.
1117
227k
  // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
1118
227k
  Constant *CondVal = nullptr;
1119
89.5k
  BasicBlock *LoopExitBB = nullptr;
1120
89.5k
1121
89.5k
  if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
1122
87.0k
    // If this isn't branching on an invariant condition, we can't unswitch it.
1123
87.0k
    if (!BI->isConditional())
1124
0
      return false;
1125
87.0k
1126
87.0k
    Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
1127
87.0k
                                           currentLoop, Changed).first;
1128
87.0k
1129
87.0k
    // Unswitch only if the trivial condition itself is an LIV (not
1130
87.0k
    // partial LIV which could occur in and/or)
1131
87.0k
    if (!LoopCond || 
LoopCond != BI->getCondition()3.22k
)
1132
83.9k
      return false;
1133
3.14k
1134
3.14k
    // Check to see if a successor of the branch is guaranteed to
1135
3.14k
    // exit through a unique exit block without having any
1136
3.14k
    // side-effects.  If so, determine the value of Cond that causes
1137
3.14k
    // it to do this.
1138
3.14k
    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
1139
3.14k
                                             BI->getSuccessor(0)))) {
1140
59
      CondVal = ConstantInt::getTrue(Context);
1141
3.08k
    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
1142
3.08k
                                                    BI->getSuccessor(1)))) {
1143
42
      CondVal = ConstantInt::getFalse(Context);
1144
42
    }
1145
3.14k
1146
3.14k
    // If we didn't find a single unique LoopExit block, or if the loop exit
1147
3.14k
    // block contains phi nodes, this isn't trivial.
1148
3.14k
    if (!LoopExitBB || 
isa<PHINode>(LoopExitBB->begin())101
)
1149
3.10k
      return false;   // Can't handle this.
1150
38
1151
38
    if (EqualityPropUnSafe(*LoopCond))
1152
0
      return false;
1153
38
1154
38
    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
1155
38
                             CurrentTerm);
1156
38
    ++NumBranches;
1157
38
    return true;
1158
2.44k
  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1159
2.44k
    // If this isn't switching on an invariant condition, we can't unswitch it.
1160
2.44k
    Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
1161
2.44k
                                           currentLoop, Changed).first;
1162
2.44k
1163
2.44k
    // Unswitch only if the trivial condition itself is an LIV (not
1164
2.44k
    // partial LIV which could occur in and/or)
1165
2.44k
    if (!LoopCond || 
LoopCond != SI->getCondition()202
)
1166
2.25k
      return false;
1167
190
1168
190
    // Check to see if a successor of the switch is guaranteed to go to the
1169
190
    // latch block or exit through a one exit block without having any
1170
190
    // side-effects.  If so, determine the value of Cond that causes it to do
1171
190
    // this.
1172
190
    // Note that we can't trivially unswitch on the default case or
1173
190
    // on already unswitched cases.
1174
2.29k
    
for (auto Case : SI->cases())190
{
1175
2.29k
      BasicBlock *LoopExitCandidate;
1176
2.29k
      if ((LoopExitCandidate =
1177
2.29k
               isTrivialLoopExitBlock(currentLoop, Case.getCaseSuccessor()))) {
1178
2.06k
        // Okay, we found a trivial case, remember the value that is trivial.
1179
2.06k
        ConstantInt *CaseVal = Case.getCaseValue();
1180
2.06k
1181
2.06k
        // Check that it was not unswitched before, since already unswitched
1182
2.06k
        // trivial vals are looks trivial too.
1183
2.06k
        if (BranchesInfo.isUnswitched(SI, CaseVal))
1184
1.97k
          continue;
1185
88
        LoopExitBB = LoopExitCandidate;
1186
88
        CondVal = CaseVal;
1187
88
        break;
1188
88
      }
1189
2.29k
    }
1190
190
1191
190
    // If we didn't find a single unique LoopExit block, or if the loop exit
1192
190
    // block contains phi nodes, this isn't trivial.
1193
190
    if (!LoopExitBB || 
isa<PHINode>(LoopExitBB->begin())88
)
1194
106
      return false;   // Can't handle this.
1195
84
1196
84
    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
1197
84
                             nullptr);
1198
84
1199
84
    // We are only unswitching full LIV.
1200
84
    BranchesInfo.setUnswitched(SI, CondVal);
1201
84
    ++NumSwitches;
1202
84
    return true;
1203
84
  }
1204
0
  return false;
1205
0
}
1206
1207
/// Split all of the edges from inside the loop to their exit blocks.
1208
/// Update the appropriate Phi nodes as we do so.
1209
void LoopUnswitch::SplitExitEdges(Loop *L,
1210
3.76k
                               const SmallVectorImpl<BasicBlock *> &ExitBlocks){
1211
3.76k
1212
9.31k
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; 
++i5.54k
) {
1213
5.54k
    BasicBlock *ExitBlock = ExitBlocks[i];
1214
5.54k
    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
1215
5.54k
                                       pred_end(ExitBlock));
1216
5.54k
1217
5.54k
    // Although SplitBlockPredecessors doesn't preserve loop-simplify in
1218
5.54k
    // general, if we call it on all predecessors of all exits then it does.
1219
5.54k
    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
1220
5.54k
                           /*PreserveLCSSA*/ true);
1221
5.54k
  }
1222
3.76k
}
1223
1224
/// We determined that the loop is profitable to unswitch when LIC equal Val.
1225
/// Split it into loop versions and test the condition outside of either loop.
1226
/// Return the loops created as Out1/Out2.
1227
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
1228
3.76k
                                               Loop *L, Instruction *TI) {
1229
3.76k
  Function *F = loopHeader->getParent();
1230
3.76k
  LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
1231
3.76k
                    << loopHeader->getName() << " [" << L->getBlocks().size()
1232
3.76k
                    << " blocks] in Function " << F->getName() << " when '"
1233
3.76k
                    << *Val << "' == " << *LIC << "\n");
1234
3.76k
1235
3.76k
  // We are going to make essential changes to CFG. This may invalidate cached
1236
3.76k
  // information for L or one of its parent loops in SCEV.
1237
3.76k
  if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
1238
3.76k
    SEWP->getSE().forgetTopmostLoop(L);
1239
3.76k
1240
3.76k
  LoopBlocks.clear();
1241
3.76k
  NewBlocks.clear();
1242
3.76k
1243
3.76k
  // First step, split the preheader and exit blocks, and add these blocks to
1244
3.76k
  // the LoopBlocks list.
1245
3.76k
  BasicBlock *NewPreheader =
1246
3.76k
      SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
1247
3.76k
  LoopBlocks.push_back(NewPreheader);
1248
3.76k
1249
3.76k
  // We want the loop to come after the preheader, but before the exit blocks.
1250
3.76k
  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
1251
3.76k
1252
3.76k
  SmallVector<BasicBlock*, 8> ExitBlocks;
1253
3.76k
  L->getUniqueExitBlocks(ExitBlocks);
1254
3.76k
1255
3.76k
  // Split all of the edges from inside the loop to their exit blocks.  Update
1256
3.76k
  // the appropriate Phi nodes as we do so.
1257
3.76k
  SplitExitEdges(L, ExitBlocks);
1258
3.76k
1259
3.76k
  // The exit blocks may have been changed due to edge splitting, recompute.
1260
3.76k
  ExitBlocks.clear();
1261
3.76k
  L->getUniqueExitBlocks(ExitBlocks);
1262
3.76k
1263
3.76k
  // Add exit blocks to the loop blocks.
1264
3.76k
  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
1265
3.76k
1266
3.76k
  // Next step, clone all of the basic blocks that make up the loop (including
1267
3.76k
  // the loop preheader and exit blocks), keeping track of the mapping between
1268
3.76k
  // the instructions and blocks.
1269
3.76k
  NewBlocks.reserve(LoopBlocks.size());
1270
3.76k
  ValueToValueMapTy VMap;
1271
49.4k
  for (unsigned i = 0, e = LoopBlocks.size(); i != e; 
++i45.6k
) {
1272
45.6k
    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
1273
45.6k
1274
45.6k
    NewBlocks.push_back(NewBB);
1275
45.6k
    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
1276
45.6k
    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
1277
45.6k
  }
1278
3.76k
1279
3.76k
  // Splice the newly inserted blocks into the function right before the
1280
3.76k
  // original preheader.
1281
3.76k
  F->getBasicBlockList().splice(NewPreheader->getIterator(),
1282
3.76k
                                F->getBasicBlockList(),
1283
3.76k
                                NewBlocks[0]->getIterator(), F->end());
1284
3.76k
1285
3.76k
  // Now we create the new Loop object for the versioned loop.
1286
3.76k
  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
1287
3.76k
1288
3.76k
  // Recalculate unswitching quota, inherit simplified switches info for NewBB,
1289
3.76k
  // Probably clone more loop-unswitch related loop properties.
1290
3.76k
  BranchesInfo.cloneData(NewLoop, L, VMap);
1291
3.76k
1292
3.76k
  Loop *ParentLoop = L->getParentLoop();
1293
3.76k
  if (ParentLoop) {
1294
1.16k
    // Make sure to add the cloned preheader and exit blocks to the parent loop
1295
1.16k
    // as well.
1296
1.16k
    ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
1297
1.16k
  }
1298
3.76k
1299
9.31k
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; 
++i5.54k
) {
1300
5.54k
    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
1301
5.54k
    // The new exit block should be in the same loop as the old one.
1302
5.54k
    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
1303
1.30k
      ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
1304
5.54k
1305
5.54k
    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
1306
5.54k
           "Exit block should have been split to have one successor!");
1307
5.54k
    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
1308
5.54k
1309
5.54k
    // If the successor of the exit block had PHI nodes, add an entry for
1310
5.54k
    // NewExit.
1311
5.54k
    for (PHINode &PN : ExitSucc->phis()) {
1312
3.39k
      Value *V = PN.getIncomingValueForBlock(ExitBlocks[i]);
1313
3.39k
      ValueToValueMapTy::iterator It = VMap.find(V);
1314
3.39k
      if (It != VMap.end()) V = It->second;
1315
3.39k
      PN.addIncoming(V, NewExit);
1316
3.39k
    }
1317
5.54k
1318
5.54k
    if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
1319
126
      PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
1320
126
                                    &*ExitSucc->getFirstInsertionPt());
1321
126
1322
126
      for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
1323
378
           I != E; 
++I252
) {
1324
252
        BasicBlock *BB = *I;
1325
252
        LandingPadInst *LPI = BB->getLandingPadInst();
1326
252
        LPI->replaceAllUsesWith(PN);
1327
252
        PN->addIncoming(LPI, BB);
1328
252
      }
1329
126
    }
1330
5.54k
  }
1331
3.76k
1332
3.76k
  // Rewrite the code to refer to itself.
1333
49.4k
  for (unsigned i = 0, e = NewBlocks.size(); i != e; 
++i45.6k
) {
1334
200k
    for (Instruction &I : *NewBlocks[i]) {
1335
200k
      RemapInstruction(&I, VMap,
1336
200k
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1337
200k
      if (auto *II = dyn_cast<IntrinsicInst>(&I))
1338
2.10k
        if (II->getIntrinsicID() == Intrinsic::assume)
1339
0
          AC->registerAssumption(II);
1340
200k
    }
1341
45.6k
  }
1342
3.76k
1343
3.76k
  // Rewrite the original preheader to select between versions of the loop.
1344
3.76k
  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
1345
3.76k
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
1346
3.76k
         "Preheader splitting did not work correctly!");
1347
3.76k
1348
3.76k
  if (MSSAU) {
1349
38
    // Update MemorySSA after cloning, and before splitting to unreachables,
1350
38
    // since that invalidates the 1:1 mapping of clones in VMap.
1351
38
    LoopBlocksRPO LBRPO(L);
1352
38
    LBRPO.perform(LI);
1353
38
    MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
1354
38
  }
1355
3.76k
1356
3.76k
  // Emit the new branch that selects between the two versions of this loop.
1357
3.76k
  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
1358
3.76k
                                 TI);
1359
3.76k
  LPM->deleteSimpleAnalysisValue(OldBR, L);
1360
3.76k
  if (MSSAU) {
1361
38
    // Update MemoryPhis in Exit blocks.
1362
38
    MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
1363
38
    if (VerifyMemorySSA)
1364
38
      MSSA->verifyMemorySSA();
1365
38
  }
1366
3.76k
1367
3.76k
  // The OldBr was replaced by a new one and removed (but not erased) by
1368
3.76k
  // EmitPreheaderBranchOnCondition. It is no longer needed, so delete it.
1369
3.76k
  delete OldBR;
1370
3.76k
1371
3.76k
  LoopProcessWorklist.push_back(NewLoop);
1372
3.76k
  redoLoop = true;
1373
3.76k
1374
3.76k
  // Keep a WeakTrackingVH holding onto LIC.  If the first call to
1375
3.76k
  // RewriteLoopBody
1376
3.76k
  // deletes the instruction (for example by simplifying a PHI that feeds into
1377
3.76k
  // the condition that we're unswitching on), we don't rewrite the second
1378
3.76k
  // iteration.
1379
3.76k
  WeakTrackingVH LICHandle(LIC);
1380
3.76k
1381
3.76k
  // Now we rewrite the original code to know that the condition is true and the
1382
3.76k
  // new code to know that the condition is false.
1383
3.76k
  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
1384
3.76k
1385
3.76k
  // It's possible that simplifying one loop could cause the other to be
1386
3.76k
  // changed to another value or a constant.  If its a constant, don't simplify
1387
3.76k
  // it.
1388
3.76k
  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
1389
3.76k
      LICHandle && !isa<Constant>(LICHandle))
1390
3.76k
    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
1391
3.76k
1392
3.76k
  if (MSSA && 
VerifyMemorySSA38
)
1393
38
    MSSA->verifyMemorySSA();
1394
3.76k
}
1395
1396
/// Remove all instances of I from the worklist vector specified.
1397
static void RemoveFromWorklist(Instruction *I,
1398
1.29k
                               std::vector<Instruction*> &Worklist) {
1399
1.29k
1400
1.29k
  Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
1401
1.29k
                 Worklist.end());
1402
1.29k
}
1403
1404
/// When we find that I really equals V, remove I from the
1405
/// program, replacing all uses with V and update the worklist.
1406
static void ReplaceUsesOfWith(Instruction *I, Value *V,
1407
                              std::vector<Instruction *> &Worklist, Loop *L,
1408
762
                              LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
1409
762
  LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
1410
762
1411
762
  // Add uses to the worklist, which may be dead now.
1412
2.57k
  for (unsigned i = 0, e = I->getNumOperands(); i != e; 
++i1.81k
)
1413
1.81k
    if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
1414
803
      Worklist.push_back(Use);
1415
762
1416
762
  // Add users to the worklist which may be simplified now.
1417
762
  for (User *U : I->users())
1418
852
    Worklist.push_back(cast<Instruction>(U));
1419
762
  LPM->deleteSimpleAnalysisValue(I, L);
1420
762
  RemoveFromWorklist(I, Worklist);
1421
762
  I->replaceAllUsesWith(V);
1422
762
  if (!I->mayHaveSideEffects()) {
1423
758
    if (MSSAU)
1424
18
      MSSAU->removeMemoryAccess(I);
1425
758
    I->eraseFromParent();
1426
758
  }
1427
762
  ++NumSimplify;
1428
762
}
1429
1430
/// We know either that the value LIC has the value specified by Val in the
1431
/// specified loop, or we know it does NOT have that value.
1432
/// Rewrite any uses of LIC or of properties correlated to it.
1433
void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
1434
                                                        Constant *Val,
1435
7.66k
                                                        bool IsEqual) {
1436
7.66k
  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
1437
7.66k
1438
7.66k
  // FIXME: Support correlated properties, like:
1439
7.66k
  //  for (...)
1440
7.66k
  //    if (li1 < li2)
1441
7.66k
  //      ...
1442
7.66k
  //    if (li1 > li2)
1443
7.66k
  //      ...
1444
7.66k
1445
7.66k
  // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
1446
7.66k
  // selects, switches.
1447
7.66k
  std::vector<Instruction*> Worklist;
1448
7.66k
  LLVMContext &Context = Val->getContext();
1449
7.66k
1450
7.66k
  // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
1451
7.66k
  // in the loop with the appropriate one directly.
1452
7.66k
  if (IsEqual || 
(3.89k
isa<ConstantInt>(Val)3.89k
&&
1453
7.47k
      
Val->getType()->isIntegerTy(1)3.89k
)) {
1454
7.47k
    Value *Replacement;
1455
7.47k
    if (IsEqual)
1456
3.76k
      Replacement = Val;
1457
3.70k
    else
1458
3.70k
      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
1459
3.70k
                                     !cast<ConstantInt>(Val)->getZExtValue());
1460
7.47k
1461
21.9k
    for (User *U : LIC->users()) {
1462
21.9k
      Instruction *UI = dyn_cast<Instruction>(U);
1463
21.9k
      if (!UI || !L->contains(UI))
1464
14.2k
        continue;
1465
7.72k
      Worklist.push_back(UI);
1466
7.72k
    }
1467
7.47k
1468
7.47k
    for (Instruction *UI : Worklist)
1469
7.72k
      UI->replaceUsesOfWith(LIC, Replacement);
1470
7.47k
1471
7.47k
    SimplifyCode(Worklist, L);
1472
7.47k
    return;
1473
7.47k
  }
1474
182
1475
182
  // Otherwise, we don't know the precise value of LIC, but we do know that it
1476
182
  // is certainly NOT "Val".  As such, simplify any uses in the loop that we
1477
182
  // can.  This case occurs when we unswitch switch statements.
1478
2.52k
  
for (User *U : LIC->users())182
{
1479
2.52k
    Instruction *UI = dyn_cast<Instruction>(U);
1480
2.52k
    if (!UI || !L->contains(UI))
1481
2.33k
      continue;
1482
193
1483
193
    // At this point, we know LIC is definitely not Val. Try to use some simple
1484
193
    // logic to simplify the user w.r.t. to the context.
1485
193
    if (Value *Replacement = SimplifyInstructionWithNotEqual(UI, LIC, Val)) {
1486
2
      if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
1487
2
        // This in-loop instruction has been simplified w.r.t. its context,
1488
2
        // i.e. LIC != Val, make sure we propagate its replacement value to
1489
2
        // all its users.
1490
2
        //
1491
2
        // We can not yet delete UI, the LIC user, yet, because that would invalidate
1492
2
        // the LIC->users() iterator !. However, we can make this instruction
1493
2
        // dead by replacing all its users and push it onto the worklist so that
1494
2
        // it can be properly deleted and its operands simplified.
1495
2
        UI->replaceAllUsesWith(Replacement);
1496
2
      }
1497
2
    }
1498
193
1499
193
    // This is a LIC user, push it into the worklist so that SimplifyCode can
1500
193
    // attempt to simplify it.
1501
193
    Worklist.push_back(UI);
1502
193
1503
193
    // If we know that LIC is not Val, use this info to simplify code.
1504
193
    SwitchInst *SI = dyn_cast<SwitchInst>(UI);
1505
193
    if (!SI || 
!isa<ConstantInt>(Val)180
)
continue13
;
1506
180
1507
180
    // NOTE: if a case value for the switch is unswitched out, we record it
1508
180
    // after the unswitch finishes. We can not record it here as the switch
1509
180
    // is not a direct user of the partial LIV.
1510
180
    SwitchInst::CaseHandle DeadCase =
1511
180
        *SI->findCaseValue(cast<ConstantInt>(Val));
1512
180
    // Default case is live for multiple values.
1513
180
    if (DeadCase == *SI->case_default())
1514
2
      continue;
1515
178
1516
178
    // Found a dead case value.  Don't remove PHI nodes in the
1517
178
    // successor if they become single-entry, those PHI nodes may
1518
178
    // be in the Users list.
1519
178
1520
178
    BasicBlock *Switch = SI->getParent();
1521
178
    BasicBlock *SISucc = DeadCase.getCaseSuccessor();
1522
178
    BasicBlock *Latch = L->getLoopLatch();
1523
178
1524
178
    if (!SI->findCaseDest(SISucc)) 
continue96
; // Edge is critical.
1525
82
    // If the DeadCase successor dominates the loop latch, then the
1526
82
    // transformation isn't safe since it will delete the sole predecessor edge
1527
82
    // to the latch.
1528
82
    if (Latch && DT->dominates(SISucc, Latch))
1529
0
      continue;
1530
82
1531
82
    // FIXME: This is a hack.  We need to keep the successor around
1532
82
    // and hooked up so as to preserve the loop structure, because
1533
82
    // trying to update it is complicated.  So instead we preserve the
1534
82
    // loop structure and put the block on a dead code path.
1535
82
    SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
1536
82
    // Compute the successors instead of relying on the return value
1537
82
    // of SplitEdge, since it may have split the switch successor
1538
82
    // after PHI nodes.
1539
82
    BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
1540
82
    BasicBlock *OldSISucc = *succ_begin(NewSISucc);
1541
82
    // Create an "unreachable" destination.
1542
82
    BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
1543
82
                                           Switch->getParent(),
1544
82
                                           OldSISucc);
1545
82
    new UnreachableInst(Context, Abort);
1546
82
    // Force the new case destination to branch to the "unreachable"
1547
82
    // block while maintaining a (dead) CFG edge to the old block.
1548
82
    NewSISucc->getTerminator()->eraseFromParent();
1549
82
    BranchInst::Create(Abort, OldSISucc,
1550
82
                       ConstantInt::getTrue(Context), NewSISucc);
1551
82
    // Release the PHI operands for this edge.
1552
82
    for (PHINode &PN : NewSISucc->phis())
1553
4
      PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
1554
82
    // Tell the domtree about the new block. We don't fully update the
1555
82
    // domtree here -- instead we force it to do a full recomputation
1556
82
    // after the pass is complete -- but we do need to inform it of
1557
82
    // new blocks.
1558
82
    DT->addNewBlock(Abort, NewSISucc);
1559
82
  }
1560
182
1561
182
  SimplifyCode(Worklist, L);
1562
182
}
1563
1564
/// Now that we have simplified some instructions in the loop, walk over it and
1565
/// constant prop, dce, and fold control flow where possible. Note that this is
1566
/// effectively a very simple loop-structure-aware optimizer. During processing
1567
/// of this loop, L could very well be deleted, so it must not be used.
1568
///
1569
/// FIXME: When the loop optimizer is more mature, separate this out to a new
1570
/// pass.
1571
///
1572
7.66k
void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
1573
7.66k
  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1574
17.8k
  while (!Worklist.empty()) {
1575
10.1k
    Instruction *I = Worklist.back();
1576
10.1k
    Worklist.pop_back();
1577
10.1k
1578
10.1k
    // Simple DCE.
1579
10.1k
    if (isInstructionTriviallyDead(I)) {
1580
529
      LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
1581
529
1582
529
      // Add uses to the worklist, which may be dead now.
1583
1.59k
      for (unsigned i = 0, e = I->getNumOperands(); i != e; 
++i1.06k
)
1584
1.06k
        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
1585
611
          Worklist.push_back(Use);
1586
529
      LPM->deleteSimpleAnalysisValue(I, L);
1587
529
      RemoveFromWorklist(I, Worklist);
1588
529
      if (MSSAU)
1589
12
        MSSAU->removeMemoryAccess(I);
1590
529
      I->eraseFromParent();
1591
529
      ++NumSimplify;
1592
529
      continue;
1593
529
    }
1594
9.63k
1595
9.63k
    // See if instruction simplification can hack this up.  This is common for
1596
9.63k
    // things like "select false, X, Y" after unswitching made the condition be
1597
9.63k
    // 'false'.  TODO: update the domtree properly so we can pass it here.
1598
9.63k
    if (Value *V = SimplifyInstruction(I, DL))
1599
803
      if (LI->replacementPreservesLCSSAForm(I, V)) {
1600
762
        ReplaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
1601
762
        continue;
1602
762
      }
1603
8.87k
1604
8.87k
    // Special case hacks that appear commonly in unswitched code.
1605
8.87k
    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1606
7.24k
      if (BI->isUnconditional()) {
1607
0
        // If BI's parent is the only pred of the successor, fold the two blocks
1608
0
        // together.
1609
0
        BasicBlock *Pred = BI->getParent();
1610
0
        BasicBlock *Succ = BI->getSuccessor(0);
1611
0
        BasicBlock *SinglePred = Succ->getSinglePredecessor();
1612
0
        if (!SinglePred) continue;  // Nothing to do.
1613
0
        assert(SinglePred == Pred && "CFG broken");
1614
0
1615
0
        LLVM_DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
1616
0
                          << Succ->getName() << "\n");
1617
0
1618
0
        // Resolve any single entry PHI nodes in Succ.
1619
0
        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
1620
0
          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM,
1621
0
                            MSSAU.get());
1622
0
1623
0
        // If Succ has any successors with PHI nodes, update them to have
1624
0
        // entries coming from Pred instead of Succ.
1625
0
        Succ->replaceAllUsesWith(Pred);
1626
0
1627
0
        // Move all of the successor contents from Succ to Pred.
1628
0
        Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
1629
0
                                   Succ->begin(), Succ->end());
1630
0
        if (MSSAU)
1631
0
          MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI);
1632
0
        LPM->deleteSimpleAnalysisValue(BI, L);
1633
0
        RemoveFromWorklist(BI, Worklist);
1634
0
        BI->eraseFromParent();
1635
0
1636
0
        // Remove Succ from the loop tree.
1637
0
        LI->removeBlock(Succ);
1638
0
        LPM->deleteSimpleAnalysisValue(Succ, L);
1639
0
        Succ->eraseFromParent();
1640
0
        ++NumSimplify;
1641
0
        continue;
1642
0
      }
1643
7.24k
1644
7.24k
      continue;
1645
7.24k
    }
1646
8.87k
  }
1647
7.66k
}
1648
1649
/// Simple simplifications we can do given the information that Cond is
1650
/// definitely not equal to Val.
1651
Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
1652
                                                     Value *Invariant,
1653
193
                                                     Constant *Val) {
1654
193
  // icmp eq cond, val -> false
1655
193
  ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
1656
193
  if (CI && 
CI->isEquality()5
) {
1657
2
    Value *Op0 = CI->getOperand(0);
1658
2
    Value *Op1 = CI->getOperand(1);
1659
2
    if ((Op0 == Invariant && Op1 == Val) || 
(0
Op0 == Val0
&&
Op1 == Invariant0
)) {
1660
2
      LLVMContext &Ctx = Inst->getContext();
1661
2
      if (CI->getPredicate() == CmpInst::ICMP_EQ)
1662
2
        return ConstantInt::getFalse(Ctx);
1663
0
      else
1664
0
        return ConstantInt::getTrue(Ctx);
1665
191
     }
1666
2
  }
1667
191
1668
191
  // FIXME: there may be other opportunities, e.g. comparison with floating
1669
191
  // point, or Invariant - Val != 0, etc.
1670
191
  return nullptr;
1671
191
}