Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the Correlated Value Propagation pass.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
14
#include "llvm/ADT/DepthFirstIterator.h"
15
#include "llvm/ADT/Optional.h"
16
#include "llvm/ADT/SmallVector.h"
17
#include "llvm/ADT/Statistic.h"
18
#include "llvm/Analysis/DomTreeUpdater.h"
19
#include "llvm/Analysis/GlobalsModRef.h"
20
#include "llvm/Analysis/InstructionSimplify.h"
21
#include "llvm/Analysis/LazyValueInfo.h"
22
#include "llvm/IR/Attributes.h"
23
#include "llvm/IR/BasicBlock.h"
24
#include "llvm/IR/CFG.h"
25
#include "llvm/IR/CallSite.h"
26
#include "llvm/IR/Constant.h"
27
#include "llvm/IR/ConstantRange.h"
28
#include "llvm/IR/Constants.h"
29
#include "llvm/IR/DerivedTypes.h"
30
#include "llvm/IR/Function.h"
31
#include "llvm/IR/IRBuilder.h"
32
#include "llvm/IR/InstrTypes.h"
33
#include "llvm/IR/Instruction.h"
34
#include "llvm/IR/Instructions.h"
35
#include "llvm/IR/IntrinsicInst.h"
36
#include "llvm/IR/Operator.h"
37
#include "llvm/IR/PassManager.h"
38
#include "llvm/IR/Type.h"
39
#include "llvm/IR/Value.h"
40
#include "llvm/Pass.h"
41
#include "llvm/Support/Casting.h"
42
#include "llvm/Support/CommandLine.h"
43
#include "llvm/Support/Debug.h"
44
#include "llvm/Support/raw_ostream.h"
45
#include "llvm/Transforms/Scalar.h"
46
#include "llvm/Transforms/Utils/Local.h"
47
#include <cassert>
48
#include <utility>
49
50
using namespace llvm;
51
52
#define DEBUG_TYPE "correlated-value-propagation"
53
54
STATISTIC(NumPhis,      "Number of phis propagated");
55
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
56
STATISTIC(NumSelects,   "Number of selects propagated");
57
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
58
STATISTIC(NumCmps,      "Number of comparisons propagated");
59
STATISTIC(NumReturns,   "Number of return values propagated");
60
STATISTIC(NumDeadCases, "Number of switch cases removed");
61
STATISTIC(NumSDivs,     "Number of sdiv converted to udiv");
62
STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
63
STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
64
STATISTIC(NumSRems,     "Number of srem converted to urem");
65
STATISTIC(NumOverflows, "Number of overflow checks removed");
66
STATISTIC(NumSaturating,
67
    "Number of saturating arithmetics converted to normal arithmetics");
68
69
static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(false));
70
71
namespace {
72
73
  class CorrelatedValuePropagation : public FunctionPass {
74
  public:
75
    static char ID;
76
77
25.9k
    CorrelatedValuePropagation(): FunctionPass(ID) {
78
25.9k
     initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
79
25.9k
    }
80
81
    bool runOnFunction(Function &F) override;
82
83
25.9k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
84
25.9k
      AU.addRequired<DominatorTreeWrapperPass>();
85
25.9k
      AU.addRequired<LazyValueInfoWrapperPass>();
86
25.9k
      AU.addPreserved<GlobalsAAWrapperPass>();
87
25.9k
      AU.addPreserved<DominatorTreeWrapperPass>();
88
25.9k
    }
89
  };
90
91
} // end anonymous namespace
92
93
char CorrelatedValuePropagation::ID = 0;
94
95
48.6k
INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
96
48.6k
                "Value Propagation", false, false)
97
48.6k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
98
48.6k
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
99
48.6k
INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
100
                "Value Propagation", false, false)
101
102
// Public interface to the Value Propagation pass
103
25.9k
Pass *llvm::createCorrelatedValuePropagationPass() {
104
25.9k
  return new CorrelatedValuePropagation();
105
25.9k
}
106
107
205k
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
108
205k
  if (S->getType()->isVectorTy()) 
return false517
;
109
205k
  if (isa<Constant>(S->getOperand(0))) 
return false163
;
110
205k
111
205k
  Constant *C = LVI->getConstant(S->getCondition(), S->getParent(), S);
112
205k
  if (!C) 
return false205k
;
113
27
114
27
  ConstantInt *CI = dyn_cast<ConstantInt>(C);
115
27
  if (!CI) 
return false0
;
116
27
117
27
  Value *ReplaceWith = S->getTrueValue();
118
27
  Value *Other = S->getFalseValue();
119
27
  if (!CI->isOne()) 
std::swap(ReplaceWith, Other)13
;
120
27
  if (ReplaceWith == S) 
ReplaceWith = UndefValue::get(S->getType())0
;
121
27
122
27
  S->replaceAllUsesWith(ReplaceWith);
123
27
  S->eraseFromParent();
124
27
125
27
  ++NumSelects;
126
27
127
27
  return true;
128
27
}
129
130
/// Try to simplify a phi with constant incoming values that match the edge
131
/// values of a non-constant value on all other edges:
132
/// bb0:
133
///   %isnull = icmp eq i8* %x, null
134
///   br i1 %isnull, label %bb2, label %bb1
135
/// bb1:
136
///   br label %bb2
137
/// bb2:
138
///   %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ]
139
/// -->
140
///   %r = %x
141
static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
142
1.21M
                                   DominatorTree *DT) {
143
1.21M
  // Collect incoming constants and initialize possible common value.
144
1.21M
  SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants;
145
1.21M
  Value *CommonValue = nullptr;
146
3.16M
  for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; 
++i1.95M
) {
147
2.69M
    Value *Incoming = P->getIncomingValue(i);
148
2.69M
    if (auto *IncomingConstant = dyn_cast<Constant>(Incoming)) {
149
742k
      IncomingConstants.push_back(std::make_pair(IncomingConstant, i));
150
1.94M
    } else if (!CommonValue) {
151
1.17M
      // The potential common value is initialized to the first non-constant.
152
1.17M
      CommonValue = Incoming;
153
1.17M
    } else 
if (778k
Incoming != CommonValue778k
) {
154
739k
      // There can be only one non-constant common value.
155
739k
      return false;
156
739k
    }
157
2.69M
  }
158
1.21M
159
1.21M
  
if (478k
!CommonValue478k
||
IncomingConstants.empty()430k
)
160
47.2k
    return false;
161
430k
162
430k
  // The common value must be valid in all incoming blocks.
163
430k
  BasicBlock *ToBB = P->getParent();
164
430k
  if (auto *CommonInst = dyn_cast<Instruction>(CommonValue))
165
430k
    if (!DT->dominates(CommonInst, ToBB))
166
411k
      return false;
167
19.6k
168
19.6k
  // We have a phi with exactly 1 variable incoming value and 1 or more constant
169
19.6k
  // incoming values. See if all constant incoming values can be mapped back to
170
19.6k
  // the same incoming variable value.
171
19.8k
  
for (auto &IncomingConstant : IncomingConstants)19.6k
{
172
19.8k
    Constant *C = IncomingConstant.first;
173
19.8k
    BasicBlock *IncomingBB = P->getIncomingBlock(IncomingConstant.second);
174
19.8k
    if (C != LVI->getConstantOnEdge(CommonValue, IncomingBB, ToBB, P))
175
19.1k
      return false;
176
19.8k
  }
177
19.6k
178
19.6k
  // All constant incoming values map to the same variable along the incoming
179
19.6k
  // edges of the phi. The phi is unnecessary.
180
19.6k
  P->replaceAllUsesWith(CommonValue);
181
487
  P->eraseFromParent();
182
487
  ++NumPhiCommon;
183
487
  return true;
184
19.6k
}
185
186
static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
187
1.24M
                       const SimplifyQuery &SQ) {
188
1.24M
  bool Changed = false;
189
1.24M
190
1.24M
  BasicBlock *BB = P->getParent();
191
4.46M
  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; 
++i3.21M
) {
192
3.21M
    Value *Incoming = P->getIncomingValue(i);
193
3.21M
    if (isa<Constant>(Incoming)) 
continue798k
;
194
2.41M
195
2.41M
    Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
196
2.41M
197
2.41M
    // Look if the incoming value is a select with a scalar condition for which
198
2.41M
    // LVI can tells us the value. In that case replace the incoming value with
199
2.41M
    // the appropriate value of the select. This often allows us to remove the
200
2.41M
    // select later.
201
2.41M
    if (!V) {
202
2.40M
      SelectInst *SI = dyn_cast<SelectInst>(Incoming);
203
2.40M
      if (!SI) 
continue2.34M
;
204
55.9k
205
55.9k
      Value *Condition = SI->getCondition();
206
55.9k
      if (!Condition->getType()->isVectorTy()) {
207
55.9k
        if (Constant *C = LVI->getConstantOnEdge(
208
2.50k
                Condition, P->getIncomingBlock(i), BB, P)) {
209
2.50k
          if (C->isOneValue()) {
210
1.00k
            V = SI->getTrueValue();
211
1.49k
          } else if (C->isZeroValue()) {
212
1.49k
            V = SI->getFalseValue();
213
1.49k
          }
214
2.50k
          // Once LVI learns to handle vector types, we could also add support
215
2.50k
          // for vector type constants that are not all zeroes or all ones.
216
2.50k
        }
217
55.9k
      }
218
55.9k
219
55.9k
      // Look if the select has a constant but LVI tells us that the incoming
220
55.9k
      // value can never be that constant. In that case replace the incoming
221
55.9k
      // value with the other value of the select. This often allows us to
222
55.9k
      // remove the select later.
223
55.9k
      if (!V) {
224
53.4k
        Constant *C = dyn_cast<Constant>(SI->getFalseValue());
225
53.4k
        if (!C) 
continue33.2k
;
226
20.2k
227
20.2k
        if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
228
20.2k
              P->getIncomingBlock(i), BB, P) !=
229
20.2k
            LazyValueInfo::False)
230
20.1k
          continue;
231
51
        V = SI->getTrueValue();
232
51
      }
233
55.9k
234
55.9k
      
LLVM_DEBUG2.55k
(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
235
2.55k
    }
236
2.41M
237
2.41M
    P->setIncomingValue(i, V);
238
14.6k
    Changed = true;
239
14.6k
  }
240
1.24M
241
1.24M
  if (Value *V = SimplifyInstruction(P, SQ)) {
242
23.0k
    P->replaceAllUsesWith(V);
243
23.0k
    P->eraseFromParent();
244
23.0k
    Changed = true;
245
23.0k
  }
246
1.24M
247
1.24M
  if (!Changed)
248
1.21M
    Changed = simplifyCommonValuePhi(P, LVI, DT);
249
1.24M
250
1.24M
  if (Changed)
251
30.7k
    ++NumPhis;
252
1.24M
253
1.24M
  return Changed;
254
1.24M
}
255
256
5.86M
static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
257
5.86M
  Value *Pointer = nullptr;
258
5.86M
  if (LoadInst *L = dyn_cast<LoadInst>(I))
259
3.42M
    Pointer = L->getPointerOperand();
260
2.44M
  else
261
2.44M
    Pointer = cast<StoreInst>(I)->getPointerOperand();
262
5.86M
263
5.86M
  if (isa<Constant>(Pointer)) 
return false770k
;
264
5.09M
265
5.09M
  Constant *C = LVI->getConstant(Pointer, I->getParent(), I);
266
5.09M
  if (!C) 
return false5.09M
;
267
1
268
1
  ++NumMemAccess;
269
1
  I->replaceUsesOfWith(Pointer, C);
270
1
  return true;
271
1
}
272
273
/// See if LazyValueInfo's ability to exploit edge conditions or range
274
/// information is sufficient to prove this comparison. Even for local
275
/// conditions, this can sometimes prove conditions instcombine can't by
276
/// exploiting range information.
277
2.71M
static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
278
2.71M
  Value *Op0 = Cmp->getOperand(0);
279
2.71M
  auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
280
2.71M
  if (!C)
281
711k
    return false;
282
2.00M
283
2.00M
  // As a policy choice, we choose not to waste compile time on anything where
284
2.00M
  // the comparison is testing local values.  While LVI can sometimes reason
285
2.00M
  // about such cases, it's not its primary purpose.  We do make sure to do
286
2.00M
  // the block local query for uses from terminator instructions, but that's
287
2.00M
  // handled in the code for each terminator.
288
2.00M
  auto *I = dyn_cast<Instruction>(Op0);
289
2.00M
  if (I && 
I->getParent() == Cmp->getParent()1.70M
)
290
1.43M
    return false;
291
568k
292
568k
  LazyValueInfo::Tristate Result =
293
568k
      LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp);
294
568k
  if (Result == LazyValueInfo::Unknown)
295
567k
    return false;
296
1.34k
297
1.34k
  ++NumCmps;
298
1.34k
  Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result);
299
1.34k
  Cmp->replaceAllUsesWith(TorF);
300
1.34k
  Cmp->eraseFromParent();
301
1.34k
  return true;
302
1.34k
}
303
304
/// Simplify a switch instruction by removing cases which can never fire. If the
305
/// uselessness of a case could be determined locally then constant propagation
306
/// would already have figured it out. Instead, walk the predecessors and
307
/// statically evaluate cases based on information available on that edge. Cases
308
/// that cannot fire no matter what the incoming edge can safely be removed. If
309
/// a case fires on every incoming edge then the entire switch can be removed
310
/// and replaced with a branch to the case destination.
311
static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
312
39.8k
                          DominatorTree *DT) {
313
39.8k
  DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
314
39.8k
  Value *Cond = I->getCondition();
315
39.8k
  BasicBlock *BB = I->getParent();
316
39.8k
317
39.8k
  // If the condition was defined in same block as the switch then LazyValueInfo
318
39.8k
  // currently won't say anything useful about it, though in theory it could.
319
39.8k
  if (isa<Instruction>(Cond) && 
cast<Instruction>(Cond)->getParent() == BB34.8k
)
320
26.2k
    return false;
321
13.5k
322
13.5k
  // If the switch is unreachable then trying to improve it is a waste of time.
323
13.5k
  pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
324
13.5k
  if (PB == PE) 
return false3.34k
;
325
10.2k
326
10.2k
  // Analyse each switch case in turn.
327
10.2k
  bool Changed = false;
328
10.2k
  DenseMap<BasicBlock*, int> SuccessorsCount;
329
10.2k
  for (auto *Succ : successors(BB))
330
42.6k
    SuccessorsCount[Succ]++;
331
10.2k
332
10.2k
  { // Scope for SwitchInstProfUpdateWrapper. It must not live during
333
10.2k
    // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
334
10.2k
    SwitchInstProfUpdateWrapper SI(*I);
335
10.2k
336
42.5k
    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
337
32.3k
      ConstantInt *Case = CI->getCaseValue();
338
32.3k
339
32.3k
      // Check to see if the switch condition is equal to/not equal to the case
340
32.3k
      // value on every incoming edge, equal/not equal being the same each time.
341
32.3k
      LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
342
33.8k
      for (pred_iterator PI = PB; PI != PE; 
++PI1.45k
) {
343
32.7k
        // Is the switch condition equal to the case value?
344
32.7k
        LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
345
32.7k
                                                                Cond, Case, *PI,
346
32.7k
                                                                BB, SI);
347
32.7k
        // Give up on this case if nothing is known.
348
32.7k
        if (Value == LazyValueInfo::Unknown) {
349
31.2k
          State = LazyValueInfo::Unknown;
350
31.2k
          break;
351
31.2k
        }
352
1.45k
353
1.45k
        // If this was the first edge to be visited, record that all other edges
354
1.45k
        // need to give the same result.
355
1.45k
        if (PI == PB) {
356
1.22k
          State = Value;
357
1.22k
          continue;
358
1.22k
        }
359
226
360
226
        // If this case is known to fire for some edges and known not to fire for
361
226
        // others then there is nothing we can do - give up.
362
226
        if (Value != State) {
363
3
          State = LazyValueInfo::Unknown;
364
3
          break;
365
3
        }
366
226
      }
367
32.3k
368
32.3k
      if (State == LazyValueInfo::False) {
369
1.12k
        // This case never fires - remove it.
370
1.12k
        BasicBlock *Succ = CI->getCaseSuccessor();
371
1.12k
        Succ->removePredecessor(BB);
372
1.12k
        CI = SI.removeCase(CI);
373
1.12k
        CE = SI->case_end();
374
1.12k
375
1.12k
        // The condition can be modified by removePredecessor's PHI simplification
376
1.12k
        // logic.
377
1.12k
        Cond = SI->getCondition();
378
1.12k
379
1.12k
        ++NumDeadCases;
380
1.12k
        Changed = true;
381
1.12k
        if (--SuccessorsCount[Succ] == 0)
382
251
          DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
383
1.12k
        continue;
384
1.12k
      }
385
31.2k
      if (State == LazyValueInfo::True) {
386
2
        // This case always fires.  Arrange for the switch to be turned into an
387
2
        // unconditional branch by replacing the switch condition with the case
388
2
        // value.
389
2
        SI->setCondition(Case);
390
2
        NumDeadCases += SI->getNumCases();
391
2
        Changed = true;
392
2
        break;
393
2
      }
394
31.2k
395
31.2k
      // Increment the case iterator since we didn't delete it.
396
31.2k
      ++CI;
397
31.2k
    }
398
10.2k
  }
399
10.2k
400
10.2k
  if (Changed)
401
602
    // If the switch has been simplified to the point where it can be replaced
402
602
    // by a branch then do so now.
403
602
    ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false,
404
602
                           /*TLI = */ nullptr, &DTU);
405
10.2k
  return Changed;
406
10.2k
}
407
408
// See if we can prove that the given binary op intrinsic will not overflow.
409
2.86k
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
410
2.86k
  ConstantRange LRange = LVI->getConstantRange(
411
2.86k
      BO->getLHS(), BO->getParent(), BO);
412
2.86k
  ConstantRange RRange = LVI->getConstantRange(
413
2.86k
      BO->getRHS(), BO->getParent(), BO);
414
2.86k
  ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
415
2.86k
      BO->getBinaryOp(), RRange, BO->getNoWrapKind());
416
2.86k
  return NWRegion.contains(LRange);
417
2.86k
}
418
419
40
static void processOverflowIntrinsic(WithOverflowInst *WO) {
420
40
  IRBuilder<> B(WO);
421
40
  Value *NewOp = B.CreateBinOp(
422
40
      WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
423
40
  // Constant-folding could have happened.
424
40
  if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
425
38
    if (WO->isSigned())
426
20
      Inst->setHasNoSignedWrap();
427
18
    else
428
18
      Inst->setHasNoUnsignedWrap();
429
38
  }
430
40
431
40
  Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0);
432
40
  NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1);
433
40
  WO->replaceAllUsesWith(NewI);
434
40
  WO->eraseFromParent();
435
40
  ++NumOverflows;
436
40
}
437
438
8
static void processSaturatingInst(SaturatingInst *SI) {
439
8
  BinaryOperator *BinOp = BinaryOperator::Create(
440
8
      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
441
8
  BinOp->setDebugLoc(SI->getDebugLoc());
442
8
  if (SI->isSigned())
443
4
    BinOp->setHasNoSignedWrap();
444
4
  else
445
4
    BinOp->setHasNoUnsignedWrap();
446
8
447
8
  SI->replaceAllUsesWith(BinOp);
448
8
  SI->eraseFromParent();
449
8
  ++NumSaturating;
450
8
}
451
452
/// Infer nonnull attributes for the arguments at the specified callsite.
453
3.79M
static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
454
3.79M
  SmallVector<unsigned, 4> ArgNos;
455
3.79M
  unsigned ArgNo = 0;
456
3.79M
457
3.79M
  if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
458
2.65k
    if (WO->getLHS()->getType()->isIntegerTy() && 
willNotOverflow(WO, LVI)2.65k
) {
459
40
      processOverflowIntrinsic(WO);
460
40
      return true;
461
40
    }
462
3.79M
  }
463
3.79M
464
3.79M
  if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
465
228
    if (SI->getType()->isIntegerTy() && 
willNotOverflow(SI, LVI)210
) {
466
8
      processSaturatingInst(SI);
467
8
      return true;
468
8
    }
469
3.79M
  }
470
3.79M
471
3.79M
  // Deopt bundle operands are intended to capture state with minimal
472
3.79M
  // perturbance of the code otherwise.  If we can find a constant value for
473
3.79M
  // any such operand and remove a use of the original value, that's
474
3.79M
  // desireable since it may allow further optimization of that value (e.g. via
475
3.79M
  // single use rules in instcombine).  Since deopt uses tend to,
476
3.79M
  // idiomatically, appear along rare conditional paths, it's reasonable likely
477
3.79M
  // we may have a conditional fact with which LVI can fold.   
478
3.79M
  if (auto DeoptBundle = CS.getOperandBundle(LLVMContext::OB_deopt)) {
479
17
    bool Progress = false;
480
17
    for (const Use &ConstU : DeoptBundle->Inputs) {
481
7
      Use &U = const_cast<Use&>(ConstU);
482
7
      Value *V = U.get();
483
7
      if (V->getType()->isVectorTy()) 
continue0
;
484
7
      if (isa<Constant>(V)) 
continue0
;
485
7
486
7
      Constant *C = LVI->getConstant(V, CS.getParent(), CS.getInstruction());
487
7
      if (!C) 
continue1
;
488
6
      U.set(C);
489
6
      Progress = true;
490
6
    }
491
17
    if (Progress)
492
6
      return true;
493
3.79M
  }
494
3.79M
495
8.40M
  
for (Value *V : CS.args())3.79M
{
496
8.40M
    PointerType *Type = dyn_cast<PointerType>(V->getType());
497
8.40M
    // Try to mark pointer typed parameters as non-null.  We skip the
498
8.40M
    // relatively expensive analysis for constants which are obviously either
499
8.40M
    // null or non-null to start with.
500
8.40M
    if (Type && 
!CS.paramHasAttr(ArgNo, Attribute::NonNull)5.75M
&&
501
8.40M
        
!isa<Constant>(V)3.81M
&&
502
8.40M
        LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
503
2.36M
                            ConstantPointerNull::get(Type),
504
2.36M
                            CS.getInstruction()) == LazyValueInfo::False)
505
72.1k
      ArgNos.push_back(ArgNo);
506
8.40M
    ArgNo++;
507
8.40M
  }
508
3.79M
509
3.79M
  assert(ArgNo == CS.arg_size() && "sanity check");
510
3.79M
511
3.79M
  if (ArgNos.empty())
512
3.72M
    return false;
513
68.9k
514
68.9k
  AttributeList AS = CS.getAttributes();
515
68.9k
  LLVMContext &Ctx = CS.getInstruction()->getContext();
516
68.9k
  AS = AS.addParamAttribute(Ctx, ArgNos,
517
68.9k
                            Attribute::get(Ctx, Attribute::NonNull));
518
68.9k
  CS.setAttributes(AS);
519
68.9k
520
68.9k
  return true;
521
68.9k
}
522
523
35.8k
static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
524
35.8k
  Constant *Zero = ConstantInt::get(SDI->getType(), 0);
525
39.1k
  for (Value *O : SDI->operands()) {
526
39.1k
    auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI);
527
39.1k
    if (Result != LazyValueInfo::True)
528
35.5k
      return false;
529
39.1k
  }
530
35.8k
  
return true309
;
531
35.8k
}
532
533
/// Try to shrink a udiv/urem's width down to the smallest power of two that's
534
/// sufficient to contain its operands.
535
28.6k
static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
536
28.6k
  assert(Instr->getOpcode() == Instruction::UDiv ||
537
28.6k
         Instr->getOpcode() == Instruction::URem);
538
28.6k
  if (Instr->getType()->isVectorTy())
539
2
    return false;
540
28.6k
541
28.6k
  // Find the smallest power of two bitwidth that's sufficient to hold Instr's
542
28.6k
  // operands.
543
28.6k
  auto OrigWidth = Instr->getType()->getIntegerBitWidth();
544
28.6k
  ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
545
57.2k
  for (Value *Operand : Instr->operands()) {
546
57.2k
    OperandRange = OperandRange.unionWith(
547
57.2k
        LVI->getConstantRange(Operand, Instr->getParent()));
548
57.2k
  }
549
28.6k
  // Don't shrink below 8 bits wide.
550
28.6k
  unsigned NewWidth = std::max<unsigned>(
551
28.6k
      PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8);
552
28.6k
  // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
553
28.6k
  // two.
554
28.6k
  if (NewWidth >= OrigWidth)
555
28.3k
    return false;
556
295
557
295
  ++NumUDivs;
558
295
  IRBuilder<> B{Instr};
559
295
  auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
560
295
  auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
561
295
                                     Instr->getName() + ".lhs.trunc");
562
295
  auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
563
295
                                     Instr->getName() + ".rhs.trunc");
564
295
  auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
565
295
  auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext");
566
295
  if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
567
284
    if (BinOp->getOpcode() == Instruction::UDiv)
568
86
      BinOp->setIsExact(Instr->isExact());
569
295
570
295
  Instr->replaceAllUsesWith(Zext);
571
295
  Instr->eraseFromParent();
572
295
  return true;
573
295
}
574
575
6.00k
static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
576
6.00k
  if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
577
5.95k
    return false;
578
53
579
53
  ++NumSRems;
580
53
  auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
581
53
                                        SDI->getName(), SDI);
582
53
  BO->setDebugLoc(SDI->getDebugLoc());
583
53
  SDI->replaceAllUsesWith(BO);
584
53
  SDI->eraseFromParent();
585
53
586
53
  // Try to process our new urem.
587
53
  processUDivOrURem(BO, LVI);
588
53
589
53
  return true;
590
53
}
591
592
/// See if LazyValueInfo's ability to exploit edge conditions or range
593
/// information is sufficient to prove the both operands of this SDiv are
594
/// positive.  If this is the case, replace the SDiv with a UDiv. Even for local
595
/// conditions, this can sometimes prove conditions instcombine can't by
596
/// exploiting range information.
597
29.8k
static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
598
29.8k
  if (SDI->getType()->isVectorTy() || 
!hasPositiveOperands(SDI, LVI)29.8k
)
599
29.6k
    return false;
600
256
601
256
  ++NumSDivs;
602
256
  auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
603
256
                                        SDI->getName(), SDI);
604
256
  BO->setDebugLoc(SDI->getDebugLoc());
605
256
  BO->setIsExact(SDI->isExact());
606
256
  SDI->replaceAllUsesWith(BO);
607
256
  SDI->eraseFromParent();
608
256
609
256
  // Try to simplify our new udiv.
610
256
  processUDivOrURem(BO, LVI);
611
256
612
256
  return true;
613
256
}
614
615
50.3k
static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
616
50.3k
  if (SDI->getType()->isVectorTy())
617
222
    return false;
618
50.0k
619
50.0k
  Constant *Zero = ConstantInt::get(SDI->getType(), 0);
620
50.0k
  if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) !=
621
50.0k
      LazyValueInfo::True)
622
49.8k
    return false;
623
250
624
250
  ++NumAShrs;
625
250
  auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
626
250
                                        SDI->getName(), SDI);
627
250
  BO->setDebugLoc(SDI->getDebugLoc());
628
250
  BO->setIsExact(SDI->isExact());
629
250
  SDI->replaceAllUsesWith(BO);
630
250
  SDI->eraseFromParent();
631
250
632
250
  return true;
633
250
}
634
635
1.13M
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
636
1.13M
  using OBO = OverflowingBinaryOperator;
637
1.13M
638
1.13M
  if (DontAddNoWrapFlags)
639
0
    return false;
640
1.13M
641
1.13M
  if (BinOp->getType()->isVectorTy())
642
2.14k
    return false;
643
1.13M
644
1.13M
  bool NSW = BinOp->hasNoSignedWrap();
645
1.13M
  bool NUW = BinOp->hasNoUnsignedWrap();
646
1.13M
  if (NSW && 
NUW621k
)
647
208k
    return false;
648
921k
649
921k
  BasicBlock *BB = BinOp->getParent();
650
921k
651
921k
  Value *LHS = BinOp->getOperand(0);
652
921k
  Value *RHS = BinOp->getOperand(1);
653
921k
654
921k
  ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
655
921k
  ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
656
921k
657
921k
  bool Changed = false;
658
921k
  if (!NUW) {
659
888k
    ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
660
888k
        BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
661
888k
    bool NewNUW = NUWRange.contains(LRange);
662
888k
    BinOp->setHasNoUnsignedWrap(NewNUW);
663
888k
    Changed |= NewNUW;
664
888k
  }
665
921k
  if (!NSW) {
666
509k
    ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
667
509k
        BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
668
509k
    bool NewNSW = NSWRange.contains(LRange);
669
509k
    BinOp->setHasNoSignedWrap(NewNSW);
670
509k
    Changed |= NewNSW;
671
509k
  }
672
921k
673
921k
  return Changed;
674
921k
}
675
676
526k
static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
677
526k
  if (Constant *C = LVI->getConstant(V, At->getParent(), At))
678
13
    return C;
679
526k
680
526k
  // TODO: The following really should be sunk inside LVI's core algorithm, or
681
526k
  // at least the outer shims around such.
682
526k
  auto *C = dyn_cast<CmpInst>(V);
683
526k
  if (!C) 
return nullptr508k
;
684
17.9k
685
17.9k
  Value *Op0 = C->getOperand(0);
686
17.9k
  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
687
17.9k
  if (!Op1) 
return nullptr6.89k
;
688
11.0k
689
11.0k
  LazyValueInfo::Tristate Result =
690
11.0k
    LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
691
11.0k
  if (Result == LazyValueInfo::Unknown)
692
10.9k
    return nullptr;
693
26
694
26
  return (Result == LazyValueInfo::True) ?
695
15
    ConstantInt::getTrue(C->getContext()) :
696
26
    
ConstantInt::getFalse(C->getContext())11
;
697
26
}
698
699
static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
700
933k
                    const SimplifyQuery &SQ) {
701
933k
  bool FnChanged = false;
702
933k
  // Visiting in a pre-order depth-first traversal causes us to simplify early
703
933k
  // blocks before querying later blocks (which require us to analyze early
704
933k
  // blocks).  Eagerly simplifying shallow blocks means there is strictly less
705
933k
  // work to do for deep blocks.  This also means we don't visit unreachable
706
933k
  // blocks.
707
5.40M
  for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
708
5.40M
    bool BBChanged = false;
709
34.6M
    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
710
29.2M
      Instruction *II = &*BI++;
711
29.2M
      switch (II->getOpcode()) {
712
29.2M
      case Instruction::Select:
713
205k
        BBChanged |= processSelect(cast<SelectInst>(II), LVI);
714
205k
        break;
715
29.2M
      case Instruction::PHI:
716
1.24M
        BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
717
1.24M
        break;
718
29.2M
      case Instruction::ICmp:
719
2.71M
      case Instruction::FCmp:
720
2.71M
        BBChanged |= processCmp(cast<CmpInst>(II), LVI);
721
2.71M
        break;
722
5.86M
      case Instruction::Load:
723
5.86M
      case Instruction::Store:
724
5.86M
        BBChanged |= processMemAccess(II, LVI);
725
5.86M
        break;
726
5.86M
      case Instruction::Call:
727
3.79M
      case Instruction::Invoke:
728
3.79M
        BBChanged |= processCallSite(CallSite(II), LVI);
729
3.79M
        break;
730
3.79M
      case Instruction::SRem:
731
6.00k
        BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
732
6.00k
        break;
733
3.79M
      case Instruction::SDiv:
734
29.8k
        BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
735
29.8k
        break;
736
3.79M
      case Instruction::UDiv:
737
28.3k
      case Instruction::URem:
738
28.3k
        BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
739
28.3k
        break;
740
50.3k
      case Instruction::AShr:
741
50.3k
        BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
742
50.3k
        break;
743
1.13M
      case Instruction::Add:
744
1.13M
      case Instruction::Sub:
745
1.13M
        BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
746
1.13M
        break;
747
29.2M
      }
748
29.2M
    }
749
5.40M
750
5.40M
    Instruction *Term = BB->getTerminator();
751
5.40M
    switch (Term->getOpcode()) {
752
5.40M
    case Instruction::Switch:
753
39.8k
      BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT);
754
39.8k
      break;
755
5.40M
    case Instruction::Ret: {
756
934k
      auto *RI = cast<ReturnInst>(Term);
757
934k
      // Try to determine the return value if we can.  This is mainly here to
758
934k
      // simplify the writing of unit tests, but also helps to enable IPO by
759
934k
      // constant folding the return values of callees.
760
934k
      auto *RetVal = RI->getReturnValue();
761
934k
      if (!RetVal) 
break309k
; // handle "ret void"
762
624k
      if (isa<Constant>(RetVal)) 
break98.2k
; // nothing to do
763
526k
      if (auto *C = getConstantAt(RetVal, RI, LVI)) {
764
40
        ++NumReturns;
765
40
        RI->replaceUsesOfWith(RetVal, C);
766
40
        BBChanged = true;
767
40
      }
768
526k
    }
769
5.40M
    }
770
5.40M
771
5.40M
    FnChanged |= BBChanged;
772
5.40M
  }
773
933k
774
933k
  return FnChanged;
775
933k
}
776
777
931k
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
778
931k
  if (skipFunction(F))
779
88
    return false;
780
931k
781
931k
  LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
782
931k
  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
783
931k
784
931k
  return runImpl(F, LVI, DT, getBestSimplifyQuery(*this, F));
785
931k
}
786
787
PreservedAnalyses
788
2.32k
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
789
2.32k
  LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
790
2.32k
  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
791
2.32k
792
2.32k
  bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F));
793
2.32k
794
2.32k
  if (!Changed)
795
2.29k
    return PreservedAnalyses::all();
796
31
  PreservedAnalyses PA;
797
31
  PA.preserve<GlobalsAA>();
798
31
  PA.preserve<DominatorTreeAnalysis>();
799
31
  return PA;
800
31
}