Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/ValueTracking.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains routines that help analyze properties that chains of
10
// computations have.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Analysis/ValueTracking.h"
15
#include "llvm/ADT/APFloat.h"
16
#include "llvm/ADT/APInt.h"
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/None.h"
19
#include "llvm/ADT/Optional.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/SmallPtrSet.h"
22
#include "llvm/ADT/SmallSet.h"
23
#include "llvm/ADT/SmallVector.h"
24
#include "llvm/ADT/StringRef.h"
25
#include "llvm/ADT/iterator_range.h"
26
#include "llvm/Analysis/AliasAnalysis.h"
27
#include "llvm/Analysis/AssumptionCache.h"
28
#include "llvm/Analysis/GuardUtils.h"
29
#include "llvm/Analysis/InstructionSimplify.h"
30
#include "llvm/Analysis/Loads.h"
31
#include "llvm/Analysis/LoopInfo.h"
32
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
33
#include "llvm/Analysis/TargetLibraryInfo.h"
34
#include "llvm/IR/Argument.h"
35
#include "llvm/IR/Attributes.h"
36
#include "llvm/IR/BasicBlock.h"
37
#include "llvm/IR/CallSite.h"
38
#include "llvm/IR/Constant.h"
39
#include "llvm/IR/ConstantRange.h"
40
#include "llvm/IR/Constants.h"
41
#include "llvm/IR/DerivedTypes.h"
42
#include "llvm/IR/DiagnosticInfo.h"
43
#include "llvm/IR/Dominators.h"
44
#include "llvm/IR/Function.h"
45
#include "llvm/IR/GetElementPtrTypeIterator.h"
46
#include "llvm/IR/GlobalAlias.h"
47
#include "llvm/IR/GlobalValue.h"
48
#include "llvm/IR/GlobalVariable.h"
49
#include "llvm/IR/InstrTypes.h"
50
#include "llvm/IR/Instruction.h"
51
#include "llvm/IR/Instructions.h"
52
#include "llvm/IR/IntrinsicInst.h"
53
#include "llvm/IR/Intrinsics.h"
54
#include "llvm/IR/LLVMContext.h"
55
#include "llvm/IR/Metadata.h"
56
#include "llvm/IR/Module.h"
57
#include "llvm/IR/Operator.h"
58
#include "llvm/IR/PatternMatch.h"
59
#include "llvm/IR/Type.h"
60
#include "llvm/IR/User.h"
61
#include "llvm/IR/Value.h"
62
#include "llvm/Support/Casting.h"
63
#include "llvm/Support/CommandLine.h"
64
#include "llvm/Support/Compiler.h"
65
#include "llvm/Support/ErrorHandling.h"
66
#include "llvm/Support/KnownBits.h"
67
#include "llvm/Support/MathExtras.h"
68
#include <algorithm>
69
#include <array>
70
#include <cassert>
71
#include <cstdint>
72
#include <iterator>
73
#include <utility>
74
75
using namespace llvm;
76
using namespace llvm::PatternMatch;
77
78
const unsigned MaxDepth = 6;
79
80
// Controls the number of uses of the value searched for possible
81
// dominating comparisons.
82
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
83
                                              cl::Hidden, cl::init(20));
84
85
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
86
/// returns the element type's bitwidth.
87
221M
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
88
221M
  if (unsigned BitWidth = Ty->getScalarSizeInBits())
89
156M
    return BitWidth;
90
65.2M
91
65.2M
  return DL.getIndexTypeSizeInBits(Ty);
92
65.2M
}
93
94
namespace {
95
96
// Simplifying using an assume can only be done in a particular control-flow
97
// context (the context instruction provides that context). If an assume and
98
// the context instruction are not in the same block then the DT helps in
99
// figuring out if we can use it.
100
struct Query {
101
  const DataLayout &DL;
102
  AssumptionCache *AC;
103
  const Instruction *CxtI;
104
  const DominatorTree *DT;
105
106
  // Unlike the other analyses, this may be a nullptr because not all clients
107
  // provide it currently.
108
  OptimizationRemarkEmitter *ORE;
109
110
  /// Set of assumptions that should be excluded from further queries.
111
  /// This is because of the potential for mutual recursion to cause
112
  /// computeKnownBits to repeatedly visit the same assume intrinsic. The
113
  /// classic case of this is assume(x = y), which will attempt to determine
114
  /// bits in x from bits in y, which will attempt to determine bits in y from
115
  /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
116
  /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
117
  /// (all of which can call computeKnownBits), and so on.
118
  std::array<const Value *, MaxDepth> Excluded;
119
120
  /// If true, it is safe to use metadata during simplification.
121
  InstrInfoQuery IIQ;
122
123
  unsigned NumExcluded = 0;
124
125
  Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
126
        const DominatorTree *DT, bool UseInstrInfo,
127
        OptimizationRemarkEmitter *ORE = nullptr)
128
288M
      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
129
130
  Query(const Query &Q, const Value *NewExcl)
131
      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
132
435
        NumExcluded(Q.NumExcluded) {
133
435
    Excluded = Q.Excluded;
134
435
    Excluded[NumExcluded++] = NewExcl;
135
435
    assert(NumExcluded <= Excluded.size());
136
435
  }
137
138
3.66k
  bool isExcluded(const Value *Value) const {
139
3.66k
    if (NumExcluded == 0)
140
3.64k
      return false;
141
26
    auto End = Excluded.begin() + NumExcluded;
142
26
    return std::find(Excluded.begin(), End, Value) != End;
143
26
  }
144
};
145
146
} // end anonymous namespace
147
148
// Given the provided Value and, potentially, a context instruction, return
149
// the preferred context instruction (if any).
150
305M
static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
151
305M
  // If we've been provided with a context instruction, then use that (provided
152
305M
  // it has been inserted).
153
305M
  if (CxtI && 
CxtI->getParent()282M
)
154
279M
    return CxtI;
155
25.4M
156
25.4M
  // If the value is really an already-inserted instruction, then use that.
157
25.4M
  CxtI = dyn_cast<Instruction>(V);
158
25.4M
  if (CxtI && 
CxtI->getParent()19.8M
)
159
18.0M
    return CxtI;
160
7.37M
161
7.37M
  return nullptr;
162
7.37M
}
163
164
static void computeKnownBits(const Value *V, KnownBits &Known,
165
                             unsigned Depth, const Query &Q);
166
167
void llvm::computeKnownBits(const Value *V, KnownBits &Known,
168
                            const DataLayout &DL, unsigned Depth,
169
                            AssumptionCache *AC, const Instruction *CxtI,
170
                            const DominatorTree *DT,
171
56.7M
                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
172
56.7M
  ::computeKnownBits(V, Known, Depth,
173
56.7M
                     Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
174
56.7M
}
175
176
static KnownBits computeKnownBits(const Value *V, unsigned Depth,
177
                                  const Query &Q);
178
179
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
180
                                 unsigned Depth, AssumptionCache *AC,
181
                                 const Instruction *CxtI,
182
                                 const DominatorTree *DT,
183
                                 OptimizationRemarkEmitter *ORE,
184
158M
                                 bool UseInstrInfo) {
185
158M
  return ::computeKnownBits(
186
158M
      V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
187
158M
}
188
189
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
190
                               const DataLayout &DL, AssumptionCache *AC,
191
                               const Instruction *CxtI, const DominatorTree *DT,
192
5.44M
                               bool UseInstrInfo) {
193
5.44M
  assert(LHS->getType() == RHS->getType() &&
194
5.44M
         "LHS and RHS should have the same type");
195
5.44M
  assert(LHS->getType()->isIntOrIntVectorTy() &&
196
5.44M
         "LHS and RHS should be integers");
197
5.44M
  // Look for an inverted mask: (X & ~M) op (Y & M).
198
5.44M
  Value *M;
199
5.44M
  if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
200
5.44M
      
match(RHS, m_c_And(m_Specific(M), m_Value()))155
)
201
4
    return true;
202
5.44M
  if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
203
5.44M
      
match(LHS, m_c_And(m_Specific(M), m_Value()))242
)
204
14
    return true;
205
5.44M
  IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
206
5.44M
  KnownBits LHSKnown(IT->getBitWidth());
207
5.44M
  KnownBits RHSKnown(IT->getBitWidth());
208
5.44M
  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
209
5.44M
  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
210
5.44M
  return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
211
5.44M
}
212
213
33.4k
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
214
34.6k
  for (const User *U : CxtI->users()) {
215
34.6k
    if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
216
9.40k
      if (IC->isEquality())
217
5.85k
        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
218
5.01k
          if (C->isNullValue())
219
4.70k
            continue;
220
29.9k
    return false;
221
29.9k
  }
222
33.4k
  
return true3.47k
;
223
33.4k
}
224
225
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
226
                                   const Query &Q);
227
228
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
229
                                  bool OrZero, unsigned Depth,
230
                                  AssumptionCache *AC, const Instruction *CxtI,
231
82.0k
                                  const DominatorTree *DT, bool UseInstrInfo) {
232
82.0k
  return ::isKnownToBeAPowerOfTwo(
233
82.0k
      V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
234
82.0k
}
235
236
static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
237
238
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
239
                          AssumptionCache *AC, const Instruction *CxtI,
240
47.0M
                          const DominatorTree *DT, bool UseInstrInfo) {
241
47.0M
  return ::isKnownNonZero(V, Depth,
242
47.0M
                          Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
243
47.0M
}
244
245
bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
246
                              unsigned Depth, AssumptionCache *AC,
247
                              const Instruction *CxtI, const DominatorTree *DT,
248
12.8k
                              bool UseInstrInfo) {
249
12.8k
  KnownBits Known =
250
12.8k
      computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
251
12.8k
  return Known.isNonNegative();
252
12.8k
}
253
254
bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
255
                           AssumptionCache *AC, const Instruction *CxtI,
256
1.66k
                           const DominatorTree *DT, bool UseInstrInfo) {
257
1.66k
  if (auto *CI = dyn_cast<ConstantInt>(V))
258
153
    return CI->getValue().isStrictlyPositive();
259
1.51k
260
1.51k
  // TODO: We'd doing two recursive queries here.  We should factor this such
261
1.51k
  // that only a single query is needed.
262
1.51k
  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
263
1.51k
         
isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo)37
;
264
1.51k
}
265
266
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
267
                           AssumptionCache *AC, const Instruction *CxtI,
268
0
                           const DominatorTree *DT, bool UseInstrInfo) {
269
0
  KnownBits Known =
270
0
      computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
271
0
  return Known.isNegative();
272
0
}
273
274
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
275
276
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
277
                           const DataLayout &DL, AssumptionCache *AC,
278
                           const Instruction *CxtI, const DominatorTree *DT,
279
16.2M
                           bool UseInstrInfo) {
280
16.2M
  return ::isKnownNonEqual(V1, V2,
281
16.2M
                           Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT,
282
16.2M
                                 UseInstrInfo, /*ORE=*/nullptr));
283
16.2M
}
284
285
static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
286
                              const Query &Q);
287
288
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
289
                             const DataLayout &DL, unsigned Depth,
290
                             AssumptionCache *AC, const Instruction *CxtI,
291
2.61M
                             const DominatorTree *DT, bool UseInstrInfo) {
292
2.61M
  return ::MaskedValueIsZero(
293
2.61M
      V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
294
2.61M
}
295
296
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
297
                                   const Query &Q);
298
299
unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
300
                                  unsigned Depth, AssumptionCache *AC,
301
                                  const Instruction *CxtI,
302
8.07M
                                  const DominatorTree *DT, bool UseInstrInfo) {
303
8.07M
  return ::ComputeNumSignBits(
304
8.07M
      V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
305
8.07M
}
306
307
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
308
                                   bool NSW,
309
                                   KnownBits &KnownOut, KnownBits &Known2,
310
61.2M
                                   unsigned Depth, const Query &Q) {
311
61.2M
  unsigned BitWidth = KnownOut.getBitWidth();
312
61.2M
313
61.2M
  // If an initial sequence of bits in the result is not needed, the
314
61.2M
  // corresponding bits in the operands are not needed.
315
61.2M
  KnownBits LHSKnown(BitWidth);
316
61.2M
  computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
317
61.2M
  computeKnownBits(Op1, Known2, Depth + 1, Q);
318
61.2M
319
61.2M
  KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2);
320
61.2M
}
321
322
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
323
                                KnownBits &Known, KnownBits &Known2,
324
10.3M
                                unsigned Depth, const Query &Q) {
325
10.3M
  unsigned BitWidth = Known.getBitWidth();
326
10.3M
  computeKnownBits(Op1, Known, Depth + 1, Q);
327
10.3M
  computeKnownBits(Op0, Known2, Depth + 1, Q);
328
10.3M
329
10.3M
  bool isKnownNegative = false;
330
10.3M
  bool isKnownNonNegative = false;
331
10.3M
  // If the multiplication is known not to overflow, compute the sign bit.
332
10.3M
  if (NSW) {
333
6.15M
    if (Op0 == Op1) {
334
158k
      // The product of a number with itself is non-negative.
335
158k
      isKnownNonNegative = true;
336
5.99M
    } else {
337
5.99M
      bool isKnownNonNegativeOp1 = Known.isNonNegative();
338
5.99M
      bool isKnownNonNegativeOp0 = Known2.isNonNegative();
339
5.99M
      bool isKnownNegativeOp1 = Known.isNegative();
340
5.99M
      bool isKnownNegativeOp0 = Known2.isNegative();
341
5.99M
      // The product of two numbers with the same sign is non-negative.
342
5.99M
      isKnownNonNegative = (isKnownNegativeOp1 && 
isKnownNegativeOp0120k
) ||
343
5.99M
        
(5.99M
isKnownNonNegativeOp15.99M
&&
isKnownNonNegativeOp02.80M
);
344
5.99M
      // The product of a negative number and a non-negative number is either
345
5.99M
      // negative or zero.
346
5.99M
      if (!isKnownNonNegative)
347
5.68M
        isKnownNegative = (isKnownNegativeOp1 && 
isKnownNonNegativeOp0120k
&&
348
5.68M
                           
isKnownNonZero(Op0, Depth, Q)15.7k
) ||
349
5.68M
                          (isKnownNegativeOp0 && 
isKnownNonNegativeOp11.51k
&&
350
5.68M
                           
isKnownNonZero(Op1, Depth, Q)592
);
351
5.99M
    }
352
6.15M
  }
353
10.3M
354
10.3M
  assert(!Known.hasConflict() && !Known2.hasConflict());
355
10.3M
  // Compute a conservative estimate for high known-0 bits.
356
10.3M
  unsigned LeadZ =  std::max(Known.countMinLeadingZeros() +
357
10.3M
                             Known2.countMinLeadingZeros(),
358
10.3M
                             BitWidth) - BitWidth;
359
10.3M
  LeadZ = std::min(LeadZ, BitWidth);
360
10.3M
361
10.3M
  // The result of the bottom bits of an integer multiply can be
362
10.3M
  // inferred by looking at the bottom bits of both operands and
363
10.3M
  // multiplying them together.
364
10.3M
  // We can infer at least the minimum number of known trailing bits
365
10.3M
  // of both operands. Depending on number of trailing zeros, we can
366
10.3M
  // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming
367
10.3M
  // a and b are divisible by m and n respectively.
368
10.3M
  // We then calculate how many of those bits are inferrable and set
369
10.3M
  // the output. For example, the i8 mul:
370
10.3M
  //  a = XXXX1100 (12)
371
10.3M
  //  b = XXXX1110 (14)
372
10.3M
  // We know the bottom 3 bits are zero since the first can be divided by
373
10.3M
  // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4).
374
10.3M
  // Applying the multiplication to the trimmed arguments gets:
375
10.3M
  //    XX11 (3)
376
10.3M
  //    X111 (7)
377
10.3M
  // -------
378
10.3M
  //    XX11
379
10.3M
  //   XX11
380
10.3M
  //  XX11
381
10.3M
  // XX11
382
10.3M
  // -------
383
10.3M
  // XXXXX01
384
10.3M
  // Which allows us to infer the 2 LSBs. Since we're multiplying the result
385
10.3M
  // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits.
386
10.3M
  // The proof for this can be described as:
387
10.3M
  // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) &&
388
10.3M
  //      (C7 == (1 << (umin(countTrailingZeros(C1), C5) +
389
10.3M
  //                    umin(countTrailingZeros(C2), C6) +
390
10.3M
  //                    umin(C5 - umin(countTrailingZeros(C1), C5),
391
10.3M
  //                         C6 - umin(countTrailingZeros(C2), C6)))) - 1)
392
10.3M
  // %aa = shl i8 %a, C5
393
10.3M
  // %bb = shl i8 %b, C6
394
10.3M
  // %aaa = or i8 %aa, C1
395
10.3M
  // %bbb = or i8 %bb, C2
396
10.3M
  // %mul = mul i8 %aaa, %bbb
397
10.3M
  // %mask = and i8 %mul, C7
398
10.3M
  //   =>
399
10.3M
  // %mask = i8 ((C1*C2)&C7)
400
10.3M
  // Where C5, C6 describe the known bits of %a, %b
401
10.3M
  // C1, C2 describe the known bottom bits of %a, %b.
402
10.3M
  // C7 describes the mask of the known bits of the result.
403
10.3M
  APInt Bottom0 = Known.One;
404
10.3M
  APInt Bottom1 = Known2.One;
405
10.3M
406
10.3M
  // How many times we'd be able to divide each argument by 2 (shr by 1).
407
10.3M
  // This gives us the number of trailing zeros on the multiplication result.
408
10.3M
  unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes();
409
10.3M
  unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes();
410
10.3M
  unsigned TrailZero0 = Known.countMinTrailingZeros();
411
10.3M
  unsigned TrailZero1 = Known2.countMinTrailingZeros();
412
10.3M
  unsigned TrailZ = TrailZero0 + TrailZero1;
413
10.3M
414
10.3M
  // Figure out the fewest known-bits operand.
415
10.3M
  unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0,
416
10.3M
                                      TrailBitsKnown1 - TrailZero1);
417
10.3M
  unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth);
418
10.3M
419
10.3M
  APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) *
420
10.3M
                      Bottom1.getLoBits(TrailBitsKnown1);
421
10.3M
422
10.3M
  Known.resetAll();
423
10.3M
  Known.Zero.setHighBits(LeadZ);
424
10.3M
  Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
425
10.3M
  Known.One |= BottomKnown.getLoBits(ResultBitsKnown);
426
10.3M
427
10.3M
  // Only make use of no-wrap flags if we failed to compute the sign bit
428
10.3M
  // directly.  This matters if the multiplication always overflows, in
429
10.3M
  // which case we prefer to follow the result of the direct computation,
430
10.3M
  // though as the program is invoking undefined behaviour we can choose
431
10.3M
  // whatever we like here.
432
10.3M
  if (isKnownNonNegative && 
!Known.isNegative()476k
)
433
476k
    Known.makeNonNegative();
434
9.91M
  else if (isKnownNegative && 
!Known.isNonNegative()24
)
435
24
    Known.makeNegative();
436
10.3M
}
437
438
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
439
4.47M
                                             KnownBits &Known) {
440
4.47M
  unsigned BitWidth = Known.getBitWidth();
441
4.47M
  unsigned NumRanges = Ranges.getNumOperands() / 2;
442
4.47M
  assert(NumRanges >= 1);
443
4.47M
444
4.47M
  Known.Zero.setAllBits();
445
4.47M
  Known.One.setAllBits();
446
4.47M
447
8.94M
  for (unsigned i = 0; i < NumRanges; 
++i4.47M
) {
448
4.47M
    ConstantInt *Lower =
449
4.47M
        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
450
4.47M
    ConstantInt *Upper =
451
4.47M
        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
452
4.47M
    ConstantRange Range(Lower->getValue(), Upper->getValue());
453
4.47M
454
4.47M
    // The first CommonPrefixBits of all values in Range are equal.
455
4.47M
    unsigned CommonPrefixBits =
456
4.47M
        (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
457
4.47M
458
4.47M
    APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
459
4.47M
    Known.One &= Range.getUnsignedMax() & Mask;
460
4.47M
    Known.Zero &= ~Range.getUnsignedMax() & Mask;
461
4.47M
  }
462
4.47M
}
463
464
2.30k
static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
465
2.30k
  SmallVector<const Value *, 16> WorkSet(1, I);
466
2.30k
  SmallPtrSet<const Value *, 32> Visited;
467
2.30k
  SmallPtrSet<const Value *, 16> EphValues;
468
2.30k
469
2.30k
  // The instruction defining an assumption's condition itself is always
470
2.30k
  // considered ephemeral to that assumption (even if it has other
471
2.30k
  // non-ephemeral users). See r246696's test case for an example.
472
2.30k
  if (is_contained(I->operands(), E))
473
1.33k
    return true;
474
969
475
5.69k
  
while (969
!WorkSet.empty()) {
476
5.61k
    const Value *V = WorkSet.pop_back_val();
477
5.61k
    if (!Visited.insert(V).second)
478
2
      continue;
479
5.61k
480
5.61k
    // If all uses of this value are ephemeral, then so is this value.
481
5.61k
    if (llvm::all_of(V->users(), [&](const User *U) {
482
5.16k
                                   return EphValues.count(U);
483
5.16k
                                 })) {
484
3.66k
      if (V == E)
485
884
        return true;
486
2.78k
487
2.78k
      if (V == I || 
isSafeToSpeculativelyExecute(V)1.81k
) {
488
2.32k
       EphValues.insert(V);
489
2.32k
       if (const User *U = dyn_cast<User>(V))
490
2.32k
         for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
491
6.96k
              J != JE; 
++J4.64k
)
492
4.64k
           WorkSet.push_back(*J);
493
2.32k
      }
494
2.78k
    }
495
5.61k
  }
496
969
497
969
  
return false85
;
498
969
}
499
500
// Is this an intrinsic that cannot be speculated but also cannot trap?
501
503k
bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
502
503k
  if (const CallInst *CI = dyn_cast<CallInst>(I))
503
502k
    if (Function *F = CI->getCalledFunction())
504
502k
      switch (F->getIntrinsicID()) {
505
502k
      
default: break3.92k
;
506
502k
      // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
507
502k
      case Intrinsic::assume:
508
498k
      case Intrinsic::sideeffect:
509
498k
      case Intrinsic::dbg_declare:
510
498k
      case Intrinsic::dbg_value:
511
498k
      case Intrinsic::dbg_label:
512
498k
      case Intrinsic::invariant_start:
513
498k
      case Intrinsic::invariant_end:
514
498k
      case Intrinsic::lifetime_start:
515
498k
      case Intrinsic::lifetime_end:
516
498k
      case Intrinsic::objectsize:
517
498k
      case Intrinsic::ptr_annotation:
518
498k
      case Intrinsic::var_annotation:
519
498k
        return true;
520
4.29k
      }
521
4.29k
522
4.29k
  return false;
523
4.29k
}
524
525
bool llvm::isValidAssumeForContext(const Instruction *Inv,
526
                                   const Instruction *CxtI,
527
3.37k
                                   const DominatorTree *DT) {
528
3.37k
  // There are two restrictions on the use of an assume:
529
3.37k
  //  1. The assume must dominate the context (or the control flow must
530
3.37k
  //     reach the assume whenever it reaches the context).
531
3.37k
  //  2. The context must not be in the assume's set of ephemeral values
532
3.37k
  //     (otherwise we will use the assume to prove that the condition
533
3.37k
  //     feeding the assume is trivially true, thus causing the removal of
534
3.37k
  //     the assume).
535
3.37k
536
3.37k
  if (DT) {
537
3.34k
    if (DT->dominates(Inv, CxtI))
538
386
      return true;
539
32
  } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
540
0
    // We don't have a DT, but this trivially dominates.
541
0
    return true;
542
0
  }
543
2.99k
544
2.99k
  // With or without a DT, the only remaining case we will check is if the
545
2.99k
  // instructions are in the same BB.  Give up if that is not the case.
546
2.99k
  if (Inv->getParent() != CxtI->getParent())
547
270
    return false;
548
2.72k
549
2.72k
  // If we have a dom tree, then we now know that the assume doesn't dominate
550
2.72k
  // the other instruction.  If we don't have a dom tree then we can check if
551
2.72k
  // the assume is first in the BB.
552
2.72k
  if (!DT) {
553
10
    // Search forward from the assume until we reach the context (or the end
554
10
    // of the block); the common case is that the assume will come first.
555
10
    for (auto I = std::next(BasicBlock::const_iterator(Inv)),
556
11
         IE = Inv->getParent()->end(); I != IE; 
++I1
)
557
11
      if (&*I == CxtI)
558
10
        return true;
559
10
  }
560
2.72k
561
2.72k
  // The context comes first, but they're both in the same block. Make sure
562
2.72k
  // there is nothing in between that might interrupt the control flow.
563
2.72k
  for (BasicBlock::const_iterator I =
564
2.71k
         std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
565
5.05k
       I != IE; 
++I2.34k
)
566
2.75k
    if (!isSafeToSpeculativelyExecute(&*I) && 
!isAssumeLikeIntrinsic(&*I)565
)
567
410
      return false;
568
2.71k
569
2.71k
  
return !isEphemeralValueOf(Inv, CxtI)2.30k
;
570
2.71k
}
571
572
static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
573
541M
                                       unsigned Depth, const Query &Q) {
574
541M
  // Use of assumptions is context-sensitive. If we don't have a context, we
575
541M
  // cannot use them!
576
541M
  if (!Q.AC || 
!Q.CxtI511M
)
577
31.3M
    return;
578
510M
579
510M
  unsigned BitWidth = Known.getBitWidth();
580
510M
581
510M
  // Note that the patterns below need to be kept in sync with the code
582
510M
  // in AssumptionCache::updateAffectedValues.
583
510M
584
510M
  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
585
3.86k
    if (!AssumeVH)
586
201
      continue;
587
3.66k
    CallInst *I = cast<CallInst>(AssumeVH);
588
3.66k
    assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
589
3.66k
           "Got assumption for the wrong function!");
590
3.66k
    if (Q.isExcluded(I))
591
21
      continue;
592
3.64k
593
3.64k
    // Warning: This loop can end up being somewhat performance sensitive.
594
3.64k
    // We're running this loop for once for each value queried resulting in a
595
3.64k
    // runtime of ~O(#assumes * #values).
596
3.64k
597
3.64k
    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
598
3.64k
           "must be an assume intrinsic");
599
3.64k
600
3.64k
    Value *Arg = I->getArgOperand(0);
601
3.64k
602
3.64k
    if (Arg == V && 
isValidAssumeForContext(I, Q.CxtI, Q.DT)1.00k
) {
603
6
      assert(BitWidth == 1 && "assume operand is not i1?");
604
6
      Known.setAllOnes();
605
6
      return;
606
6
    }
607
3.64k
    if (match(Arg, m_Not(m_Specific(V))) &&
608
3.64k
        
isValidAssumeForContext(I, Q.CxtI, Q.DT)36
) {
609
2
      assert(BitWidth == 1 && "assume operand is not i1?");
610
2
      Known.setAllZero();
611
2
      return;
612
2
    }
613
3.63k
614
3.63k
    // The remaining tests are all recursive, so bail out if we hit the limit.
615
3.63k
    if (Depth == MaxDepth)
616
0
      continue;
617
3.63k
618
3.63k
    ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
619
3.63k
    if (!Cmp)
620
191
      continue;
621
3.44k
622
3.44k
    Value *A, *B;
623
3.44k
    auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
624
3.44k
625
3.44k
    CmpInst::Predicate Pred;
626
3.44k
    uint64_t C;
627
3.44k
    switch (Cmp->getPredicate()) {
628
3.44k
    default:
629
581
      break;
630
3.44k
    case ICmpInst::ICMP_EQ:
631
2.38k
      // assume(v = a)
632
2.38k
      if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
633
2.38k
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)1.02k
) {
634
36
        KnownBits RHSKnown(BitWidth);
635
36
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
636
36
        Known.Zero |= RHSKnown.Zero;
637
36
        Known.One  |= RHSKnown.One;
638
36
      // assume(v & b = a)
639
2.35k
      } else if (match(Cmp,
640
2.35k
                       m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
641
2.35k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)661
) {
642
57
        KnownBits RHSKnown(BitWidth);
643
57
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
644
57
        KnownBits MaskKnown(BitWidth);
645
57
        computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
646
57
647
57
        // For those bits in the mask that are known to be one, we can propagate
648
57
        // known bits from the RHS to V.
649
57
        Known.Zero |= RHSKnown.Zero & MaskKnown.One;
650
57
        Known.One  |= RHSKnown.One  & MaskKnown.One;
651
57
      // assume(~(v & b) = a)
652
2.29k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
653
2.29k
                                     m_Value(A))) &&
654
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)6
) {
655
0
        KnownBits RHSKnown(BitWidth);
656
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
657
0
        KnownBits MaskKnown(BitWidth);
658
0
        computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
659
0
660
0
        // For those bits in the mask that are known to be one, we can propagate
661
0
        // inverted known bits from the RHS to V.
662
0
        Known.Zero |= RHSKnown.One  & MaskKnown.One;
663
0
        Known.One  |= RHSKnown.Zero & MaskKnown.One;
664
0
      // assume(v | b = a)
665
2.29k
      } else if (match(Cmp,
666
2.29k
                       m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
667
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)18
) {
668
2
        KnownBits RHSKnown(BitWidth);
669
2
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
670
2
        KnownBits BKnown(BitWidth);
671
2
        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
672
2
673
2
        // For those bits in B that are known to be zero, we can propagate known
674
2
        // bits from the RHS to V.
675
2
        Known.Zero |= RHSKnown.Zero & BKnown.Zero;
676
2
        Known.One  |= RHSKnown.One  & BKnown.Zero;
677
2
      // assume(~(v | b) = a)
678
2.29k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
679
2.29k
                                     m_Value(A))) &&
680
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)7
) {
681
0
        KnownBits RHSKnown(BitWidth);
682
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
683
0
        KnownBits BKnown(BitWidth);
684
0
        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
685
0
686
0
        // For those bits in B that are known to be zero, we can propagate
687
0
        // inverted known bits from the RHS to V.
688
0
        Known.Zero |= RHSKnown.One  & BKnown.Zero;
689
0
        Known.One  |= RHSKnown.Zero & BKnown.Zero;
690
0
      // assume(v ^ b = a)
691
2.29k
      } else if (match(Cmp,
692
2.29k
                       m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
693
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)10
) {
694
0
        KnownBits RHSKnown(BitWidth);
695
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
696
0
        KnownBits BKnown(BitWidth);
697
0
        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
698
0
699
0
        // For those bits in B that are known to be zero, we can propagate known
700
0
        // bits from the RHS to V. For those bits in B that are known to be one,
701
0
        // we can propagate inverted known bits from the RHS to V.
702
0
        Known.Zero |= RHSKnown.Zero & BKnown.Zero;
703
0
        Known.One  |= RHSKnown.One  & BKnown.Zero;
704
0
        Known.Zero |= RHSKnown.One  & BKnown.One;
705
0
        Known.One  |= RHSKnown.Zero & BKnown.One;
706
0
      // assume(~(v ^ b) = a)
707
2.29k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
708
2.29k
                                     m_Value(A))) &&
709
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
) {
710
0
        KnownBits RHSKnown(BitWidth);
711
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
712
0
        KnownBits BKnown(BitWidth);
713
0
        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
714
0
715
0
        // For those bits in B that are known to be zero, we can propagate
716
0
        // inverted known bits from the RHS to V. For those bits in B that are
717
0
        // known to be one, we can propagate known bits from the RHS to V.
718
0
        Known.Zero |= RHSKnown.One  & BKnown.Zero;
719
0
        Known.One  |= RHSKnown.Zero & BKnown.Zero;
720
0
        Known.Zero |= RHSKnown.Zero & BKnown.One;
721
0
        Known.One  |= RHSKnown.One  & BKnown.One;
722
0
      // assume(v << c = a)
723
2.29k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
724
2.29k
                                     m_Value(A))) &&
725
2.29k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)12
&&
C < BitWidth6
) {
726
6
        KnownBits RHSKnown(BitWidth);
727
6
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
728
6
        // For those bits in RHS that are known, we can propagate them to known
729
6
        // bits in V shifted to the right by C.
730
6
        RHSKnown.Zero.lshrInPlace(C);
731
6
        Known.Zero |= RHSKnown.Zero;
732
6
        RHSKnown.One.lshrInPlace(C);
733
6
        Known.One  |= RHSKnown.One;
734
6
      // assume(~(v << c) = a)
735
2.28k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
736
2.28k
                                     m_Value(A))) &&
737
2.28k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
&&
C < BitWidth0
) {
738
0
        KnownBits RHSKnown(BitWidth);
739
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
740
0
        // For those bits in RHS that are known, we can propagate them inverted
741
0
        // to known bits in V shifted to the right by C.
742
0
        RHSKnown.One.lshrInPlace(C);
743
0
        Known.Zero |= RHSKnown.One;
744
0
        RHSKnown.Zero.lshrInPlace(C);
745
0
        Known.One  |= RHSKnown.Zero;
746
0
      // assume(v >> c = a)
747
2.28k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
748
2.28k
                                     m_Value(A))) &&
749
2.28k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)12
&&
C < BitWidth1
) {
750
0
        KnownBits RHSKnown(BitWidth);
751
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
752
0
        // For those bits in RHS that are known, we can propagate them to known
753
0
        // bits in V shifted to the right by C.
754
0
        Known.Zero |= RHSKnown.Zero << C;
755
0
        Known.One  |= RHSKnown.One  << C;
756
0
      // assume(~(v >> c) = a)
757
2.28k
      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
758
2.28k
                                     m_Value(A))) &&
759
2.28k
                 
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
&&
C < BitWidth0
) {
760
0
        KnownBits RHSKnown(BitWidth);
761
0
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
762
0
        // For those bits in RHS that are known, we can propagate them inverted
763
0
        // to known bits in V shifted to the right by C.
764
0
        Known.Zero |= RHSKnown.One  << C;
765
0
        Known.One  |= RHSKnown.Zero << C;
766
0
      }
767
2.38k
      break;
768
3.44k
    case ICmpInst::ICMP_SGE:
769
4
      // assume(v >=_s c) where c is non-negative
770
4
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
771
4
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)2
) {
772
1
        KnownBits RHSKnown(BitWidth);
773
1
        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
774
1
775
1
        if (RHSKnown.isNonNegative()) {
776
1
          // We know that the sign bit is zero.
777
1
          Known.makeNonNegative();
778
1
        }
779
1
      }
780
4
      break;
781
3.44k
    case ICmpInst::ICMP_SGT:
782
295
      // assume(v >_s c) where c is at least -1.
783
295
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
784
295
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)164
) {
785
50
        KnownBits RHSKnown(BitWidth);
786
50
        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
787
50
788
50
        if (RHSKnown.isAllOnes() || 
RHSKnown.isNonNegative()25
) {
789
50
          // We know that the sign bit is zero.
790
50
          Known.makeNonNegative();
791
50
        }
792
50
      }
793
295
      break;
794
3.44k
    case ICmpInst::ICMP_SLE:
795
1
      // assume(v <=_s c) where c is negative
796
1
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
797
1
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
) {
798
0
        KnownBits RHSKnown(BitWidth);
799
0
        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
800
0
801
0
        if (RHSKnown.isNegative()) {
802
0
          // We know that the sign bit is one.
803
0
          Known.makeNegative();
804
0
        }
805
0
      }
806
1
      break;
807
3.44k
    case ICmpInst::ICMP_SLT:
808
15
      // assume(v <_s c) where c is non-positive
809
15
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
810
15
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)6
) {
811
3
        KnownBits RHSKnown(BitWidth);
812
3
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
813
3
814
3
        if (RHSKnown.isZero() || RHSKnown.isNegative()) {
815
3
          // We know that the sign bit is one.
816
3
          Known.makeNegative();
817
3
        }
818
3
      }
819
15
      break;
820
3.44k
    case ICmpInst::ICMP_ULE:
821
18
      // assume(v <=_u c)
822
18
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
823
18
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)10
) {
824
10
        KnownBits RHSKnown(BitWidth);
825
10
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
826
10
827
10
        // Whatever high bits in c are zero are known to be zero.
828
10
        Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
829
10
      }
830
18
      break;
831
3.44k
    case ICmpInst::ICMP_ULT:
832
148
      // assume(v <_u c)
833
148
      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
834
148
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)112
) {
835
107
        KnownBits RHSKnown(BitWidth);
836
107
        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
837
107
838
107
        // If the RHS is known zero, then this assumption must be wrong (nothing
839
107
        // is unsigned less than zero). Signal a conflict and get out of here.
840
107
        if (RHSKnown.isZero()) {
841
3
          Known.Zero.setAllBits();
842
3
          Known.One.setAllBits();
843
3
          break;
844
3
        }
845
104
846
104
        // Whatever high bits in c are zero are known to be zero (if c is a power
847
104
        // of 2, then one more).
848
104
        if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
849
4
          Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
850
100
        else
851
100
          Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
852
104
      }
853
148
      
break145
;
854
3.44k
    }
855
3.44k
  }
856
510M
857
510M
  // If assumptions conflict with each other or previous known bits, then we
858
510M
  // have a logical fallacy. It's possible that the assumption is not reachable,
859
510M
  // so this isn't a real bug. On the other hand, the program may have undefined
860
510M
  // behavior, or we might have a bug in the compiler. We can't assert/crash, so
861
510M
  // clear out the known bits, try to warn the user, and hope for the best.
862
510M
  
if (510M
Known.Zero.intersects(Known.One)510M
) {
863
9
    Known.resetAll();
864
9
865
9
    if (Q.ORE)
866
4
      Q.ORE->emit([&]() {
867
4
        auto *CxtI = const_cast<Instruction *>(Q.CxtI);
868
4
        return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
869
4
                                          CxtI)
870
4
               << "Detected conflicting code assumptions. Program may "
871
4
                  "have undefined behavior, or compiler may have "
872
4
                  "internal error.";
873
4
      });
874
9
  }
875
510M
}
876
877
/// Compute known bits from a shift operator, including those with a
878
/// non-constant shift amount. Known is the output of this function. Known2 is a
879
/// pre-allocated temporary with the same bit width as Known. KZF and KOF are
880
/// operator-specific functions that, given the known-zero or known-one bits
881
/// respectively, and a shift amount, compute the implied known-zero or
882
/// known-one bits of the shift operator's result respectively for that shift
883
/// amount. The results from calling KZF and KOF are conservatively combined for
884
/// all permitted shift amounts.
885
static void computeKnownBitsFromShiftOperator(
886
    const Operator *I, KnownBits &Known, KnownBits &Known2,
887
    unsigned Depth, const Query &Q,
888
    function_ref<APInt(const APInt &, unsigned)> KZF,
889
27.1M
    function_ref<APInt(const APInt &, unsigned)> KOF) {
890
27.1M
  unsigned BitWidth = Known.getBitWidth();
891
27.1M
892
27.1M
  if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
893
24.6M
    unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
894
24.6M
895
24.6M
    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
896
24.6M
    Known.Zero = KZF(Known.Zero, ShiftAmt);
897
24.6M
    Known.One  = KOF(Known.One, ShiftAmt);
898
24.6M
    // If the known bits conflict, this must be an overflowing left shift, so
899
24.6M
    // the shift result is poison. We can return anything we want. Choose 0 for
900
24.6M
    // the best folding opportunity.
901
24.6M
    if (Known.hasConflict())
902
4
      Known.setAllZero();
903
24.6M
904
24.6M
    return;
905
24.6M
  }
906
2.47M
907
2.47M
  computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
908
2.47M
909
2.47M
  // If the shift amount could be greater than or equal to the bit-width of the
910
2.47M
  // LHS, the value could be poison, but bail out because the check below is
911
2.47M
  // expensive. TODO: Should we just carry on?
912
2.47M
  if ((~Known.Zero).uge(BitWidth)) {
913
1.88M
    Known.resetAll();
914
1.88M
    return;
915
1.88M
  }
916
582k
917
582k
  // Note: We cannot use Known.Zero.getLimitedValue() here, because if
918
582k
  // BitWidth > 64 and any upper bits are known, we'll end up returning the
919
582k
  // limit value (which implies all bits are known).
920
582k
  uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
921
582k
  uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
922
582k
923
582k
  // It would be more-clearly correct to use the two temporaries for this
924
582k
  // calculation. Reusing the APInts here to prevent unnecessary allocations.
925
582k
  Known.resetAll();
926
582k
927
582k
  // If we know the shifter operand is nonzero, we can sometimes infer more
928
582k
  // known bits. However this is expensive to compute, so be lazy about it and
929
582k
  // only compute it when absolutely necessary.
930
582k
  Optional<bool> ShifterOperandIsNonZero;
931
582k
932
582k
  // Early exit if we can't constrain any well-defined shift amount.
933
582k
  if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
934
582k
      
!(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))309k
) {
935
303k
    ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q);
936
303k
    if (!*ShifterOperandIsNonZero)
937
301k
      return;
938
280k
  }
939
280k
940
280k
  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
941
280k
942
280k
  Known.Zero.setAllBits();
943
280k
  Known.One.setAllBits();
944
13.2M
  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; 
++ShiftAmt12.9M
) {
945
12.9M
    // Combine the shifted known input bits only for those shift amounts
946
12.9M
    // compatible with its known constraints.
947
12.9M
    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
948
11.1M
      continue;
949
1.82M
    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
950
307k
      continue;
951
1.51M
    // If we know the shifter is nonzero, we may be able to infer more known
952
1.51M
    // bits. This check is sunk down as far as possible to avoid the expensive
953
1.51M
    // call to isKnownNonZero if the cheaper checks above fail.
954
1.51M
    if (ShiftAmt == 0) {
955
203k
      if (!ShifterOperandIsNonZero.hasValue())
956
201k
        ShifterOperandIsNonZero =
957
201k
            isKnownNonZero(I->getOperand(1), Depth + 1, Q);
958
203k
      if (*ShifterOperandIsNonZero)
959
5.57k
        continue;
960
1.51M
    }
961
1.51M
962
1.51M
    Known.Zero &= KZF(Known2.Zero, ShiftAmt);
963
1.51M
    Known.One  &= KOF(Known2.One, ShiftAmt);
964
1.51M
  }
965
280k
966
280k
  // If the known bits conflict, the result is poison. Return a 0 and hope the
967
280k
  // caller can further optimize that.
968
280k
  if (Known.hasConflict())
969
1
    Known.setAllZero();
970
280k
}
971
972
static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
973
493M
                                         unsigned Depth, const Query &Q) {
974
493M
  unsigned BitWidth = Known.getBitWidth();
975
493M
976
493M
  KnownBits Known2(Known);
977
493M
  switch (I->getOpcode()) {
978
493M
  
default: break31.4M
;
979
493M
  case Instruction::Load:
980
116M
    if (MDNode *MD =
981
3.10M
            Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
982
3.10M
      computeKnownBitsFromRangeMetadata(*MD, Known);
983
116M
    break;
984
493M
  case Instruction::And: {
985
13.3M
    // If either the LHS or the RHS are Zero, the result is zero.
986
13.3M
    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
987
13.3M
    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
988
13.3M
989
13.3M
    // Output known-1 bits are only known if set in both the LHS & RHS.
990
13.3M
    Known.One &= Known2.One;
991
13.3M
    // Output known-0 are known to be clear if zero in either the LHS | RHS.
992
13.3M
    Known.Zero |= Known2.Zero;
993
13.3M
994
13.3M
    // and(x, add (x, -1)) is a common idiom that always clears the low bit;
995
13.3M
    // here we handle the more general case of adding any odd number by
996
13.3M
    // matching the form add(x, add(x, y)) where y is odd.
997
13.3M
    // TODO: This could be generalized to clearing any bit set in y where the
998
13.3M
    // following bit is known to be unset in y.
999
13.3M
    Value *X = nullptr, *Y = nullptr;
1000
13.3M
    if (!Known.Zero[0] && 
!Known.One[0]9.92M
&&
1001
13.3M
        
match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))9.91M
) {
1002
3.97k
      Known2.resetAll();
1003
3.97k
      computeKnownBits(Y, Known2, Depth + 1, Q);
1004
3.97k
      if (Known2.countMinTrailingOnes() > 0)
1005
3.65k
        Known.Zero.setBit(0);
1006
3.97k
    }
1007
13.3M
    break;
1008
493M
  }
1009
493M
  case Instruction::Or:
1010
7.82M
    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
1011
7.82M
    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1012
7.82M
1013
7.82M
    // Output known-0 bits are only known if clear in both the LHS & RHS.
1014
7.82M
    Known.Zero &= Known2.Zero;
1015
7.82M
    // Output known-1 are known to be set if set in either the LHS | RHS.
1016
7.82M
    Known.One |= Known2.One;
1017
7.82M
    break;
1018
493M
  case Instruction::Xor: {
1019
5.07M
    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
1020
5.07M
    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1021
5.07M
1022
5.07M
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
1023
5.07M
    APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1024
5.07M
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
1025
5.07M
    Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1026
5.07M
    Known.Zero = std::move(KnownZeroOut);
1027
5.07M
    break;
1028
493M
  }
1029
493M
  case Instruction::Mul: {
1030
10.3M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1031
10.3M
    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
1032
10.3M
                        Known2, Depth, Q);
1033
10.3M
    break;
1034
493M
  }
1035
493M
  case Instruction::UDiv: {
1036
2.13M
    // For the purposes of computing leading zeros we can conservatively
1037
2.13M
    // treat a udiv as a logical right shift by the power of 2 known to
1038
2.13M
    // be less than the denominator.
1039
2.13M
    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1040
2.13M
    unsigned LeadZ = Known2.countMinLeadingZeros();
1041
2.13M
1042
2.13M
    Known2.resetAll();
1043
2.13M
    computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1044
2.13M
    unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
1045
2.13M
    if (RHSMaxLeadingZeros != BitWidth)
1046
1.14M
      LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
1047
2.13M
1048
2.13M
    Known.Zero.setHighBits(LeadZ);
1049
2.13M
    break;
1050
493M
  }
1051
493M
  case Instruction::Select: {
1052
13.9M
    const Value *LHS, *RHS;
1053
13.9M
    SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
1054
13.9M
    if (SelectPatternResult::isMinOrMax(SPF)) {
1055
6.03M
      computeKnownBits(RHS, Known, Depth + 1, Q);
1056
6.03M
      computeKnownBits(LHS, Known2, Depth + 1, Q);
1057
7.92M
    } else {
1058
7.92M
      computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
1059
7.92M
      computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1060
7.92M
    }
1061
13.9M
1062
13.9M
    unsigned MaxHighOnes = 0;
1063
13.9M
    unsigned MaxHighZeros = 0;
1064
13.9M
    if (SPF == SPF_SMAX) {
1065
3.76M
      // If both sides are negative, the result is negative.
1066
3.76M
      if (Known.isNegative() && 
Known2.isNegative()43.6k
)
1067
3
        // We can derive a lower bound on the result by taking the max of the
1068
3
        // leading one bits.
1069
3
        MaxHighOnes =
1070
3
            std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
1071
3.76M
      // If either side is non-negative, the result is non-negative.
1072
3.76M
      else if (Known.isNonNegative() || 
Known2.isNonNegative()221k
)
1073
3.54M
        MaxHighZeros = 1;
1074
10.2M
    } else if (SPF == SPF_SMIN) {
1075
389k
      // If both sides are non-negative, the result is non-negative.
1076
389k
      if (Known.isNonNegative() && 
Known2.isNonNegative()130k
)
1077
30.6k
        // We can derive an upper bound on the result by taking the max of the
1078
30.6k
        // leading zero bits.
1079
30.6k
        MaxHighZeros = std::max(Known.countMinLeadingZeros(),
1080
30.6k
                                Known2.countMinLeadingZeros());
1081
359k
      // If either side is negative, the result is negative.
1082
359k
      else if (Known.isNegative() || 
Known2.isNegative()358k
)
1083
62
        MaxHighOnes = 1;
1084
9.81M
    } else if (SPF == SPF_UMAX) {
1085
1.21M
      // We can derive a lower bound on the result by taking the max of the
1086
1.21M
      // leading one bits.
1087
1.21M
      MaxHighOnes =
1088
1.21M
          std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
1089
8.59M
    } else if (SPF == SPF_UMIN) {
1090
669k
      // We can derive an upper bound on the result by taking the max of the
1091
669k
      // leading zero bits.
1092
669k
      MaxHighZeros =
1093
669k
          std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
1094
7.92M
    } else if (SPF == SPF_ABS) {
1095
670k
      // RHS from matchSelectPattern returns the negation part of abs pattern.
1096
670k
      // If the negate has an NSW flag we can assume the sign bit of the result
1097
670k
      // will be 0 because that makes abs(INT_MIN) undefined.
1098
670k
      if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
1099
669k
        MaxHighZeros = 1;
1100
670k
    }
1101
13.9M
1102
13.9M
    // Only known if known in both the LHS and RHS.
1103
13.9M
    Known.One &= Known2.One;
1104
13.9M
    Known.Zero &= Known2.Zero;
1105
13.9M
    if (MaxHighOnes > 0)
1106
321
      Known.One.setHighBits(MaxHighOnes);
1107
13.9M
    if (MaxHighZeros > 0)
1108
4.63M
      Known.Zero.setHighBits(MaxHighZeros);
1109
13.9M
    break;
1110
493M
  }
1111
493M
  case Instruction::FPTrunc:
1112
1.14M
  case Instruction::FPExt:
1113
1.14M
  case Instruction::FPToUI:
1114
1.14M
  case Instruction::FPToSI:
1115
1.14M
  case Instruction::SIToFP:
1116
1.14M
  case Instruction::UIToFP:
1117
1.14M
    break; // Can't work with floating point.
1118
10.0M
  case Instruction::PtrToInt:
1119
10.0M
  case Instruction::IntToPtr:
1120
10.0M
    // Fall through and handle them the same as zext/trunc.
1121
10.0M
    LLVM_FALLTHROUGH;
1122
33.0M
  case Instruction::ZExt:
1123
33.0M
  case Instruction::Trunc: {
1124
33.0M
    Type *SrcTy = I->getOperand(0)->getType();
1125
33.0M
1126
33.0M
    unsigned SrcBitWidth;
1127
33.0M
    // Note that we handle pointer operands here because of inttoptr/ptrtoint
1128
33.0M
    // which fall through here.
1129
33.0M
    Type *ScalarTy = SrcTy->getScalarType();
1130
33.0M
    SrcBitWidth = ScalarTy->isPointerTy() ?
1131
9.31M
      Q.DL.getIndexTypeSizeInBits(ScalarTy) :
1132
33.0M
      
Q.DL.getTypeSizeInBits(ScalarTy)23.7M
;
1133
33.0M
1134
33.0M
    assert(SrcBitWidth && "SrcBitWidth can't be zero");
1135
33.0M
    Known = Known.zextOrTrunc(SrcBitWidth, false);
1136
33.0M
    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1137
33.0M
    Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */);
1138
33.0M
    break;
1139
33.0M
  }
1140
33.0M
  case Instruction::BitCast: {
1141
8.56M
    Type *SrcTy = I->getOperand(0)->getType();
1142
8.56M
    if (SrcTy->isIntOrPtrTy() &&
1143
8.56M
        // TODO: For now, not handling conversions like:
1144
8.56M
        // (bitcast i64 %x to <2 x i32>)
1145
8.56M
        
!I->getType()->isVectorTy()7.73M
) {
1146
7.73M
      computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1147
7.73M
      break;
1148
7.73M
    }
1149
837k
    break;
1150
837k
  }
1151
11.6M
  case Instruction::SExt: {
1152
11.6M
    // Compute the bits in the result that are not present in the input.
1153
11.6M
    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1154
11.6M
1155
11.6M
    Known = Known.trunc(SrcBitWidth);
1156
11.6M
    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1157
11.6M
    // If the sign bit of the input is known set or clear, then we know the
1158
11.6M
    // top bits of the result.
1159
11.6M
    Known = Known.sext(BitWidth);
1160
11.6M
    break;
1161
837k
  }
1162
14.2M
  case Instruction::Shl: {
1163
14.2M
    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
1164
14.2M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1165
14.2M
    auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
1166
13.5M
      APInt KZResult = KnownZero << ShiftAmt;
1167
13.5M
      KZResult.setLowBits(ShiftAmt); // Low bits known 0.
1168
13.5M
      // If this shift has "nsw" keyword, then the result is either a poison
1169
13.5M
      // value or has the same sign bit as the first operand.
1170
13.5M
      if (NSW && 
KnownZero.isSignBitSet()3.20M
)
1171
1.50M
        KZResult.setSignBit();
1172
13.5M
      return KZResult;
1173
13.5M
    };
1174
14.2M
1175
14.2M
    auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
1176
13.5M
      APInt KOResult = KnownOne << ShiftAmt;
1177
13.5M
      if (NSW && 
KnownOne.isSignBitSet()3.20M
)
1178
49.0k
        KOResult.setSignBit();
1179
13.5M
      return KOResult;
1180
13.5M
    };
1181
14.2M
1182
14.2M
    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1183
14.2M
    break;
1184
837k
  }
1185
9.84M
  case Instruction::LShr: {
1186
9.84M
    // (lshr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1187
9.84M
    auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1188
9.74M
      APInt KZResult = KnownZero.lshr(ShiftAmt);
1189
9.74M
      // High bits known zero.
1190
9.74M
      KZResult.setHighBits(ShiftAmt);
1191
9.74M
      return KZResult;
1192
9.74M
    };
1193
9.84M
1194
9.84M
    auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1195
9.74M
      return KnownOne.lshr(ShiftAmt);
1196
9.74M
    };
1197
9.84M
1198
9.84M
    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1199
9.84M
    break;
1200
837k
  }
1201
3.04M
  case Instruction::AShr: {
1202
3.04M
    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1203
3.04M
    auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1204
2.85M
      return KnownZero.ashr(ShiftAmt);
1205
2.85M
    };
1206
3.04M
1207
3.04M
    auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1208
2.85M
      return KnownOne.ashr(ShiftAmt);
1209
2.85M
    };
1210
3.04M
1211
3.04M
    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1212
3.04M
    break;
1213
837k
  }
1214
11.4M
  case Instruction::Sub: {
1215
11.4M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1216
11.4M
    computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
1217
11.4M
                           Known, Known2, Depth, Q);
1218
11.4M
    break;
1219
837k
  }
1220
49.7M
  case Instruction::Add: {
1221
49.7M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1222
49.7M
    computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
1223
49.7M
                           Known, Known2, Depth, Q);
1224
49.7M
    break;
1225
837k
  }
1226
837k
  case Instruction::SRem:
1227
379k
    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1228
208k
      APInt RA = Rem->getValue().abs();
1229
208k
      if (RA.isPowerOf2()) {
1230
161k
        APInt LowBits = RA - 1;
1231
161k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1232
161k
1233
161k
        // The low bits of the first operand are unchanged by the srem.
1234
161k
        Known.Zero = Known2.Zero & LowBits;
1235
161k
        Known.One = Known2.One & LowBits;
1236
161k
1237
161k
        // If the first operand is non-negative or has all low bits zero, then
1238
161k
        // the upper bits are all zero.
1239
161k
        if (Known2.isNonNegative() || 
LowBits.isSubsetOf(Known2.Zero)161k
)
1240
534
          Known.Zero |= ~LowBits;
1241
161k
1242
161k
        // If the first operand is negative and not all low bits are zero, then
1243
161k
        // the upper bits are all one.
1244
161k
        if (Known2.isNegative() && 
LowBits.intersects(Known2.One)9
)
1245
7
          Known.One |= ~LowBits;
1246
161k
1247
161k
        assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1248
161k
        break;
1249
161k
      }
1250
217k
    }
1251
217k
1252
217k
    // The sign bit is the LHS's sign bit, except when the result of the
1253
217k
    // remainder is zero.
1254
217k
    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1255
217k
    // If it's known zero, our sign bit is also zero.
1256
217k
    if (Known2.isNonNegative())
1257
2.40k
      Known.makeNonNegative();
1258
217k
1259
217k
    break;
1260
438k
  case Instruction::URem: {
1261
438k
    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1262
276k
      const APInt &RA = Rem->getValue();
1263
276k
      if (RA.isPowerOf2()) {
1264
21.5k
        APInt LowBits = (RA - 1);
1265
21.5k
        computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1266
21.5k
        Known.Zero |= ~LowBits;
1267
21.5k
        Known.One &= LowBits;
1268
21.5k
        break;
1269
21.5k
      }
1270
417k
    }
1271
417k
1272
417k
    // Since the result is less than or equal to either operand, any leading
1273
417k
    // zero bits in either operand must also exist in the result.
1274
417k
    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1275
417k
    computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1276
417k
1277
417k
    unsigned Leaders =
1278
417k
        std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
1279
417k
    Known.resetAll();
1280
417k
    Known.Zero.setHighBits(Leaders);
1281
417k
    break;
1282
417k
  }
1283
417k
1284
7.16M
  case Instruction::Alloca: {
1285
7.16M
    const AllocaInst *AI = cast<AllocaInst>(I);
1286
7.16M
    unsigned Align = AI->getAlignment();
1287
7.16M
    if (Align == 0)
1288
1.25k
      Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
1289
7.16M
1290
7.16M
    if (Align > 0)
1291
7.16M
      Known.Zero.setLowBits(countTrailingZeros(Align));
1292
7.16M
    break;
1293
417k
  }
1294
32.9M
  case Instruction::GetElementPtr: {
1295
32.9M
    // Analyze all of the subscripts of this getelementptr instruction
1296
32.9M
    // to determine if we can prove known low zero bits.
1297
32.9M
    KnownBits LocalKnown(BitWidth);
1298
32.9M
    computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
1299
32.9M
    unsigned TrailZ = LocalKnown.countMinTrailingZeros();
1300
32.9M
1301
32.9M
    gep_type_iterator GTI = gep_type_begin(I);
1302
107M
    for (unsigned i = 1, e = I->getNumOperands(); i != e; 
++i, ++GTI74.9M
) {
1303
74.9M
      Value *Index = I->getOperand(i);
1304
74.9M
      if (StructType *STy = GTI.getStructTypeOrNull()) {
1305
33.2M
        // Handle struct member offset arithmetic.
1306
33.2M
1307
33.2M
        // Handle case when index is vector zeroinitializer
1308
33.2M
        Constant *CIndex = cast<Constant>(Index);
1309
33.2M
        if (CIndex->isZeroValue())
1310
14.5M
          continue;
1311
18.6M
1312
18.6M
        if (CIndex->getType()->isVectorTy())
1313
3
          Index = CIndex->getSplatValue();
1314
18.6M
1315
18.6M
        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1316
18.6M
        const StructLayout *SL = Q.DL.getStructLayout(STy);
1317
18.6M
        uint64_t Offset = SL->getElementOffset(Idx);
1318
18.6M
        TrailZ = std::min<unsigned>(TrailZ,
1319
18.6M
                                    countTrailingZeros(Offset));
1320
41.7M
      } else {
1321
41.7M
        // Handle array index arithmetic.
1322
41.7M
        Type *IndexedTy = GTI.getIndexedType();
1323
41.7M
        if (!IndexedTy->isSized()) {
1324
0
          TrailZ = 0;
1325
0
          break;
1326
0
        }
1327
41.7M
        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
1328
41.7M
        uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy);
1329
41.7M
        LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
1330
41.7M
        computeKnownBits(Index, LocalKnown, Depth + 1, Q);
1331
41.7M
        TrailZ = std::min(TrailZ,
1332
41.7M
                          unsigned(countTrailingZeros(TypeSize) +
1333
41.7M
                                   LocalKnown.countMinTrailingZeros()));
1334
41.7M
      }
1335
74.9M
    }
1336
32.9M
1337
32.9M
    Known.Zero.setLowBits(TrailZ);
1338
32.9M
    break;
1339
417k
  }
1340
76.5M
  case Instruction::PHI: {
1341
76.5M
    const PHINode *P = cast<PHINode>(I);
1342
76.5M
    // Handle the case of a simple two-predecessor recurrence PHI.
1343
76.5M
    // There's a lot more that could theoretically be done here, but
1344
76.5M
    // this is sufficient to catch some interesting cases.
1345
76.5M
    if (P->getNumIncomingValues() == 2) {
1346
149M
      for (unsigned i = 0; i != 2; 
++i82.5M
) {
1347
117M
        Value *L = P->getIncomingValue(i);
1348
117M
        Value *R = P->getIncomingValue(!i);
1349
117M
        Operator *LU = dyn_cast<Operator>(L);
1350
117M
        if (!LU)
1351
28.3M
          continue;
1352
89.2M
        unsigned Opcode = LU->getOpcode();
1353
89.2M
        // Check for operations that have the property that if
1354
89.2M
        // both their operands have low zero bits, the result
1355
89.2M
        // will have low zero bits.
1356
89.2M
        if (Opcode == Instruction::Add ||
1357
89.2M
            
Opcode == Instruction::Sub57.1M
||
1358
89.2M
            
Opcode == Instruction::And55.9M
||
1359
89.2M
            
Opcode == Instruction::Or55.0M
||
1360
89.2M
            
Opcode == Instruction::Mul54.3M
) {
1361
35.0M
          Value *LL = LU->getOperand(0);
1362
35.0M
          Value *LR = LU->getOperand(1);
1363
35.0M
          // Find a recurrence.
1364
35.0M
          if (LL == I)
1365
26.5M
            L = LR;
1366
8.48M
          else if (LR == I)
1367
518k
            L = LL;
1368
7.96M
          else
1369
7.96M
            break;
1370
27.0M
          // Ok, we have a PHI of the form L op= R. Check for low
1371
27.0M
          // zero bits.
1372
27.0M
          computeKnownBits(R, Known2, Depth + 1, Q);
1373
27.0M
1374
27.0M
          // We need to take the minimum number of known bits
1375
27.0M
          KnownBits Known3(Known);
1376
27.0M
          computeKnownBits(L, Known3, Depth + 1, Q);
1377
27.0M
1378
27.0M
          Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1379
27.0M
                                         Known3.countMinTrailingZeros()));
1380
27.0M
1381
27.0M
          auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
1382
27.0M
          if (OverflowOp && 
Q.IIQ.hasNoSignedWrap(OverflowOp)26.7M
) {
1383
17.6M
            // If initial value of recurrence is nonnegative, and we are adding
1384
17.6M
            // a nonnegative number with nsw, the result can only be nonnegative
1385
17.6M
            // or poison value regardless of the number of times we execute the
1386
17.6M
            // add in phi recurrence. If initial value is negative and we are
1387
17.6M
            // adding a negative number with nsw, the result can only be
1388
17.6M
            // negative or poison value. Similar arguments apply to sub and mul.
1389
17.6M
            //
1390
17.6M
            // (add non-negative, non-negative) --> non-negative
1391
17.6M
            // (add negative, negative) --> negative
1392
17.6M
            if (Opcode == Instruction::Add) {
1393
17.5M
              if (Known2.isNonNegative() && 
Known3.isNonNegative()15.1M
)
1394
14.5M
                Known.makeNonNegative();
1395
2.95M
              else if (Known2.isNegative() && 
Known3.isNegative()62.4k
)
1396
1.09k
                Known.makeNegative();
1397
17.5M
            }
1398
131k
1399
131k
            // (sub nsw non-negative, negative) --> non-negative
1400
131k
            // (sub nsw negative, non-negative) --> negative
1401
131k
            else if (Opcode == Instruction::Sub && 
LL == I112k
) {
1402
111k
              if (Known2.isNonNegative() && 
Known3.isNegative()56.8k
)
1403
1
                Known.makeNonNegative();
1404
111k
              else if (Known2.isNegative() && 
Known3.isNonNegative()6
)
1405
0
                Known.makeNegative();
1406
111k
            }
1407
19.7k
1408
19.7k
            // (mul nsw non-negative, non-negative) --> non-negative
1409
19.7k
            else if (Opcode == Instruction::Mul && 
Known2.isNonNegative()18.7k
&&
1410
19.7k
                     
Known3.isNonNegative()16.4k
)
1411
1.24k
              Known.makeNonNegative();
1412
17.6M
          }
1413
27.0M
1414
27.0M
          break;
1415
27.0M
        }
1416
89.2M
      }
1417
67.2M
    }
1418
76.5M
1419
76.5M
    // Unreachable blocks may have zero-operand PHI nodes.
1420
76.5M
    if (P->getNumIncomingValues() == 0)
1421
741
      break;
1422
76.4M
1423
76.4M
    // Otherwise take the unions of the known bit sets of the operands,
1424
76.4M
    // taking conservative care to avoid excessive recursion.
1425
76.4M
    if (Depth < MaxDepth - 1 && 
!Known.Zero65.5M
&&
!Known.One48.4M
) {
1426
48.4M
      // Skip if every incoming value references to ourself.
1427
48.4M
      if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
1428
142
        break;
1429
48.4M
1430
48.4M
      Known.Zero.setAllBits();
1431
48.4M
      Known.One.setAllBits();
1432
77.2M
      for (Value *IncValue : P->incoming_values()) {
1433
77.2M
        // Skip direct self references.
1434
77.2M
        if (IncValue == P) 
continue22.3k
;
1435
77.2M
1436
77.2M
        Known2 = KnownBits(BitWidth);
1437
77.2M
        // Recurse, but cap the recursion to one level, because we don't
1438
77.2M
        // want to waste time spinning around in loops.
1439
77.2M
        computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
1440
77.2M
        Known.Zero &= Known2.Zero;
1441
77.2M
        Known.One &= Known2.One;
1442
77.2M
        // If all bits have been ruled out, there's no need to check
1443
77.2M
        // more operands.
1444
77.2M
        if (!Known.Zero && 
!Known.One44.6M
)
1445
44.0M
          break;
1446
77.2M
      }
1447
48.4M
    }
1448
76.4M
    
break76.4M
;
1449
76.4M
  }
1450
76.4M
  case Instruction::Call:
1451
30.9M
  case Instruction::Invoke:
1452
30.9M
    // If range metadata is attached to this call, set known bits from that,
1453
30.9M
    // and then intersect with known bits based on other properties of the
1454
30.9M
    // function.
1455
30.9M
    if (MDNode *MD =
1456
1.35M
            Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1457
1.35M
      computeKnownBitsFromRangeMetadata(*MD, Known);
1458
30.9M
    if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
1459
1.76k
      computeKnownBits(RV, Known2, Depth + 1, Q);
1460
1.76k
      Known.Zero |= Known2.Zero;
1461
1.76k
      Known.One |= Known2.One;
1462
1.76k
    }
1463
30.9M
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1464
2.08M
      switch (II->getIntrinsicID()) {
1465
2.08M
      
default: break683k
;
1466
2.08M
      case Intrinsic::bitreverse:
1467
2.36k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1468
2.36k
        Known.Zero |= Known2.Zero.reverseBits();
1469
2.36k
        Known.One |= Known2.One.reverseBits();
1470
2.36k
        break;
1471
2.08M
      case Intrinsic::bswap:
1472
30.4k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1473
30.4k
        Known.Zero |= Known2.Zero.byteSwap();
1474
30.4k
        Known.One |= Known2.One.byteSwap();
1475
30.4k
        break;
1476
2.08M
      case Intrinsic::ctlz: {
1477
834k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1478
834k
        // If we have a known 1, its position is our upper bound.
1479
834k
        unsigned PossibleLZ = Known2.One.countLeadingZeros();
1480
834k
        // If this call is undefined for 0, the result will be less than 2^n.
1481
834k
        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1482
613k
          PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1483
834k
        unsigned LowBits = Log2_32(PossibleLZ)+1;
1484
834k
        Known.Zero.setBitsFrom(LowBits);
1485
834k
        break;
1486
2.08M
      }
1487
2.08M
      case Intrinsic::cttz: {
1488
427k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1489
427k
        // If we have a known 1, its position is our upper bound.
1490
427k
        unsigned PossibleTZ = Known2.One.countTrailingZeros();
1491
427k
        // If this call is undefined for 0, the result will be less than 2^n.
1492
427k
        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1493
318k
          PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1494
427k
        unsigned LowBits = Log2_32(PossibleTZ)+1;
1495
427k
        Known.Zero.setBitsFrom(LowBits);
1496
427k
        break;
1497
2.08M
      }
1498
2.08M
      case Intrinsic::ctpop: {
1499
85.9k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1500
85.9k
        // We can bound the space the count needs.  Also, bits known to be zero
1501
85.9k
        // can't contribute to the population.
1502
85.9k
        unsigned BitsPossiblySet = Known2.countMaxPopulation();
1503
85.9k
        unsigned LowBits = Log2_32(BitsPossiblySet)+1;
1504
85.9k
        Known.Zero.setBitsFrom(LowBits);
1505
85.9k
        // TODO: we could bound KnownOne using the lower bound on the number
1506
85.9k
        // of bits which might be set provided by popcnt KnownOne2.
1507
85.9k
        break;
1508
2.08M
      }
1509
2.08M
      case Intrinsic::fshr:
1510
3.39k
      case Intrinsic::fshl: {
1511
3.39k
        const APInt *SA;
1512
3.39k
        if (!match(I->getOperand(2), m_APInt(SA)))
1513
975
          break;
1514
2.41k
1515
2.41k
        // Normalize to funnel shift left.
1516
2.41k
        uint64_t ShiftAmt = SA->urem(BitWidth);
1517
2.41k
        if (II->getIntrinsicID() == Intrinsic::fshr)
1518
53
          ShiftAmt = BitWidth - ShiftAmt;
1519
2.41k
1520
2.41k
        KnownBits Known3(Known);
1521
2.41k
        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1522
2.41k
        computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
1523
2.41k
1524
2.41k
        Known.Zero =
1525
2.41k
            Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1526
2.41k
        Known.One =
1527
2.41k
            Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1528
2.41k
        break;
1529
2.41k
      }
1530
18.7k
      case Intrinsic::uadd_sat:
1531
18.7k
      case Intrinsic::usub_sat: {
1532
18.7k
        bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
1533
18.7k
        computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1534
18.7k
        computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1535
18.7k
1536
18.7k
        // Add: Leading ones of either operand are preserved.
1537
18.7k
        // Sub: Leading zeros of LHS and leading ones of RHS are preserved
1538
18.7k
        // as leading zeros in the result.
1539
18.7k
        unsigned LeadingKnown;
1540
18.7k
        if (IsAdd)
1541
15.9k
          LeadingKnown = std::max(Known.countMinLeadingOnes(),
1542
15.9k
                                  Known2.countMinLeadingOnes());
1543
2.81k
        else
1544
2.81k
          LeadingKnown = std::max(Known.countMinLeadingZeros(),
1545
2.81k
                                  Known2.countMinLeadingOnes());
1546
18.7k
1547
18.7k
        Known = KnownBits::computeForAddSub(
1548
18.7k
            IsAdd, /* NSW */ false, Known, Known2);
1549
18.7k
1550
18.7k
        // We select between the operation result and all-ones/zero
1551
18.7k
        // respectively, so we can preserve known ones/zeros.
1552
18.7k
        if (IsAdd) {
1553
15.9k
          Known.One.setHighBits(LeadingKnown);
1554
15.9k
          Known.Zero.clearAllBits();
1555
15.9k
        } else {
1556
2.81k
          Known.Zero.setHighBits(LeadingKnown);
1557
2.81k
          Known.One.clearAllBits();
1558
2.81k
        }
1559
18.7k
        break;
1560
18.7k
      }
1561
18.7k
      case Intrinsic::x86_sse42_crc32_64_64:
1562
9
        Known.Zero.setBitsFrom(32);
1563
9
        break;
1564
30.9M
      }
1565
30.9M
    }
1566
30.9M
    break;
1567
30.9M
  case Instruction::ExtractElement:
1568
268k
    // Look through extract element. At the moment we keep this simple and skip
1569
268k
    // tracking the specific element. But at least we might find information
1570
268k
    // valid for all elements of the vector (for example if vector is sign
1571
268k
    // extended, shifted, etc).
1572
268k
    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1573
268k
    break;
1574
30.9M
  case Instruction::ExtractValue:
1575
991k
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1576
145k
      const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1577
145k
      if (EVI->getNumIndices() != 1) 
break0
;
1578
145k
      if (EVI->getIndices()[0] == 0) {
1579
59.9k
        switch (II->getIntrinsicID()) {
1580
59.9k
        
default: break12.6k
;
1581
59.9k
        case Intrinsic::uadd_with_overflow:
1582
10.3k
        case Intrinsic::sadd_with_overflow:
1583
10.3k
          computeKnownBitsAddSub(true, II->getArgOperand(0),
1584
10.3k
                                 II->getArgOperand(1), false, Known, Known2,
1585
10.3k
                                 Depth, Q);
1586
10.3k
          break;
1587
10.3k
        case Intrinsic::usub_with_overflow:
1588
912
        case Intrinsic::ssub_with_overflow:
1589
912
          computeKnownBitsAddSub(false, II->getArgOperand(0),
1590
912
                                 II->getArgOperand(1), false, Known, Known2,
1591
912
                                 Depth, Q);
1592
912
          break;
1593
36.0k
        case Intrinsic::umul_with_overflow:
1594
36.0k
        case Intrinsic::smul_with_overflow:
1595
36.0k
          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1596
36.0k
                              Known, Known2, Depth, Q);
1597
36.0k
          break;
1598
59.9k
        }
1599
59.9k
      }
1600
145k
    }
1601
493M
  }
1602
493M
}
1603
1604
/// Determine which bits of V are known to be either zero or one and return
1605
/// them.
1606
180M
KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
1607
180M
  KnownBits Known(getBitWidth(V->getType(), Q.DL));
1608
180M
  computeKnownBits(V, Known, Depth, Q);
1609
180M
  return Known;
1610
180M
}
1611
1612
/// Determine which bits of V are known to be either zero or one and return
1613
/// them in the Known bit set.
1614
///
1615
/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
1616
/// we cannot optimize based on the assumption that it is zero without changing
1617
/// it to be an explicit zero.  If we don't change it to zero, other code could
1618
/// optimized based on the contradictory assumption that it is non-zero.
1619
/// Because instcombine aggressively folds operations with undef args anyway,
1620
/// this won't lose us code quality.
1621
///
1622
/// This function is defined on values with integer type, values with pointer
1623
/// type, and vectors of integers.  In the case
1624
/// where V is a vector, known zero, and known one values are the
1625
/// same width as the vector element, and the bit is set only if it is true
1626
/// for all of the elements in the vector.
1627
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
1628
812M
                      const Query &Q) {
1629
812M
  assert(V && "No Value?");
1630
812M
  assert(Depth <= MaxDepth && "Limit Search Depth");
1631
812M
  unsigned BitWidth = Known.getBitWidth();
1632
812M
1633
812M
  assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
1634
812M
          V->getType()->isPtrOrPtrVectorTy()) &&
1635
812M
         "Not integer or pointer type!");
1636
812M
1637
812M
  Type *ScalarTy = V->getType()->getScalarType();
1638
812M
  unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
1639
678M
    
Q.DL.getIndexTypeSizeInBits(ScalarTy)133M
: Q.DL.getTypeSizeInBits(ScalarTy);
1640
812M
  assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
1641
812M
  (void)BitWidth;
1642
812M
  (void)ExpectedWidth;
1643
812M
1644
812M
  const APInt *C;
1645
812M
  if (match(V, m_APInt(C))) {
1646
209M
    // We know all of the bits for a scalar constant or a splat vector constant!
1647
209M
    Known.One = *C;
1648
209M
    Known.Zero = ~Known.One;
1649
209M
    return;
1650
209M
  }
1651
602M
  // Null and aggregate-zero are all-zeros.
1652
602M
  if (isa<ConstantPointerNull>(V) || 
isa<ConstantAggregateZero>(V)599M
) {
1653
2.87M
    Known.setAllZero();
1654
2.87M
    return;
1655
2.87M
  }
1656
599M
  // Handle a constant vector by taking the intersection of the known bits of
1657
599M
  // each element.
1658
599M
  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
1659
1.97M
    // We know that CDS must be a vector of integers. Take the intersection of
1660
1.97M
    // each element.
1661
1.97M
    Known.Zero.setAllBits(); Known.One.setAllBits();
1662
7.69M
    for (unsigned i = 0, e = CDS->getNumElements(); i != e; 
++i5.71M
) {
1663
5.71M
      APInt Elt = CDS->getElementAsAPInt(i);
1664
5.71M
      Known.Zero &= ~Elt;
1665
5.71M
      Known.One &= Elt;
1666
5.71M
    }
1667
1.97M
    return;
1668
1.97M
  }
1669
598M
1670
598M
  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
1671
2.96k
    // We know that CV must be a vector of integers. Take the intersection of
1672
2.96k
    // each element.
1673
2.96k
    Known.Zero.setAllBits(); Known.One.setAllBits();
1674
7.00k
    for (unsigned i = 0, e = CV->getNumOperands(); i != e; 
++i4.04k
) {
1675
6.71k
      Constant *Element = CV->getAggregateElement(i);
1676
6.71k
      auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
1677
6.71k
      if (!ElementCI) {
1678
2.66k
        Known.resetAll();
1679
2.66k
        return;
1680
2.66k
      }
1681
4.04k
      const APInt &Elt = ElementCI->getValue();
1682
4.04k
      Known.Zero &= ~Elt;
1683
4.04k
      Known.One &= Elt;
1684
4.04k
    }
1685
2.96k
    
return292
;
1686
598M
  }
1687
598M
1688
598M
  // Start out not knowing anything.
1689
598M
  Known.resetAll();
1690
598M
1691
598M
  // We can't imply anything about undefs.
1692
598M
  if (isa<UndefValue>(V))
1693
122k
    return;
1694
597M
1695
597M
  // There's no point in looking through other users of ConstantData for
1696
597M
  // assumptions.  Confirm that we've handled them all.
1697
597M
  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
1698
597M
1699
597M
  // Limit search depth.
1700
597M
  // All recursive calls that increase depth must come after this.
1701
597M
  if (Depth == MaxDepth)
1702
56.3M
    return;
1703
541M
1704
541M
  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
1705
541M
  // the bits of its aliasee.
1706
541M
  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1707
33
    if (!GA->isInterposable())
1708
20
      computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
1709
33
    return;
1710
33
  }
1711
541M
1712
541M
  if (const Operator *I = dyn_cast<Operator>(V))
1713
493M
    computeKnownBitsFromOperator(I, Known, Depth, Q);
1714
541M
1715
541M
  // Aligned pointers have trailing zeros - refine Known.Zero set
1716
541M
  if (V->getType()->isPointerTy()) {
1717
127M
    unsigned Align = V->getPointerAlignment(Q.DL);
1718
127M
    if (Align)
1719
12.9M
      Known.Zero.setLowBits(countTrailingZeros(Align));
1720
127M
  }
1721
541M
1722
541M
  // computeKnownBitsFromAssume strictly refines Known.
1723
541M
  // Therefore, we run them after computeKnownBitsFromOperator.
1724
541M
1725
541M
  // Check whether a nearby assume intrinsic can determine some known bits.
1726
541M
  computeKnownBitsFromAssume(V, Known, Depth, Q);
1727
541M
1728
541M
  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1729
541M
}
1730
1731
/// Return true if the given value is known to have exactly one
1732
/// bit set when defined. For vectors return true if every element is known to
1733
/// be a power of two when defined. Supports values with integer or pointer
1734
/// types and vectors of integers.
1735
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
1736
252k
                            const Query &Q) {
1737
252k
  assert(Depth <= MaxDepth && "Limit Search Depth");
1738
252k
1739
252k
  // Attempt to match against constants.
1740
252k
  if (OrZero && 
match(V, m_Power2OrZero())77.0k
)
1741
7.84k
      return true;
1742
245k
  if (match(V, m_Power2()))
1743
222
      return true;
1744
244k
1745
244k
  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
1746
244k
  // it is shifted off the end then the result is undefined.
1747
244k
  if (match(V, m_Shl(m_One(), m_Value())))
1748
57
    return true;
1749
244k
1750
244k
  // (signmask) >>l X is clearly a power of two if the one is not shifted off
1751
244k
  // the bottom.  If it is shifted off the bottom then the result is undefined.
1752
244k
  if (match(V, m_LShr(m_SignMask(), m_Value())))
1753
2
    return true;
1754
244k
1755
244k
  // The remaining tests are all recursive, so bail out if we hit the limit.
1756
244k
  if (Depth++ == MaxDepth)
1757
547
    return false;
1758
244k
1759
244k
  Value *X = nullptr, *Y = nullptr;
1760
244k
  // A shift left or a logical shift right of a power of two is a power of two
1761
244k
  // or zero.
1762
244k
  if (OrZero && 
(69.1k
match(V, m_Shl(m_Value(X), m_Value()))69.1k
||
1763
69.1k
                 
match(V, m_LShr(m_Value(X), m_Value()))69.1k
))
1764
96
    return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
1765
244k
1766
244k
  if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
1767
18.6k
    return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
1768
225k
1769
225k
  if (const SelectInst *SI = dyn_cast<SelectInst>(V))
1770
1.27k
    return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
1771
1.27k
           
isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q)21
;
1772
224k
1773
224k
  if (OrZero && 
match(V, m_And(m_Value(X), m_Value(Y)))67.3k
) {
1774
54
    // A power of two and'd with anything is a power of two or zero.
1775
54
    if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
1776
54
        isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
1777
1
      return true;
1778
53
    // X & (-X) is always a power of two or zero.
1779
53
    if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
1780
1
      return true;
1781
52
    return false;
1782
52
  }
1783
224k
1784
224k
  // Adding a power-of-two or zero to the same power-of-two or zero yields
1785
224k
  // either the original power-of-two, a larger power-of-two or zero.
1786
224k
  if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
1787
7.85k
    const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
1788
7.85k
    if (OrZero || 
Q.IIQ.hasNoUnsignedWrap(VOBO)6.62k
||
1789
7.85k
        
Q.IIQ.hasNoSignedWrap(VOBO)6.18k
) {
1790
3.70k
      if (match(X, m_And(m_Specific(Y), m_Value())) ||
1791
3.70k
          
match(X, m_And(m_Value(), m_Specific(Y)))3.70k
)
1792
3
        if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
1793
3
          return true;
1794
3.70k
      if (match(Y, m_And(m_Specific(X), m_Value())) ||
1795
3.70k
          
match(Y, m_And(m_Value(), m_Specific(X)))3.70k
)
1796
2
        if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
1797
2
          return true;
1798
3.70k
1799
3.70k
      unsigned BitWidth = V->getType()->getScalarSizeInBits();
1800
3.70k
      KnownBits LHSBits(BitWidth);
1801
3.70k
      computeKnownBits(X, LHSBits, Depth, Q);
1802
3.70k
1803
3.70k
      KnownBits RHSBits(BitWidth);
1804
3.70k
      computeKnownBits(Y, RHSBits, Depth, Q);
1805
3.70k
      // If i8 V is a power of two or zero:
1806
3.70k
      //  ZeroBits: 1 1 1 0 1 1 1 1
1807
3.70k
      // ~ZeroBits: 0 0 0 1 0 0 0 0
1808
3.70k
      if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
1809
32
        // If OrZero isn't set, we cannot give back a zero result.
1810
32
        // Make sure either the LHS or RHS has a bit set.
1811
32
        if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
1812
0
          return true;
1813
224k
    }
1814
7.85k
  }
1815
224k
1816
224k
  // An exact divide or right shift can only shift off zero bits, so the result
1817
224k
  // is a power of two only if the first operand is a power of two and not
1818
224k
  // copying a sign bit (sdiv int_min, 2).
1819
224k
  if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
1820
224k
      
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))224k
) {
1821
18
    return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
1822
18
                                  Depth, Q);
1823
18
  }
1824
224k
1825
224k
  return false;
1826
224k
}
1827
1828
/// Test whether a GEP's result is known to be non-null.
1829
///
1830
/// Uses properties inherent in a GEP to try to determine whether it is known
1831
/// to be non-null.
1832
///
1833
/// Currently this routine does not support vector GEPs.
1834
static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
1835
5.62M
                              const Query &Q) {
1836
5.62M
  const Function *F = nullptr;
1837
5.62M
  if (const Instruction *I = dyn_cast<Instruction>(GEP))
1838
5.62M
    F = I->getFunction();
1839
5.62M
1840
5.62M
  if (!GEP->isInBounds() ||
1841
5.62M
      
NullPointerIsDefined(F, GEP->getPointerAddressSpace())5.36M
)
1842
256k
    return false;
1843
5.36M
1844
5.36M
  // FIXME: Support vector-GEPs.
1845
5.36M
  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
1846
5.36M
1847
5.36M
  // If the base pointer is non-null, we cannot walk to a null address with an
1848
5.36M
  // inbounds GEP in address space zero.
1849
5.36M
  if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
1850
1.30M
    return true;
1851
4.06M
1852
4.06M
  // Walk the GEP operands and see if any operand introduces a non-zero offset.
1853
4.06M
  // If so, then the GEP cannot produce a null pointer, as doing so would
1854
4.06M
  // inherently violate the inbounds contract within address space zero.
1855
4.06M
  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1856
8.41M
       GTI != GTE; 
++GTI4.34M
) {
1857
7.17M
    // Struct types are easy -- they must always be indexed by a constant.
1858
7.17M
    if (StructType *STy = GTI.getStructTypeOrNull()) {
1859
2.99M
      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
1860
2.99M
      unsigned ElementIdx = OpC->getZExtValue();
1861
2.99M
      const StructLayout *SL = Q.DL.getStructLayout(STy);
1862
2.99M
      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
1863
2.99M
      if (ElementOffset > 0)
1864
1.66M
        return true;
1865
1.32M
      continue;
1866
1.32M
    }
1867
4.18M
1868
4.18M
    // If we have a zero-sized type, the index doesn't matter. Keep looping.
1869
4.18M
    if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
1870
7
      continue;
1871
4.18M
1872
4.18M
    // Fast path the constant operand case both for efficiency and so we don't
1873
4.18M
    // increment Depth when just zipping down an all-constant GEP.
1874
4.18M
    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
1875
3.52M
      if (!OpC->isZero())
1876
1.13M
        return true;
1877
2.39M
      continue;
1878
2.39M
    }
1879
653k
1880
653k
    // We post-increment Depth here because while isKnownNonZero increments it
1881
653k
    // as well, when we pop back up that increment won't persist. We don't want
1882
653k
    // to recurse 10k times just because we have 10k GEP operands. We don't
1883
653k
    // bail completely out because we want to handle constant GEPs regardless
1884
653k
    // of depth.
1885
653k
    if (Depth++ >= MaxDepth)
1886
945
      continue;
1887
652k
1888
652k
    if (isKnownNonZero(GTI.getOperand(), Depth, Q))
1889
22.4k
      return true;
1890
652k
  }
1891
4.06M
1892
4.06M
  
return false1.24M
;
1893
4.06M
}
1894
1895
static bool isKnownNonNullFromDominatingCondition(const Value *V,
1896
                                                  const Instruction *CtxI,
1897
37.4M
                                                  const DominatorTree *DT) {
1898
37.4M
  assert(V->getType()->isPointerTy() && "V must be pointer type");
1899
37.4M
  assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
1900
37.4M
1901
37.4M
  if (!CtxI || 
!DT34.2M
)
1902
18.9M
    return false;
1903
18.4M
1904
18.4M
  unsigned NumUsesExplored = 0;
1905
65.6M
  for (auto *U : V->users()) {
1906
65.6M
    // Avoid massive lists
1907
65.6M
    if (NumUsesExplored >= DomConditionsMaxUses)
1908
347k
      break;
1909
65.2M
    NumUsesExplored++;
1910
65.2M
1911
65.2M
    // If the value is used as an argument to a call or invoke, then argument
1912
65.2M
    // attributes may provide an answer about null-ness.
1913
65.2M
    if (auto CS = ImmutableCallSite(U))
1914
43.6M
      if (auto *CalledFunc = CS.getCalledFunction())
1915
42.4M
        for (const Argument &Arg : CalledFunc->args())
1916
127M
          if (CS.getArgOperand(Arg.getArgNo()) == V &&
1917
127M
              
Arg.hasNonNullAttr()42.2M
&&
DT->dominates(CS.getInstruction(), CtxI)401k
)
1918
5.35k
            return true;
1919
65.2M
1920
65.2M
    // Consider only compare instructions uniquely controlling a branch
1921
65.2M
    CmpInst::Predicate Pred;
1922
65.2M
    if (!match(const_cast<User *>(U),
1923
65.2M
               m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
1924
65.2M
        
(4.26M
Pred != ICmpInst::ICMP_EQ4.26M
&&
Pred != ICmpInst::ICMP_NE420k
))
1925
60.9M
      continue;
1926
4.26M
1927
4.26M
    SmallVector<const User *, 4> WorkList;
1928
4.26M
    SmallPtrSet<const User *, 4> Visited;
1929
4.33M
    for (auto *CmpU : U->users()) {
1930
4.33M
      assert(WorkList.empty() && "Should be!");
1931
4.33M
      if (Visited.insert(CmpU).second)
1932
4.33M
        WorkList.push_back(CmpU);
1933
4.33M
1934
8.85M
      while (!WorkList.empty()) {
1935
4.60M
        auto *Curr = WorkList.pop_back_val();
1936
4.60M
1937
4.60M
        // If a user is an AND, add all its users to the work list. We only
1938
4.60M
        // propagate "pred != null" condition through AND because it is only
1939
4.60M
        // correct to assume that all conditions of AND are met in true branch.
1940
4.60M
        // TODO: Support similar logic of OR and EQ predicate?
1941
4.60M
        if (Pred == ICmpInst::ICMP_NE)
1942
729k
          if (auto *BO = dyn_cast<BinaryOperator>(Curr))
1943
274k
            if (BO->getOpcode() == Instruction::And) {
1944
267k
              for (auto *BOU : BO->users())
1945
267k
                if (Visited.insert(BOU).second)
1946
267k
                  WorkList.push_back(BOU);
1947
267k
              continue;
1948
267k
            }
1949
4.33M
1950
4.33M
        if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
1951
4.01M
          assert(BI->isConditional() && "uses a comparison!");
1952
4.01M
1953
4.01M
          BasicBlock *NonNullSuccessor =
1954
4.01M
              BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 
13.56M
:
0441k
);
1955
4.01M
          BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
1956
4.01M
          if (Edge.isSingleEdge() && 
DT->dominates(Edge, CtxI->getParent())4.01M
)
1957
77.4k
            return true;
1958
324k
        } else if (Pred == ICmpInst::ICMP_NE && 
isGuard(Curr)20.5k
&&
1959
324k
                   
DT->dominates(cast<Instruction>(Curr), CtxI)12
) {
1960
8
          return true;
1961
8
        }
1962
4.33M
      }
1963
4.33M
    }
1964
4.26M
  }
1965
18.4M
1966
18.4M
  
return false18.4M
;
1967
18.4M
}
1968
1969
/// Does the 'Range' metadata (which must be a valid MD_range operand list)
1970
/// ensure that the value it's attached to is never Value?  'RangeType' is
1971
/// is the type of the value described by the range.
1972
893k
static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
1973
893k
  const unsigned NumRanges = Ranges->getNumOperands() / 2;
1974
893k
  assert(NumRanges >= 1);
1975
894k
  for (unsigned i = 0; i < NumRanges; 
++i501
) {
1976
893k
    ConstantInt *Lower =
1977
893k
        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
1978
893k
    ConstantInt *Upper =
1979
893k
        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
1980
893k
    ConstantRange Range(Lower->getValue(), Upper->getValue());
1981
893k
    if (Range.contains(Value))
1982
893k
      return false;
1983
893k
  }
1984
893k
  
return true500
;
1985
893k
}
1986
1987
/// Return true if the given value is known to be non-zero when defined. For
1988
/// vectors, return true if every element is known to be non-zero when
1989
/// defined. For pointers, if the context instruction and dominator tree are
1990
/// specified, perform context-sensitive analysis and return true if the
1991
/// pointer couldn't possibly be null at the specified instruction.
1992
/// Supports values with integer or pointer type and vectors of integers.
1993
56.8M
bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
1994
56.8M
  if (auto *C = dyn_cast<Constant>(V)) {
1995
8.10M
    if (C->isNullValue())
1996
1.44M
      return false;
1997
6.66M
    if (isa<ConstantInt>(C))
1998
161k
      // Must be non-zero due to null test above.
1999
161k
      return true;
2000
6.50M
2001
6.50M
    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
2002
6.35M
      // See the comment for IntToPtr/PtrToInt instructions below.
2003
6.35M
      if (CE->getOpcode() == Instruction::IntToPtr ||
2004
6.35M
          
CE->getOpcode() == Instruction::PtrToInt6.35M
)
2005
1.34k
        if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
2006
1.34k
            Q.DL.getTypeSizeInBits(CE->getType()))
2007
1.34k
          return isKnownNonZero(CE->getOperand(0), Depth, Q);
2008
6.50M
    }
2009
6.50M
2010
6.50M
    // For constant vectors, check that all elements are undefined or known
2011
6.50M
    // non-zero to determine that the whole vector is known non-zero.
2012
6.50M
    if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
2013
3.45k
      for (unsigned i = 0, e = VecTy->getNumElements(); i != e; 
++i2.64k
) {
2014
2.74k
        Constant *Elt = C->getAggregateElement(i);
2015
2.74k
        if (!Elt || Elt->isNullValue())
2016
100
          return false;
2017
2.64k
        if (!isa<UndefValue>(Elt) && 
!isa<ConstantInt>(Elt)2.64k
)
2018
0
          return false;
2019
2.64k
      }
2020
808
      
return true708
;
2021
6.50M
    }
2022
6.50M
2023
6.50M
    // A global variable in address space 0 is non null unless extern weak
2024
6.50M
    // or an absolute symbol reference. Other address spaces may have null as a
2025
6.50M
    // valid address for a global, so we can't assume anything.
2026
6.50M
    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2027
114k
      if (!GV->isAbsoluteSymbolRef() && 
!GV->hasExternalWeakLinkage()114k
&&
2028
114k
          
GV->getType()->getAddressSpace() == 0114k
)
2029
114k
        return true;
2030
6.38M
    } else
2031
6.38M
      return false;
2032
48.7M
  }
2033
48.7M
2034
48.7M
  if (auto *I = dyn_cast<Instruction>(V)) {
2035
37.0M
    if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
2036
893k
      // If the possible ranges don't contain zero, then the value is
2037
893k
      // definitely non-zero.
2038
893k
      if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
2039
893k
        const APInt ZeroValue(Ty->getBitWidth(), 0);
2040
893k
        if (rangeMetadataExcludesValue(Ranges, ZeroValue))
2041
500
          return true;
2042
48.7M
      }
2043
893k
    }
2044
37.0M
  }
2045
48.7M
2046
48.7M
  // Some of the tests below are recursive, so bail out if we hit the limit.
2047
48.7M
  if (Depth++ >= MaxDepth)
2048
16.2k
    return false;
2049
48.7M
2050
48.7M
  // Check for pointer simplifications.
2051
48.7M
  if (V->getType()->isPointerTy()) {
2052
39.0M
    // Alloca never returns null, malloc might.
2053
39.0M
    if (isa<AllocaInst>(V) && 
Q.DL.getAllocaAddrSpace() == 01.11M
)
2054
1.11M
      return true;
2055
37.9M
2056
37.9M
    // A byval, inalloca, or nonnull argument is never null.
2057
37.9M
    if (const Argument *A = dyn_cast<Argument>(V))
2058
11.1M
      if (A->hasByValOrInAllocaAttr() || 
A->hasNonNullAttr()11.1M
)
2059
362k
        return true;
2060
37.5M
2061
37.5M
    // A Load tagged with nonnull metadata is never null.
2062
37.5M
    if (const LoadInst *LI = dyn_cast<LoadInst>(V))
2063
8.07M
      if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
2064
33
        return true;
2065
37.5M
2066
37.5M
    if (const auto *Call = dyn_cast<CallBase>(V)) {
2067
7.75M
      if (Call->isReturnNonNull())
2068
140k
        return true;
2069
7.61M
      if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
2070
1.32k
        return isKnownNonZero(RP, Depth, Q);
2071
47.0M
    }
2072
37.5M
  }
2073
47.0M
2074
47.0M
2075
47.0M
  // Check for recursive pointer simplifications.
2076
47.0M
  if (V->getType()->isPointerTy()) {
2077
37.4M
    if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
2078
82.8k
      return true;
2079
37.3M
2080
37.3M
    // Look through bitcast operations, GEPs, and int2ptr instructions as they
2081
37.3M
    // do not alter the value, or at least not the nullness property of the
2082
37.3M
    // value, e.g., int2ptr is allowed to zero/sign extend the value.
2083
37.3M
    //
2084
37.3M
    // Note that we have to take special care to avoid looking through
2085
37.3M
    // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2086
37.3M
    // as casts that can alter the value, e.g., AddrSpaceCasts.
2087
37.3M
    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
2088
5.62M
      if (isGEPKnownNonNull(GEP, Depth, Q))
2089
4.12M
        return true;
2090
33.1M
2091
33.1M
    if (auto *BCO = dyn_cast<BitCastOperator>(V))
2092
1.73M
      return isKnownNonZero(BCO->getOperand(0), Depth, Q);
2093
31.4M
2094
31.4M
    if (auto *I2P = dyn_cast<IntToPtrInst>(V))
2095
346k
      if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <=
2096
346k
          Q.DL.getTypeSizeInBits(I2P->getDestTy()))
2097
346k
        return isKnownNonZero(I2P->getOperand(0), Depth, Q);
2098
40.7M
  }
2099
40.7M
2100
40.7M
  // Similar to int2ptr above, we can look through ptr2int here if the cast
2101
40.7M
  // is a no-op or an extend and not a truncate.
2102
40.7M
  if (auto *P2I = dyn_cast<PtrToIntInst>(V))
2103
19.6k
    if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <=
2104
19.6k
        Q.DL.getTypeSizeInBits(P2I->getDestTy()))
2105
19.6k
      return isKnownNonZero(P2I->getOperand(0), Depth, Q);
2106
40.7M
2107
40.7M
  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
2108
40.7M
2109
40.7M
  // X | Y != 0 if X != 0 or Y != 0.
2110
40.7M
  Value *X = nullptr, *Y = nullptr;
2111
40.7M
  if (match(V, m_Or(m_Value(X), m_Value(Y))))
2112
37.0k
    return isKnownNonZero(X, Depth, Q) || 
isKnownNonZero(Y, Depth, Q)36.3k
;
2113
40.7M
2114
40.7M
  // ext X != 0 if X != 0.
2115
40.7M
  if (isa<SExtInst>(V) || 
isa<ZExtInst>(V)40.5M
)
2116
389k
    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q);
2117
40.3M
2118
40.3M
  // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
2119
40.3M
  // if the lowest bit is shifted off the end.
2120
40.3M
  if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
2121
66.9k
    // shl nuw can't remove any non-zero bits.
2122
66.9k
    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2123
66.9k
    if (Q.IIQ.hasNoUnsignedWrap(BO))
2124
36.9k
      return isKnownNonZero(X, Depth, Q);
2125
29.9k
2126
29.9k
    KnownBits Known(BitWidth);
2127
29.9k
    computeKnownBits(X, Known, Depth, Q);
2128
29.9k
    if (Known.One[0])
2129
1.07k
      return true;
2130
40.2M
  }
2131
40.2M
  // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
2132
40.2M
  // defined if the sign bit is shifted off the end.
2133
40.2M
  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
2134
105k
    // shr exact can only shift out zero bits.
2135
105k
    const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
2136
105k
    if (BO->isExact())
2137
13.0k
      return isKnownNonZero(X, Depth, Q);
2138
92.5k
2139
92.5k
    KnownBits Known = computeKnownBits(X, Depth, Q);
2140
92.5k
    if (Known.isNegative())
2141
5
      return true;
2142
92.5k
2143
92.5k
    // If the shifter operand is a constant, and all of the bits shifted
2144
92.5k
    // out are known to be zero, and X is known non-zero then at least one
2145
92.5k
    // non-zero bit must remain.
2146
92.5k
    if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
2147
72.5k
      auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
2148
72.5k
      // Is there a known one in the portion not shifted out?
2149
72.5k
      if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
2150
1
        return true;
2151
72.5k
      // Are all the bits to be shifted out known zero?
2152
72.5k
      if (Known.countMinTrailingZeros() >= ShiftVal)
2153
99
        return isKnownNonZero(X, Depth, Q);
2154
40.1M
    }
2155
40.1M
  }
2156
40.1M
  // div exact can only produce a zero if the dividend is zero.
2157
40.1M
  else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
2158
9.81k
    return isKnownNonZero(X, Depth, Q);
2159
9.81k
  }
2160
40.1M
  // X + Y.
2161
40.1M
  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
2162
433k
    KnownBits XKnown = computeKnownBits(X, Depth, Q);
2163
433k
    KnownBits YKnown = computeKnownBits(Y, Depth, Q);
2164
433k
2165
433k
    // If X and Y are both non-negative (as signed values) then their sum is not
2166
433k
    // zero unless both X and Y are zero.
2167
433k
    if (XKnown.isNonNegative() && 
YKnown.isNonNegative()116k
)
2168
106k
      if (isKnownNonZero(X, Depth, Q) || 
isKnownNonZero(Y, Depth, Q)105k
)
2169
96.6k
        return true;
2170
337k
2171
337k
    // If X and Y are both negative (as signed values) then their sum is not
2172
337k
    // zero unless both X and Y equal INT_MIN.
2173
337k
    if (XKnown.isNegative() && 
YKnown.isNegative()202
) {
2174
1
      APInt Mask = APInt::getSignedMaxValue(BitWidth);
2175
1
      // The sign bit of X is set.  If some other bit is set then X is not equal
2176
1
      // to INT_MIN.
2177
1
      if (XKnown.One.intersects(Mask))
2178
0
        return true;
2179
1
      // The sign bit of Y is set.  If some other bit is set then Y is not equal
2180
1
      // to INT_MIN.
2181
1
      if (YKnown.One.intersects(Mask))
2182
1
        return true;
2183
337k
    }
2184
337k
2185
337k
    // The sum of a non-negative number and a power of two is not zero.
2186
337k
    if (XKnown.isNonNegative() &&
2187
337k
        
isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)20.3k
)
2188
5
      return true;
2189
337k
    if (YKnown.isNonNegative() &&
2190
337k
        
isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)130k
)
2191
1
      return true;
2192
39.7M
  }
2193
39.7M
  // X * Y.
2194
39.7M
  else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
2195
46.2k
    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2196
46.2k
    // If X and Y are non-zero then so is X * Y as long as the multiplication
2197
46.2k
    // does not overflow.
2198
46.2k
    if ((Q.IIQ.hasNoSignedWrap(BO) || 
Q.IIQ.hasNoUnsignedWrap(BO)4.09k
) &&
2199
46.2k
        
isKnownNonZero(X, Depth, Q)42.1k
&&
isKnownNonZero(Y, Depth, Q)101
)
2200
55
      return true;
2201
39.6M
  }
2202
39.6M
  // (C ? X : Y) != 0 if X != 0 and Y != 0.
2203
39.6M
  else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
2204
273k
    if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
2205
273k
        
isKnownNonZero(SI->getFalseValue(), Depth, Q)34.9k
)
2206
4.27k
      return true;
2207
39.4M
  }
2208
39.4M
  // PHI
2209
39.4M
  else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
2210
4.20M
    // Try and detect a recurrence that monotonically increases from a
2211
4.20M
    // starting value, as these are common as induction variables.
2212
4.20M
    if (PN->getNumIncomingValues() == 2) {
2213
3.72M
      Value *Start = PN->getIncomingValue(0);
2214
3.72M
      Value *Induction = PN->getIncomingValue(1);
2215
3.72M
      if (isa<ConstantInt>(Induction) && 
!isa<ConstantInt>(Start)264k
)
2216
170k
        std::swap(Start, Induction);
2217
3.72M
      if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
2218
771k
        if (!C->isZero() && 
!C->isNegative()252k
) {
2219
249k
          ConstantInt *X;
2220
249k
          if (Q.IIQ.UseInstrInfo &&
2221
249k
              (match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
2222
249k
               
match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))239k
) &&
2223
249k
              
!X->isNegative()13.2k
)
2224
11.6k
            return true;
2225
4.19M
        }
2226
771k
      }
2227
3.72M
    }
2228
4.19M
    // Check if all incoming values are non-zero constant.
2229
4.43M
    
bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) 4.19M
{
2230
4.43M
      return isa<ConstantInt>(V) && 
!cast<ConstantInt>(V)->isZero()754k
;
2231
4.43M
    });
2232
4.19M
    if (AllNonZeroConstants)
2233
2.14k
      return true;
2234
40.1M
  }
2235
40.1M
2236
40.1M
  KnownBits Known(BitWidth);
2237
40.1M
  computeKnownBits(V, Known, Depth, Q);
2238
40.1M
  return Known.One != 0;
2239
40.1M
}
2240
2241
/// Return true if V2 == V1 + X, where X is known non-zero.
2242
32.4M
static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
2243
32.4M
  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
2244
32.4M
  if (!BO || 
BO->getOpcode() != Instruction::Add3.12M
)
2245
31.0M
    return false;
2246
1.39M
  Value *Op = nullptr;
2247
1.39M
  if (V2 == BO->getOperand(0))
2248
647
    Op = BO->getOperand(1);
2249
1.39M
  else if (V2 == BO->getOperand(1))
2250
3.22k
    Op = BO->getOperand(0);
2251
1.39M
  else
2252
1.39M
    return false;
2253
3.87k
  return isKnownNonZero(Op, 0, Q);
2254
3.87k
}
2255
2256
/// Return true if it is known that V1 != V2.
2257
16.2M
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
2258
16.2M
  if (V1 == V2)
2259
327
    return false;
2260
16.2M
  if (V1->getType() != V2->getType())
2261
0
    // We can't look through casts yet.
2262
0
    return false;
2263
16.2M
  if (isAddOfNonZero(V1, V2, Q) || 
isAddOfNonZero(V2, V1, Q)16.2M
)
2264
1.31k
    return true;
2265
16.2M
2266
16.2M
  if (V1->getType()->isIntOrIntVectorTy()) {
2267
10.9M
    // Are any known bits in V1 contradictory to known bits in V2? If V1
2268
10.9M
    // has a known zero where V2 has a known one, they must not be equal.
2269
10.9M
    KnownBits Known1 = computeKnownBits(V1, 0, Q);
2270
10.9M
    KnownBits Known2 = computeKnownBits(V2, 0, Q);
2271
10.9M
2272
10.9M
    if (Known1.Zero.intersects(Known2.One) ||
2273
10.9M
        
Known2.Zero.intersects(Known1.One)10.9M
)
2274
7.71k
      return true;
2275
16.2M
  }
2276
16.2M
  return false;
2277
16.2M
}
2278
2279
/// Return true if 'V & Mask' is known to be zero.  We use this predicate to
2280
/// simplify operations downstream. Mask is known to be zero for bits that V
2281
/// cannot have.
2282
///
2283
/// This function is defined on values with integer type, values with pointer
2284
/// type, and vectors of integers.  In the case
2285
/// where V is a vector, the mask, known zero, and known one values are the
2286
/// same width as the vector element, and the bit is set only if it is true
2287
/// for all of the elements in the vector.
2288
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
2289
2.61M
                       const Query &Q) {
2290
2.61M
  KnownBits Known(Mask.getBitWidth());
2291
2.61M
  computeKnownBits(V, Known, Depth, Q);
2292
2.61M
  return Mask.isSubsetOf(Known.Zero);
2293
2.61M
}
2294
2295
// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
2296
// Returns the input and lower/upper bounds.
2297
static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
2298
515k
                                const APInt *&CLow, const APInt *&CHigh) {
2299
515k
  assert(isa<Operator>(Select) &&
2300
515k
         cast<Operator>(Select)->getOpcode() == Instruction::Select &&
2301
515k
         "Input should be a Select!");
2302
515k
2303
515k
  const Value *LHS, *RHS, *LHS2, *RHS2;
2304
515k
  SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
2305
515k
  if (SPF != SPF_SMAX && 
SPF != SPF_SMIN285k
)
2306
278k
    return false;
2307
237k
2308
237k
  if (!match(RHS, m_APInt(CLow)))
2309
8.78k
    return false;
2310
228k
2311
228k
  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
2312
228k
  if (getInverseMinMaxFlavor(SPF) != SPF2)
2313
227k
    return false;
2314
979
2315
979
  if (!match(RHS2, m_APInt(CHigh)))
2316
6
    return false;
2317
973
2318
973
  if (SPF == SPF_SMIN)
2319
971
    std::swap(CLow, CHigh);
2320
973
2321
973
  In = LHS2;
2322
973
  return CLow->sle(*CHigh);
2323
973
}
2324
2325
/// For vector constants, loop over the elements and find the constant with the
2326
/// minimum number of sign bits. Return 0 if the value is not a vector constant
2327
/// or if any element was not analyzed; otherwise, return the count for the
2328
/// element with the minimum number of sign bits.
2329
static unsigned computeNumSignBitsVectorConstant(const Value *V,
2330
15.4M
                                                 unsigned TyBits) {
2331
15.4M
  const auto *CV = dyn_cast<Constant>(V);
2332
15.4M
  if (!CV || 
!CV->getType()->isVectorTy()2.31M
)
2333
15.2M
    return 0;
2334
190k
2335
190k
  unsigned MinSignBits = TyBits;
2336
190k
  unsigned NumElts = CV->getType()->getVectorNumElements();
2337
768k
  for (unsigned i = 0; i != NumElts; 
++i577k
) {
2338
577k
    // If we find a non-ConstantInt, bail out.
2339
577k
    auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
2340
577k
    if (!Elt)
2341
387
      return 0;
2342
577k
2343
577k
    MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
2344
577k
  }
2345
190k
2346
190k
  
return MinSignBits190k
;
2347
190k
}
2348
2349
static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
2350
                                       const Query &Q);
2351
2352
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
2353
22.0M
                                   const Query &Q) {
2354
22.0M
  unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
2355
22.0M
  assert(Result > 0 && "At least one sign bit needs to be present!");
2356
22.0M
  return Result;
2357
22.0M
}
2358
2359
/// Return the number of times the sign bit of the register is replicated into
2360
/// the other bits. We know that at least 1 bit is always equal to the sign bit
2361
/// (itself), but other cases can give us information. For example, immediately
2362
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
2363
/// other, so we return 3. For vectors, return the number of sign bits for the
2364
/// vector element with the minimum number of known sign bits.
2365
static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
2366
22.0M
                                       const Query &Q) {
2367
22.0M
  assert(Depth <= MaxDepth && "Limit Search Depth");
2368
22.0M
2369
22.0M
  // We return the minimum number of sign bits that are guaranteed to be present
2370
22.0M
  // in V, so for undef we have to conservatively return 1.  We don't have the
2371
22.0M
  // same behavior for poison though -- that's a FIXME today.
2372
22.0M
2373
22.0M
  Type *ScalarTy = V->getType()->getScalarType();
2374
22.0M
  unsigned TyBits = ScalarTy->isPointerTy() ?
2375
1.49M
    Q.DL.getIndexTypeSizeInBits(ScalarTy) :
2376
22.0M
    
Q.DL.getTypeSizeInBits(ScalarTy)20.5M
;
2377
22.0M
2378
22.0M
  unsigned Tmp, Tmp2;
2379
22.0M
  unsigned FirstAnswer = 1;
2380
22.0M
2381
22.0M
  // Note that ConstantInt is handled by the general computeKnownBits case
2382
22.0M
  // below.
2383
22.0M
2384
22.0M
  if (Depth == MaxDepth)
2385
1.28M
    return 1;  // Limit search depth.
2386
20.7M
2387
20.7M
  const Operator *U = dyn_cast<Operator>(V);
2388
20.7M
  switch (Operator::getOpcode(V)) {
2389
20.7M
  
default: break8.72M
;
2390
20.7M
  case Instruction::SExt:
2391
129k
    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
2392
129k
    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
2393
20.7M
2394
20.7M
  case Instruction::SDiv: {
2395
50.4k
    const APInt *Denominator;
2396
50.4k
    // sdiv X, C -> adds log(C) sign bits.
2397
50.4k
    if (match(U->getOperand(1), m_APInt(Denominator))) {
2398
43.9k
2399
43.9k
      // Ignore non-positive denominator.
2400
43.9k
      if (!Denominator->isStrictlyPositive())
2401
671
        break;
2402
43.2k
2403
43.2k
      // Calculate the incoming numerator bits.
2404
43.2k
      unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2405
43.2k
2406
43.2k
      // Add floor(log(C)) bits to the numerator bits.
2407
43.2k
      return std::min(TyBits, NumBits + Denominator->logBase2());
2408
43.2k
    }
2409
6.55k
    break;
2410
6.55k
  }
2411
6.55k
2412
6.55k
  case Instruction::SRem: {
2413
6.04k
    const APInt *Denominator;
2414
6.04k
    // srem X, C -> we know that the result is within [-C+1,C) when C is a
2415
6.04k
    // positive constant.  This let us put a lower bound on the number of sign
2416
6.04k
    // bits.
2417
6.04k
    if (match(U->getOperand(1), m_APInt(Denominator))) {
2418
1.76k
2419
1.76k
      // Ignore non-positive denominator.
2420
1.76k
      if (!Denominator->isStrictlyPositive())
2421
7
        break;
2422
1.76k
2423
1.76k
      // Calculate the incoming numerator bits. SRem by a positive constant
2424
1.76k
      // can't lower the number of sign bits.
2425
1.76k
      unsigned NumrBits =
2426
1.76k
          ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2427
1.76k
2428
1.76k
      // Calculate the leading sign bit constraints by examining the
2429
1.76k
      // denominator.  Given that the denominator is positive, there are two
2430
1.76k
      // cases:
2431
1.76k
      //
2432
1.76k
      //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
2433
1.76k
      //     (1 << ceilLogBase2(C)).
2434
1.76k
      //
2435
1.76k
      //  2. the numerator is negative.  Then the result range is (-C,0] and
2436
1.76k
      //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
2437
1.76k
      //
2438
1.76k
      // Thus a lower bound on the number of sign bits is `TyBits -
2439
1.76k
      // ceilLogBase2(C)`.
2440
1.76k
2441
1.76k
      unsigned ResBits = TyBits - Denominator->ceilLogBase2();
2442
1.76k
      return std::max(NumrBits, ResBits);
2443
1.76k
    }
2444
4.28k
    break;
2445
4.28k
  }
2446
4.28k
2447
115k
  case Instruction::AShr: {
2448
115k
    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2449
115k
    // ashr X, C   -> adds C sign bits.  Vectors too.
2450
115k
    const APInt *ShAmt;
2451
115k
    if (match(U->getOperand(1), m_APInt(ShAmt))) {
2452
108k
      if (ShAmt->uge(TyBits))
2453
11
        break;  // Bad shift.
2454
108k
      unsigned ShAmtLimited = ShAmt->getZExtValue();
2455
108k
      Tmp += ShAmtLimited;
2456
108k
      if (Tmp > TyBits) 
Tmp = TyBits130
;
2457
108k
    }
2458
115k
    
return Tmp115k
;
2459
115k
  }
2460
1.04M
  case Instruction::Shl: {
2461
1.04M
    const APInt *ShAmt;
2462
1.04M
    if (match(U->getOperand(1), m_APInt(ShAmt))) {
2463
992k
      // shl destroys sign bits.
2464
992k
      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2465
992k
      if (ShAmt->uge(TyBits) ||      // Bad shift.
2466
992k
          
ShAmt->uge(Tmp)992k
)
break945k
; // Shifted all sign bits out.
2467
46.9k
      Tmp2 = ShAmt->getZExtValue();
2468
46.9k
      return Tmp - Tmp2;
2469
46.9k
    }
2470
50.9k
    break;
2471
50.9k
  }
2472
532k
  case Instruction::And:
2473
532k
  case Instruction::Or:
2474
532k
  case Instruction::Xor:    // NOT is handled here.
2475
532k
    // Logical binary ops preserve the number of sign bits at the worst.
2476
532k
    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2477
532k
    if (Tmp != 1) {
2478
152k
      Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2479
152k
      FirstAnswer = std::min(Tmp, Tmp2);
2480
152k
      // We computed what we know about the sign bits as our first
2481
152k
      // answer. Now proceed to the generic code that uses
2482
152k
      // computeKnownBits, and pick whichever answer is better.
2483
152k
    }
2484
532k
    break;
2485
532k
2486
532k
  case Instruction::Select: {
2487
515k
    // If we have a clamp pattern, we know that the number of sign bits will be
2488
515k
    // the minimum of the clamp min/max range.
2489
515k
    const Value *X;
2490
515k
    const APInt *CLow, *CHigh;
2491
515k
    if (isSignedMinMaxClamp(U, X, CLow, CHigh))
2492
973
      return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
2493
514k
2494
514k
    Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2495
514k
    if (Tmp == 1) 
break422k
;
2496
92.5k
    Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
2497
92.5k
    return std::min(Tmp, Tmp2);
2498
92.5k
  }
2499
92.5k
2500
3.58M
  case Instruction::Add:
2501
3.58M
    // Add can have at most one carry bit.  Thus we know that the output
2502
3.58M
    // is, at worst, one more bit than the inputs.
2503
3.58M
    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2504
3.58M
    if (Tmp == 1) 
break3.43M
;
2505
155k
2506
155k
    // Special case decrementing a value (ADD X, -1):
2507
155k
    if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
2508
84.1k
      if (CRHS->isAllOnesValue()) {
2509
3.17k
        KnownBits Known(TyBits);
2510
3.17k
        computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
2511
3.17k
2512
3.17k
        // If the input is known to be 0 or 1, the output is 0/-1, which is all
2513
3.17k
        // sign bits set.
2514
3.17k
        if ((Known.Zero | 1).isAllOnesValue())
2515
8
          return TyBits;
2516
3.16k
2517
3.16k
        // If we are subtracting one from a positive number, there is no carry
2518
3.16k
        // out of the result.
2519
3.16k
        if (Known.isNonNegative())
2520
1.77k
          return Tmp;
2521
153k
      }
2522
153k
2523
153k
    Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2524
153k
    if (Tmp2 == 1) 
break41.2k
;
2525
112k
    return std::min(Tmp, Tmp2)-1;
2526
112k
2527
664k
  case Instruction::Sub:
2528
664k
    Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2529
664k
    if (Tmp2 == 1) 
break586k
;
2530
77.9k
2531
77.9k
    // Handle NEG.
2532
77.9k
    if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
2533
17.2k
      if (CLHS->isNullValue()) {
2534
3.08k
        KnownBits Known(TyBits);
2535
3.08k
        computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
2536
3.08k
        // If the input is known to be 0 or 1, the output is 0/-1, which is all
2537
3.08k
        // sign bits set.
2538
3.08k
        if ((Known.Zero | 1).isAllOnesValue())
2539
231
          return TyBits;
2540
2.85k
2541
2.85k
        // If the input is known to be positive (the sign bit is known clear),
2542
2.85k
        // the output of the NEG has the same number of sign bits as the input.
2543
2.85k
        if (Known.isNonNegative())
2544
2.03k
          return Tmp2;
2545
75.7k
2546
75.7k
        // Otherwise, we treat this like a SUB.
2547
75.7k
      }
2548
75.7k
2549
75.7k
    // Sub can have at most one carry bit.  Thus we know that the output
2550
75.7k
    // is, at worst, one more bit than the inputs.
2551
75.7k
    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2552
75.7k
    if (Tmp == 1) 
break34.8k
;
2553
40.8k
    return std::min(Tmp, Tmp2)-1;
2554
40.8k
2555
444k
  case Instruction::Mul: {
2556
444k
    // The output of the Mul can be at most twice the valid bits in the inputs.
2557
444k
    unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2558
444k
    if (SignBitsOp0 == 1) 
break369k
;
2559
74.8k
    unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2560
74.8k
    if (SignBitsOp1 == 1) 
break6.46k
;
2561
68.3k
    unsigned OutValidBits =
2562
68.3k
        (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
2563
68.3k
    return OutValidBits > TyBits ? 
154.7k
:
TyBits - OutValidBits + 113.6k
;
2564
68.3k
  }
2565
68.3k
2566
4.70M
  case Instruction::PHI: {
2567
4.70M
    const PHINode *PN = cast<PHINode>(U);
2568
4.70M
    unsigned NumIncomingValues = PN->getNumIncomingValues();
2569
4.70M
    // Don't analyze large in-degree PHIs.
2570
4.70M
    if (NumIncomingValues > 4) 
break39.7k
;
2571
4.66M
    // Unreachable blocks may have zero-operand PHI nodes.
2572
4.66M
    if (NumIncomingValues == 0) 
break0
;
2573
4.66M
2574
4.66M
    // Take the minimum of all incoming values.  This can't infinitely loop
2575
4.66M
    // because of our depth threshold.
2576
4.66M
    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
2577
6.35M
    for (unsigned i = 1, e = NumIncomingValues; i != e; 
++i1.68M
) {
2578
4.66M
      if (Tmp == 1) 
return Tmp2.97M
;
2579
1.68M
      Tmp = std::min(
2580
1.68M
          Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
2581
1.68M
    }
2582
4.66M
    
return Tmp1.69M
;
2583
4.66M
  }
2584
4.66M
2585
4.66M
  case Instruction::Trunc:
2586
206k
    // FIXME: it's tricky to do anything useful for this, but it is an important
2587
206k
    // case for targets like X86.
2588
206k
    break;
2589
4.66M
2590
4.66M
  case Instruction::ExtractElement:
2591
4.32k
    // Look through extract element. At the moment we keep this simple and skip
2592
4.32k
    // tracking the specific element. But at least we might find information
2593
4.32k
    // valid for all elements of the vector (for example if vector is sign
2594
4.32k
    // extended, shifted, etc).
2595
4.32k
    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2596
4.66M
2597
4.66M
  case Instruction::ShuffleVector: {
2598
13.0k
    // TODO: This is copied almost directly from the SelectionDAG version of
2599
13.0k
    //       ComputeNumSignBits. It would be better if we could share common
2600
13.0k
    //       code. If not, make sure that changes are translated to the DAG.
2601
13.0k
2602
13.0k
    // Collect the minimum number of sign bits that are shared by every vector
2603
13.0k
    // element referenced by the shuffle.
2604
13.0k
    auto *Shuf = cast<ShuffleVectorInst>(U);
2605
13.0k
    int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
2606
13.0k
    int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
2607
13.0k
    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
2608
128k
    for (int i = 0; i != NumMaskElts; 
++i115k
) {
2609
117k
      int M = Shuf->getMaskValue(i);
2610
117k
      assert(M < NumElts * 2 && "Invalid shuffle mask constant");
2611
117k
      // For undef elements, we don't know anything about the common state of
2612
117k
      // the shuffle result.
2613
117k
      if (M == -1)
2614
2.29k
        return 1;
2615
115k
      if (M < NumElts)
2616
83.0k
        DemandedLHS.setBit(M % NumElts);
2617
32.0k
      else
2618
32.0k
        DemandedRHS.setBit(M % NumElts);
2619
115k
    }
2620
13.0k
    Tmp = std::numeric_limits<unsigned>::max();
2621
10.7k
    if (!!DemandedLHS)
2622
10.7k
      Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
2623
10.7k
    if (!!DemandedRHS) {
2624
5.12k
      Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
2625
5.12k
      Tmp = std::min(Tmp, Tmp2);
2626
5.12k
    }
2627
10.7k
    // If we don't know anything, early out and try computeKnownBits fall-back.
2628
10.7k
    if (Tmp == 1)
2629
10.7k
      break;
2630
8
    assert(Tmp <= V->getType()->getScalarSizeInBits() &&
2631
8
           "Failed to determine minimum sign bits");
2632
8
    return Tmp;
2633
8
  }
2634
15.4M
  }
2635
15.4M
2636
15.4M
  // Finally, if we can prove that the top bits of the result are 0's or 1's,
2637
15.4M
  // use this information.
2638
15.4M
2639
15.4M
  // If we can examine all elements of a vector constant successfully, we're
2640
15.4M
  // done (we can't do any better than that). If not, keep trying.
2641
15.4M
  if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
2642
190k
    return VecSignBits;
2643
15.2M
2644
15.2M
  KnownBits Known(TyBits);
2645
15.2M
  computeKnownBits(V, Known, Depth, Q);
2646
15.2M
2647
15.2M
  // If we know that the sign bit is either zero or one, determine the number of
2648
15.2M
  // identical bits in the top of the input value.
2649
15.2M
  return std::max(FirstAnswer, Known.countMinSignBits());
2650
15.2M
}
2651
2652
/// This function computes the integer multiple of Base that equals V.
2653
/// If successful, it returns true and returns the multiple in
2654
/// Multiple. If unsuccessful, it returns false. It looks
2655
/// through SExt instructions only if LookThroughSExt is true.
2656
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
2657
100
                           bool LookThroughSExt, unsigned Depth) {
2658
100
  const unsigned MaxDepth = 6;
2659
100
2660
100
  assert(V && "No Value?");
2661
100
  assert(Depth <= MaxDepth && "Limit Search Depth");
2662
100
  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
2663
100
2664
100
  Type *T = V->getType();
2665
100
2666
100
  ConstantInt *CI = dyn_cast<ConstantInt>(V);
2667
100
2668
100
  if (Base == 0)
2669
0
    return false;
2670
100
2671
100
  if (Base == 1) {
2672
0
    Multiple = V;
2673
0
    return true;
2674
0
  }
2675
100
2676
100
  ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
2677
100
  Constant *BaseVal = ConstantInt::get(T, Base);
2678
100
  if (CO && 
CO == BaseVal4
) {
2679
0
    // Multiple is 1.
2680
0
    Multiple = ConstantInt::get(T, 1);
2681
0
    return true;
2682
0
  }
2683
100
2684
100
  if (CI && 
CI->getZExtValue() % Base == 020
) {
2685
18
    Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
2686
18
    return true;
2687
18
  }
2688
82
2689
82
  if (Depth == MaxDepth) 
return false0
; // Limit search depth.
2690
82
2691
82
  Operator *I = dyn_cast<Operator>(V);
2692
82
  if (!I) 
return false8
;
2693
74
2694
74
  switch (I->getOpcode()) {
2695
74
  
default: break36
;
2696
74
  case Instruction::SExt:
2697
25
    if (!LookThroughSExt) 
return false0
;
2698
25
    // otherwise fall through to ZExt
2699
25
    LLVM_FALLTHROUGH;
2700
25
  case Instruction::ZExt:
2701
25
    return ComputeMultiple(I->getOperand(0), Base, Multiple,
2702
25
                           LookThroughSExt, Depth+1);
2703
25
  case Instruction::Shl:
2704
13
  case Instruction::Mul: {
2705
13
    Value *Op0 = I->getOperand(0);
2706
13
    Value *Op1 = I->getOperand(1);
2707
13
2708
13
    if (I->getOpcode() == Instruction::Shl) {
2709
6
      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
2710
6
      if (!Op1CI) 
return false0
;
2711
6
      // Turn Op0 << Op1 into Op0 * 2^Op1
2712
6
      APInt Op1Int = Op1CI->getValue();
2713
6
      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
2714
6
      APInt API(Op1Int.getBitWidth(), 0);
2715
6
      API.setBit(BitToSet);
2716
6
      Op1 = ConstantInt::get(V->getContext(), API);
2717
6
    }
2718
13
2719
13
    Value *Mul0 = nullptr;
2720
13
    if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
2721
3
      if (Constant *Op1C = dyn_cast<Constant>(Op1))
2722
1
        if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
2723
1
          if (Op1C->getType()->getPrimitiveSizeInBits() <
2724
1
              MulC->getType()->getPrimitiveSizeInBits())
2725
0
            Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
2726
1
          if (Op1C->getType()->getPrimitiveSizeInBits() >
2727
1
              MulC->getType()->getPrimitiveSizeInBits())
2728
1
            MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
2729
1
2730
1
          // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
2731
1
          Multiple = ConstantExpr::getMul(MulC, Op1C);
2732
1
          return true;
2733
1
        }
2734
2
2735
2
      if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
2736
2
        if (Mul0CI->getValue() == 1) {
2737
2
          // V == Base * Op1, so return Op1
2738
2
          Multiple = Op1;
2739
2
          return true;
2740
2
        }
2741
10
    }
2742
10
2743
10
    Value *Mul1 = nullptr;
2744
10
    if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
2745
8
      if (Constant *Op0C = dyn_cast<Constant>(Op0))
2746
0
        if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
2747
0
          if (Op0C->getType()->getPrimitiveSizeInBits() <
2748
0
              MulC->getType()->getPrimitiveSizeInBits())
2749
0
            Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
2750
0
          if (Op0C->getType()->getPrimitiveSizeInBits() >
2751
0
              MulC->getType()->getPrimitiveSizeInBits())
2752
0
            MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
2753
0
2754
0
          // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
2755
0
          Multiple = ConstantExpr::getMul(MulC, Op0C);
2756
0
          return true;
2757
0
        }
2758
8
2759
8
      if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
2760
8
        if (Mul1CI->getValue() == 1) {
2761
8
          // V == Base * Op0, so return Op0
2762
8
          Multiple = Op0;
2763
8
          return true;
2764
8
        }
2765
38
    }
2766
10
  }
2767
38
  }
2768
38
2769
38
  // We could not determine if V is a multiple of Base.
2770
38
  return false;
2771
38
}
2772
2773
Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
2774
76.9k
                                            const TargetLibraryInfo *TLI) {
2775
76.9k
  const Function *F = ICS.getCalledFunction();
2776
76.9k
  if (!F)
2777
1.03k
    return Intrinsic::not_intrinsic;
2778
75.9k
2779
75.9k
  if (F->isIntrinsic())
2780
33.3k
    return F->getIntrinsicID();
2781
42.5k
2782
42.5k
  if (!TLI)
2783
123
    return Intrinsic::not_intrinsic;
2784
42.4k
2785
42.4k
  LibFunc Func;
2786
42.4k
  // We're going to make assumptions on the semantics of the functions, check
2787
42.4k
  // that the target knows that it's available in this environment and it does
2788
42.4k
  // not have local linkage.
2789
42.4k
  if (!F || F->hasLocalLinkage() || 
!TLI->getLibFunc(*F, Func)41.7k
)
2790
34.3k
    return Intrinsic::not_intrinsic;
2791
8.11k
2792
8.11k
  if (!ICS.onlyReadsMemory())
2793
3.96k
    return Intrinsic::not_intrinsic;
2794
4.14k
2795
4.14k
  // Otherwise check if we have a call to a function that can be turned into a
2796
4.14k
  // vector intrinsic.
2797
4.14k
  switch (Func) {
2798
4.14k
  default:
2799
3.80k
    break;
2800
4.14k
  case LibFunc_sin:
2801
39
  case LibFunc_sinf:
2802
39
  case LibFunc_sinl:
2803
39
    return Intrinsic::sin;
2804
39
  case LibFunc_cos:
2805
39
  case LibFunc_cosf:
2806
39
  case LibFunc_cosl:
2807
39
    return Intrinsic::cos;
2808
39
  case LibFunc_exp:
2809
34
  case LibFunc_expf:
2810
34
  case LibFunc_expl:
2811
34
    return Intrinsic::exp;
2812
34
  case LibFunc_exp2:
2813
5
  case LibFunc_exp2f:
2814
5
  case LibFunc_exp2l:
2815
5
    return Intrinsic::exp2;
2816
34
  case LibFunc_log:
2817
34
  case LibFunc_logf:
2818
34
  case LibFunc_logl:
2819
34
    return Intrinsic::log;
2820
34
  case LibFunc_log10:
2821
16
  case LibFunc_log10f:
2822
16
  case LibFunc_log10l:
2823
16
    return Intrinsic::log10;
2824
16
  case LibFunc_log2:
2825
0
  case LibFunc_log2f:
2826
0
  case LibFunc_log2l:
2827
0
    return Intrinsic::log2;
2828
25
  case LibFunc_fabs:
2829
25
  case LibFunc_fabsf:
2830
25
  case LibFunc_fabsl:
2831
25
    return Intrinsic::fabs;
2832
25
  case LibFunc_fmin:
2833
0
  case LibFunc_fminf:
2834
0
  case LibFunc_fminl:
2835
0
    return Intrinsic::minnum;
2836
0
  case LibFunc_fmax:
2837
0
  case LibFunc_fmaxf:
2838
0
  case LibFunc_fmaxl:
2839
0
    return Intrinsic::maxnum;
2840
0
  case LibFunc_copysign:
2841
0
  case LibFunc_copysignf:
2842
0
  case LibFunc_copysignl:
2843
0
    return Intrinsic::copysign;
2844
47
  case LibFunc_floor:
2845
47
  case LibFunc_floorf:
2846
47
  case LibFunc_floorl:
2847
47
    return Intrinsic::floor;
2848
47
  case LibFunc_ceil:
2849
16
  case LibFunc_ceilf:
2850
16
  case LibFunc_ceill:
2851
16
    return Intrinsic::ceil;
2852
16
  case LibFunc_trunc:
2853
0
  case LibFunc_truncf:
2854
0
  case LibFunc_truncl:
2855
0
    return Intrinsic::trunc;
2856
0
  case LibFunc_rint:
2857
0
  case LibFunc_rintf:
2858
0
  case LibFunc_rintl:
2859
0
    return Intrinsic::rint;
2860
0
  case LibFunc_nearbyint:
2861
0
  case LibFunc_nearbyintf:
2862
0
  case LibFunc_nearbyintl:
2863
0
    return Intrinsic::nearbyint;
2864
0
  case LibFunc_round:
2865
0
  case LibFunc_roundf:
2866
0
  case LibFunc_roundl:
2867
0
    return Intrinsic::round;
2868
24
  case LibFunc_pow:
2869
24
  case LibFunc_powf:
2870
24
  case LibFunc_powl:
2871
24
    return Intrinsic::pow;
2872
61
  case LibFunc_sqrt:
2873
61
  case LibFunc_sqrtf:
2874
61
  case LibFunc_sqrtl:
2875
61
    return Intrinsic::sqrt;
2876
3.80k
  }
2877
3.80k
2878
3.80k
  return Intrinsic::not_intrinsic;
2879
3.80k
}
2880
2881
/// Return true if we can prove that the specified FP value is never equal to
2882
/// -0.0.
2883
///
2884
/// NOTE: this function will need to be revisited when we support non-default
2885
/// rounding modes!
2886
bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
2887
201k
                                unsigned Depth) {
2888
201k
  if (auto *CFP = dyn_cast<ConstantFP>(V))
2889
57.1k
    return !CFP->getValueAPF().isNegZero();
2890
143k
2891
143k
  // Limit search depth.
2892
143k
  if (Depth == MaxDepth)
2893
0
    return false;
2894
143k
2895
143k
  auto *Op = dyn_cast<Operator>(V);
2896
143k
  if (!Op)
2897
4.19k
    return false;
2898
139k
2899
139k
  // Check if the nsz fast-math flag is set.
2900
139k
  if (auto *FPO = dyn_cast<FPMathOperator>(Op))
2901
132k
    if (FPO->hasNoSignedZeros())
2902
4
      return true;
2903
139k
2904
139k
  // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
2905
139k
  if (match(Op, m_FAdd(m_Value(), m_PosZeroFP())))
2906
432
    return true;
2907
139k
2908
139k
  // sitofp and uitofp turn into +0.0 for zero.
2909
139k
  if (isa<SIToFPInst>(Op) || 
isa<UIToFPInst>(Op)137k
)
2910
2.06k
    return true;
2911
137k
2912
137k
  if (auto *Call = dyn_cast<CallInst>(Op)) {
2913
2.71k
    Intrinsic::ID IID = getIntrinsicForCallSite(Call, TLI);
2914
2.71k
    switch (IID) {
2915
2.71k
    default:
2916
1.88k
      break;
2917
2.71k
    // sqrt(-0.0) = -0.0, no other negative results are possible.
2918
2.71k
    case Intrinsic::sqrt:
2919
409
    case Intrinsic::canonicalize:
2920
409
      return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
2921
409
    // fabs(x) != -0.0
2922
423
    case Intrinsic::fabs:
2923
423
      return true;
2924
136k
    }
2925
136k
  }
2926
136k
2927
136k
  return false;
2928
136k
}
2929
2930
/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
2931
/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
2932
/// bit despite comparing equal.
2933
static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
2934
                                            const TargetLibraryInfo *TLI,
2935
                                            bool SignBitOnly,
2936
118k
                                            unsigned Depth) {
2937
118k
  // TODO: This function does not do the right thing when SignBitOnly is true
2938
118k
  // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
2939
118k
  // which flips the sign bits of NaNs.  See
2940
118k
  // https://llvm.org/bugs/show_bug.cgi?id=31702.
2941
118k
2942
118k
  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2943
3.23k
    return !CFP->getValueAPF().isNegative() ||
2944
3.23k
           
(82
!SignBitOnly82
&&
CFP->getValueAPF().isZero()40
);
2945
3.23k
  }
2946
115k
2947
115k
  // Handle vector of constants.
2948
115k
  if (auto *CV = dyn_cast<Constant>(V)) {
2949
20
    if (CV->getType()->isVectorTy()) {
2950
17
      unsigned NumElts = CV->getType()->getVectorNumElements();
2951
40
      for (unsigned i = 0; i != NumElts; 
++i23
) {
2952
29
        auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
2953
29
        if (!CFP)
2954
0
          return false;
2955
29
        if (CFP->getValueAPF().isNegative() &&
2956
29
            
(6
SignBitOnly6
||
!CFP->getValueAPF().isZero()0
))
2957
6
          return false;
2958
29
      }
2959
17
2960
17
      // All non-negative ConstantFPs.
2961
17
      
return true11
;
2962
115k
    }
2963
20
  }
2964
115k
2965
115k
  if (Depth == MaxDepth)
2966
223
    return false; // Limit search depth.
2967
115k
2968
115k
  const Operator *I = dyn_cast<Operator>(V);
2969
115k
  if (!I)
2970
20.7k
    return false;
2971
94.4k
2972
94.4k
  switch (I->getOpcode()) {
2973
94.4k
  default:
2974
55.9k
    break;
2975
94.4k
  // Unsigned integers are always nonnegative.
2976
94.4k
  case Instruction::UIToFP:
2977
76
    return true;
2978
94.4k
  case Instruction::FMul:
2979
12.3k
    // x*x is always non-negative or a NaN.
2980
12.3k
    if (I->getOperand(0) == I->getOperand(1) &&
2981
12.3k
        
(1.49k
!SignBitOnly1.49k
||
cast<FPMathOperator>(I)->hasNoNaNs()173
))
2982
1.32k
      return true;
2983
10.9k
2984
10.9k
    LLVM_FALLTHROUGH;
2985
27.3k
  case Instruction::FAdd:
2986
27.3k
  case Instruction::FDiv:
2987
27.3k
  case Instruction::FRem:
2988
27.3k
    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2989
27.3k
                                           Depth + 1) &&
2990
27.3k
           cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2991
3.17k
                                           Depth + 1);
2992
27.3k
  case Instruction::Select:
2993
6.37k
    return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2994
6.37k
                                           Depth + 1) &&
2995
6.37k
           cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2996
1.21k
                                           Depth + 1);
2997
27.3k
  case Instruction::FPExt:
2998
990
  case Instruction::FPTrunc:
2999
990
    // Widening/narrowing never change sign.
3000
990
    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3001
990
                                           Depth + 1);
3002
990
  case Instruction::ExtractElement:
3003
121
    // Look through extract element. At the moment we keep this simple and skip
3004
121
    // tracking the specific element. But at least we might find information
3005
121
    // valid for all elements of the vector.
3006
121
    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3007
121
                                           Depth + 1);
3008
2.23k
  case Instruction::Call:
3009
2.23k
    const auto *CI = cast<CallInst>(I);
3010
2.23k
    Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
3011
2.23k
    switch (IID) {
3012
2.23k
    default:
3013
1.98k
      break;
3014
2.23k
    case Intrinsic::maxnum:
3015
1
      return (isKnownNeverNaN(I->getOperand(0), TLI) &&
3016
1
              cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
3017
0
                                              SignBitOnly, Depth + 1)) ||
3018
1
            (isKnownNeverNaN(I->getOperand(1), TLI) &&
3019
1
              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
3020
0
                                              SignBitOnly, Depth + 1));
3021
2.23k
3022
2.23k
    case Intrinsic::maximum:
3023
1
      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3024
1
                                             Depth + 1) ||
3025
1
             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3026
0
                                             Depth + 1);
3027
2.23k
    case Intrinsic::minnum:
3028
1
    case Intrinsic::minimum:
3029
1
      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3030
1
                                             Depth + 1) &&
3031
1
             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3032
1
                                             Depth + 1);
3033
65
    case Intrinsic::exp:
3034
65
    case Intrinsic::exp2:
3035
65
    case Intrinsic::fabs:
3036
65
      return true;
3037
65
3038
168
    case Intrinsic::sqrt:
3039
168
      // sqrt(x) is always >= -0 or NaN.  Moreover, sqrt(x) == -0 iff x == -0.
3040
168
      if (!SignBitOnly)
3041
21
        return true;
3042
147
      return CI->hasNoNaNs() && 
(3
CI->hasNoSignedZeros()3
||
3043
3
                                 
CannotBeNegativeZero(CI->getOperand(0), TLI)2
);
3044
147
3045
147
    case Intrinsic::powi:
3046
1
      if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
3047
1
        // powi(x,n) is non-negative if n is even.
3048
1
        if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
3049
0
          return true;
3050
1
      }
3051
1
      // TODO: This is not correct.  Given that exp is an integer, here are the
3052
1
      // ways that pow can return a negative value:
3053
1
      //
3054
1
      //   pow(x, exp)    --> negative if exp is odd and x is negative.
3055
1
      //   pow(-0, exp)   --> -inf if exp is negative odd.
3056
1
      //   pow(-0, exp)   --> -0 if exp is positive odd.
3057
1
      //   pow(-inf, exp) --> -0 if exp is negative odd.
3058
1
      //   pow(-inf, exp) --> -inf if exp is positive odd.
3059
1
      //
3060
1
      // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
3061
1
      // but we must return false if x == -0.  Unfortunately we do not currently
3062
1
      // have a way of expressing this constraint.  See details in
3063
1
      // https://llvm.org/bugs/show_bug.cgi?id=31702.
3064
1
      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3065
1
                                             Depth + 1);
3066
1
3067
11
    case Intrinsic::fma:
3068
11
    case Intrinsic::fmuladd:
3069
11
      // x*x+y is non-negative if y is non-negative.
3070
11
      return I->getOperand(0) == I->getOperand(1) &&
3071
11
             
(5
!SignBitOnly5
||
cast<FPMathOperator>(I)->hasNoNaNs()4
) &&
3072
11
             cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
3073
3
                                             Depth + 1);
3074
1.98k
    }
3075
1.98k
    break;
3076
57.9k
  }
3077
57.9k
  return false;
3078
57.9k
}
3079
3080
bool llvm::CannotBeOrderedLessThanZero(const Value *V,
3081
25.9k
                                       const TargetLibraryInfo *TLI) {
3082
25.9k
  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
3083
25.9k
}
3084
3085
53.4k
bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
3086
53.4k
  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
3087
53.4k
}
3088
3089
bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
3090
54.6k
                           unsigned Depth) {
3091
54.6k
  assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
3092
54.6k
3093
54.6k
  // If we're told that NaNs won't happen, assume they won't.
3094
54.6k
  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
3095
22.0k
    if (FPMathOp->hasNoNaNs())
3096
68
      return true;
3097
54.5k
3098
54.5k
  // Handle scalar constants.
3099
54.5k
  if (auto *CFP = dyn_cast<ConstantFP>(V))
3100
2.56k
    return !CFP->isNaN();
3101
52.0k
3102
52.0k
  if (Depth == MaxDepth)
3103
0
    return false;
3104
52.0k
3105
52.0k
  if (auto *Inst = dyn_cast<Instruction>(V)) {
3106
33.7k
    switch (Inst->getOpcode()) {
3107
33.7k
    case Instruction::FAdd:
3108
7.93k
    case Instruction::FMul:
3109
7.93k
    case Instruction::FSub:
3110
7.93k
    case Instruction::FDiv:
3111
7.93k
    case Instruction::FRem: {
3112
7.93k
      // TODO: Need isKnownNeverInfinity
3113
7.93k
      return false;
3114
7.93k
    }
3115
7.93k
    case Instruction::Select: {
3116
293
      return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3117
293
             
isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1)199
;
3118
7.93k
    }
3119
7.93k
    case Instruction::SIToFP:
3120
876
    case Instruction::UIToFP:
3121
876
      return true;
3122
876
    case Instruction::FPTrunc:
3123
359
    case Instruction::FPExt:
3124
359
      return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
3125
24.3k
    default:
3126
24.3k
      break;
3127
42.5k
    }
3128
42.5k
  }
3129
42.5k
3130
42.5k
  if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
3131
5.47k
    switch (II->getIntrinsicID()) {
3132
5.47k
    case Intrinsic::canonicalize:
3133
4.18k
    case Intrinsic::fabs:
3134
4.18k
    case Intrinsic::copysign:
3135
4.18k
    case Intrinsic::exp:
3136
4.18k
    case Intrinsic::exp2:
3137
4.18k
    case Intrinsic::floor:
3138
4.18k
    case Intrinsic::ceil:
3139
4.18k
    case Intrinsic::trunc:
3140
4.18k
    case Intrinsic::rint:
3141
4.18k
    case Intrinsic::nearbyint:
3142
4.18k
    case Intrinsic::round:
3143
4.18k
      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
3144
4.18k
    case Intrinsic::sqrt:
3145
20
      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
3146
20
             
CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI)2
;
3147
4.18k
    case Intrinsic::minnum:
3148
1.25k
    case Intrinsic::maxnum:
3149
1.25k
      // If either operand is not NaN, the result is not NaN.
3150
1.25k
      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
3151
1.25k
             
isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1)1.25k
;
3152
1.25k
    default:
3153
18
      return false;
3154
37.0k
    }
3155
37.0k
  }
3156
37.0k
3157
37.0k
  // Bail out for constant expressions, but try to handle vector constants.
3158
37.0k
  if (!V->getType()->isVectorTy() || 
!isa<Constant>(V)740
)
3159
37.0k
    return false;
3160
53
3161
53
  // For vectors, verify that each element is not NaN.
3162
53
  unsigned NumElts = V->getType()->getVectorNumElements();
3163
182
  for (unsigned i = 0; i != NumElts; 
++i129
) {
3164
130
    Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
3165
130
    if (!Elt)
3166
0
      return false;
3167
130
    if (isa<UndefValue>(Elt))
3168
15
      continue;
3169
115
    auto *CElt = dyn_cast<ConstantFP>(Elt);
3170
115
    if (!CElt || CElt->isNaN())
3171
1
      return false;
3172
115
  }
3173
53
  // All elements were confirmed not-NaN or undefined.
3174
53
  
return true52
;
3175
53
}
3176
3177
1.53M
Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
3178
1.53M
3179
1.53M
  // All byte-wide stores are splatable, even of arbitrary variables.
3180
1.53M
  if (V->getType()->isIntegerTy(8))
3181
311k
    return V;
3182
1.21M
3183
1.21M
  LLVMContext &Ctx = V->getContext();
3184
1.21M
3185
1.21M
  // Undef don't care.
3186
1.21M
  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
3187
1.21M
  if (isa<UndefValue>(V))
3188
46
    return UndefInt8;
3189
1.21M
3190
1.21M
  const uint64_t Size = DL.getTypeStoreSize(V->getType());
3191
1.21M
  if (!Size)
3192
4
    return UndefInt8;
3193
1.21M
3194
1.21M
  Constant *C = dyn_cast<Constant>(V);
3195
1.21M
  if (!C) {
3196
579k
    // Conceptually, we could handle things like:
3197
579k
    //   %a = zext i8 %X to i16
3198
579k
    //   %b = shl i16 %a, 8
3199
579k
    //   %c = or i16 %a, %b
3200
579k
    // but until there is an example that actually needs this, it doesn't seem
3201
579k
    // worth worrying about.
3202
579k
    return nullptr;
3203
579k
  }
3204
640k
3205
640k
  // Handle 'null' ConstantArrayZero etc.
3206
640k
  if (C->isNullValue())
3207
400k
    return Constant::getNullValue(Type::getInt8Ty(Ctx));
3208
239k
3209
239k
  // Constant floating-point values can be handled as integer values if the
3210
239k
  // corresponding integer value is "byteable".  An important case is 0.0.
3211
239k
  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3212
18.0k
    Type *Ty = nullptr;
3213
18.0k
    if (CFP->getType()->isHalfTy())
3214
3
      Ty = Type::getInt16Ty(Ctx);
3215
18.0k
    else if (CFP->getType()->isFloatTy())
3216
9.05k
      Ty = Type::getInt32Ty(Ctx);
3217
8.96k
    else if (CFP->getType()->isDoubleTy())
3218
8.95k
      Ty = Type::getInt64Ty(Ctx);
3219
18.0k
    // Don't handle long double formats, which have strange constraints.
3220
18.0k
    return Ty ? 
isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)18.0k
3221
18.0k
              : 
nullptr4
;
3222
18.0k
  }
3223
221k
3224
221k
  // We can handle constant integers that are multiple of 8 bits.
3225
221k
  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
3226
197k
    if (CI->getBitWidth() % 8 == 0) {
3227
184k
      assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
3228
184k
      if (!CI->getValue().isSplat(8))
3229
170k
        return nullptr;
3230
13.7k
      return ConstantInt::get(Ctx, CI->getValue().trunc(8));
3231
13.7k
    }
3232
197k
  }
3233
37.6k
3234
37.6k
  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
3235
19.5k
    if (CE->getOpcode() == Instruction::IntToPtr) {
3236
103
      auto PS = DL.getPointerSizeInBits(
3237
103
          cast<PointerType>(CE->getType())->getAddressSpace());
3238
103
      return isBytewiseValue(
3239
103
          ConstantExpr::getIntegerCast(CE->getOperand(0),
3240
103
                                       Type::getIntNTy(Ctx, PS), false),
3241
103
          DL);
3242
103
    }
3243
37.5k
  }
3244
37.5k
3245
77.4k
  
auto Merge = [&](Value *LHS, Value *RHS) -> Value * 37.5k
{
3246
77.4k
    if (LHS == RHS)
3247
73.0k
      return LHS;
3248
4.48k
    if (!LHS || !RHS)
3249
308
      return nullptr;
3250
4.18k
    if (LHS == UndefInt8)
3251
2.22k
      return RHS;
3252
1.95k
    if (RHS == UndefInt8)
3253
23
      return LHS;
3254
1.93k
    return nullptr;
3255
1.93k
  };
3256
37.5k
3257
37.5k
  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
3258
2.18k
    Value *Val = UndefInt8;
3259
76.5k
    for (unsigned I = 0, E = CA->getNumElements(); I != E; 
++I74.3k
)
3260
76.4k
      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
3261
2.07k
        return nullptr;
3262
2.18k
    
return Val115
;
3263
35.3k
  }
3264
35.3k
3265
35.3k
  if (isa<ConstantAggregate>(C)) {
3266
278
    Value *Val = UndefInt8;
3267
1.13k
    for (unsigned I = 0, E = C->getNumOperands(); I != E; 
++I860
)
3268
1.02k
      if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
3269
169
        return nullptr;
3270
278
    
return Val109
;
3271
35.0k
  }
3272
35.0k
3273
35.0k
  // Don't try to handle the handful of other constants.
3274
35.0k
  return nullptr;
3275
35.0k
}
3276
3277
// This is the recursive version of BuildSubAggregate. It takes a few different
3278
// arguments. Idxs is the index within the nested struct From that we are
3279
// looking at now (which is of type IndexedType). IdxSkip is the number of
3280
// indices from Idxs that should be left out when inserting into the resulting
3281
// struct. To is the result struct built so far, new insertvalue instructions
3282
// build on that.
3283
static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
3284
                                SmallVectorImpl<unsigned> &Idxs,
3285
                                unsigned IdxSkip,
3286
0
                                Instruction *InsertBefore) {
3287
0
  StructType *STy = dyn_cast<StructType>(IndexedType);
3288
0
  if (STy) {
3289
0
    // Save the original To argument so we can modify it
3290
0
    Value *OrigTo = To;
3291
0
    // General case, the type indexed by Idxs is a struct
3292
0
    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3293
0
      // Process each struct element recursively
3294
0
      Idxs.push_back(i);
3295
0
      Value *PrevTo = To;
3296
0
      To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
3297
0
                             InsertBefore);
3298
0
      Idxs.pop_back();
3299
0
      if (!To) {
3300
0
        // Couldn't find any inserted value for this index? Cleanup
3301
0
        while (PrevTo != OrigTo) {
3302
0
          InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
3303
0
          PrevTo = Del->getAggregateOperand();
3304
0
          Del->eraseFromParent();
3305
0
        }
3306
0
        // Stop processing elements
3307
0
        break;
3308
0
      }
3309
0
    }
3310
0
    // If we successfully found a value for each of our subaggregates
3311
0
    if (To)
3312
0
      return To;
3313
0
  }
3314
0
  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
3315
0
  // the struct's elements had a value that was inserted directly. In the latter
3316
0
  // case, perhaps we can't determine each of the subelements individually, but
3317
0
  // we might be able to find the complete struct somewhere.
3318
0
3319
0
  // Find the value that is at that particular spot
3320
0
  Value *V = FindInsertedValue(From, Idxs);
3321
0
3322
0
  if (!V)
3323
0
    return nullptr;
3324
0
3325
0
  // Insert the value in the new (sub) aggregate
3326
0
  return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
3327
0
                                 "tmp", InsertBefore);
3328
0
}
3329
3330
// This helper takes a nested struct and extracts a part of it (which is again a
3331
// struct) into a new value. For example, given the struct:
3332
// { a, { b, { c, d }, e } }
3333
// and the indices "1, 1" this returns
3334
// { c, d }.
3335
//
3336
// It does this by inserting an insertvalue for each element in the resulting
3337
// struct, as opposed to just inserting a single struct. This will only work if
3338
// each of the elements of the substruct are known (ie, inserted into From by an
3339
// insertvalue instruction somewhere).
3340
//
3341
// All inserted insertvalue instructions are inserted before InsertBefore
3342
static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
3343
0
                                Instruction *InsertBefore) {
3344
0
  assert(InsertBefore && "Must have someplace to insert!");
3345
0
  Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
3346
0
                                                             idx_range);
3347
0
  Value *To = UndefValue::get(IndexedType);
3348
0
  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
3349
0
  unsigned IdxSkip = Idxs.size();
3350
0
3351
0
  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
3352
0
}
3353
3354
/// Given an aggregate and a sequence of indices, see if the scalar value
3355
/// indexed is already around as a register, for example if it was inserted
3356
/// directly into the aggregate.
3357
///
3358
/// If InsertBefore is not null, this function will duplicate (modified)
3359
/// insertvalues when a part of a nested struct is extracted.
3360
Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
3361
27
                               Instruction *InsertBefore) {
3362
27
  // Nothing to index? Just return V then (this is useful at the end of our
3363
27
  // recursion).
3364
27
  if (idx_range.empty())
3365
10
    return V;
3366
17
  // We have indices, so V should have an indexable type.
3367
17
  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
3368
17
         "Not looking at a struct or array?");
3369
17
  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
3370
17
         "Invalid indices for type?");
3371
17
3372
17
  if (Constant *C = dyn_cast<Constant>(V)) {
3373
0
    C = C->getAggregateElement(idx_range[0]);
3374
0
    if (!C) return nullptr;
3375
0
    return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
3376
0
  }
3377
17
3378
17
  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
3379
15
    // Loop the indices for the insertvalue instruction in parallel with the
3380
15
    // requested indices
3381
15
    const unsigned *req_idx = idx_range.begin();
3382
15
    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
3383
25
         i != e; 
++i, ++req_idx10
) {
3384
15
      if (req_idx == idx_range.end()) {
3385
0
        // We can't handle this without inserting insertvalues
3386
0
        if (!InsertBefore)
3387
0
          return nullptr;
3388
0
3389
0
        // The requested index identifies a part of a nested aggregate. Handle
3390
0
        // this specially. For example,
3391
0
        // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
3392
0
        // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
3393
0
        // %C = extractvalue {i32, { i32, i32 } } %B, 1
3394
0
        // This can be changed into
3395
0
        // %A = insertvalue {i32, i32 } undef, i32 10, 0
3396
0
        // %C = insertvalue {i32, i32 } %A, i32 11, 1
3397
0
        // which allows the unused 0,0 element from the nested struct to be
3398
0
        // removed.
3399
0
        return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
3400
0
                                 InsertBefore);
3401
0
      }
3402
15
3403
15
      // This insert value inserts something else than what we are looking for.
3404
15
      // See if the (aggregate) value inserted into has the value we are
3405
15
      // looking for, then.
3406
15
      if (*req_idx != *i)
3407
5
        return FindInsertedValue(I->getAggregateOperand(), idx_range,
3408
5
                                 InsertBefore);
3409
15
    }
3410
15
    // If we end up here, the indices of the insertvalue match with those
3411
15
    // requested (though possibly only partially). Now we recursively look at
3412
15
    // the inserted value, passing any remaining indices.
3413
15
    return FindInsertedValue(I->getInsertedValueOperand(),
3414
10
                             makeArrayRef(req_idx, idx_range.end()),
3415
10
                             InsertBefore);
3416
2
  }
3417
2
3418
2
  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
3419
0
    // If we're extracting a value from an aggregate that was extracted from
3420
0
    // something else, we can extract from that something else directly instead.
3421
0
    // However, we will need to chain I's indices with the requested indices.
3422
0
3423
0
    // Calculate the number of indices required
3424
0
    unsigned size = I->getNumIndices() + idx_range.size();
3425
0
    // Allocate some space to put the new indices in
3426
0
    SmallVector<unsigned, 5> Idxs;
3427
0
    Idxs.reserve(size);
3428
0
    // Add indices from the extract value instruction
3429
0
    Idxs.append(I->idx_begin(), I->idx_end());
3430
0
3431
0
    // Add requested indices
3432
0
    Idxs.append(idx_range.begin(), idx_range.end());
3433
0
3434
0
    assert(Idxs.size() == size
3435
0
           && "Number of indices added not correct?");
3436
0
3437
0
    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
3438
0
  }
3439
2
  // Otherwise, we don't know (such as, extracting from a function return value
3440
2
  // or load instruction)
3441
2
  return nullptr;
3442
2
}
3443
3444
bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
3445
5.54M
                                       unsigned CharSize) {
3446
5.54M
  // Make sure the GEP has exactly three arguments.
3447
5.54M
  if (GEP->getNumOperands() != 3)
3448
539k
    return false;
3449
5.00M
3450
5.00M
  // Make sure the index-ee is a pointer to array of \p CharSize integers.
3451
5.00M
  // CharSize.
3452
5.00M
  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
3453
5.00M
  if (!AT || 
!AT->getElementType()->isIntegerTy(CharSize)648k
)
3454
4.95M
    return false;
3455
47.6k
3456
47.6k
  // Check to make sure that the first operand of the GEP is an integer and
3457
47.6k
  // has value 0 so that we are sure we're indexing into the initializer.
3458
47.6k
  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
3459
47.6k
  if (!FirstIdx || 
!FirstIdx->isZero()47.6k
)
3460
38
    return false;
3461
47.6k
3462
47.6k
  return true;
3463
47.6k
}
3464
3465
bool llvm::getConstantDataArrayInfo(const Value *V,
3466
                                    ConstantDataArraySlice &Slice,
3467
7.89M
                                    unsigned ElementSize, uint64_t Offset) {
3468
7.89M
  assert(V);
3469
7.89M
3470
7.89M
  // Look through bitcast instructions and geps.
3471
7.89M
  V = V->stripPointerCasts();
3472
7.89M
3473
7.89M
  // If the value is a GEP instruction or constant expression, treat it as an
3474
7.89M
  // offset.
3475
7.89M
  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
3476
5.53M
    // The GEP operator should be based on a pointer to string constant, and is
3477
5.53M
    // indexing into the string constant.
3478
5.53M
    if (!isGEPBasedOnPointerToString(GEP, ElementSize))
3479
5.49M
      return false;
3480
43.2k
3481
43.2k
    // If the second index isn't a ConstantInt, then this is a variable index
3482
43.2k
    // into the array.  If this occurs, we can't say anything meaningful about
3483
43.2k
    // the string.
3484
43.2k
    uint64_t StartIdx = 0;
3485
43.2k
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
3486
42.8k
      StartIdx = CI->getZExtValue();
3487
410
    else
3488
410
      return false;
3489
42.8k
    return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
3490
42.8k
                                    StartIdx + Offset);
3491
42.8k
  }
3492
2.36M
3493
2.36M
  // The GEP instruction, constant or instruction, must reference a global
3494
2.36M
  // variable that is a constant and is initialized. The referenced constant
3495
2.36M
  // initializer is the array that we'll use for optimization.
3496
2.36M
  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
3497
2.36M
  if (!GV || 
!GV->isConstant()2.13M
||
!GV->hasDefinitiveInitializer()560k
)
3498
1.80M
    return false;
3499
559k
3500
559k
  const ConstantDataArray *Array;
3501
559k
  ArrayType *ArrayTy;
3502
559k
  if (GV->getInitializer()->isNullValue()) {
3503
981
    Type *GVTy = GV->getValueType();
3504
981
    if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
3505
975
      // A zeroinitializer for the array; there is no ConstantDataArray.
3506
975
      Array = nullptr;
3507
975
    } else {
3508
6
      const DataLayout &DL = GV->getParent()->getDataLayout();
3509
6
      uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy);
3510
6
      uint64_t Length = SizeInBytes / (ElementSize / 8);
3511
6
      if (Length <= Offset)
3512
1
        return false;
3513
5
3514
5
      Slice.Array = nullptr;
3515
5
      Slice.Offset = 0;
3516
5
      Slice.Length = Length - Offset;
3517
5
      return true;
3518
5
    }
3519
558k
  } else {
3520
558k
    // This must be a ConstantDataArray.
3521
558k
    Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
3522
558k
    if (!Array)
3523
231
      return false;
3524
558k
    ArrayTy = Array->getType();
3525
558k
  }
3526
559k
  
if (559k
!ArrayTy->getElementType()->isIntegerTy(ElementSize)559k
)
3527
72
    return false;
3528
559k
3529
559k
  uint64_t NumElts = ArrayTy->getArrayNumElements();
3530
559k
  if (Offset > NumElts)
3531
2
    return false;
3532
559k
3533
559k
  Slice.Array = Array;
3534
559k
  Slice.Offset = Offset;
3535
559k
  Slice.Length = NumElts - Offset;
3536
559k
  return true;
3537
559k
}
3538
3539
/// This function computes the length of a null-terminated C string pointed to
3540
/// by V. If successful, it returns true and returns the string in Str.
3541
/// If unsuccessful, it returns false.
3542
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
3543
7.63M
                                 uint64_t Offset, bool TrimAtNul) {
3544
7.63M
  ConstantDataArraySlice Slice;
3545
7.63M
  if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
3546
7.15M
    return false;
3547
474k
3548
474k
  if (Slice.Array == nullptr) {
3549
100
    if (TrimAtNul) {
3550
98
      Str = StringRef();
3551
98
      return true;
3552
98
    }
3553
2
    if (Slice.Length == 1) {
3554
2
      Str = StringRef("", 1);
3555
2
      return true;
3556
2
    }
3557
0
    // We cannot instantiate a StringRef as we do not have an appropriate string
3558
0
    // of 0s at hand.
3559
0
    return false;
3560
0
  }
3561
474k
3562
474k
  // Start out with the entire array in the StringRef.
3563
474k
  Str = Slice.Array->getAsString();
3564
474k
  // Skip over 'offset' bytes.
3565
474k
  Str = Str.substr(Slice.Offset);
3566
474k
3567
474k
  if (TrimAtNul) {
3568
473k
    // Trim off the \0 and anything after it.  If the array is not nul
3569
473k
    // terminated, we just return the whole end of string.  The client may know
3570
473k
    // some other way that the string is length-bound.
3571
473k
    Str = Str.substr(0, Str.find('\0'));
3572
473k
  }
3573
474k
  return true;
3574
474k
}
3575
3576
// These next two are very similar to the above, but also look through PHI
3577
// nodes.
3578
// TODO: See if we can integrate these two together.
3579
3580
/// If we can compute the length of the string pointed to by
3581
/// the specified pointer, return 'len+1'.  If we can't, return 0.
3582
static uint64_t GetStringLengthH(const Value *V,
3583
                                 SmallPtrSetImpl<const PHINode*> &PHIs,
3584
229k
                                 unsigned CharSize) {
3585
229k
  // Look through noop bitcast instructions.
3586
229k
  V = V->stripPointerCasts();
3587
229k
3588
229k
  // If this is a PHI node, there are two cases: either we have already seen it
3589
229k
  // or we haven't.
3590
229k
  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
3591
7.84k
    if (!PHIs.insert(PN).second)
3592
1.75k
      return ~0ULL;  // already in the set.
3593
6.08k
3594
6.08k
    // If it was new, see if all the input strings are the same length.
3595
6.08k
    uint64_t LenSoFar = ~0ULL;
3596
8.12k
    for (Value *IncValue : PN->incoming_values()) {
3597
8.12k
      uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
3598
8.12k
      if (Len == 0) 
return 05.87k
; // Unknown length -> unknown.
3599
2.25k
3600
2.25k
      if (Len == ~0ULL) 
continue1.75k
;
3601
491
3602
491
      if (Len != LenSoFar && 
LenSoFar != ~0ULL488
)
3603
196
        return 0;    // Disagree -> unknown.
3604
295
      LenSoFar = Len;
3605
295
    }
3606
6.08k
3607
6.08k
    // Success, all agree.
3608
6.08k
    
return LenSoFar12
;
3609
221k
  }
3610
221k
3611
221k
  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
3612
221k
  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
3613
1.65k
    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
3614
1.65k
    if (Len1 == 0) 
return 0964
;
3615
695
    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
3616
695
    if (Len2 == 0) 
return 0401
;
3617
294
    if (Len1 == ~0ULL) 
return Len20
;
3618
294
    if (Len2 == ~0ULL) 
return Len10
;
3619
294
    if (Len1 != Len2) 
return 0285
;
3620
9
    return Len1;
3621
9
  }
3622
219k
3623
219k
  // Otherwise, see if we can read the string.
3624
219k
  ConstantDataArraySlice Slice;
3625
219k
  if (!getConstantDataArrayInfo(V, Slice, CharSize))
3626
135k
    return 0;
3627
84.4k
3628
84.4k
  if (Slice.Array == nullptr)
3629
864
    return 1;
3630
83.5k
3631
83.5k
  // Search for nul characters
3632
83.5k
  unsigned NullIndex = 0;
3633
589k
  for (unsigned E = Slice.Length; NullIndex < E; 
++NullIndex505k
) {
3634
589k
    if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
3635
83.5k
      break;
3636
589k
  }
3637
83.5k
3638
83.5k
  return NullIndex + 1;
3639
83.5k
}
3640
3641
/// If we can compute the length of the string pointed to by
3642
/// the specified pointer, return 'len+1'.  If we can't, return 0.
3643
218k
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
3644
218k
  if (!V->getType()->isPointerTy())
3645
0
    return 0;
3646
218k
3647
218k
  SmallPtrSet<const PHINode*, 32> PHIs;
3648
218k
  uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
3649
218k
  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
3650
218k
  // an empty string as a length.
3651
218k
  return Len == ~0ULL ? 
10
: Len;
3652
218k
}
3653
3654
26.8M
const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
3655
26.8M
  assert(Call &&
3656
26.8M
         "getArgumentAliasingToReturnedPointer only works on nonnull calls");
3657
26.8M
  if (const Value *RV = Call->getReturnedArgOperand())
3658
8.53k
    return RV;
3659
26.8M
  // This can be used only as a aliasing property.
3660
26.8M
  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
3661
433
    return Call->getArgOperand(0);
3662
26.8M
  return nullptr;
3663
26.8M
}
3664
3665
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
3666
37.5M
    const CallBase *Call) {
3667
37.5M
  return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
3668
37.5M
         
Call->getIntrinsicID() == Intrinsic::strip_invariant_group37.5M
||
3669
37.5M
         
Call->getIntrinsicID() == Intrinsic::aarch64_irg37.5M
||
3670
37.5M
         
Call->getIntrinsicID() == Intrinsic::aarch64_tagp37.5M
;
3671
37.5M
}
3672
3673
/// \p PN defines a loop-variant pointer to an object.  Check if the
3674
/// previous iteration of the loop was referring to the same object as \p PN.
3675
static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
3676
42.5k
                                         const LoopInfo *LI) {
3677
42.5k
  // Find the loop-defined value.
3678
42.5k
  Loop *L = LI->getLoopFor(PN->getParent());
3679
42.5k
  if (PN->getNumIncomingValues() != 2)
3680
0
    return true;
3681
42.5k
3682
42.5k
  // Find the value from previous iteration.
3683
42.5k
  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
3684
42.5k
  if (!PrevValue || 
LI->getLoopFor(PrevValue->getParent()) != L40.8k
)
3685
33.6k
    PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
3686
42.5k
  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
3687
17
    return true;
3688
42.5k
3689
42.5k
  // If a new pointer is loaded in the loop, the pointer references a different
3690
42.5k
  // object in every iteration.  E.g.:
3691
42.5k
  //    for (i)
3692
42.5k
  //       int *p = a[i];
3693
42.5k
  //       ...
3694
42.5k
  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
3695
424
    if (!L->isLoopInvariant(Load->getPointerOperand()))
3696
286
      return false;
3697
42.2k
  return true;
3698
42.2k
}
3699
3700
Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
3701
305M
                                 unsigned MaxLookup) {
3702
305M
  if (!V->getType()->isPointerTy())
3703
47.7k
    return V;
3704
517M
  
for (unsigned Count = 0; 305M
MaxLookup == 0517M
|| Count < MaxLookup;
++Count211M
) {
3705
516M
    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
3706
177M
      V = GEP->getPointerOperand();
3707
338M
    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
3708
338M
               
Operator::getOpcode(V) == Instruction::AddrSpaceCast305M
) {
3709
33.3M
      V = cast<Operator>(V)->getOperand(0);
3710
305M
    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
3711
83
      if (GA->isInterposable())
3712
0
        return V;
3713
83
      V = GA->getAliasee();
3714
305M
    } else if (isa<AllocaInst>(V)) {
3715
36.3M
      // An alloca can't be further simplified.
3716
36.3M
      return V;
3717
268M
    } else {
3718
268M
      if (auto *Call = dyn_cast<CallBase>(V)) {
3719
16.6M
        // CaptureTracking can know about special capturing properties of some
3720
16.6M
        // intrinsics like launder.invariant.group, that can't be expressed with
3721
16.6M
        // the attributes, but have properties like returning aliasing pointer.
3722
16.6M
        // Because some analysis may assume that nocaptured pointer is not
3723
16.6M
        // returned from some special intrinsic (because function would have to
3724
16.6M
        // be marked with returns attribute), it is crucial to use this function
3725
16.6M
        // because it should be in sync with CaptureTracking. Not using it may
3726
16.6M
        // cause weird miscompilations where 2 aliasing pointers are assumed to
3727
16.6M
        // noalias.
3728
16.6M
        if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
3729
6.49k
          V = RP;
3730
6.49k
          continue;
3731
6.49k
        }
3732
268M
      }
3733
268M
3734
268M
      // See if InstructionSimplify knows any relevant tricks.
3735
268M
      if (Instruction *I = dyn_cast<Instruction>(V))
3736
147M
        // TODO: Acquire a DominatorTree and AssumptionCache and use them.
3737
147M
        if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
3738
357k
          V = Simplified;
3739
357k
          continue;
3740
357k
        }
3741
268M
3742
268M
      return V;
3743
268M
    }
3744
211M
    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
3745
211M
  }
3746
305M
  
return V648k
;
3747
305M
}
3748
3749
void llvm::GetUnderlyingObjects(const Value *V,
3750
                                SmallVectorImpl<const Value *> &Objects,
3751
                                const DataLayout &DL, LoopInfo *LI,
3752
14.0M
                                unsigned MaxLookup) {
3753
14.0M
  SmallPtrSet<const Value *, 4> Visited;
3754
14.0M
  SmallVector<const Value *, 4> Worklist;
3755
14.0M
  Worklist.push_back(V);
3756
20.7M
  do {
3757
20.7M
    const Value *P = Worklist.pop_back_val();
3758
20.7M
    P = GetUnderlyingObject(P, DL, MaxLookup);
3759
20.7M
3760
20.7M
    if (!Visited.insert(P).second)
3761
2.73M
      continue;
3762
18.0M
3763
18.0M
    if (auto *SI = dyn_cast<SelectInst>(P)) {
3764
75.6k
      Worklist.push_back(SI->getTrueValue());
3765
75.6k
      Worklist.push_back(SI->getFalseValue());
3766
75.6k
      continue;
3767
75.6k
    }
3768
17.9M
3769
17.9M
    if (auto *PN = dyn_cast<PHINode>(P)) {
3770
2.67M
      // If this PHI changes the underlying object in every iteration of the
3771
2.67M
      // loop, don't look through it.  Consider:
3772
2.67M
      //   int **A;
3773
2.67M
      //   for (i) {
3774
2.67M
      //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
3775
2.67M
      //     Curr = A[i];
3776
2.67M
      //     *Prev, *Curr;
3777
2.67M
      //
3778
2.67M
      // Prev is tracking Curr one iteration behind so they refer to different
3779
2.67M
      // underlying objects.
3780
2.67M
      if (!LI || 
!LI->isLoopHeader(PN->getParent())55.2k
||
3781
2.67M
          
isSameUnderlyingObjectInLoop(PN, LI)42.5k
)
3782
2.67M
        for (Value *IncValue : PN->incoming_values())
3783
6.58M
          Worklist.push_back(IncValue);
3784
2.67M
      continue;
3785
2.67M
    }
3786
15.3M
3787
15.3M
    Objects.push_back(P);
3788
20.7M
  } while (!Worklist.empty());
3789
14.0M
}
3790
3791
/// This is the function that does the work of looking through basic
3792
/// ptrtoint+arithmetic+inttoptr sequences.
3793
74.3k
static const Value *getUnderlyingObjectFromInt(const Value *V) {
3794
76.5k
  do {
3795
76.5k
    if (const Operator *U = dyn_cast<Operator>(V)) {
3796
73.1k
      // If we find a ptrtoint, we can transfer control back to the
3797
73.1k
      // regular getUnderlyingObjectFromInt.
3798
73.1k
      if (U->getOpcode() == Instruction::PtrToInt)
3799
345
        return U->getOperand(0);
3800
72.7k
      // If we find an add of a constant, a multiplied value, or a phi, it's
3801
72.7k
      // likely that the other operand will lead us to the base
3802
72.7k
      // object. We don't have to worry about the case where the
3803
72.7k
      // object address is somehow being computed by the multiply,
3804
72.7k
      // because our callers only care when the result is an
3805
72.7k
      // identifiable object.
3806
72.7k
      if (U->getOpcode() != Instruction::Add ||
3807
72.7k
          
(9.67k
!isa<ConstantInt>(U->getOperand(1))9.67k
&&
3808
9.67k
           
Operator::getOpcode(U->getOperand(1)) != Instruction::Mul7.78k
&&
3809
9.67k
           
!isa<PHINode>(U->getOperand(1))7.75k
))
3810
70.6k
        return V;
3811
2.15k
      V = U->getOperand(0);
3812
3.37k
    } else {
3813
3.37k
      return V;
3814
3.37k
    }
3815
2.15k
    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
3816
2.15k
  } while (true);
3817
74.3k
}
3818
3819
/// This is a wrapper around GetUnderlyingObjects and adds support for basic
3820
/// ptrtoint+arithmetic+inttoptr sequences.
3821
/// It returns false if unidentified object is found in GetUnderlyingObjects.
3822
bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
3823
                          SmallVectorImpl<Value *> &Objects,
3824
2.88M
                          const DataLayout &DL) {
3825
2.88M
  SmallPtrSet<const Value *, 16> Visited;
3826
2.88M
  SmallVector<const Value *, 4> Working(1, V);
3827
2.88M
  do {
3828
2.88M
    V = Working.pop_back_val();
3829
2.88M
3830
2.88M
    SmallVector<const Value *, 4> Objs;
3831
2.88M
    GetUnderlyingObjects(V, Objs, DL);
3832
2.88M
3833
2.89M
    for (const Value *V : Objs) {
3834
2.89M
      if (!Visited.insert(V).second)
3835
0
        continue;
3836
2.89M
      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
3837
74.3k
        const Value *O =
3838
74.3k
          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
3839
74.3k
        if (O->getType()->isPointerTy()) {
3840
345
          Working.push_back(O);
3841
345
          continue;
3842
345
        }
3843
2.89M
      }
3844
2.89M
      // If GetUnderlyingObjects fails to find an identifiable object,
3845
2.89M
      // getUnderlyingObjectsForCodeGen also fails for safety.
3846
2.89M
      if (!isIdentifiedObject(V)) {
3847
1.68M
        Objects.clear();
3848
1.68M
        return false;
3849
1.68M
      }
3850
1.21M
      Objects.push_back(const_cast<Value *>(V));
3851
1.21M
    }
3852
2.88M
  } while (
!Working.empty()1.19M
);
3853
2.88M
  
return true1.19M
;
3854
2.88M
}
3855
3856
/// Return true if the only users of this pointer are lifetime markers.
3857
11.5k
bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
3858
19.6k
  for (const User *U : V->users()) {
3859
19.6k
    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
3860
19.6k
    if (!II) 
return false7.45k
;
3861
12.1k
3862
12.1k
    if (!II->isLifetimeStartOrEnd())
3863
3.76k
      return false;
3864
12.1k
  }
3865
11.5k
  
return true329
;
3866
11.5k
}
3867
3868
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
3869
                                        const Instruction *CtxI,
3870
27.3M
                                        const DominatorTree *DT) {
3871
27.3M
  const Operator *Inst = dyn_cast<Operator>(V);
3872
27.3M
  if (!Inst)
3873
604
    return false;
3874
27.3M
3875
83.3M
  
for (unsigned i = 0, e = Inst->getNumOperands(); 27.3M
i != e;
++i56.0M
)
3876
56.0M
    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
3877
21.9M
      if (C->canTrap())
3878
7
        return false;
3879
27.3M
3880
27.3M
  switch (Inst->getOpcode()) {
3881
27.3M
  default:
3882
16.5M
    return true;
3883
27.3M
  case Instruction::UDiv:
3884
70.3k
  case Instruction::URem: {
3885
70.3k
    // x / y is undefined if y == 0.
3886
70.3k
    const APInt *V;
3887
70.3k
    if (match(Inst->getOperand(1), m_APInt(V)))
3888
12.3k
      return *V != 0;
3889
58.0k
    return false;
3890
58.0k
  }
3891
58.0k
  case Instruction::SDiv:
3892
34.3k
  case Instruction::SRem: {
3893
34.3k
    // x / y is undefined if y == 0 or x == INT_MIN and y == -1
3894
34.3k
    const APInt *Numerator, *Denominator;
3895
34.3k
    if (!match(Inst->getOperand(1), m_APInt(Denominator)))
3896
16.4k
      return false;
3897
17.8k
    // We cannot hoist this division if the denominator is 0.
3898
17.8k
    if (*Denominator == 0)
3899
8
      return false;
3900
17.8k
    // It's safe to hoist if the denominator is not 0 or -1.
3901
17.8k
    if (*Denominator != -1)
3902
17.8k
      return true;
3903
1
    // At this point we know that the denominator is -1.  It is safe to hoist as
3904
1
    // long we know that the numerator is not INT_MIN.
3905
1
    if (match(Inst->getOperand(0), m_APInt(Numerator)))
3906
0
      return !Numerator->isMinSignedValue();
3907
1
    // The numerator *might* be MinSignedValue.
3908
1
    return false;
3909
1
  }
3910
3.40M
  case Instruction::Load: {
3911
3.40M
    const LoadInst *LI = cast<LoadInst>(Inst);
3912
3.40M
    if (!LI->isUnordered() ||
3913
3.40M
        // Speculative load may create a race that did not exist in the source.
3914
3.40M
        
LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread)3.37M
||
3915
3.40M
        // Speculative load may load data from dirty regions.
3916
3.40M
        
LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)3.37M
||
3917
3.40M
        
LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)3.37M
)
3918
24.9k
      return false;
3919
3.37M
    const DataLayout &DL = LI->getModule()->getDataLayout();
3920
3.37M
    return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
3921
3.37M
                                              LI->getType(), LI->getAlignment(),
3922
3.37M
                                              DL, CtxI, DT);
3923
3.37M
  }
3924
3.37M
  case Instruction::Call: {
3925
2.03M
    auto *CI = cast<const CallInst>(Inst);
3926
2.03M
    const Function *Callee = CI->getCalledFunction();
3927
2.03M
3928
2.03M
    // The called function could have undefined behavior or side-effects, even
3929
2.03M
    // if marked readnone nounwind.
3930
2.03M
    return Callee && 
Callee->isSpeculatable()1.98M
;
3931
3.37M
  }
3932
5.19M
  case Instruction::VAArg:
3933
5.19M
  case Instruction::Alloca:
3934
5.19M
  case Instruction::Invoke:
3935
5.19M
  case Instruction::CallBr:
3936
5.19M
  case Instruction::PHI:
3937
5.19M
  case Instruction::Store:
3938
5.19M
  case Instruction::Ret:
3939
5.19M
  case Instruction::Br:
3940
5.19M
  case Instruction::IndirectBr:
3941
5.19M
  case Instruction::Switch:
3942
5.19M
  case Instruction::Unreachable:
3943
5.19M
  case Instruction::Fence:
3944
5.19M
  case Instruction::AtomicRMW:
3945
5.19M
  case Instruction::AtomicCmpXchg:
3946
5.19M
  case Instruction::LandingPad:
3947
5.19M
  case Instruction::Resume:
3948
5.19M
  case Instruction::CatchSwitch:
3949
5.19M
  case Instruction::CatchPad:
3950
5.19M
  case Instruction::CatchRet:
3951
5.19M
  case Instruction::CleanupPad:
3952
5.19M
  case Instruction::CleanupRet:
3953
5.19M
    return false; // Misc instructions which have effects
3954
27.3M
  }
3955
27.3M
}
3956
3957
14.1M
bool llvm::mayBeMemoryDependent(const Instruction &I) {
3958
14.1M
  return I.mayReadOrWriteMemory() || 
!isSafeToSpeculativelyExecute(&I)9.47M
;
3959
14.1M
}
3960
3961
/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
3962
9.28M
static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
3963
9.28M
  switch (OR) {
3964
9.28M
    case ConstantRange::OverflowResult::MayOverflow:
3965
9.21M
      return OverflowResult::MayOverflow;
3966
9.28M
    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
3967
1.98k
      return OverflowResult::AlwaysOverflowsLow;
3968
9.28M
    case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
3969
23.5k
      return OverflowResult::AlwaysOverflowsHigh;
3970
9.28M
    case ConstantRange::OverflowResult::NeverOverflows:
3971
50.7k
      return OverflowResult::NeverOverflows;
3972
0
  }
3973
0
  llvm_unreachable("Unknown OverflowResult");
3974
0
}
3975
3976
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
3977
static ConstantRange computeConstantRangeIncludingKnownBits(
3978
    const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
3979
    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
3980
16.7M
    OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
3981
16.7M
  KnownBits Known = computeKnownBits(
3982
16.7M
      V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
3983
16.7M
  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
3984
16.7M
  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
3985
16.7M
  ConstantRange::PreferredRangeType RangeType =
3986
16.7M
      ForSigned ? 
ConstantRange::Signed6.17M
:
ConstantRange::Unsigned10.6M
;
3987
16.7M
  return CR1.intersectWith(CR2, RangeType);
3988
16.7M
}
3989
3990
OverflowResult llvm::computeOverflowForUnsignedMul(
3991
    const Value *LHS, const Value *RHS, const DataLayout &DL,
3992
    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
3993
897k
    bool UseInstrInfo) {
3994
897k
  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
3995
897k
                                        nullptr, UseInstrInfo);
3996
897k
  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
3997
897k
                                        nullptr, UseInstrInfo);
3998
897k
  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
3999
897k
  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
4000
897k
  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
4001
897k
}
4002
4003
OverflowResult
4004
llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
4005
                                  const DataLayout &DL, AssumptionCache *AC,
4006
                                  const Instruction *CxtI,
4007
410k
                                  const DominatorTree *DT, bool UseInstrInfo) {
4008
410k
  // Multiplying n * m significant bits yields a result of n + m significant
4009
410k
  // bits. If the total number of significant bits does not exceed the
4010
410k
  // result bit width (minus 1), there is no overflow.
4011
410k
  // This means if we have enough leading sign bits in the operands
4012
410k
  // we can guarantee that the result does not overflow.
4013
410k
  // Ref: "Hacker's Delight" by Henry Warren
4014
410k
  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
4015
410k
4016
410k
  // Note that underestimating the number of sign bits gives a more
4017
410k
  // conservative answer.
4018
410k
  unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) +
4019
410k
                      ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT);
4020
410k
4021
410k
  // First handle the easy case: if we have enough sign bits there's
4022
410k
  // definitely no overflow.
4023
410k
  if (SignBits > BitWidth + 1)
4024
2.52k
    return OverflowResult::NeverOverflows;
4025
408k
4026
408k
  // There are two ambiguous cases where there can be no overflow:
4027
408k
  //   SignBits == BitWidth + 1    and
4028
408k
  //   SignBits == BitWidth
4029
408k
  // The second case is difficult to check, therefore we only handle the
4030
408k
  // first case.
4031
408k
  if (SignBits == BitWidth + 1) {
4032
1.45k
    // It overflows only when both arguments are negative and the true
4033
1.45k
    // product is exactly the minimum negative number.
4034
1.45k
    // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
4035
1.45k
    // For simplicity we just check if at least one side is not negative.
4036
1.45k
    KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
4037
1.45k
                                          nullptr, UseInstrInfo);
4038
1.45k
    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
4039
1.45k
                                          nullptr, UseInstrInfo);
4040
1.45k
    if (LHSKnown.isNonNegative() || 
RHSKnown.isNonNegative()1.37k
)
4041
178
      return OverflowResult::NeverOverflows;
4042
407k
  }
4043
407k
  return OverflowResult::MayOverflow;
4044
407k
}
4045
4046
OverflowResult llvm::computeOverflowForUnsignedAdd(
4047
    const Value *LHS, const Value *RHS, const DataLayout &DL,
4048
    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4049
3.90M
    bool UseInstrInfo) {
4050
3.90M
  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
4051
3.90M
      LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
4052
3.90M
      nullptr, UseInstrInfo);
4053
3.90M
  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
4054
3.90M
      RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
4055
3.90M
      nullptr, UseInstrInfo);
4056
3.90M
  return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
4057
3.90M
}
4058
4059
static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
4060
                                                  const Value *RHS,
4061
                                                  const AddOperator *Add,
4062
                                                  const DataLayout &DL,
4063
                                                  AssumptionCache *AC,
4064
                                                  const Instruction *CxtI,
4065
2.26M
                                                  const DominatorTree *DT) {
4066
2.26M
  if (Add && 
Add->hasNoSignedWrap()43
) {
4067
26
    return OverflowResult::NeverOverflows;
4068
26
  }
4069
2.26M
4070
2.26M
  // If LHS and RHS each have at least two sign bits, the addition will look
4071
2.26M
  // like
4072
2.26M
  //
4073
2.26M
  // XX..... +
4074
2.26M
  // YY.....
4075
2.26M
  //
4076
2.26M
  // If the carry into the most significant position is 0, X and Y can't both
4077
2.26M
  // be 1 and therefore the carry out of the addition is also 0.
4078
2.26M
  //
4079
2.26M
  // If the carry into the most significant position is 1, X and Y can't both
4080
2.26M
  // be 0 and therefore the carry out of the addition is also 1.
4081
2.26M
  //
4082
2.26M
  // Since the carry into the most significant position is always equal to
4083
2.26M
  // the carry out of the addition, there is no signed overflow.
4084
2.26M
  if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
4085
2.26M
      
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 163.6k
)
4086
14.9k
    return OverflowResult::NeverOverflows;
4087
2.25M
4088
2.25M
  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
4089
2.25M
      LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
4090
2.25M
  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
4091
2.25M
      RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
4092
2.25M
  OverflowResult OR =
4093
2.25M
      mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
4094
2.25M
  if (OR != OverflowResult::MayOverflow)
4095
719
    return OR;
4096
2.25M
4097
2.25M
  // The remaining code needs Add to be available. Early returns if not so.
4098
2.25M
  if (!Add)
4099
2.25M
    return OverflowResult::MayOverflow;
4100
11
4101
11
  // If the sign of Add is the same as at least one of the operands, this add
4102
11
  // CANNOT overflow. If this can be determined from the known bits of the
4103
11
  // operands the above signedAddMayOverflow() check will have already done so.
4104
11
  // The only other way to improve on the known bits is from an assumption, so
4105
11
  // call computeKnownBitsFromAssume() directly.
4106
11
  bool LHSOrRHSKnownNonNegative =
4107
11
      (LHSRange.isAllNonNegative() || 
RHSRange.isAllNonNegative()9
);
4108
11
  bool LHSOrRHSKnownNegative =
4109
11
      (LHSRange.isAllNegative() || RHSRange.isAllNegative());
4110
11
  if (LHSOrRHSKnownNonNegative || 
LHSOrRHSKnownNegative5
) {
4111
6
    KnownBits AddKnown(LHSRange.getBitWidth());
4112
6
    computeKnownBitsFromAssume(
4113
6
        Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true));
4114
6
    if ((AddKnown.isNonNegative() && 
LHSOrRHSKnownNonNegative2
) ||
4115
6
        
(4
AddKnown.isNegative()4
&&
LHSOrRHSKnownNegative0
))
4116
2
      return OverflowResult::NeverOverflows;
4117
9
  }
4118
9
4119
9
  return OverflowResult::MayOverflow;
4120
9
}
4121
4122
OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
4123
                                                   const Value *RHS,
4124
                                                   const DataLayout &DL,
4125
                                                   AssumptionCache *AC,
4126
                                                   const Instruction *CxtI,
4127
1.39M
                                                   const DominatorTree *DT) {
4128
1.39M
  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
4129
1.39M
      LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
4130
1.39M
  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
4131
1.39M
      RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
4132
1.39M
  return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
4133
1.39M
}
4134
4135
OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
4136
                                                 const Value *RHS,
4137
                                                 const DataLayout &DL,
4138
                                                 AssumptionCache *AC,
4139
                                                 const Instruction *CxtI,
4140
848k
                                                 const DominatorTree *DT) {
4141
848k
  // If LHS and RHS each have at least two sign bits, the subtraction
4142
848k
  // cannot overflow.
4143
848k
  if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
4144
848k
      
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1111k
)
4145
11.8k
    return OverflowResult::NeverOverflows;
4146
836k
4147
836k
  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
4148
836k
      LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
4149
836k
  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
4150
836k
      RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
4151
836k
  return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
4152
836k
}
4153
4154
bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
4155
155
                                     const DominatorTree &DT) {
4156
155
  SmallVector<const BranchInst *, 2> GuardingBranches;
4157
155
  SmallVector<const ExtractValueInst *, 2> Results;
4158
155
4159
310
  for (const User *U : WO->users()) {
4160
310
    if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
4161
310
      assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
4162
310
4163
310
      if (EVI->getIndices()[0] == 0)
4164
155
        Results.push_back(EVI);
4165
155
      else {
4166
155
        assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
4167
155
4168
155
        for (const auto *U : EVI->users())
4169
156
          if (const auto *B = dyn_cast<BranchInst>(U)) {
4170
132
            assert(B->isConditional() && "How else is it using an i1?");
4171
132
            GuardingBranches.push_back(B);
4172
132
          }
4173
155
      }
4174
310
    } else {
4175
0
      // We are using the aggregate directly in a way we don't want to analyze
4176
0
      // here (storing it to a global, say).
4177
0
      return false;
4178
0
    }
4179
310
  }
4180
155
4181
155
  auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
4182
130
    BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
4183
130
    if (!NoWrapEdge.isSingleEdge())
4184
0
      return false;
4185
130
4186
130
    // Check if all users of the add are provably no-wrap.
4187
130
    for (const auto *Result : Results) {
4188
130
      // If the extractvalue itself is not executed on overflow, the we don't
4189
130
      // need to check each use separately, since domination is transitive.
4190
130
      if (DT.dominates(NoWrapEdge, Result->getParent()))
4191
81
        continue;
4192
49
4193
49
      for (auto &RU : Result->uses())
4194
49
        if (!DT.dominates(NoWrapEdge, RU))
4195
18
          return false;
4196
49
    }
4197
130
4198
130
    
return true112
;
4199
130
  };
4200
155
4201
155
  return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
4202
155
}
4203
4204
4205
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
4206
                                                 const DataLayout &DL,
4207
                                                 AssumptionCache *AC,
4208
                                                 const Instruction *CxtI,
4209
43
                                                 const DominatorTree *DT) {
4210
43
  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
4211
43
                                       Add, DL, AC, CxtI, DT);
4212
43
}
4213
4214
OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
4215
                                                 const Value *RHS,
4216
                                                 const DataLayout &DL,
4217
                                                 AssumptionCache *AC,
4218
                                                 const Instruction *CxtI,
4219
2.26M
                                                 const DominatorTree *DT) {
4220
2.26M
  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
4221
2.26M
}
4222
4223
31.8M
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
4224
31.8M
  // A memory operation returns normally if it isn't volatile. A volatile
4225
31.8M
  // operation is allowed to trap.
4226
31.8M
  //
4227
31.8M
  // An atomic operation isn't guaranteed to return in a reasonable amount of
4228
31.8M
  // time because it's possible for another thread to interfere with it for an
4229
31.8M
  // arbitrary length of time, but programs aren't allowed to rely on that.
4230
31.8M
  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
4231
3.49M
    return !LI->isVolatile();
4232
28.3M
  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
4233
2.85M
    return !SI->isVolatile();
4234
25.4M
  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
4235
20.3k
    return !CXI->isVolatile();
4236
25.4M
  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
4237
18.2k
    return !RMWI->isVolatile();
4238
25.4M
  if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
4239
69.4k
    return !MII->isVolatile();
4240
25.3M
4241
25.3M
  // If there is no successor, then execution can't transfer to it.
4242
25.3M
  if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
4243
0
    return !CRI->unwindsToCaller();
4244
25.3M
  if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
4245
0
    return !CatchSwitch->unwindsToCaller();
4246
25.3M
  if (isa<ResumeInst>(I))
4247
109
    return false;
4248
25.3M
  if (isa<ReturnInst>(I))
4249
7.10k
    return false;
4250
25.3M
  if (isa<UnreachableInst>(I))
4251
20
    return false;
4252
25.3M
4253
25.3M
  // Calls can throw, or contain an infinite loop, or kill the process.
4254
25.3M
  if (auto CS = ImmutableCallSite(I)) {
4255
639k
    // Call sites that throw have implicit non-local control flow.
4256
639k
    if (!CS.doesNotThrow())
4257
30.3k
      return false;
4258
609k
4259
609k
    // A function which doens't throw and has "willreturn" attribute will
4260
609k
    // always return.
4261
609k
    if (CS.hasFnAttr(Attribute::WillReturn))
4262
1
      return true;
4263
609k
4264
609k
    // Non-throwing call sites can loop infinitely, call exit/pthread_exit
4265
609k
    // etc. and thus not return.  However, LLVM already assumes that
4266
609k
    //
4267
609k
    //  - Thread exiting actions are modeled as writes to memory invisible to
4268
609k
    //    the program.
4269
609k
    //
4270
609k
    //  - Loops that don't have side effects (side effects are volatile/atomic
4271
609k
    //    stores and IO) always terminate (see http://llvm.org/PR965).
4272
609k
    //    Furthermore IO itself is also modeled as writes to memory invisible to
4273
609k
    //    the program.
4274
609k
    //
4275
609k
    // We rely on those assumptions here, and use the memory effects of the call
4276
609k
    // target as a proxy for checking that it always returns.
4277
609k
4278
609k
    // FIXME: This isn't aggressive enough; a call which only writes to a global
4279
609k
    // is guaranteed to return.
4280
609k
    return CS.onlyReadsMemory() || 
CS.onlyAccessesArgMemory()533k
||
4281
609k
           
match(I, m_Intrinsic<Intrinsic::assume>())474k
||
4282
609k
           
match(I, m_Intrinsic<Intrinsic::sideeffect>())473k
||
4283
609k
           
match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>())473k
;
4284
609k
  }
4285
24.7M
4286
24.7M
  // Other instructions return normally.
4287
24.7M
  return true;
4288
24.7M
}
4289
4290
210k
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
4291
210k
  // TODO: This is slightly conservative for invoke instruction since exiting
4292
210k
  // via an exception *is* normal control for them.
4293
2.07M
  for (auto I = BB->begin(), E = BB->end(); I != E; 
++I1.86M
)
4294
1.90M
    if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
4295
42.8k
      return false;
4296
210k
  
return true167k
;
4297
210k
}
4298
4299
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
4300
117k
                                                  const Loop *L) {
4301
117k
  // The loop header is guaranteed to be executed for every iteration.
4302
117k
  //
4303
117k
  // FIXME: Relax this constraint to cover all basic blocks that are
4304
117k
  // guaranteed to be executed at every iteration.
4305
117k
  if (I->getParent() != L->getHeader()) 
return false460
;
4306
116k
4307
2.42M
  
for (const Instruction &LI : *L->getHeader())116k
{
4308
2.42M
    if (&LI == I) 
return true115k
;
4309
2.31M
    if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) 
return false1.61k
;
4310
2.31M
  }
4311
116k
  
llvm_unreachable0
("Instruction not contained in its own parent basic block.");
4312
116k
}
4313
4314
7.21M
bool llvm::propagatesFullPoison(const Instruction *I) {
4315
7.21M
  // TODO: This should include all instructions apart from phis, selects and
4316
7.21M
  // call-like instructions.
4317
7.21M
  switch (I->getOpcode()) {
4318
7.21M
  case Instruction::Add:
4319
769k
  case Instruction::Sub:
4320
769k
  case Instruction::Xor:
4321
769k
  case Instruction::Trunc:
4322
769k
  case Instruction::BitCast:
4323
769k
  case Instruction::AddrSpaceCast:
4324
769k
  case Instruction::Mul:
4325
769k
  case Instruction::Shl:
4326
769k
  case Instruction::GetElementPtr:
4327
769k
    // These operations all propagate poison unconditionally. Note that poison
4328
769k
    // is not any particular value, so xor or subtraction of poison with
4329
769k
    // itself still yields poison, not zero.
4330
769k
    return true;
4331
769k
4332
769k
  case Instruction::AShr:
4333
19.8k
  case Instruction::SExt:
4334
19.8k
    // For these operations, one bit of the input is replicated across
4335
19.8k
    // multiple output bits. A replicated poison bit is still poison.
4336
19.8k
    return true;
4337
19.8k
4338
1.91M
  case Instruction::ICmp:
4339
1.91M
    // Comparing poison with any value yields poison.  This is why, for
4340
1.91M
    // instance, x s< (x +nsw 1) can be folded to true.
4341
1.91M
    return true;
4342
19.8k
4343
4.51M
  default:
4344
4.51M
    return false;
4345
7.21M
  }
4346
7.21M
}
4347
4348
7.73M
const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
4349
7.73M
  switch (I->getOpcode()) {
4350
7.73M
    case Instruction::Store:
4351
840k
      return cast<StoreInst>(I)->getPointerOperand();
4352
7.73M
4353
7.73M
    case Instruction::Load:
4354
564k
      return cast<LoadInst>(I)->getPointerOperand();
4355
7.73M
4356
7.73M
    case Instruction::AtomicCmpXchg:
4357
4
      return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
4358
7.73M
4359
7.73M
    case Instruction::AtomicRMW:
4360
28
      return cast<AtomicRMWInst>(I)->getPointerOperand();
4361
7.73M
4362
7.73M
    case Instruction::UDiv:
4363
7.00k
    case Instruction::SDiv:
4364
7.00k
    case Instruction::URem:
4365
7.00k
    case Instruction::SRem:
4366
7.00k
      return I->getOperand(1);
4367
7.00k
4368
6.32M
    default:
4369
6.32M
      // Note: It's really tempting to think that a conditional branch or
4370
6.32M
      // switch should be listed here, but that's incorrect.  It's not
4371
6.32M
      // branching off of poison which is UB, it is executing a side effecting
4372
6.32M
      // instruction which follows the branch.
4373
6.32M
      return nullptr;
4374
7.73M
  }
4375
7.73M
}
4376
4377
bool llvm::mustTriggerUB(const Instruction *I,
4378
7.73M
                         const SmallSet<const Value *, 16>& KnownPoison) {
4379
7.73M
  auto *NotPoison = getGuaranteedNonFullPoisonOp(I);
4380
7.73M
  return (NotPoison && 
KnownPoison.count(NotPoison)1.41M
);
4381
7.73M
}
4382
4383
4384
1.42M
bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
4385
1.42M
  // We currently only look for uses of poison values within the same basic
4386
1.42M
  // block, as that makes it easier to guarantee that the uses will be
4387
1.42M
  // executed given that PoisonI is executed.
4388
1.42M
  //
4389
1.42M
  // FIXME: Expand this to consider uses beyond the same basic block. To do
4390
1.42M
  // this, look out for the distinction between post-dominance and strong
4391
1.42M
  // post-dominance.
4392
1.42M
  const BasicBlock *BB = PoisonI->getParent();
4393
1.42M
4394
1.42M
  // Set of instructions that we have proved will yield poison if PoisonI
4395
1.42M
  // does.
4396
1.42M
  SmallSet<const Value *, 16> YieldsPoison;
4397
1.42M
  SmallSet<const BasicBlock *, 4> Visited;
4398
1.42M
  YieldsPoison.insert(PoisonI);
4399
1.42M
  Visited.insert(PoisonI->getParent());
4400
1.42M
4401
1.42M
  BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end();
4402
1.42M
4403
1.42M
  unsigned Iter = 0;
4404
1.44M
  while (Iter++ < MaxDepth) {
4405
9.16M
    for (auto &I : make_range(Begin, End)) {
4406
9.16M
      if (&I != PoisonI) {
4407
7.73M
        if (mustTriggerUB(&I, YieldsPoison))
4408
150k
          return true;
4409
7.58M
        if (!isGuaranteedToTransferExecutionToSuccessor(&I))
4410
21.0k
          return false;
4411
8.99M
      }
4412
8.99M
4413
8.99M
      // Mark poison that propagates from I through uses of I.
4414
8.99M
      if (YieldsPoison.count(&I)) {
4415
4.50M
        for (const User *User : I.users()) {
4416
4.50M
          const Instruction *UserI = cast<Instruction>(User);
4417
4.50M
          if (propagatesFullPoison(UserI))
4418
1.82M
            YieldsPoison.insert(User);
4419
4.50M
        }
4420
2.94M
      }
4421
8.99M
    }
4422
1.44M
4423
1.44M
    
if (auto *1.27M
NextBB1.27M
= BB->getSingleSuccessor()) {
4424
22.3k
      if (Visited.insert(NextBB).second) {
4425
22.2k
        BB = NextBB;
4426
22.2k
        Begin = BB->getFirstNonPHI()->getIterator();
4427
22.2k
        End = BB->end();
4428
22.2k
        continue;
4429
22.2k
      }
4430
1.25M
    }
4431
1.25M
4432
1.25M
    break;
4433
1.25M
  }
4434
1.42M
  
return false1.25M
;
4435
1.42M
}
4436
4437
286k
static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
4438
286k
  if (FMF.noNaNs())
4439
3.75k
    return true;
4440
283k
4441
283k
  if (auto *C = dyn_cast<ConstantFP>(V))
4442
46.1k
    return !C->isNaN();
4443
237k
4444
237k
  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
4445
102
    if (!C->getElementType()->isFloatingPointTy())
4446
0
      return false;
4447
373
    
for (unsigned I = 0, E = C->getNumElements(); 102
I < E;
++I271
) {
4448
272
      if (C->getElementAsAPFloat(I).isNaN())
4449
1
        return false;
4450
272
    }
4451
102
    
return true101
;
4452
236k
  }
4453
236k
4454
236k
  return false;
4455
236k
}
4456
4457
6.82k
static bool isKnownNonZero(const Value *V) {
4458
6.82k
  if (auto *C = dyn_cast<ConstantFP>(V))
4459
2.17k
    return !C->isZero();
4460
4.64k
4461
4.64k
  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
4462
69
    if (!C->getElementType()->isFloatingPointTy())
4463
0
      return false;
4464
274
    
for (unsigned I = 0, E = C->getNumElements(); 69
I < E;
++I205
) {
4465
207
      if (C->getElementAsAPFloat(I).isZero())
4466
2
        return false;
4467
207
    }
4468
69
    
return true67
;
4469
4.58k
  }
4470
4.58k
4471
4.58k
  return false;
4472
4.58k
}
4473
4474
/// Match clamp pattern for float types without care about NaNs or signed zeros.
4475
/// Given non-min/max outer cmp/select from the clamp pattern this
4476
/// function recognizes if it can be substitued by a "canonical" min/max
4477
/// pattern.
4478
static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
4479
                                               Value *CmpLHS, Value *CmpRHS,
4480
                                               Value *TrueVal, Value *FalseVal,
4481
945
                                               Value *&LHS, Value *&RHS) {
4482
945
  // Try to match
4483
945
  //   X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
4484
945
  //   X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
4485
945
  // and return description of the outer Max/Min.
4486
945
4487
945
  // First, check if select has inverse order:
4488
945
  if (CmpRHS == FalseVal) {
4489
11
    std::swap(TrueVal, FalseVal);
4490
11
    Pred = CmpInst::getInversePredicate(Pred);
4491
11
  }
4492
945
4493
945
  // Assume success now. If there's no match, callers should not use these anyway.
4494
945
  LHS = TrueVal;
4495
945
  RHS = FalseVal;
4496
945
4497
945
  const APFloat *FC1;
4498
945
  if (CmpRHS != TrueVal || 
!match(CmpRHS, m_APFloat(FC1))31
||
!FC1->isFinite()29
)
4499
916
    return {SPF_UNKNOWN, SPNB_NA, false};
4500
29
4501
29
  const APFloat *FC2;
4502
29
  switch (Pred) {
4503
29
  case CmpInst::FCMP_OLT:
4504
17
  case CmpInst::FCMP_OLE:
4505
17
  case CmpInst::FCMP_ULT:
4506
17
  case CmpInst::FCMP_ULE:
4507
17
    if (match(FalseVal,
4508
17
              m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)),
4509
17
                          m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
4510
17
        
FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan14
)
4511
10
      return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
4512
7
    break;
4513
12
  case CmpInst::FCMP_OGT:
4514
12
  case CmpInst::FCMP_OGE:
4515
12
  case CmpInst::FCMP_UGT:
4516
12
  case CmpInst::FCMP_UGE:
4517
12
    if (match(FalseVal,
4518
12
              m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)),
4519
12
                          m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
4520
12
        
FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan8
)
4521
8
      return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
4522
4
    break;
4523
4
  default:
4524
0
    break;
4525
11
  }
4526
11
4527
11
  return {SPF_UNKNOWN, SPNB_NA, false};
4528
11
}
4529
4530
/// Recognize variations of:
4531
///   CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
4532
static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
4533
                                      Value *CmpLHS, Value *CmpRHS,
4534
5.04M
                                      Value *TrueVal, Value *FalseVal) {
4535
5.04M
  // Swap the select operands and predicate to match the patterns below.
4536
5.04M
  if (CmpRHS != TrueVal) {
4537
5.00M
    Pred = ICmpInst::getSwappedPredicate(Pred);
4538
5.00M
    std::swap(TrueVal, FalseVal);
4539
5.00M
  }
4540
5.04M
  const APInt *C1;
4541
5.04M
  if (CmpRHS == TrueVal && 
match(CmpRHS, m_APInt(C1))88.6k
) {
4542
29.3k
    const APInt *C2;
4543
29.3k
    // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
4544
29.3k
    if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
4545
29.3k
        
C1->slt(*C2)65
&&
Pred == CmpInst::ICMP_SLT65
)
4546
65
      return {SPF_SMAX, SPNB_NA, false};
4547
29.2k
4548
29.2k
    // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
4549
29.2k
    if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
4550
29.2k
        
C1->sgt(*C2)175
&&
Pred == CmpInst::ICMP_SGT175
)
4551
175
      return {SPF_SMIN, SPNB_NA, false};
4552
29.0k
4553
29.0k
    // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
4554
29.0k
    if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
4555
29.0k
        
C1->ult(*C2)7
&&
Pred == CmpInst::ICMP_ULT7
)
4556
7
      return {SPF_UMAX, SPNB_NA, false};
4557
29.0k
4558
29.0k
    // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
4559
29.0k
    if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
4560
29.0k
        
C1->ugt(*C2)7
&&
Pred == CmpInst::ICMP_UGT7
)
4561
7
      return {SPF_UMIN, SPNB_NA, false};
4562
5.04M
  }
4563
5.04M
  return {SPF_UNKNOWN, SPNB_NA, false};
4564
5.04M
}
4565
4566
/// Recognize variations of:
4567
///   a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
4568
static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
4569
                                               Value *CmpLHS, Value *CmpRHS,
4570
                                               Value *TVal, Value *FVal,
4571
5.04M
                                               unsigned Depth) {
4572
5.04M
  // TODO: Allow FP min/max with nnan/nsz.
4573
5.04M
  assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
4574
5.04M
4575
5.04M
  Value *A, *B;
4576
5.04M
  SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
4577
5.04M
  if (!SelectPatternResult::isMinOrMax(L.Flavor))
4578
5.02M
    return {SPF_UNKNOWN, SPNB_NA, false};
4579
17.2k
4580
17.2k
  Value *C, *D;
4581
17.2k
  SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
4582
17.2k
  if (L.Flavor != R.Flavor)
4583
17.1k
    return {SPF_UNKNOWN, SPNB_NA, false};
4584
148
4585
148
  // We have something like: x Pred y ? min(a, b) : min(c, d).
4586
148
  // Try to match the compare to the min/max operations of the select operands.
4587
148
  // First, make sure we have the right compare predicate.
4588
148
  switch (L.Flavor) {
4589
148
  case SPF_SMIN:
4590
32
    if (Pred == ICmpInst::ICMP_SGT || 
Pred == ICmpInst::ICMP_SGE24
) {
4591
16
      Pred = ICmpInst::getSwappedPredicate(Pred);
4592
16
      std::swap(CmpLHS, CmpRHS);
4593
16
    }
4594
32
    if (Pred == ICmpInst::ICMP_SLT || 
Pred == ICmpInst::ICMP_SLE16
)
4595
32
      break;
4596
0
    return {SPF_UNKNOWN, SPNB_NA, false};
4597
34
  case SPF_SMAX:
4598
34
    if (Pred == ICmpInst::ICMP_SLT || 
Pred == ICmpInst::ICMP_SLE26
) {
4599
16
      Pred = ICmpInst::getSwappedPredicate(Pred);
4600
16
      std::swap(CmpLHS, CmpRHS);
4601
16
    }
4602
34
    if (Pred == ICmpInst::ICMP_SGT || 
Pred == ICmpInst::ICMP_SGE16
)
4603
34
      break;
4604
0
    return {SPF_UNKNOWN, SPNB_NA, false};
4605
36
  case SPF_UMIN:
4606
36
    if (Pred == ICmpInst::ICMP_UGT || 
Pred == ICmpInst::ICMP_UGE28
) {
4607
16
      Pred = ICmpInst::getSwappedPredicate(Pred);
4608
16
      std::swap(CmpLHS, CmpRHS);
4609
16
    }
4610
36
    if (Pred == ICmpInst::ICMP_ULT || 
Pred == ICmpInst::ICMP_ULE16
)
4611
36
      break;
4612
0
    return {SPF_UNKNOWN, SPNB_NA, false};
4613
38
  case SPF_UMAX:
4614
38
    if (Pred == ICmpInst::ICMP_ULT || 
Pred == ICmpInst::ICMP_ULE24
) {
4615
22
      Pred = ICmpInst::getSwappedPredicate(Pred);
4616
22
      std::swap(CmpLHS, CmpRHS);
4617
22
    }
4618
38
    if (Pred == ICmpInst::ICMP_UGT || 
Pred == ICmpInst::ICMP_UGE16
)
4619
38
      break;
4620
0
    return {SPF_UNKNOWN, SPNB_NA, false};
4621
8
  default:
4622
8
    return {SPF_UNKNOWN, SPNB_NA, false};
4623
140
  }
4624
140
4625
140
  // If there is a common operand in the already matched min/max and the other
4626
140
  // min/max operands match the compare operands (either directly or inverted),
4627
140
  // then this is min/max of the same flavor.
4628
140
4629
140
  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
4630
140
  // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
4631
140
  if (D == B) {
4632
36
    if ((CmpLHS == A && 
CmpRHS == C16
) ||
(20
match(C, m_Not(m_Specific(CmpLHS)))20
&&
4633
20
                                         match(A, m_Not(m_Specific(CmpRHS)))))
4634
36
      return {L.Flavor, SPNB_NA, false};
4635
104
  }
4636
104
  // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
4637
104
  // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
4638
104
  if (C == B) {
4639
32
    if ((CmpLHS == A && 
CmpRHS == D16
) ||
(16
match(D, m_Not(m_Specific(CmpLHS)))16
&&
4640
16
                                         match(A, m_Not(m_Specific(CmpRHS)))))
4641
32
      return {L.Flavor, SPNB_NA, false};
4642
72
  }
4643
72
  // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
4644
72
  // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
4645
72
  if (D == A) {
4646
32
    if ((CmpLHS == B && 
CmpRHS == C16
) ||
(16
match(C, m_Not(m_Specific(CmpLHS)))16
&&
4647
16
                                         match(B, m_Not(m_Specific(CmpRHS)))))
4648
32
      return {L.Flavor, SPNB_NA, false};
4649
40
  }
4650
40
  // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
4651
40
  // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
4652
40
  if (C == A) {
4653
38
    if ((CmpLHS == B && 
CmpRHS == D22
) ||
(16
match(D, m_Not(m_Specific(CmpLHS)))16
&&
4654
16
                                         match(B, m_Not(m_Specific(CmpRHS)))))
4655
38
      return {L.Flavor, SPNB_NA, false};
4656
2
  }
4657
2
4658
2
  return {SPF_UNKNOWN, SPNB_NA, false};
4659
2
}
4660
4661
/// Match non-obvious integer minimum and maximum sequences.
4662
static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
4663
                                       Value *CmpLHS, Value *CmpRHS,
4664
                                       Value *TrueVal, Value *FalseVal,
4665
                                       Value *&LHS, Value *&RHS,
4666
5.04M
                                       unsigned Depth) {
4667
5.04M
  // Assume success. If there's no match, callers should not use these anyway.
4668
5.04M
  LHS = TrueVal;
4669
5.04M
  RHS = FalseVal;
4670
5.04M
4671
5.04M
  SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
4672
5.04M
  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
4673
254
    return SPR;
4674
5.04M
4675
5.04M
  SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
4676
5.04M
  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
4677
138
    return SPR;
4678
5.04M
4679
5.04M
  if (Pred != CmpInst::ICMP_SGT && 
Pred != CmpInst::ICMP_SLT3.99M
)
4680
2.35M
    return {SPF_UNKNOWN, SPNB_NA, false};
4681
2.69M
4682
2.69M
  // Z = X -nsw Y
4683
2.69M
  // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
4684
2.69M
  // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
4685
2.69M
  if (match(TrueVal, m_Zero()) &&
4686
2.69M
      
match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))36.6k
)
4687
2.54k
    return {Pred == CmpInst::ICMP_SGT ? 
SPF_SMIN1
:
SPF_SMAX2.54k
, SPNB_NA, false};
4688
2.68M
4689
2.68M
  // Z = X -nsw Y
4690
2.68M
  // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
4691
2.68M
  // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
4692
2.68M
  if (match(FalseVal, m_Zero()) &&
4693
2.68M
      
match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))378k
)
4694
38.2k
    return {Pred == CmpInst::ICMP_SGT ? 
SPF_SMAX38.2k
:
SPF_SMIN1
, SPNB_NA, false};
4695
2.65M
4696
2.65M
  const APInt *C1;
4697
2.65M
  if (!match(CmpRHS, m_APInt(C1)))
4698
294k
    return {SPF_UNKNOWN, SPNB_NA, false};
4699
2.35M
4700
2.35M
  // An unsigned min/max can be written with a signed compare.
4701
2.35M
  const APInt *C2;
4702
2.35M
  if ((CmpLHS == TrueVal && 
match(FalseVal, m_APInt(C2))4.42k
) ||
4703
2.35M
      
(2.35M
CmpLHS == FalseVal2.35M
&&
match(TrueVal, m_APInt(C2))30.6k
)) {
4704
11.9k
    // Is the sign bit set?
4705
11.9k
    // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
4706
11.9k
    // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
4707
11.9k
    if (Pred == CmpInst::ICMP_SLT && 
C1->isNullValue()8.20k
&&
4708
11.9k
        
C2->isMaxSignedValue()4.06k
)
4709
11
      return {CmpLHS == TrueVal ? 
SPF_UMAX4
:
SPF_UMIN7
, SPNB_NA, false};
4710
11.9k
4711
11.9k
    // Is the sign bit clear?
4712
11.9k
    // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
4713
11.9k
    // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
4714
11.9k
    if (Pred == CmpInst::ICMP_SGT && 
C1->isAllOnesValue()3.71k
&&
4715
11.9k
        
C2->isMinSignedValue()251
)
4716
11
      return {CmpLHS == FalseVal ? 
SPF_UMAX7
:
SPF_UMIN4
, SPNB_NA, false};
4717
2.35M
  }
4718
2.35M
4719
2.35M
  // Look through 'not' ops to find disguised signed min/max.
4720
2.35M
  // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
4721
2.35M
  // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
4722
2.35M
  if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
4723
2.35M
      
match(FalseVal, m_APInt(C2))474
&&
~(*C1) == *C2467
)
4724
29
    return {Pred == CmpInst::ICMP_SGT ? 
SPF_SMIN19
:
SPF_SMAX10
, SPNB_NA, false};
4725
2.35M
4726
2.35M
  // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
4727
2.35M
  // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
4728
2.35M
  if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
4729
2.35M
      
match(TrueVal, m_APInt(C2))129
&&
~(*C1) == *C214
)
4730
14
    return {Pred == CmpInst::ICMP_SGT ? 
SPF_SMAX7
:
SPF_SMIN7
, SPNB_NA, false};
4731
2.35M
4732
2.35M
  return {SPF_UNKNOWN, SPNB_NA, false};
4733
2.35M
}
4734
4735
23.1M
bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) {
4736
23.1M
  assert(X && Y && "Invalid operand");
4737
23.1M
4738
23.1M
  // X = sub (0, Y) || X = sub nsw (0, Y)
4739
23.1M
  if ((!NeedNSW && 
match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))22.8M
) ||
4740
23.1M
      
(22.1M
NeedNSW22.1M
&&
match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y)))305k
))
4741
1.05M
    return true;
4742
22.1M
4743
22.1M
  // Y = sub (0, X) || Y = sub nsw (0, X)
4744
22.1M
  if ((!NeedNSW && 
match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))21.8M
) ||
4745
22.1M
      
(22.1M
NeedNSW22.1M
&&
match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X)))305k
))
4746
6.68k
    return true;
4747
22.1M
4748
22.1M
  // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
4749
22.1M
  Value *A, *B;
4750
22.1M
  return (!NeedNSW && 
(21.8M
match(X, m_Sub(m_Value(A), m_Value(B)))21.8M
&&
4751
21.8M
                        
match(Y, m_Sub(m_Specific(B), m_Specific(A)))509k
)) ||
4752
22.1M
         
(22.1M
NeedNSW22.1M
&&
(305k
match(X, m_NSWSub(m_Value(A), m_Value(B)))305k
&&
4753
305k
                       
match(Y, m_NSWSub(m_Specific(B), m_Specific(A)))14.2k
));
4754
22.1M
}
4755
4756
static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
4757
                                              FastMathFlags FMF,
4758
                                              Value *CmpLHS, Value *CmpRHS,
4759
                                              Value *TrueVal, Value *FalseVal,
4760
                                              Value *&LHS, Value *&RHS,
4761
14.5M
                                              unsigned Depth) {
4762
14.5M
  if (CmpInst::isFPPredicate(Pred)) {
4763
145k
    // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
4764
145k
    // 0.0 operand, set the compare's 0.0 operands to that same value for the
4765
145k
    // purpose of identifying min/max. Disregard vector constants with undefined
4766
145k
    // elements because those can not be back-propagated for analysis.
4767
145k
    Value *OutputZeroVal = nullptr;
4768
145k
    if (match(TrueVal, m_AnyZeroFP()) && 
!match(FalseVal, m_AnyZeroFP())2.51k
&&
4769
145k
        
!cast<Constant>(TrueVal)->containsUndefElement()2.50k
)
4770
2.50k
      OutputZeroVal = TrueVal;
4771
142k
    else if (match(FalseVal, m_AnyZeroFP()) && 
!match(TrueVal, m_AnyZeroFP())1.44k
&&
4772
142k
             
!cast<Constant>(FalseVal)->containsUndefElement()1.43k
)
4773
1.43k
      OutputZeroVal = FalseVal;
4774
145k
4775
145k
    if (OutputZeroVal) {
4776
3.93k
      if (match(CmpLHS, m_AnyZeroFP()))
4777
39
        CmpLHS = OutputZeroVal;
4778
3.93k
      if (match(CmpRHS, m_AnyZeroFP()))
4779
3.10k
        CmpRHS = OutputZeroVal;
4780
3.93k
    }
4781
145k
  }
4782
14.5M
4783
14.5M
  LHS = CmpLHS;
4784
14.5M
  RHS = CmpRHS;
4785
14.5M
4786
14.5M
  // Signed zero may return inconsistent results between implementations.
4787
14.5M
  //  (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
4788
14.5M
  //  minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
4789
14.5M
  // Therefore, we behave conservatively and only proceed if at least one of the
4790
14.5M
  // operands is known to not be zero or if we don't care about signed zero.
4791
14.5M
  switch (Pred) {
4792
14.5M
  
default: break14.5M
;
4793
14.5M
  // FIXME: Include OGT/OLT/UGT/ULT.
4794
14.5M
  
case CmpInst::FCMP_OGE: 4.58k
case CmpInst::FCMP_OLE:
4795
4.58k
  case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
4796
4.58k
    if (!FMF.noSignedZeros() && 
!isKnownNonZero(CmpLHS)3.40k
&&
4797
4.58k
        
!isKnownNonZero(CmpRHS)3.33k
)
4798
1.99k
      return {SPF_UNKNOWN, SPNB_NA, false};
4799
14.5M
  }
4800
14.5M
4801
14.5M
  SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
4802
14.5M
  bool Ordered = false;
4803
14.5M
4804
14.5M
  // When given one NaN and one non-NaN input:
4805
14.5M
  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
4806
14.5M
  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
4807
14.5M
  //     ordered comparison fails), which could be NaN or non-NaN.
4808
14.5M
  // so here we discover exactly what NaN behavior is required/accepted.
4809
14.5M
  if (CmpInst::isFPPredicate(Pred)) {
4810
143k
    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
4811
143k
    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
4812
143k
4813
143k
    if (LHSSafe && 
RHSSafe2.06k
) {
4814
1.91k
      // Both operands are known non-NaN.
4815
1.91k
      NaNBehavior = SPNB_RETURNS_ANY;
4816
141k
    } else if (CmpInst::isOrdered(Pred)) {
4817
125k
      // An ordered comparison will return false when given a NaN, so it
4818
125k
      // returns the RHS.
4819
125k
      Ordered = true;
4820
125k
      if (LHSSafe)
4821
90
        // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
4822
90
        NaNBehavior = SPNB_RETURNS_NAN;
4823
125k
      else if (RHSSafe)
4824
35.4k
        NaNBehavior = SPNB_RETURNS_OTHER;
4825
90.2k
      else
4826
90.2k
        // Completely unsafe.
4827
90.2k
        return {SPF_UNKNOWN, SPNB_NA, false};
4828
15.7k
    } else {
4829
15.7k
      Ordered = false;
4830
15.7k
      // An unordered comparison will return true when given a NaN, so it
4831
15.7k
      // returns the LHS.
4832
15.7k
      if (LHSSafe)
4833
61
        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
4834
61
        NaNBehavior = SPNB_RETURNS_OTHER;
4835
15.6k
      else if (RHSSafe)
4836
10.6k
        NaNBehavior = SPNB_RETURNS_NAN;
4837
5.06k
      else
4838
5.06k
        // Completely unsafe.
4839
5.06k
        return {SPF_UNKNOWN, SPNB_NA, false};
4840
14.4M
    }
4841
143k
  }
4842
14.4M
4843
14.4M
  if (TrueVal == CmpRHS && 
FalseVal == CmpLHS1.07M
) {
4844
1.02M
    std::swap(CmpLHS, CmpRHS);
4845
1.02M
    Pred = CmpInst::getSwappedPredicate(Pred);
4846
1.02M
    if (NaNBehavior == SPNB_RETURNS_NAN)
4847
66
      NaNBehavior = SPNB_RETURNS_OTHER;
4848
1.02M
    else if (NaNBehavior == SPNB_RETURNS_OTHER)
4849
6.79k
      NaNBehavior = SPNB_RETURNS_NAN;
4850
1.02M
    Ordered = !Ordered;
4851
1.02M
  }
4852
14.4M
4853
14.4M
  // ([if]cmp X, Y) ? X : Y
4854
14.4M
  if (TrueVal == CmpLHS && 
FalseVal == CmpRHS8.34M
) {
4855
8.30M
    switch (Pred) {
4856
8.30M
    
default: return {SPF_UNKNOWN, SPNB_NA, false}0
; // Equality.
4857
8.30M
    case ICmpInst::ICMP_UGT:
4858
1.94M
    case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
4859
4.92M
    case ICmpInst::ICMP_SGT:
4860
4.92M
    case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
4861
4.92M
    case ICmpInst::ICMP_ULT:
4862
879k
    case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
4863
879k
    case ICmpInst::ICMP_SLT:
4864
547k
    case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
4865
547k
    case FCmpInst::FCMP_UGT:
4866
4.22k
    case FCmpInst::FCMP_UGE:
4867
4.22k
    case FCmpInst::FCMP_OGT:
4868
4.22k
    case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
4869
6.24k
    case FCmpInst::FCMP_ULT:
4870
6.24k
    case FCmpInst::FCMP_ULE:
4871
6.24k
    case FCmpInst::FCMP_OLT:
4872
6.24k
    case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
4873
6.11M
    }
4874
6.11M
  }
4875
6.11M
4876
6.11M
  if (isKnownNegation(TrueVal, FalseVal)) {
4877
1.07M
    // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
4878
1.07M
    // match against either LHS or sext(LHS).
4879
1.07M
    auto MaybeSExtCmpLHS =
4880
1.07M
        m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS)));
4881
1.07M
    auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes());
4882
1.07M
    auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One());
4883
1.07M
    if (match(TrueVal, MaybeSExtCmpLHS)) {
4884
6.56k
      // Set the return values. If the compare uses the negated value (-X >s 0),
4885
6.56k
      // swap the return values because the negated value is always 'RHS'.
4886
6.56k
      LHS = TrueVal;
4887
6.56k
      RHS = FalseVal;
4888
6.56k
      if (match(CmpLHS, m_Neg(m_Specific(FalseVal))))
4889
24
        std::swap(LHS, RHS);
4890
6.56k
4891
6.56k
      // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
4892
6.56k
      // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
4893
6.56k
      if (Pred == ICmpInst::ICMP_SGT && 
match(CmpRHS, ZeroOrAllOnes)3.75k
)
4894
3.73k
        return {SPF_ABS, SPNB_NA, false};
4895
2.82k
4896
2.82k
      // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
4897
2.82k
      if (Pred == ICmpInst::ICMP_SGE && 
match(CmpRHS, ZeroOrOne)653
)
4898
653
        return {SPF_ABS, SPNB_NA, false};
4899
2.17k
4900
2.17k
      // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
4901
2.17k
      // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
4902
2.17k
      if (Pred == ICmpInst::ICMP_SLT && 
match(CmpRHS, ZeroOrOne)2.15k
)
4903
2.15k
        return {SPF_NABS, SPNB_NA, false};
4904
1.06M
    }
4905
1.06M
    else if (match(FalseVal, MaybeSExtCmpLHS)) {
4906
1.02M
      // Set the return values. If the compare uses the negated value (-X >s 0),
4907
1.02M
      // swap the return values because the negated value is always 'RHS'.
4908
1.02M
      LHS = FalseVal;
4909
1.02M
      RHS = TrueVal;
4910
1.02M
      if (match(CmpLHS, m_Neg(m_Specific(TrueVal))))
4911
75
        std::swap(LHS, RHS);
4912
1.02M
4913
1.02M
      // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
4914
1.02M
      // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
4915
1.02M
      if (Pred == ICmpInst::ICMP_SGT && 
match(CmpRHS, ZeroOrAllOnes)216
)
4916
216
        return {SPF_NABS, SPNB_NA, false};
4917
1.02M
4918
1.02M
      // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
4919
1.02M
      // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
4920
1.02M
      if (Pred == ICmpInst::ICMP_SLT && 
match(CmpRHS, ZeroOrOne)1.02M
)
4921
1.02M
        return {SPF_ABS, SPNB_NA, false};
4922
5.08M
    }
4923
1.07M
  }
4924
5.08M
4925
5.08M
  if (CmpInst::isIntPredicate(Pred))
4926
5.04M
    return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
4927
37.6k
4928
37.6k
  // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
4929
37.6k
  // may return either -0.0 or 0.0, so fcmp/select pair has stricter
4930
37.6k
  // semantics than minNum. Be conservative in such case.
4931
37.6k
  if (NaNBehavior != SPNB_RETURNS_ANY ||
4932
37.6k
      
(967
!FMF.noSignedZeros()967
&&
!isKnownNonZero(CmpLHS)60
&&
4933
967
       
!isKnownNonZero(CmpRHS)27
))
4934
36.7k
    return {SPF_UNKNOWN, SPNB_NA, false};
4935
945
4936
945
  return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
4937
945
}
4938
4939
/// Helps to match a select pattern in case of a type mismatch.
4940
///
4941
/// The function processes the case when type of true and false values of a
4942
/// select instruction differs from type of the cmp instruction operands because
4943
/// of a cast instruction. The function checks if it is legal to move the cast
4944
/// operation after "select". If yes, it returns the new second value of
4945
/// "select" (with the assumption that cast is moved):
4946
/// 1. As operand of cast instruction when both values of "select" are same cast
4947
/// instructions.
4948
/// 2. As restored constant (by applying reverse cast operation) when the first
4949
/// value of the "select" is a cast operation and the second value is a
4950
/// constant.
4951
/// NOTE: We return only the new second value because the first value could be
4952
/// accessed as operand of cast instruction.
4953
static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
4954
271k
                              Instruction::CastOps *CastOp) {
4955
271k
  auto *Cast1 = dyn_cast<CastInst>(V1);
4956
271k
  if (!Cast1)
4957
192k
    return nullptr;
4958
78.3k
4959
78.3k
  *CastOp = Cast1->getOpcode();
4960
78.3k
  Type *SrcTy = Cast1->getSrcTy();
4961
78.3k
  if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
4962
363
    // If V1 and V2 are both the same cast from the same type, look through V1.
4963
363
    if (*CastOp == Cast2->getOpcode() && 
SrcTy == Cast2->getSrcTy()271
)
4964
191
      return Cast2->getOperand(0);
4965
172
    return nullptr;
4966
172
  }
4967
78.0k
4968
78.0k
  auto *C = dyn_cast<Constant>(V2);
4969
78.0k
  if (!C)
4970
30.9k
    return nullptr;
4971
47.0k
4972
47.0k
  Constant *CastedTo = nullptr;
4973
47.0k
  switch (*CastOp) {
4974
47.0k
  case Instruction::ZExt:
4975
1.90k
    if (CmpI->isUnsigned())
4976
1.74k
      CastedTo = ConstantExpr::getTrunc(C, SrcTy);
4977
1.90k
    break;
4978
47.0k
  case Instruction::SExt:
4979
94
    if (CmpI->isSigned())
4980
9
      CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
4981
94
    break;
4982
47.0k
  case Instruction::Trunc:
4983
44.7k
    Constant *CmpConst;
4984
44.7k
    if (match(CmpI->getOperand(1), m_Constant(CmpConst)) &&
4985
44.7k
        
CmpConst->getType() == SrcTy44.6k
) {
4986
44.6k
      // Here we have the following case:
4987
44.6k
      //
4988
44.6k
      //   %cond = cmp iN %x, CmpConst
4989
44.6k
      //   %tr = trunc iN %x to iK
4990
44.6k
      //   %narrowsel = select i1 %cond, iK %t, iK C
4991
44.6k
      //
4992
44.6k
      // We can always move trunc after select operation:
4993
44.6k
      //
4994
44.6k
      //   %cond = cmp iN %x, CmpConst
4995
44.6k
      //   %widesel = select i1 %cond, iN %x, iN CmpConst
4996
44.6k
      //   %tr = trunc iN %widesel to iK
4997
44.6k
      //
4998
44.6k
      // Note that C could be extended in any way because we don't care about