Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/DemandedBits.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DemandedBits.cpp - Determine demanded bits -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass implements a demanded bits analysis. A demanded bit is one that
10
// contributes to a result; bits that are not demanded can be either zero or
11
// one without affecting control or data flow. For example in this sequence:
12
//
13
//   %1 = add i32 %x, %y
14
//   %2 = trunc i32 %1 to i16
15
//
16
// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
17
// trunc.
18
//
19
//===----------------------------------------------------------------------===//
20
21
#include "llvm/Analysis/DemandedBits.h"
22
#include "llvm/ADT/APInt.h"
23
#include "llvm/ADT/SetVector.h"
24
#include "llvm/ADT/StringExtras.h"
25
#include "llvm/Analysis/AssumptionCache.h"
26
#include "llvm/Analysis/ValueTracking.h"
27
#include "llvm/IR/BasicBlock.h"
28
#include "llvm/IR/Constants.h"
29
#include "llvm/IR/DataLayout.h"
30
#include "llvm/IR/DerivedTypes.h"
31
#include "llvm/IR/Dominators.h"
32
#include "llvm/IR/InstIterator.h"
33
#include "llvm/IR/InstrTypes.h"
34
#include "llvm/IR/Instruction.h"
35
#include "llvm/IR/IntrinsicInst.h"
36
#include "llvm/IR/Intrinsics.h"
37
#include "llvm/IR/Module.h"
38
#include "llvm/IR/Operator.h"
39
#include "llvm/IR/PassManager.h"
40
#include "llvm/IR/PatternMatch.h"
41
#include "llvm/IR/Type.h"
42
#include "llvm/IR/Use.h"
43
#include "llvm/Pass.h"
44
#include "llvm/Support/Casting.h"
45
#include "llvm/Support/Debug.h"
46
#include "llvm/Support/KnownBits.h"
47
#include "llvm/Support/raw_ostream.h"
48
#include <algorithm>
49
#include <cstdint>
50
51
using namespace llvm;
52
using namespace llvm::PatternMatch;
53
54
#define DEBUG_TYPE "demanded-bits"
55
56
char DemandedBitsWrapperPass::ID = 0;
57
58
48.9k
INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits",
59
48.9k
                      "Demanded bits analysis", false, false)
60
48.9k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
61
48.9k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
62
48.9k
INITIALIZE_PASS_END(DemandedBitsWrapperPass, "demanded-bits",
63
                    "Demanded bits analysis", false, false)
64
65
40.4k
DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID) {
66
40.4k
  initializeDemandedBitsWrapperPassPass(*PassRegistry::getPassRegistry());
67
40.4k
}
68
69
40.4k
void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
70
40.4k
  AU.setPreservesCFG();
71
40.4k
  AU.addRequired<AssumptionCacheTracker>();
72
40.4k
  AU.addRequired<DominatorTreeWrapperPass>();
73
40.4k
  AU.setPreservesAll();
74
40.4k
}
75
76
20
void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
77
20
  DB->print(OS);
78
20
}
79
80
25.0M
static bool isAlwaysLive(Instruction *I) {
81
25.0M
  return I->isTerminator() || 
isa<DbgInfoIntrinsic>(I)17.7M
||
I->isEHPad()17.7M
||
82
25.0M
         
I->mayHaveSideEffects()17.7M
;
83
25.0M
}
84
85
void DemandedBits::determineLiveOperandBits(
86
    const Instruction *UserI, const Value *Val, unsigned OperandNo,
87
    const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
88
5.53M
    bool &KnownBitsComputed) {
89
5.53M
  unsigned BitWidth = AB.getBitWidth();
90
5.53M
91
5.53M
  // We're called once per operand, but for some instructions, we need to
92
5.53M
  // compute known bits of both operands in order to determine the live bits of
93
5.53M
  // either (when both operands are instructions themselves). We don't,
94
5.53M
  // however, want to do this twice, so we cache the result in APInts that live
95
5.53M
  // in the caller. For the two-relevant-operands case, both operand values are
96
5.53M
  // provided here.
97
5.53M
  auto ComputeKnownBits =
98
5.53M
      [&](unsigned BitWidth, const Value *V1, const Value *V2) {
99
435k
        if (KnownBitsComputed)
100
149k
          return;
101
286k
        KnownBitsComputed = true;
102
286k
103
286k
        const DataLayout &DL = UserI->getModule()->getDataLayout();
104
286k
        Known = KnownBits(BitWidth);
105
286k
        computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
106
286k
107
286k
        if (V2) {
108
280k
          Known2 = KnownBits(BitWidth);
109
280k
          computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT);
110
280k
        }
111
286k
      };
112
5.53M
113
5.53M
  switch (UserI->getOpcode()) {
114
5.53M
  
default: break1.89M
;
115
5.53M
  case Instruction::Call:
116
362k
  case Instruction::Invoke:
117
362k
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
118
24.7k
      switch (II->getIntrinsicID()) {
119
24.7k
      
default: break17.9k
;
120
24.7k
      case Intrinsic::bswap:
121
705
        // The alive bits of the input are the swapped alive bits of
122
705
        // the output.
123
705
        AB = AOut.byteSwap();
124
705
        break;
125
24.7k
      case Intrinsic::bitreverse:
126
52
        // The alive bits of the input are the reversed alive bits of
127
52
        // the output.
128
52
        AB = AOut.reverseBits();
129
52
        break;
130
24.7k
      case Intrinsic::ctlz:
131
4.74k
        if (OperandNo == 0) {
132
4.74k
          // We need some output bits, so we need all bits of the
133
4.74k
          // input to the left of, and including, the leftmost bit
134
4.74k
          // known to be one.
135
4.74k
          ComputeKnownBits(BitWidth, Val, nullptr);
136
4.74k
          AB = APInt::getHighBitsSet(BitWidth,
137
4.74k
                 std::min(BitWidth, Known.countMaxLeadingZeros()+1));
138
4.74k
        }
139
4.74k
        break;
140
24.7k
      case Intrinsic::cttz:
141
1.07k
        if (OperandNo == 0) {
142
1.07k
          // We need some output bits, so we need all bits of the
143
1.07k
          // input to the right of, and including, the rightmost bit
144
1.07k
          // known to be one.
145
1.07k
          ComputeKnownBits(BitWidth, Val, nullptr);
146
1.07k
          AB = APInt::getLowBitsSet(BitWidth,
147
1.07k
                 std::min(BitWidth, Known.countMaxTrailingZeros()+1));
148
1.07k
        }
149
1.07k
        break;
150
24.7k
      case Intrinsic::fshl:
151
186
      case Intrinsic::fshr: {
152
186
        const APInt *SA;
153
186
        if (OperandNo == 2) {
154
18
          // Shift amount is modulo the bitwidth. For powers of two we have
155
18
          // SA % BW == SA & (BW - 1).
156
18
          if (isPowerOf2_32(BitWidth))
157
16
            AB = BitWidth - 1;
158
168
        } else if (match(II->getOperand(2), m_APInt(SA))) {
159
132
          // Normalize to funnel shift left. APInt shifts of BitWidth are well-
160
132
          // defined, so no need to special-case zero shifts here.
161
132
          uint64_t ShiftAmt = SA->urem(BitWidth);
162
132
          if (II->getIntrinsicID() == Intrinsic::fshr)
163
24
            ShiftAmt = BitWidth - ShiftAmt;
164
132
165
132
          if (OperandNo == 0)
166
66
            AB = AOut.lshr(ShiftAmt);
167
66
          else if (OperandNo == 1)
168
66
            AB = AOut.shl(BitWidth - ShiftAmt);
169
132
        }
170
186
        break;
171
362k
      }
172
24.7k
      }
173
362k
    break;
174
1.00M
  case Instruction::Add:
175
1.00M
  case Instruction::Sub:
176
1.00M
  case Instruction::Mul:
177
1.00M
    // Find the highest live output bit. We don't need any more input
178
1.00M
    // bits than that (adds, and thus subtracts, ripple only to the
179
1.00M
    // left).
180
1.00M
    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
181
1.00M
    break;
182
1.00M
  case Instruction::Shl:
183
128k
    if (OperandNo == 0) {
184
106k
      const APInt *ShiftAmtC;
185
106k
      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
186
95.4k
        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
187
95.4k
        AB = AOut.lshr(ShiftAmt);
188
95.4k
189
95.4k
        // If the shift is nuw/nsw, then the high bits are not dead
190
95.4k
        // (because we've promised that they *must* be zero).
191
95.4k
        const ShlOperator *S = cast<ShlOperator>(UserI);
192
95.4k
        if (S->hasNoSignedWrap())
193
30.3k
          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
194
65.0k
        else if (S->hasNoUnsignedWrap())
195
11.5k
          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
196
95.4k
      }
197
106k
    }
198
128k
    break;
199
1.00M
  case Instruction::LShr:
200
100k
    if (OperandNo == 0) {
201
92.3k
      const APInt *ShiftAmtC;
202
92.3k
      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
203
84.3k
        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
204
84.3k
        AB = AOut.shl(ShiftAmt);
205
84.3k
206
84.3k
        // If the shift is exact, then the low bits are not dead
207
84.3k
        // (they must be zero).
208
84.3k
        if (cast<LShrOperator>(UserI)->isExact())
209
1.66k
          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
210
84.3k
      }
211
92.3k
    }
212
100k
    break;
213
1.00M
  case Instruction::AShr:
214
31.6k
    if (OperandNo == 0) {
215
29.4k
      const APInt *ShiftAmtC;
216
29.4k
      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
217
27.2k
        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
218
27.2k
        AB = AOut.shl(ShiftAmt);
219
27.2k
        // Because the high input bit is replicated into the
220
27.2k
        // high-order bits of the result, if we need any of those
221
27.2k
        // bits, then we must keep the highest input bit.
222
27.2k
        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
223
27.2k
            .getBoolValue())
224
25.5k
          AB.setSignBit();
225
27.2k
226
27.2k
        // If the shift is exact, then the low bits are not dead
227
27.2k
        // (they must be zero).
228
27.2k
        if (cast<AShrOperator>(UserI)->isExact())
229
17.8k
          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
230
27.2k
      }
231
29.4k
    }
232
31.6k
    break;
233
1.00M
  case Instruction::And:
234
259k
    AB = AOut;
235
259k
236
259k
    // For bits that are known zero, the corresponding bits in the
237
259k
    // other operand are dead (unless they're both zero, in which
238
259k
    // case they can't both be dead, so just mark the LHS bits as
239
259k
    // dead).
240
259k
    ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
241
259k
    if (OperandNo == 0)
242
185k
      AB &= ~Known2.Zero;
243
73.6k
    else
244
73.6k
      AB &= ~(Known.Zero & ~Known2.Zero);
245
259k
    break;
246
1.00M
  case Instruction::Or:
247
170k
    AB = AOut;
248
170k
249
170k
    // For bits that are known one, the corresponding bits in the
250
170k
    // other operand are dead (unless they're both one, in which
251
170k
    // case they can't both be dead, so just mark the LHS bits as
252
170k
    // dead).
253
170k
    ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
254
170k
    if (OperandNo == 0)
255
94.1k
      AB &= ~Known2.One;
256
76.5k
    else
257
76.5k
      AB &= ~(Known.One & ~Known2.One);
258
170k
    break;
259
1.00M
  case Instruction::Xor:
260
871k
  case Instruction::PHI:
261
871k
    AB = AOut;
262
871k
    break;
263
871k
  case Instruction::Trunc:
264
116k
    AB = AOut.zext(BitWidth);
265
116k
    break;
266
871k
  case Instruction::ZExt:
267
167k
    AB = AOut.trunc(BitWidth);
268
167k
    break;
269
871k
  case Instruction::SExt:
270
191k
    AB = AOut.trunc(BitWidth);
271
191k
    // Because the high input bit is replicated into the
272
191k
    // high-order bits of the result, if we need any of those
273
191k
    // bits, then we must keep the highest input bit.
274
191k
    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
275
191k
                                      AOut.getBitWidth() - BitWidth))
276
191k
        .getBoolValue())
277
189k
      AB.setSignBit();
278
191k
    break;
279
871k
  case Instruction::Select:
280
234k
    if (OperandNo != 0)
281
120k
      AB = AOut;
282
234k
    break;
283
871k
  case Instruction::ExtractElement:
284
3.79k
    if (OperandNo == 0)
285
3.64k
      AB = AOut;
286
3.79k
    break;
287
871k
  case Instruction::InsertElement:
288
8.20k
  case Instruction::ShuffleVector:
289
8.20k
    if (OperandNo == 0 || 
OperandNo == 13.87k
)
290
8.08k
      AB = AOut;
291
8.20k
    break;
292
5.53M
  }
293
5.53M
}
294
295
1.02M
bool DemandedBitsWrapperPass::runOnFunction(Function &F) {
296
1.02M
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
297
1.02M
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
298
1.02M
  DB.emplace(F, AC, DT);
299
1.02M
  return false;
300
1.02M
}
301
302
1.02M
void DemandedBitsWrapperPass::releaseMemory() {
303
1.02M
  DB.reset();
304
1.02M
}
305
306
22.4M
void DemandedBits::performAnalysis() {
307
22.4M
  if (Analyzed)
308
21.9M
    // Analysis already completed for this function.
309
21.9M
    return;
310
468k
  Analyzed = true;
311
468k
312
468k
  Visited.clear();
313
468k
  AliveBits.clear();
314
468k
  DeadUses.clear();
315
468k
316
468k
  SmallSetVector<Instruction*, 16> Worklist;
317
468k
318
468k
  // Collect the set of "root" instructions that are known live.
319
15.4M
  for (Instruction &I : instructions(F)) {
320
15.4M
    if (!isAlwaysLive(&I))
321
9.30M
      continue;
322
6.14M
323
6.14M
    LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
324
6.14M
    // For integer-valued instructions, set up an initial empty set of alive
325
6.14M
    // bits and add the instruction to the work list. For other instructions
326
6.14M
    // add their operands to the work list (for integer values operands, mark
327
6.14M
    // all bits as live).
328
6.14M
    Type *T = I.getType();
329
6.14M
    if (T->isIntOrIntVectorTy()) {
330
448k
      if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
331
448k
        Worklist.insert(&I);
332
448k
333
448k
      continue;
334
448k
    }
335
5.70M
336
5.70M
    // Non-integer-typed instructions...
337
12.8M
    
for (Use &OI : I.operands())5.70M
{
338
12.8M
      if (Instruction *J = dyn_cast<Instruction>(OI)) {
339
5.05M
        Type *T = J->getType();
340
5.05M
        if (T->isIntOrIntVectorTy())
341
2.19M
          AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
342
2.85M
        else
343
2.85M
          Visited.insert(J);
344
5.05M
        Worklist.insert(J);
345
5.05M
      }
346
12.8M
    }
347
5.70M
    // To save memory, we don't add I to the Visited set here. Instead, we
348
5.70M
    // check isAlwaysLive on every instruction when searching for dead
349
5.70M
    // instructions later (we need to check isAlwaysLive for the
350
5.70M
    // integer-typed instructions anyway).
351
5.70M
  }
352
468k
353
468k
  // Propagate liveness backwards to operands.
354
10.9M
  while (!Worklist.empty()) {
355
10.5M
    Instruction *UserI = Worklist.pop_back_val();
356
10.5M
357
10.5M
    LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
358
10.5M
    APInt AOut;
359
10.5M
    bool InputIsKnownDead = false;
360
10.5M
    if (UserI->getType()->isIntOrIntVectorTy()) {
361
5.60M
      AOut = AliveBits[UserI];
362
5.60M
      LLVM_DEBUG(dbgs() << " Alive Out: 0x"
363
5.60M
                        << Twine::utohexstr(AOut.getLimitedValue()));
364
5.60M
365
5.60M
      // If all bits of the output are dead, then all bits of the input
366
5.60M
      // are also dead.
367
5.60M
      InputIsKnownDead = !AOut && 
!isAlwaysLive(UserI)220k
;
368
5.60M
    }
369
10.5M
    LLVM_DEBUG(dbgs() << "\n");
370
10.5M
371
10.5M
    KnownBits Known, Known2;
372
10.5M
    bool KnownBitsComputed = false;
373
10.5M
    // Compute the set of alive bits for each operand. These are anded into the
374
10.5M
    // existing set, if any, and if that changes the set of alive bits, the
375
10.5M
    // operand is added to the work-list.
376
23.1M
    for (Use &OI : UserI->operands()) {
377
23.1M
      // We also want to detect dead uses of arguments, but will only store
378
23.1M
      // demanded bits for instructions.
379
23.1M
      Instruction *I = dyn_cast<Instruction>(OI);
380
23.1M
      if (!I && 
!isa<Argument>(OI)12.1M
)
381
10.2M
        continue;
382
12.8M
383
12.8M
      Type *T = OI->getType();
384
12.8M
      if (T->isIntOrIntVectorTy()) {
385
5.54M
        unsigned BitWidth = T->getScalarSizeInBits();
386
5.54M
        APInt AB = APInt::getAllOnesValue(BitWidth);
387
5.54M
        if (InputIsKnownDead) {
388
4.46k
          AB = APInt(BitWidth, 0);
389
5.53M
        } else {
390
5.53M
          // Bits of each operand that are used to compute alive bits of the
391
5.53M
          // output are alive, all others are dead.
392
5.53M
          determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
393
5.53M
                                   Known, Known2, KnownBitsComputed);
394
5.53M
395
5.53M
          // Keep track of uses which have no demanded bits.
396
5.53M
          if (AB.isNullValue())
397
1.27k
            DeadUses.insert(&OI);
398
5.53M
          else
399
5.53M
            DeadUses.erase(&OI);
400
5.53M
        }
401
5.54M
402
5.54M
        if (I) {
403
5.16M
          // If we've added to the set of alive bits (or the operand has not
404
5.16M
          // been previously visited), then re-queue the operand to be visited
405
5.16M
          // again.
406
5.16M
          auto Res = AliveBits.try_emplace(I);
407
5.16M
          if (Res.second || 
(AB |= Res.first->second) != Res.first->second2.29M
) {
408
3.31M
            Res.first->second = std::move(AB);
409
3.31M
            Worklist.insert(I);
410
3.31M
          }
411
5.16M
        }
412
7.33M
      } else if (I && 
Visited.insert(I).second5.84M
) {
413
2.82M
        Worklist.insert(I);
414
2.82M
      }
415
12.8M
    }
416
10.5M
  }
417
468k
}
418
419
5.00M
APInt DemandedBits::getDemandedBits(Instruction *I) {
420
5.00M
  performAnalysis();
421
5.00M
422
5.00M
  auto Found = AliveBits.find(I);
423
5.00M
  if (Found != AliveBits.end())
424
5.00M
    return Found->second;
425
292
426
292
  const DataLayout &DL = I->getModule()->getDataLayout();
427
292
  return APInt::getAllOnesValue(
428
292
      DL.getTypeSizeInBits(I->getType()->getScalarType()));
429
292
}
430
431
12.7M
bool DemandedBits::isInstructionDead(Instruction *I) {
432
12.7M
  performAnalysis();
433
12.7M
434
12.7M
  return !Visited.count(I) && 
AliveBits.find(I) == AliveBits.end()7.91M
&&
435
12.7M
    
!isAlwaysLive(I)2.92M
;
436
12.7M
}
437
438
6.39M
bool DemandedBits::isUseDead(Use *U) {
439
6.39M
  // We only track integer uses, everything else is assumed live.
440
6.39M
  if (!(*U)->getType()->isIntOrIntVectorTy())
441
0
    return false;
442
6.39M
443
6.39M
  // Uses by always-live instructions are never dead.
444
6.39M
  Instruction *UserI = cast<Instruction>(U->getUser());
445
6.39M
  if (isAlwaysLive(UserI))
446
1.67M
    return false;
447
4.72M
448
4.72M
  performAnalysis();
449
4.72M
  if (DeadUses.count(U))
450
63
    return true;
451
4.72M
452
4.72M
  // If no output bits are demanded, no input bits are demanded and the use
453
4.72M
  // is dead. These uses might not be explicitly present in the DeadUses map.
454
4.72M
  if (UserI->getType()->isIntOrIntVectorTy()) {
455
4.24M
    auto Found = AliveBits.find(UserI);
456
4.24M
    if (Found != AliveBits.end() && Found->second.isNullValue())
457
0
      return true;
458
4.72M
  }
459
4.72M
460
4.72M
  return false;
461
4.72M
}
462
463
46
void DemandedBits::print(raw_ostream &OS) {
464
46
  performAnalysis();
465
159
  for (auto &KV : AliveBits) {
466
159
    OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue())
467
159
       << " for " << *KV.first << '\n';
468
159
  }
469
46
}
470
471
0
FunctionPass *llvm::createDemandedBitsWrapperPass() {
472
0
  return new DemandedBitsWrapperPass();
473
0
}
474
475
AnalysisKey DemandedBitsAnalysis::Key;
476
477
DemandedBits DemandedBitsAnalysis::run(Function &F,
478
2.06k
                                             FunctionAnalysisManager &AM) {
479
2.06k
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
480
2.06k
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
481
2.06k
  return DemandedBits(F, AC, DT);
482
2.06k
}
483
484
PreservedAnalyses DemandedBitsPrinterPass::run(Function &F,
485
26
                                               FunctionAnalysisManager &AM) {
486
26
  AM.getResult<DemandedBitsAnalysis>(F).print(OS);
487
26
  return PreservedAnalyses::all();
488
26
}