Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains an optimization for div and rem on architectures that
10
// execute short instructions significantly faster than longer instructions.
11
// For example, on Intel Atom 32-bit divides are slow enough that during
12
// runtime it is profitable to check the value of the operands, and if they are
13
// positive and less than 256 use an unsigned 8-bit divide.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
18
#include "llvm/ADT/DenseMap.h"
19
#include "llvm/ADT/None.h"
20
#include "llvm/ADT/Optional.h"
21
#include "llvm/ADT/STLExtras.h"
22
#include "llvm/ADT/SmallPtrSet.h"
23
#include "llvm/Transforms/Utils/Local.h"
24
#include "llvm/Analysis/ValueTracking.h"
25
#include "llvm/IR/BasicBlock.h"
26
#include "llvm/IR/Constants.h"
27
#include "llvm/IR/DerivedTypes.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/IR/IRBuilder.h"
30
#include "llvm/IR/Instruction.h"
31
#include "llvm/IR/Instructions.h"
32
#include "llvm/IR/Module.h"
33
#include "llvm/IR/Type.h"
34
#include "llvm/IR/Value.h"
35
#include "llvm/Support/Casting.h"
36
#include "llvm/Support/KnownBits.h"
37
#include <cassert>
38
#include <cstdint>
39
40
using namespace llvm;
41
42
#define DEBUG_TYPE "bypass-slow-division"
43
44
namespace {
45
46
  struct QuotRemPair {
47
    Value *Quotient;
48
    Value *Remainder;
49
50
    QuotRemPair(Value *InQuotient, Value *InRemainder)
51
264
        : Quotient(InQuotient), Remainder(InRemainder) {}
52
  };
53
54
  /// A quotient and remainder, plus a BB from which they logically "originate".
55
  /// If you use Quotient or Remainder in a Phi node, you should use BB as its
56
  /// corresponding predecessor.
57
  struct QuotRemWithBB {
58
    BasicBlock *BB = nullptr;
59
    Value *Quotient = nullptr;
60
    Value *Remainder = nullptr;
61
  };
62
63
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
64
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
65
using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
66
67
enum ValueRange {
68
  /// Operand definitely fits into BypassType. No runtime checks are needed.
69
  VALRNG_KNOWN_SHORT,
70
  /// A runtime check is required, as value range is unknown.
71
  VALRNG_UNKNOWN,
72
  /// Operand is unlikely to fit into BypassType. The bypassing should be
73
  /// disabled.
74
  VALRNG_LIKELY_LONG
75
};
76
77
class FastDivInsertionTask {
78
  bool IsValidTask = false;
79
  Instruction *SlowDivOrRem = nullptr;
80
  IntegerType *BypassType = nullptr;
81
  BasicBlock *MainBB = nullptr;
82
83
  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
84
  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
85
  QuotRemWithBB createSlowBB(BasicBlock *Successor);
86
  QuotRemWithBB createFastBB(BasicBlock *Successor);
87
  QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
88
                                   BasicBlock *PhiBB);
89
  Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
90
  Optional<QuotRemPair> insertFastDivAndRem();
91
92
836
  bool isSignedOp() {
93
836
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
94
836
           
SlowDivOrRem->getOpcode() == Instruction::SRem599
;
95
836
  }
96
97
272
  bool isDivisionOp() {
98
272
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
99
272
           
SlowDivOrRem->getOpcode() == Instruction::UDiv245
;
100
272
  }
101
102
1.30k
  Type *getSlowType() { return SlowDivOrRem->getType(); }
103
104
public:
105
  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
106
107
  Value *getReplacement(DivCacheTy &Cache);
108
};
109
110
} // end anonymous namespace
111
112
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
113
449k
                                           const BypassWidthsTy &BypassWidths) {
114
449k
  switch (I->getOpcode()) {
115
449k
  case Instruction::UDiv:
116
982
  case Instruction::SDiv:
117
982
  case Instruction::URem:
118
982
  case Instruction::SRem:
119
982
    SlowDivOrRem = I;
120
982
    break;
121
448k
  default:
122
448k
    // I is not a div/rem operation.
123
448k
    return;
124
982
  }
125
982
126
982
  // Skip division on vector types. Only optimize integer instructions.
127
982
  IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
128
982
  if (!SlowType)
129
2
    return;
130
980
131
980
  // Skip if this bitwidth is not bypassed.
132
980
  auto BI = BypassWidths.find(SlowType->getBitWidth());
133
980
  if (BI == BypassWidths.end())
134
405
    return;
135
575
136
575
  // Get type for div/rem instruction with bypass bitwidth.
137
575
  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
138
575
  BypassType = BT;
139
575
140
575
  // The original basic block.
141
575
  MainBB = I->getParent();
142
575
143
575
  // The instruction is indeed a slow div or rem operation.
144
575
  IsValidTask = true;
145
575
}
146
147
/// Reuses previously-computed dividend or remainder from the current BB if
148
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
149
/// perform the optimization and caches the resulting dividend and remainder.
150
/// If no replacement can be generated, nullptr is returned.
151
449k
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
152
449k
  // First, make sure that the task is valid.
153
449k
  if (!IsValidTask)
154
449k
    return nullptr;
155
575
156
575
  // Then, look for a value in Cache.
157
575
  Value *Dividend = SlowDivOrRem->getOperand(0);
158
575
  Value *Divisor = SlowDivOrRem->getOperand(1);
159
575
  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
160
575
  auto CacheI = Cache.find(Key);
161
575
162
575
  if (CacheI == Cache.end()) {
163
567
    // If previous instance does not exist, try to insert fast div.
164
567
    Optional<QuotRemPair> OptResult = insertFastDivAndRem();
165
567
    // Bail out if insertFastDivAndRem has failed.
166
567
    if (!OptResult)
167
303
      return nullptr;
168
264
    CacheI = Cache.insert({Key, *OptResult}).first;
169
264
  }
170
575
171
575
  QuotRemPair &Value = CacheI->second;
172
272
  return isDivisionOp() ? 
Value.Quotient108
:
Value.Remainder164
;
173
575
}
174
175
/// Check if a value looks like a hash.
176
///
177
/// The routine is expected to detect values computed using the most common hash
178
/// algorithms. Typically, hash computations end with one of the following
179
/// instructions:
180
///
181
/// 1) MUL with a constant wider than BypassType
182
/// 2) XOR instruction
183
///
184
/// And even if we are wrong and the value is not a hash, it is still quite
185
/// unlikely that such values will fit into BypassType.
186
///
187
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
188
/// It is implemented as a depth-first search for values that look neither long
189
/// nor hash-like.
190
822
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
191
822
  Instruction *I = dyn_cast<Instruction>(V);
192
822
  if (!I)
193
151
    return false;
194
671
195
671
  switch (I->getOpcode()) {
196
671
  case Instruction::Xor:
197
8
    return true;
198
671
  case Instruction::Mul: {
199
14
    // After Constant Hoisting pass, long constants may be represented as
200
14
    // bitcast instructions. As a result, some constants may look like an
201
14
    // instruction at first, and an additional check is necessary to find out if
202
14
    // an operand is actually a constant.
203
14
    Value *Op1 = I->getOperand(1);
204
14
    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
205
14
    if (!C && 
isa<BitCastInst>(Op1)7
)
206
0
      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207
14
    return C && 
C->getValue().getMinSignedBits() > BypassType->getBitWidth()7
;
208
671
  }
209
671
  case Instruction::PHI:
210
130
    // Stop IR traversal in case of a crazy input code. This limits recursion
211
130
    // depth.
212
130
    if (Visited.size() >= 16)
213
0
      return false;
214
130
    // Do not visit nodes that have been visited already. We return true because
215
130
    // it means that we couldn't find any value that doesn't look hash-like.
216
130
    if (Visited.find(I) != Visited.end())
217
4
      return true;
218
126
    Visited.insert(I);
219
131
    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
220
131
      // Ignore undef values as they probably don't affect the division
221
131
      // operands.
222
131
      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
223
131
             
isa<UndefValue>(V)126
;
224
131
    });
225
519
  default:
226
519
    return false;
227
671
  }
228
671
}
229
230
/// Check if an integer value fits into our bypass type.
231
ValueRange FastDivInsertionTask::getValueRange(Value *V,
232
1.23k
                                               VisitedSetTy &Visited) {
233
1.23k
  unsigned ShortLen = BypassType->getBitWidth();
234
1.23k
  unsigned LongLen = V->getType()->getIntegerBitWidth();
235
1.23k
236
1.23k
  assert(LongLen > ShortLen && "Value type must be wider than BypassType");
237
1.23k
  unsigned HiBits = LongLen - ShortLen;
238
1.23k
239
1.23k
  const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
240
1.23k
  KnownBits Known(LongLen);
241
1.23k
242
1.23k
  computeKnownBits(V, Known, DL);
243
1.23k
244
1.23k
  if (Known.countMinLeadingZeros() >= HiBits)
245
370
    return VALRNG_KNOWN_SHORT;
246
860
247
860
  if (Known.countMaxLeadingZeros() < HiBits)
248
38
    return VALRNG_LIKELY_LONG;
249
822
250
822
  // Long integer divisions are often used in hashtable implementations. It's
251
822
  // not worth bypassing such divisions because hash values are extremely
252
822
  // unlikely to have enough leading zeros. The call below tries to detect
253
822
  // values that are unlikely to fit BypassType (including hashes).
254
822
  if (isHashLikeValue(V, Visited))
255
14
    return VALRNG_LIKELY_LONG;
256
808
257
808
  return VALRNG_UNKNOWN;
258
808
}
259
260
/// Add new basic block for slow div and rem operations and put it before
261
/// SuccessorBB.
262
213
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
263
213
  QuotRemWithBB DivRemPair;
264
213
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
265
213
                                     MainBB->getParent(), SuccessorBB);
266
213
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
267
213
268
213
  Value *Dividend = SlowDivOrRem->getOperand(0);
269
213
  Value *Divisor = SlowDivOrRem->getOperand(1);
270
213
271
213
  if (isSignedOp()) {
272
36
    DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
273
36
    DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
274
177
  } else {
275
177
    DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
276
177
    DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
277
177
  }
278
213
279
213
  Builder.CreateBr(SuccessorBB);
280
213
  return DivRemPair;
281
213
}
282
283
/// Add new basic block for fast div and rem operations and put it before
284
/// SuccessorBB.
285
258
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
286
258
  QuotRemWithBB DivRemPair;
287
258
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
288
258
                                     MainBB->getParent(), SuccessorBB);
289
258
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
290
258
291
258
  Value *Dividend = SlowDivOrRem->getOperand(0);
292
258
  Value *Divisor = SlowDivOrRem->getOperand(1);
293
258
  Value *ShortDivisorV =
294
258
      Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
295
258
  Value *ShortDividendV =
296
258
      Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
297
258
298
258
  // udiv/urem because this optimization only handles positive numbers.
299
258
  Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
300
258
  Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
301
258
  DivRemPair.Quotient =
302
258
      Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
303
258
  DivRemPair.Remainder =
304
258
      Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
305
258
  Builder.CreateBr(SuccessorBB);
306
258
307
258
  return DivRemPair;
308
258
}
309
310
/// Creates Phi nodes for result of Div and Rem.
311
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
312
                                                       QuotRemWithBB &RHS,
313
258
                                                       BasicBlock *PhiBB) {
314
258
  IRBuilder<> Builder(PhiBB, PhiBB->begin());
315
258
  PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
316
258
  QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
317
258
  QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
318
258
  PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
319
258
  RemPhi->addIncoming(LHS.Remainder, LHS.BB);
320
258
  RemPhi->addIncoming(RHS.Remainder, RHS.BB);
321
258
  return QuotRemPair(QuoPhi, RemPhi);
322
258
}
323
324
/// Creates a runtime check to test whether both the divisor and dividend fit
325
/// into BypassType. The check is inserted at the end of MainBB. True return
326
/// value means that the operands fit. Either of the operands may be NULL if it
327
/// doesn't need a runtime check.
328
213
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
329
213
  assert((Op1 || Op2) && "Nothing to check");
330
213
  IRBuilder<> Builder(MainBB, MainBB->end());
331
213
332
213
  Value *OrV;
333
213
  if (Op1 && 
Op2210
)
334
203
    OrV = Builder.CreateOr(Op1, Op2);
335
10
  else
336
10
    OrV = Op1 ? 
Op17
:
Op23
;
337
213
338
213
  // BitMask is inverted to check if the operands are
339
213
  // larger than the bypass type
340
213
  uint64_t BitMask = ~BypassType->getBitMask();
341
213
  Value *AndV = Builder.CreateAnd(OrV, BitMask);
342
213
343
213
  // Compare operand values
344
213
  Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
345
213
  return Builder.CreateICmpEQ(AndV, ZeroV);
346
213
}
347
348
/// Substitutes the div/rem instruction with code that checks the value of the
349
/// operands and uses a shorter-faster div/rem instruction when possible.
350
567
Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
351
567
  Value *Dividend = SlowDivOrRem->getOperand(0);
352
567
  Value *Divisor = SlowDivOrRem->getOperand(1);
353
567
354
567
  VisitedSetTy SetL;
355
567
  ValueRange DividendRange = getValueRange(Dividend, SetL);
356
567
  if (DividendRange == VALRNG_LIKELY_LONG)
357
35
    return None;
358
532
359
532
  VisitedSetTy SetR;
360
532
  ValueRange DivisorRange = getValueRange(Divisor, SetR);
361
532
  if (DivisorRange == VALRNG_LIKELY_LONG)
362
12
    return None;
363
520
364
520
  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
365
520
  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
366
520
367
520
  if (DividendShort && 
DivisorShort54
) {
368
6
    // If both operands are known to be short then just replace the long
369
6
    // division with a short one in-place.  Since we're not introducing control
370
6
    // flow in this case, narrowing the division is always a win, even if the
371
6
    // divisor is a constant (and will later get replaced by a multiplication).
372
6
373
6
    IRBuilder<> Builder(SlowDivOrRem);
374
6
    Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
375
6
    Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
376
6
    Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
377
6
    Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
378
6
    Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
379
6
    Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
380
6
    return QuotRemPair(ExtDiv, ExtRem);
381
6
  }
382
514
383
514
  if (isa<ConstantInt>(Divisor)) {
384
256
    // If the divisor is not a constant, DAGCombiner will convert it to a
385
256
    // multiplication by a magic constant.  It isn't clear if it is worth
386
256
    // introducing control flow to get a narrower multiply.
387
256
    return None;
388
256
  }
389
258
390
258
  // After Constant Hoisting pass, long constants may be represented as
391
258
  // bitcast instructions. As a result, some constants may look like an
392
258
  // instruction at first, and an additional check is necessary to find out if
393
258
  // an operand is actually a constant.
394
258
  if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
395
0
    if (BCI->getParent() == SlowDivOrRem->getParent() &&
396
0
        isa<ConstantInt>(BCI->getOperand(0)))
397
0
      return None;
398
258
399
258
  if (DividendShort && 
!isSignedOp()48
) {
400
45
    // If the division is unsigned and Dividend is known to be short, then
401
45
    // either
402
45
    // 1) Divisor is less or equal to Dividend, and the result can be computed
403
45
    //    with a short division.
404
45
    // 2) Divisor is greater than Dividend. In this case, no division is needed
405
45
    //    at all: The quotient is 0 and the remainder is equal to Dividend.
406
45
    //
407
45
    // So instead of checking at runtime whether Divisor fits into BypassType,
408
45
    // we emit a runtime check to differentiate between these two cases. This
409
45
    // lets us entirely avoid a long div.
410
45
411
45
    // Split the basic block before the div/rem.
412
45
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
413
45
    // Remove the unconditional branch from MainBB to SuccessorBB.
414
45
    MainBB->getInstList().back().eraseFromParent();
415
45
    QuotRemWithBB Long;
416
45
    Long.BB = MainBB;
417
45
    Long.Quotient = ConstantInt::get(getSlowType(), 0);
418
45
    Long.Remainder = Dividend;
419
45
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
420
45
    QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
421
45
    IRBuilder<> Builder(MainBB, MainBB->end());
422
45
    Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
423
45
    Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
424
45
    return Result;
425
213
  } else {
426
213
    // General case. Create both slow and fast div/rem pairs and choose one of
427
213
    // them at runtime.
428
213
429
213
    // Split the basic block before the div/rem.
430
213
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
431
213
    // Remove the unconditional branch from MainBB to SuccessorBB.
432
213
    MainBB->getInstList().back().eraseFromParent();
433
213
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
434
213
    QuotRemWithBB Slow = createSlowBB(SuccessorBB);
435
213
    QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
436
213
    Value *CmpV = insertOperandRuntimeCheck(DividendShort ? 
nullptr3
:
Dividend210
,
437
213
                                            DivisorShort ? 
nullptr7
:
Divisor206
);
438
213
    IRBuilder<> Builder(MainBB, MainBB->end());
439
213
    Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
440
213
    return Result;
441
213
  }
442
258
}
443
444
/// This optimization identifies DIV/REM instructions in a BB that can be
445
/// profitably bypassed and carried out with a shorter, faster divide.
446
bool llvm::bypassSlowDivision(BasicBlock *BB,
447
78.1k
                              const BypassWidthsTy &BypassWidths) {
448
78.1k
  DivCacheTy PerBBDivCache;
449
78.1k
450
78.1k
  bool MadeChange = false;
451
78.1k
  Instruction* Next = &*BB->begin();
452
527k
  while (Next != nullptr) {
453
449k
    // We may add instructions immediately after I, but we want to skip over
454
449k
    // them.
455
449k
    Instruction* I = Next;
456
449k
    Next = Next->getNextNode();
457
449k
458
449k
    FastDivInsertionTask Task(I, BypassWidths);
459
449k
    if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
460
272
      I->replaceAllUsesWith(Replacement);
461
272
      I->eraseFromParent();
462
272
      MadeChange = true;
463
272
    }
464
449k
  }
465
78.1k
466
78.1k
  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
467
78.1k
  // create divrem machine instructions.  Now erase any unused divs / rems so we
468
78.1k
  // don't leave extra instructions sitting around.
469
78.1k
  for (auto &KV : PerBBDivCache)
470
264
    for (Value *V : {KV.second.Quotient, KV.second.Remainder})
471
528
      RecursivelyDeleteTriviallyDeadInstructions(V);
472
78.1k
473
78.1k
  return MadeChange;
474
78.1k
}