Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains an optimization for div and rem on architectures that
11
// execute short instructions significantly faster than longer instructions.
12
// For example, on Intel Atom 32-bit divides are slow enough that during
13
// runtime it is profitable to check the value of the operands, and if they are
14
// positive and less than 256 use an unsigned 8-bit divide.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/Analysis/ValueTracking.h"
22
#include "llvm/IR/Function.h"
23
#include "llvm/IR/IRBuilder.h"
24
#include "llvm/IR/Instructions.h"
25
#include "llvm/Support/KnownBits.h"
26
#include "llvm/Transforms/Utils/Local.h"
27
28
using namespace llvm;
29
30
#define DEBUG_TYPE "bypass-slow-division"
31
32
namespace {
33
  struct QuotRemPair {
34
    Value *Quotient;
35
    Value *Remainder;
36
37
    QuotRemPair(Value *InQuotient, Value *InRemainder)
38
59
        : Quotient(InQuotient), Remainder(InRemainder) {}
39
  };
40
41
  /// A quotient and remainder, plus a BB from which they logically "originate".
42
  /// If you use Quotient or Remainder in a Phi node, you should use BB as its
43
  /// corresponding predecessor.
44
  struct QuotRemWithBB {
45
    BasicBlock *BB = nullptr;
46
    Value *Quotient = nullptr;
47
    Value *Remainder = nullptr;
48
  };
49
}
50
51
namespace llvm {
52
  typedef DenseMap<DivRemMapKey, QuotRemPair> DivCacheTy;
53
  typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
54
  typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
55
}
56
57
namespace {
58
enum ValueRange {
59
  /// Operand definitely fits into BypassType. No runtime checks are needed.
60
  VALRNG_KNOWN_SHORT,
61
  /// A runtime check is required, as value range is unknown.
62
  VALRNG_UNKNOWN,
63
  /// Operand is unlikely to fit into BypassType. The bypassing should be
64
  /// disabled.
65
  VALRNG_LIKELY_LONG
66
};
67
68
class FastDivInsertionTask {
69
  bool IsValidTask = false;
70
  Instruction *SlowDivOrRem = nullptr;
71
  IntegerType *BypassType = nullptr;
72
  BasicBlock *MainBB = nullptr;
73
74
  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
75
  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
76
  QuotRemWithBB createSlowBB(BasicBlock *Successor);
77
  QuotRemWithBB createFastBB(BasicBlock *Successor);
78
  QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
79
                                   BasicBlock *PhiBB);
80
  Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
81
  Optional<QuotRemPair> insertFastDivAndRem();
82
83
157
  bool isSignedOp() {
84
157
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
85
100
           SlowDivOrRem->getOpcode() == Instruction::SRem;
86
157
  }
87
64
  bool isDivisionOp() {
88
64
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
89
43
           SlowDivOrRem->getOpcode() == Instruction::UDiv;
90
64
  }
91
286
  Type *getSlowType() { return SlowDivOrRem->getType(); }
92
93
public:
94
  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
95
  Value *getReplacement(DivCacheTy &Cache);
96
};
97
} // anonymous namespace
98
99
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
100
103k
                                           const BypassWidthsTy &BypassWidths) {
101
103k
  switch (I->getOpcode()) {
102
174
  case Instruction::UDiv:
103
174
  case Instruction::SDiv:
104
174
  case Instruction::URem:
105
174
  case Instruction::SRem:
106
174
    SlowDivOrRem = I;
107
174
    break;
108
103k
  default:
109
103k
    // I is not a div/rem operation.
110
103k
    return;
111
174
  }
112
174
113
174
  // Skip division on vector types. Only optimize integer instructions.
114
174
  IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
115
174
  if (!SlowType)
116
2
    return;
117
172
118
172
  // Skip if this bitwidth is not bypassed.
119
172
  auto BI = BypassWidths.find(SlowType->getBitWidth());
120
172
  if (BI == BypassWidths.end())
121
75
    return;
122
97
123
97
  // Get type for div/rem instruction with bypass bitwidth.
124
97
  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
125
97
  BypassType = BT;
126
97
127
97
  // The original basic block.
128
97
  MainBB = I->getParent();
129
97
130
97
  // The instruction is indeed a slow div or rem operation.
131
97
  IsValidTask = true;
132
97
}
133
134
/// Reuses previously-computed dividend or remainder from the current BB if
135
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
136
/// perform the optimization and caches the resulting dividend and remainder.
137
/// If no replacement can be generated, nullptr is returned.
138
103k
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
139
103k
  // First, make sure that the task is valid.
140
103k
  if (!IsValidTask)
141
103k
    return nullptr;
142
97
143
97
  // Then, look for a value in Cache.
144
97
  Value *Dividend = SlowDivOrRem->getOperand(0);
145
97
  Value *Divisor = SlowDivOrRem->getOperand(1);
146
97
  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
147
97
  auto CacheI = Cache.find(Key);
148
97
149
97
  if (
CacheI == Cache.end()97
) {
150
92
    // If previous instance does not exist, try to insert fast div.
151
92
    Optional<QuotRemPair> OptResult = insertFastDivAndRem();
152
92
    // Bail out if insertFastDivAndRem has failed.
153
92
    if (!OptResult)
154
33
      return nullptr;
155
59
    CacheI = Cache.insert({Key, *OptResult}).first;
156
59
  }
157
97
158
64
  QuotRemPair &Value = CacheI->second;
159
64
  return isDivisionOp() ? 
Value.Quotient42
:
Value.Remainder22
;
160
103k
}
161
162
/// \brief Check if a value looks like a hash.
163
///
164
/// The routine is expected to detect values computed using the most common hash
165
/// algorithms. Typically, hash computations end with one of the following
166
/// instructions:
167
///
168
/// 1) MUL with a constant wider than BypassType
169
/// 2) XOR instruction
170
///
171
/// And even if we are wrong and the value is not a hash, it is still quite
172
/// unlikely that such values will fit into BypassType.
173
///
174
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
175
/// It is implemented as a depth-first search for values that look neither long
176
/// nor hash-like.
177
123
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
178
123
  Instruction *I = dyn_cast<Instruction>(V);
179
123
  if (!I)
180
84
    return false;
181
39
182
39
  switch (I->getOpcode()) {
183
2
  case Instruction::Xor:
184
2
    return true;
185
2
  case Instruction::Mul: {
186
2
    // After Constant Hoisting pass, long constants may be represented as
187
2
    // bitcast instructions. As a result, some constants may look like an
188
2
    // instruction at first, and an additional check is necessary to find out if
189
2
    // an operand is actually a constant.
190
2
    Value *Op1 = I->getOperand(1);
191
2
    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
192
2
    if (
!C && 2
isa<BitCastInst>(Op1)0
)
193
0
      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
194
2
    return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
195
39
  }
196
20
  case Instruction::PHI: {
197
20
    // Stop IR traversal in case of a crazy input code. This limits recursion
198
20
    // depth.
199
20
    if (Visited.size() >= 16)
200
0
      return false;
201
20
    // Do not visit nodes that have been visited already. We return true because
202
20
    // it means that we couldn't find any value that doesn't look hash-like.
203
20
    
if (20
Visited.find(I) != Visited.end()20
)
204
0
      return true;
205
20
    Visited.insert(I);
206
21
    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
207
21
      // Ignore undef values as they probably don't affect the division
208
21
      // operands.
209
21
      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
210
20
             isa<UndefValue>(V);
211
21
    });
212
20
  }
213
15
  default:
214
15
    return false;
215
0
  }
216
0
}
217
218
/// Check if an integer value fits into our bypass type.
219
ValueRange FastDivInsertionTask::getValueRange(Value *V,
220
188
                                               VisitedSetTy &Visited) {
221
188
  unsigned ShortLen = BypassType->getBitWidth();
222
188
  unsigned LongLen = V->getType()->getIntegerBitWidth();
223
188
224
188
  assert(LongLen > ShortLen && "Value type must be wider than BypassType");
225
188
  unsigned HiBits = LongLen - ShortLen;
226
188
227
188
  const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
228
188
  KnownBits Known(LongLen);
229
188
230
188
  computeKnownBits(V, Known, DL);
231
188
232
188
  if (Known.countMinLeadingZeros() >= HiBits)
233
49
    return VALRNG_KNOWN_SHORT;
234
139
235
139
  
if (139
Known.countMaxLeadingZeros() < HiBits139
)
236
16
    return VALRNG_LIKELY_LONG;
237
123
238
123
  // Long integer divisions are often used in hashtable implementations. It's
239
123
  // not worth bypassing such divisions because hash values are extremely
240
123
  // unlikely to have enough leading zeros. The call below tries to detect
241
123
  // values that are unlikely to fit BypassType (including hashes).
242
123
  
if (123
isHashLikeValue(V, Visited)123
)
243
4
    return VALRNG_LIKELY_LONG;
244
119
245
119
  return VALRNG_UNKNOWN;
246
119
}
247
248
/// Add new basic block for slow div and rem operations and put it before
249
/// SuccessorBB.
250
45
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
251
45
  QuotRemWithBB DivRemPair;
252
45
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
253
45
                                     MainBB->getParent(), SuccessorBB);
254
45
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
255
45
256
45
  Value *Dividend = SlowDivOrRem->getOperand(0);
257
45
  Value *Divisor = SlowDivOrRem->getOperand(1);
258
45
259
45
  if (
isSignedOp()45
) {
260
27
    DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
261
27
    DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
262
45
  } else {
263
18
    DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
264
18
    DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
265
18
  }
266
45
267
45
  Builder.CreateBr(SuccessorBB);
268
45
  return DivRemPair;
269
45
}
270
271
/// Add new basic block for fast div and rem operations and put it before
272
/// SuccessorBB.
273
56
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
274
56
  QuotRemWithBB DivRemPair;
275
56
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
276
56
                                     MainBB->getParent(), SuccessorBB);
277
56
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
278
56
279
56
  Value *Dividend = SlowDivOrRem->getOperand(0);
280
56
  Value *Divisor = SlowDivOrRem->getOperand(1);
281
56
  Value *ShortDivisorV =
282
56
      Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
283
56
  Value *ShortDividendV =
284
56
      Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
285
56
286
56
  // udiv/urem because this optimization only handles positive numbers.
287
56
  Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
288
56
  Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
289
56
  DivRemPair.Quotient =
290
56
      Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
291
56
  DivRemPair.Remainder =
292
56
      Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
293
56
  Builder.CreateBr(SuccessorBB);
294
56
295
56
  return DivRemPair;
296
56
}
297
298
/// Creates Phi nodes for result of Div and Rem.
299
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
300
                                                       QuotRemWithBB &RHS,
301
56
                                                       BasicBlock *PhiBB) {
302
56
  IRBuilder<> Builder(PhiBB, PhiBB->begin());
303
56
  PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
304
56
  QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
305
56
  QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
306
56
  PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
307
56
  RemPhi->addIncoming(LHS.Remainder, LHS.BB);
308
56
  RemPhi->addIncoming(RHS.Remainder, RHS.BB);
309
56
  return QuotRemPair(QuoPhi, RemPhi);
310
56
}
311
312
/// Creates a runtime check to test whether both the divisor and dividend fit
313
/// into BypassType. The check is inserted at the end of MainBB. True return
314
/// value means that the operands fit. Either of the operands may be NULL if it
315
/// doesn't need a runtime check.
316
45
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
317
45
  assert((Op1 || Op2) && "Nothing to check");
318
45
  IRBuilder<> Builder(MainBB, MainBB->end());
319
45
320
45
  Value *OrV;
321
45
  if (
Op1 && 45
Op241
)
322
38
    OrV = Builder.CreateOr(Op1, Op2);
323
45
  else
324
7
    
OrV = Op1 ? 7
Op13
:
Op24
;
325
45
326
45
  // BitMask is inverted to check if the operands are
327
45
  // larger than the bypass type
328
45
  uint64_t BitMask = ~BypassType->getBitMask();
329
45
  Value *AndV = Builder.CreateAnd(OrV, BitMask);
330
45
331
45
  // Compare operand values
332
45
  Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
333
45
  return Builder.CreateICmpEQ(AndV, ZeroV);
334
45
}
335
336
/// Substitutes the div/rem instruction with code that checks the value of the
337
/// operands and uses a shorter-faster div/rem instruction when possible.
338
92
Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
339
92
  Value *Dividend = SlowDivOrRem->getOperand(0);
340
92
  Value *Divisor = SlowDivOrRem->getOperand(1);
341
92
342
92
  VisitedSetTy SetL;
343
92
  ValueRange DividendRange = getValueRange(Dividend, SetL);
344
92
  if (DividendRange == VALRNG_LIKELY_LONG)
345
17
    return None;
346
75
347
75
  VisitedSetTy SetR;
348
75
  ValueRange DivisorRange = getValueRange(Divisor, SetR);
349
75
  if (DivisorRange == VALRNG_LIKELY_LONG)
350
2
    return None;
351
73
352
73
  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
353
73
  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
354
73
355
73
  if (
DividendShort && 73
DivisorShort18
) {
356
3
    // If both operands are known to be short then just replace the long
357
3
    // division with a short one in-place.  Since we're not introducing control
358
3
    // flow in this case, narrowing the division is always a win, even if the
359
3
    // divisor is a constant (and will later get replaced by a multiplication).
360
3
361
3
    IRBuilder<> Builder(SlowDivOrRem);
362
3
    Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
363
3
    Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
364
3
    Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
365
3
    Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
366
3
    Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
367
3
    Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
368
3
    return QuotRemPair(ExtDiv, ExtRem);
369
3
  }
370
70
371
70
  
if (70
isa<ConstantInt>(Divisor)70
) {
372
14
    // If the divisor is not a constant, DAGCombiner will convert it to a
373
14
    // multiplication by a magic constant.  It isn't clear if it is worth
374
14
    // introducing control flow to get a narrower multiply.
375
14
    return None;
376
14
  }
377
56
378
56
  
if (56
DividendShort && 56
!isSignedOp()15
) {
379
11
    // If the division is unsigned and Dividend is known to be short, then
380
11
    // either
381
11
    // 1) Divisor is less or equal to Dividend, and the result can be computed
382
11
    //    with a short division.
383
11
    // 2) Divisor is greater than Dividend. In this case, no division is needed
384
11
    //    at all: The quotient is 0 and the remainder is equal to Dividend.
385
11
    //
386
11
    // So instead of checking at runtime whether Divisor fits into BypassType,
387
11
    // we emit a runtime check to differentiate between these two cases. This
388
11
    // lets us entirely avoid a long div.
389
11
390
11
    // Split the basic block before the div/rem.
391
11
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
392
11
    // Remove the unconditional branch from MainBB to SuccessorBB.
393
11
    MainBB->getInstList().back().eraseFromParent();
394
11
    QuotRemWithBB Long;
395
11
    Long.BB = MainBB;
396
11
    Long.Quotient = ConstantInt::get(getSlowType(), 0);
397
11
    Long.Remainder = Dividend;
398
11
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
399
11
    QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
400
11
    IRBuilder<> Builder(MainBB, MainBB->end());
401
11
    Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
402
11
    Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
403
11
    return Result;
404
0
  } else {
405
45
    // General case. Create both slow and fast div/rem pairs and choose one of
406
45
    // them at runtime.
407
45
408
45
    // Split the basic block before the div/rem.
409
45
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
410
45
    // Remove the unconditional branch from MainBB to SuccessorBB.
411
45
    MainBB->getInstList().back().eraseFromParent();
412
45
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
413
45
    QuotRemWithBB Slow = createSlowBB(SuccessorBB);
414
45
    QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
415
45
    Value *CmpV = insertOperandRuntimeCheck(DividendShort ? 
nullptr4
:
Dividend41
,
416
45
                                            DivisorShort ? 
nullptr3
:
Divisor42
);
417
45
    IRBuilder<> Builder(MainBB, MainBB->end());
418
45
    Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
419
45
    return Result;
420
45
  }
421
0
}
422
423
/// This optimization identifies DIV/REM instructions in a BB that can be
424
/// profitably bypassed and carried out with a shorter, faster divide.
425
bool llvm::bypassSlowDivision(BasicBlock *BB,
426
21.5k
                              const BypassWidthsTy &BypassWidths) {
427
21.5k
  DivCacheTy PerBBDivCache;
428
21.5k
429
21.5k
  bool MadeChange = false;
430
21.5k
  Instruction* Next = &*BB->begin();
431
124k
  while (
Next != nullptr124k
) {
432
103k
    // We may add instructions immediately after I, but we want to skip over
433
103k
    // them.
434
103k
    Instruction* I = Next;
435
103k
    Next = Next->getNextNode();
436
103k
437
103k
    FastDivInsertionTask Task(I, BypassWidths);
438
103k
    if (Value *
Replacement103k
= Task.getReplacement(PerBBDivCache)) {
439
64
      I->replaceAllUsesWith(Replacement);
440
64
      I->eraseFromParent();
441
64
      MadeChange = true;
442
64
    }
443
103k
  }
444
21.5k
445
21.5k
  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
446
21.5k
  // create divrem machine instructions.  Now erase any unused divs / rems so we
447
21.5k
  // don't leave extra instructions sitting around.
448
21.5k
  for (auto &KV : PerBBDivCache)
449
59
    for (Value *V : {KV.second.Quotient, KV.second.Remainder})
450
118
      RecursivelyDeleteTriviallyDeadInstructions(V);
451
21.5k
452
21.5k
  return MadeChange;
453
21.5k
}