/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file contains an optimization for div and rem on architectures that |
11 | | // execute short instructions significantly faster than longer instructions. |
12 | | // For example, on Intel Atom 32-bit divides are slow enough that during |
13 | | // runtime it is profitable to check the value of the operands, and if they are |
14 | | // positive and less than 256 use an unsigned 8-bit divide. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
19 | | #include "llvm/ADT/DenseMap.h" |
20 | | #include "llvm/ADT/SmallPtrSet.h" |
21 | | #include "llvm/Analysis/ValueTracking.h" |
22 | | #include "llvm/IR/Function.h" |
23 | | #include "llvm/IR/IRBuilder.h" |
24 | | #include "llvm/IR/Instructions.h" |
25 | | #include "llvm/Support/KnownBits.h" |
26 | | #include "llvm/Transforms/Utils/Local.h" |
27 | | |
28 | | using namespace llvm; |
29 | | |
30 | | #define DEBUG_TYPE "bypass-slow-division" |
31 | | |
32 | | namespace { |
33 | | struct QuotRemPair { |
34 | | Value *Quotient; |
35 | | Value *Remainder; |
36 | | |
37 | | QuotRemPair(Value *InQuotient, Value *InRemainder) |
38 | 59 | : Quotient(InQuotient), Remainder(InRemainder) {} |
39 | | }; |
40 | | |
41 | | /// A quotient and remainder, plus a BB from which they logically "originate". |
42 | | /// If you use Quotient or Remainder in a Phi node, you should use BB as its |
43 | | /// corresponding predecessor. |
44 | | struct QuotRemWithBB { |
45 | | BasicBlock *BB = nullptr; |
46 | | Value *Quotient = nullptr; |
47 | | Value *Remainder = nullptr; |
48 | | }; |
49 | | } |
50 | | |
51 | | namespace llvm { |
52 | | typedef DenseMap<DivRemMapKey, QuotRemPair> DivCacheTy; |
53 | | typedef DenseMap<unsigned, unsigned> BypassWidthsTy; |
54 | | typedef SmallPtrSet<Instruction *, 4> VisitedSetTy; |
55 | | } |
56 | | |
57 | | namespace { |
58 | | enum ValueRange { |
59 | | /// Operand definitely fits into BypassType. No runtime checks are needed. |
60 | | VALRNG_KNOWN_SHORT, |
61 | | /// A runtime check is required, as value range is unknown. |
62 | | VALRNG_UNKNOWN, |
63 | | /// Operand is unlikely to fit into BypassType. The bypassing should be |
64 | | /// disabled. |
65 | | VALRNG_LIKELY_LONG |
66 | | }; |
67 | | |
68 | | class FastDivInsertionTask { |
69 | | bool IsValidTask = false; |
70 | | Instruction *SlowDivOrRem = nullptr; |
71 | | IntegerType *BypassType = nullptr; |
72 | | BasicBlock *MainBB = nullptr; |
73 | | |
74 | | bool isHashLikeValue(Value *V, VisitedSetTy &Visited); |
75 | | ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); |
76 | | QuotRemWithBB createSlowBB(BasicBlock *Successor); |
77 | | QuotRemWithBB createFastBB(BasicBlock *Successor); |
78 | | QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, |
79 | | BasicBlock *PhiBB); |
80 | | Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); |
81 | | Optional<QuotRemPair> insertFastDivAndRem(); |
82 | | |
83 | 157 | bool isSignedOp() { |
84 | 157 | return SlowDivOrRem->getOpcode() == Instruction::SDiv || |
85 | 100 | SlowDivOrRem->getOpcode() == Instruction::SRem; |
86 | 157 | } |
87 | 64 | bool isDivisionOp() { |
88 | 64 | return SlowDivOrRem->getOpcode() == Instruction::SDiv || |
89 | 43 | SlowDivOrRem->getOpcode() == Instruction::UDiv; |
90 | 64 | } |
91 | 286 | Type *getSlowType() { return SlowDivOrRem->getType(); } |
92 | | |
93 | | public: |
94 | | FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); |
95 | | Value *getReplacement(DivCacheTy &Cache); |
96 | | }; |
97 | | } // anonymous namespace |
98 | | |
99 | | FastDivInsertionTask::FastDivInsertionTask(Instruction *I, |
100 | 103k | const BypassWidthsTy &BypassWidths) { |
101 | 103k | switch (I->getOpcode()) { |
102 | 174 | case Instruction::UDiv: |
103 | 174 | case Instruction::SDiv: |
104 | 174 | case Instruction::URem: |
105 | 174 | case Instruction::SRem: |
106 | 174 | SlowDivOrRem = I; |
107 | 174 | break; |
108 | 103k | default: |
109 | 103k | // I is not a div/rem operation. |
110 | 103k | return; |
111 | 174 | } |
112 | 174 | |
113 | 174 | // Skip division on vector types. Only optimize integer instructions. |
114 | 174 | IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); |
115 | 174 | if (!SlowType) |
116 | 2 | return; |
117 | 172 | |
118 | 172 | // Skip if this bitwidth is not bypassed. |
119 | 172 | auto BI = BypassWidths.find(SlowType->getBitWidth()); |
120 | 172 | if (BI == BypassWidths.end()) |
121 | 75 | return; |
122 | 97 | |
123 | 97 | // Get type for div/rem instruction with bypass bitwidth. |
124 | 97 | IntegerType *BT = IntegerType::get(I->getContext(), BI->second); |
125 | 97 | BypassType = BT; |
126 | 97 | |
127 | 97 | // The original basic block. |
128 | 97 | MainBB = I->getParent(); |
129 | 97 | |
130 | 97 | // The instruction is indeed a slow div or rem operation. |
131 | 97 | IsValidTask = true; |
132 | 97 | } |
133 | | |
134 | | /// Reuses previously-computed dividend or remainder from the current BB if |
135 | | /// operands and operation are identical. Otherwise calls insertFastDivAndRem to |
136 | | /// perform the optimization and caches the resulting dividend and remainder. |
137 | | /// If no replacement can be generated, nullptr is returned. |
138 | 103k | Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { |
139 | 103k | // First, make sure that the task is valid. |
140 | 103k | if (!IsValidTask) |
141 | 103k | return nullptr; |
142 | 97 | |
143 | 97 | // Then, look for a value in Cache. |
144 | 97 | Value *Dividend = SlowDivOrRem->getOperand(0); |
145 | 97 | Value *Divisor = SlowDivOrRem->getOperand(1); |
146 | 97 | DivRemMapKey Key(isSignedOp(), Dividend, Divisor); |
147 | 97 | auto CacheI = Cache.find(Key); |
148 | 97 | |
149 | 97 | if (CacheI == Cache.end()97 ) { |
150 | 92 | // If previous instance does not exist, try to insert fast div. |
151 | 92 | Optional<QuotRemPair> OptResult = insertFastDivAndRem(); |
152 | 92 | // Bail out if insertFastDivAndRem has failed. |
153 | 92 | if (!OptResult) |
154 | 33 | return nullptr; |
155 | 59 | CacheI = Cache.insert({Key, *OptResult}).first; |
156 | 59 | } |
157 | 97 | |
158 | 64 | QuotRemPair &Value = CacheI->second; |
159 | 64 | return isDivisionOp() ? Value.Quotient42 : Value.Remainder22 ; |
160 | 103k | } |
161 | | |
162 | | /// \brief Check if a value looks like a hash. |
163 | | /// |
164 | | /// The routine is expected to detect values computed using the most common hash |
165 | | /// algorithms. Typically, hash computations end with one of the following |
166 | | /// instructions: |
167 | | /// |
168 | | /// 1) MUL with a constant wider than BypassType |
169 | | /// 2) XOR instruction |
170 | | /// |
171 | | /// And even if we are wrong and the value is not a hash, it is still quite |
172 | | /// unlikely that such values will fit into BypassType. |
173 | | /// |
174 | | /// To detect string hash algorithms like FNV we have to look through PHI-nodes. |
175 | | /// It is implemented as a depth-first search for values that look neither long |
176 | | /// nor hash-like. |
177 | 123 | bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { |
178 | 123 | Instruction *I = dyn_cast<Instruction>(V); |
179 | 123 | if (!I) |
180 | 84 | return false; |
181 | 39 | |
182 | 39 | switch (I->getOpcode()) { |
183 | 2 | case Instruction::Xor: |
184 | 2 | return true; |
185 | 2 | case Instruction::Mul: { |
186 | 2 | // After Constant Hoisting pass, long constants may be represented as |
187 | 2 | // bitcast instructions. As a result, some constants may look like an |
188 | 2 | // instruction at first, and an additional check is necessary to find out if |
189 | 2 | // an operand is actually a constant. |
190 | 2 | Value *Op1 = I->getOperand(1); |
191 | 2 | ConstantInt *C = dyn_cast<ConstantInt>(Op1); |
192 | 2 | if (!C && 2 isa<BitCastInst>(Op1)0 ) |
193 | 0 | C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); |
194 | 2 | return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); |
195 | 39 | } |
196 | 20 | case Instruction::PHI: { |
197 | 20 | // Stop IR traversal in case of a crazy input code. This limits recursion |
198 | 20 | // depth. |
199 | 20 | if (Visited.size() >= 16) |
200 | 0 | return false; |
201 | 20 | // Do not visit nodes that have been visited already. We return true because |
202 | 20 | // it means that we couldn't find any value that doesn't look hash-like. |
203 | 20 | if (20 Visited.find(I) != Visited.end()20 ) |
204 | 0 | return true; |
205 | 20 | Visited.insert(I); |
206 | 21 | return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { |
207 | 21 | // Ignore undef values as they probably don't affect the division |
208 | 21 | // operands. |
209 | 21 | return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || |
210 | 20 | isa<UndefValue>(V); |
211 | 21 | }); |
212 | 20 | } |
213 | 15 | default: |
214 | 15 | return false; |
215 | 0 | } |
216 | 0 | } |
217 | | |
218 | | /// Check if an integer value fits into our bypass type. |
219 | | ValueRange FastDivInsertionTask::getValueRange(Value *V, |
220 | 188 | VisitedSetTy &Visited) { |
221 | 188 | unsigned ShortLen = BypassType->getBitWidth(); |
222 | 188 | unsigned LongLen = V->getType()->getIntegerBitWidth(); |
223 | 188 | |
224 | 188 | assert(LongLen > ShortLen && "Value type must be wider than BypassType"); |
225 | 188 | unsigned HiBits = LongLen - ShortLen; |
226 | 188 | |
227 | 188 | const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); |
228 | 188 | KnownBits Known(LongLen); |
229 | 188 | |
230 | 188 | computeKnownBits(V, Known, DL); |
231 | 188 | |
232 | 188 | if (Known.countMinLeadingZeros() >= HiBits) |
233 | 49 | return VALRNG_KNOWN_SHORT; |
234 | 139 | |
235 | 139 | if (139 Known.countMaxLeadingZeros() < HiBits139 ) |
236 | 16 | return VALRNG_LIKELY_LONG; |
237 | 123 | |
238 | 123 | // Long integer divisions are often used in hashtable implementations. It's |
239 | 123 | // not worth bypassing such divisions because hash values are extremely |
240 | 123 | // unlikely to have enough leading zeros. The call below tries to detect |
241 | 123 | // values that are unlikely to fit BypassType (including hashes). |
242 | 123 | if (123 isHashLikeValue(V, Visited)123 ) |
243 | 4 | return VALRNG_LIKELY_LONG; |
244 | 119 | |
245 | 119 | return VALRNG_UNKNOWN; |
246 | 119 | } |
247 | | |
248 | | /// Add new basic block for slow div and rem operations and put it before |
249 | | /// SuccessorBB. |
250 | 45 | QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { |
251 | 45 | QuotRemWithBB DivRemPair; |
252 | 45 | DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", |
253 | 45 | MainBB->getParent(), SuccessorBB); |
254 | 45 | IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); |
255 | 45 | |
256 | 45 | Value *Dividend = SlowDivOrRem->getOperand(0); |
257 | 45 | Value *Divisor = SlowDivOrRem->getOperand(1); |
258 | 45 | |
259 | 45 | if (isSignedOp()45 ) { |
260 | 27 | DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); |
261 | 27 | DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); |
262 | 45 | } else { |
263 | 18 | DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); |
264 | 18 | DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); |
265 | 18 | } |
266 | 45 | |
267 | 45 | Builder.CreateBr(SuccessorBB); |
268 | 45 | return DivRemPair; |
269 | 45 | } |
270 | | |
271 | | /// Add new basic block for fast div and rem operations and put it before |
272 | | /// SuccessorBB. |
273 | 56 | QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { |
274 | 56 | QuotRemWithBB DivRemPair; |
275 | 56 | DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", |
276 | 56 | MainBB->getParent(), SuccessorBB); |
277 | 56 | IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); |
278 | 56 | |
279 | 56 | Value *Dividend = SlowDivOrRem->getOperand(0); |
280 | 56 | Value *Divisor = SlowDivOrRem->getOperand(1); |
281 | 56 | Value *ShortDivisorV = |
282 | 56 | Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); |
283 | 56 | Value *ShortDividendV = |
284 | 56 | Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); |
285 | 56 | |
286 | 56 | // udiv/urem because this optimization only handles positive numbers. |
287 | 56 | Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); |
288 | 56 | Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); |
289 | 56 | DivRemPair.Quotient = |
290 | 56 | Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); |
291 | 56 | DivRemPair.Remainder = |
292 | 56 | Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); |
293 | 56 | Builder.CreateBr(SuccessorBB); |
294 | 56 | |
295 | 56 | return DivRemPair; |
296 | 56 | } |
297 | | |
298 | | /// Creates Phi nodes for result of Div and Rem. |
299 | | QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, |
300 | | QuotRemWithBB &RHS, |
301 | 56 | BasicBlock *PhiBB) { |
302 | 56 | IRBuilder<> Builder(PhiBB, PhiBB->begin()); |
303 | 56 | PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); |
304 | 56 | QuoPhi->addIncoming(LHS.Quotient, LHS.BB); |
305 | 56 | QuoPhi->addIncoming(RHS.Quotient, RHS.BB); |
306 | 56 | PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); |
307 | 56 | RemPhi->addIncoming(LHS.Remainder, LHS.BB); |
308 | 56 | RemPhi->addIncoming(RHS.Remainder, RHS.BB); |
309 | 56 | return QuotRemPair(QuoPhi, RemPhi); |
310 | 56 | } |
311 | | |
312 | | /// Creates a runtime check to test whether both the divisor and dividend fit |
313 | | /// into BypassType. The check is inserted at the end of MainBB. True return |
314 | | /// value means that the operands fit. Either of the operands may be NULL if it |
315 | | /// doesn't need a runtime check. |
316 | 45 | Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { |
317 | 45 | assert((Op1 || Op2) && "Nothing to check"); |
318 | 45 | IRBuilder<> Builder(MainBB, MainBB->end()); |
319 | 45 | |
320 | 45 | Value *OrV; |
321 | 45 | if (Op1 && 45 Op241 ) |
322 | 38 | OrV = Builder.CreateOr(Op1, Op2); |
323 | 45 | else |
324 | 7 | OrV = Op1 ? 7 Op13 : Op24 ; |
325 | 45 | |
326 | 45 | // BitMask is inverted to check if the operands are |
327 | 45 | // larger than the bypass type |
328 | 45 | uint64_t BitMask = ~BypassType->getBitMask(); |
329 | 45 | Value *AndV = Builder.CreateAnd(OrV, BitMask); |
330 | 45 | |
331 | 45 | // Compare operand values |
332 | 45 | Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); |
333 | 45 | return Builder.CreateICmpEQ(AndV, ZeroV); |
334 | 45 | } |
335 | | |
336 | | /// Substitutes the div/rem instruction with code that checks the value of the |
337 | | /// operands and uses a shorter-faster div/rem instruction when possible. |
338 | 92 | Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { |
339 | 92 | Value *Dividend = SlowDivOrRem->getOperand(0); |
340 | 92 | Value *Divisor = SlowDivOrRem->getOperand(1); |
341 | 92 | |
342 | 92 | VisitedSetTy SetL; |
343 | 92 | ValueRange DividendRange = getValueRange(Dividend, SetL); |
344 | 92 | if (DividendRange == VALRNG_LIKELY_LONG) |
345 | 17 | return None; |
346 | 75 | |
347 | 75 | VisitedSetTy SetR; |
348 | 75 | ValueRange DivisorRange = getValueRange(Divisor, SetR); |
349 | 75 | if (DivisorRange == VALRNG_LIKELY_LONG) |
350 | 2 | return None; |
351 | 73 | |
352 | 73 | bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); |
353 | 73 | bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); |
354 | 73 | |
355 | 73 | if (DividendShort && 73 DivisorShort18 ) { |
356 | 3 | // If both operands are known to be short then just replace the long |
357 | 3 | // division with a short one in-place. Since we're not introducing control |
358 | 3 | // flow in this case, narrowing the division is always a win, even if the |
359 | 3 | // divisor is a constant (and will later get replaced by a multiplication). |
360 | 3 | |
361 | 3 | IRBuilder<> Builder(SlowDivOrRem); |
362 | 3 | Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); |
363 | 3 | Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); |
364 | 3 | Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); |
365 | 3 | Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); |
366 | 3 | Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); |
367 | 3 | Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); |
368 | 3 | return QuotRemPair(ExtDiv, ExtRem); |
369 | 3 | } |
370 | 70 | |
371 | 70 | if (70 isa<ConstantInt>(Divisor)70 ) { |
372 | 14 | // If the divisor is not a constant, DAGCombiner will convert it to a |
373 | 14 | // multiplication by a magic constant. It isn't clear if it is worth |
374 | 14 | // introducing control flow to get a narrower multiply. |
375 | 14 | return None; |
376 | 14 | } |
377 | 56 | |
378 | 56 | if (56 DividendShort && 56 !isSignedOp()15 ) { |
379 | 11 | // If the division is unsigned and Dividend is known to be short, then |
380 | 11 | // either |
381 | 11 | // 1) Divisor is less or equal to Dividend, and the result can be computed |
382 | 11 | // with a short division. |
383 | 11 | // 2) Divisor is greater than Dividend. In this case, no division is needed |
384 | 11 | // at all: The quotient is 0 and the remainder is equal to Dividend. |
385 | 11 | // |
386 | 11 | // So instead of checking at runtime whether Divisor fits into BypassType, |
387 | 11 | // we emit a runtime check to differentiate between these two cases. This |
388 | 11 | // lets us entirely avoid a long div. |
389 | 11 | |
390 | 11 | // Split the basic block before the div/rem. |
391 | 11 | BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); |
392 | 11 | // Remove the unconditional branch from MainBB to SuccessorBB. |
393 | 11 | MainBB->getInstList().back().eraseFromParent(); |
394 | 11 | QuotRemWithBB Long; |
395 | 11 | Long.BB = MainBB; |
396 | 11 | Long.Quotient = ConstantInt::get(getSlowType(), 0); |
397 | 11 | Long.Remainder = Dividend; |
398 | 11 | QuotRemWithBB Fast = createFastBB(SuccessorBB); |
399 | 11 | QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); |
400 | 11 | IRBuilder<> Builder(MainBB, MainBB->end()); |
401 | 11 | Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); |
402 | 11 | Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); |
403 | 11 | return Result; |
404 | 0 | } else { |
405 | 45 | // General case. Create both slow and fast div/rem pairs and choose one of |
406 | 45 | // them at runtime. |
407 | 45 | |
408 | 45 | // Split the basic block before the div/rem. |
409 | 45 | BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); |
410 | 45 | // Remove the unconditional branch from MainBB to SuccessorBB. |
411 | 45 | MainBB->getInstList().back().eraseFromParent(); |
412 | 45 | QuotRemWithBB Fast = createFastBB(SuccessorBB); |
413 | 45 | QuotRemWithBB Slow = createSlowBB(SuccessorBB); |
414 | 45 | QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); |
415 | 45 | Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr4 : Dividend41 , |
416 | 45 | DivisorShort ? nullptr3 : Divisor42 ); |
417 | 45 | IRBuilder<> Builder(MainBB, MainBB->end()); |
418 | 45 | Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); |
419 | 45 | return Result; |
420 | 45 | } |
421 | 0 | } |
422 | | |
423 | | /// This optimization identifies DIV/REM instructions in a BB that can be |
424 | | /// profitably bypassed and carried out with a shorter, faster divide. |
425 | | bool llvm::bypassSlowDivision(BasicBlock *BB, |
426 | 21.5k | const BypassWidthsTy &BypassWidths) { |
427 | 21.5k | DivCacheTy PerBBDivCache; |
428 | 21.5k | |
429 | 21.5k | bool MadeChange = false; |
430 | 21.5k | Instruction* Next = &*BB->begin(); |
431 | 124k | while (Next != nullptr124k ) { |
432 | 103k | // We may add instructions immediately after I, but we want to skip over |
433 | 103k | // them. |
434 | 103k | Instruction* I = Next; |
435 | 103k | Next = Next->getNextNode(); |
436 | 103k | |
437 | 103k | FastDivInsertionTask Task(I, BypassWidths); |
438 | 103k | if (Value *Replacement103k = Task.getReplacement(PerBBDivCache)) { |
439 | 64 | I->replaceAllUsesWith(Replacement); |
440 | 64 | I->eraseFromParent(); |
441 | 64 | MadeChange = true; |
442 | 64 | } |
443 | 103k | } |
444 | 21.5k | |
445 | 21.5k | // Above we eagerly create divs and rems, as pairs, so that we can efficiently |
446 | 21.5k | // create divrem machine instructions. Now erase any unused divs / rems so we |
447 | 21.5k | // don't leave extra instructions sitting around. |
448 | 21.5k | for (auto &KV : PerBBDivCache) |
449 | 59 | for (Value *V : {KV.second.Quotient, KV.second.Remainder}) |
450 | 118 | RecursivelyDeleteTriviallyDeadInstructions(V); |
451 | 21.5k | |
452 | 21.5k | return MadeChange; |
453 | 21.5k | } |