/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- InstCombineShifts.cpp ----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the visitShl, visitLShr, and visitAShr functions. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "InstCombineInternal.h" |
14 | | #include "llvm/Analysis/ConstantFolding.h" |
15 | | #include "llvm/Analysis/InstructionSimplify.h" |
16 | | #include "llvm/IR/IntrinsicInst.h" |
17 | | #include "llvm/IR/PatternMatch.h" |
18 | | using namespace llvm; |
19 | | using namespace PatternMatch; |
20 | | |
21 | | #define DEBUG_TYPE "instcombine" |
22 | | |
23 | | // Given pattern: |
24 | | // (x shiftopcode Q) shiftopcode K |
25 | | // we should rewrite it as |
26 | | // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) |
27 | | // This is valid for any shift, but they must be identical. |
28 | | static Instruction * |
29 | | reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, |
30 | 2.11M | const SimplifyQuery &SQ) { |
31 | 2.11M | // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1 |
32 | 2.11M | Value *X, *ShAmt1, *ShAmt0; |
33 | 2.11M | Instruction *Sh1; |
34 | 2.11M | if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)), |
35 | 2.11M | m_Instruction(Sh1)), |
36 | 2.11M | m_Value(ShAmt0)))) |
37 | 1.94M | return nullptr; |
38 | 165k | |
39 | 165k | // The shift opcodes must be identical. |
40 | 165k | Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); |
41 | 165k | if (ShiftOpcode != Sh1->getOpcode()) |
42 | 158k | return nullptr; |
43 | 7.09k | // Can we fold (ShAmt0+ShAmt1) ? |
44 | 7.09k | Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1, |
45 | 7.09k | SQ.getWithInstruction(Sh0)); |
46 | 7.09k | if (!NewShAmt) |
47 | 6.57k | return nullptr; // Did not simplify. |
48 | 513 | // Is the new shift amount smaller than the bit width? |
49 | 513 | // FIXME: could also rely on ConstantRange. |
50 | 513 | unsigned BitWidth = X->getType()->getScalarSizeInBits(); |
51 | 513 | if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, |
52 | 513 | APInt(BitWidth, BitWidth)))) |
53 | 28 | return nullptr; |
54 | 485 | // All good, we can do this fold. |
55 | 485 | BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); |
56 | 485 | // If both of the original shifts had the same flag set, preserve the flag. |
57 | 485 | if (ShiftOpcode == Instruction::BinaryOps::Shl) { |
58 | 90 | NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && |
59 | 90 | Sh1->hasNoUnsignedWrap()2 ); |
60 | 90 | NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && |
61 | 90 | Sh1->hasNoSignedWrap()3 ); |
62 | 395 | } else { |
63 | 395 | NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()3 ); |
64 | 395 | } |
65 | 485 | return NewShift; |
66 | 485 | } |
67 | | |
68 | | // If we have some pattern that leaves only some low bits set, and then performs |
69 | | // left-shift of those bits, if none of the bits that are left after the final |
70 | | // shift are modified by the mask, we can omit the mask. |
71 | | // |
72 | | // There are many variants to this pattern: |
73 | | // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt |
74 | | // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt |
75 | | // c) (x & (-1 >> MaskShAmt)) << ShiftShAmt |
76 | | // d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt |
77 | | // e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt |
78 | | // f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt |
79 | | // All these patterns can be simplified to just: |
80 | | // x << ShiftShAmt |
81 | | // iff: |
82 | | // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x) |
83 | | // c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt) |
84 | | static Instruction * |
85 | | dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, |
86 | 1.01M | const SimplifyQuery &SQ) { |
87 | 1.01M | assert(OuterShift->getOpcode() == Instruction::BinaryOps::Shl && |
88 | 1.01M | "The input must be 'shl'!"); |
89 | 1.01M | |
90 | 1.01M | Value *Masked = OuterShift->getOperand(0); |
91 | 1.01M | Value *ShiftShAmt = OuterShift->getOperand(1); |
92 | 1.01M | |
93 | 1.01M | Value *MaskShAmt; |
94 | 1.01M | |
95 | 1.01M | // ((1 << MaskShAmt) - 1) |
96 | 1.01M | auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes()); |
97 | 1.01M | // (~(-1 << maskNbits)) |
98 | 1.01M | auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes()); |
99 | 1.01M | // (-1 >> MaskShAmt) |
100 | 1.01M | auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt)); |
101 | 1.01M | // ((-1 << MaskShAmt) >> MaskShAmt) |
102 | 1.01M | auto MaskD = |
103 | 1.01M | m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt)); |
104 | 1.01M | |
105 | 1.01M | Value *X; |
106 | 1.01M | if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) { |
107 | 1.75k | // Can we simplify (MaskShAmt+ShiftShAmt) ? |
108 | 1.75k | Value *SumOfShAmts = |
109 | 1.75k | SimplifyAddInst(MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, |
110 | 1.75k | SQ.getWithInstruction(OuterShift)); |
111 | 1.75k | if (!SumOfShAmts) |
112 | 507 | return nullptr; // Did not simplify. |
113 | 1.24k | // Is the total shift amount *not* smaller than the bit width? |
114 | 1.24k | // FIXME: could also rely on ConstantRange. |
115 | 1.24k | unsigned BitWidth = X->getType()->getScalarSizeInBits(); |
116 | 1.24k | if (!match(SumOfShAmts, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_UGE, |
117 | 1.24k | APInt(BitWidth, BitWidth)))) |
118 | 1.22k | return nullptr; |
119 | 1.01M | // All good, we can do this fold. |
120 | 1.01M | } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) || |
121 | 1.01M | match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)), |
122 | 1.01M | m_Deferred(MaskShAmt)))) { |
123 | 259 | // Can we simplify (ShiftShAmt-MaskShAmt) ? |
124 | 259 | Value *ShAmtsDiff = |
125 | 259 | SimplifySubInst(ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, |
126 | 259 | SQ.getWithInstruction(OuterShift)); |
127 | 259 | if (!ShAmtsDiff) |
128 | 155 | return nullptr; // Did not simplify. |
129 | 104 | // Is the difference non-negative? (is ShiftShAmt u>= MaskShAmt ?) |
130 | 104 | // FIXME: could also rely on ConstantRange. |
131 | 104 | if (!match(ShAmtsDiff, m_NonNegative())) |
132 | 68 | return nullptr; |
133 | 1.01M | // All good, we can do this fold. |
134 | 1.01M | } else |
135 | 1.01M | return nullptr; // Don't know anything about this pattern. |
136 | 58 | |
137 | 58 | // No 'NUW'/'NSW'! |
138 | 58 | // We no longer know that we won't shift-out non-0 bits. |
139 | 58 | return BinaryOperator::Create(OuterShift->getOpcode(), X, ShiftShAmt); |
140 | 58 | } |
141 | | |
142 | 2.12M | Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { |
143 | 2.12M | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
144 | 2.12M | assert(Op0->getType() == Op1->getType()); |
145 | 2.12M | |
146 | 2.12M | // See if we can fold away this shift. |
147 | 2.12M | if (SimplifyDemandedInstructionBits(I)) |
148 | 4.44k | return &I; |
149 | 2.12M | |
150 | 2.12M | // Try to fold constant and into select arguments. |
151 | 2.12M | if (isa<Constant>(Op0)) |
152 | 100k | if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) |
153 | 351 | if (Instruction *R = FoldOpIntoSelect(I, SI)) |
154 | 3 | return R; |
155 | 2.12M | |
156 | 2.12M | if (Constant *CUI = dyn_cast<Constant>(Op1)) |
157 | 1.85M | if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) |
158 | 8.24k | return Res; |
159 | 2.11M | |
160 | 2.11M | if (Instruction *NewShift = |
161 | 485 | reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) |
162 | 485 | return NewShift; |
163 | 2.11M | |
164 | 2.11M | // (C1 shift (A add C2)) -> (C1 shift C2) shift A) |
165 | 2.11M | // iff A and C2 are both positive. |
166 | 2.11M | Value *A; |
167 | 2.11M | Constant *C; |
168 | 2.11M | if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))100k ) |
169 | 9.85k | if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && |
170 | 9.85k | isKnownNonNegative(C, DL, 0, &AC, &I, &DT)743 ) |
171 | 177 | return BinaryOperator::Create( |
172 | 177 | I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); |
173 | 2.11M | |
174 | 2.11M | // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. |
175 | 2.11M | // Because shifts by negative values (which could occur if A were negative) |
176 | 2.11M | // are undefined. |
177 | 2.11M | const APInt *B; |
178 | 2.11M | if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))206k ) { |
179 | 48 | // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't |
180 | 48 | // demand the sign bit (and many others) here?? |
181 | 48 | Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1), |
182 | 48 | Op1->getName()); |
183 | 48 | I.setOperand(1, Rem); |
184 | 48 | return &I; |
185 | 48 | } |
186 | 2.11M | |
187 | 2.11M | return nullptr; |
188 | 2.11M | } |
189 | | |
190 | | /// Return true if we can simplify two logical (either left or right) shifts |
191 | | /// that have constant shift amounts: OuterShift (InnerShift X, C1), C2. |
192 | | static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl, |
193 | | Instruction *InnerShift, InstCombiner &IC, |
194 | 18.3k | Instruction *CxtI) { |
195 | 18.3k | assert(InnerShift->isLogicalShift() && "Unexpected instruction type"); |
196 | 18.3k | |
197 | 18.3k | // We need constant scalar or constant splat shifts. |
198 | 18.3k | const APInt *InnerShiftConst; |
199 | 18.3k | if (!match(InnerShift->getOperand(1), m_APInt(InnerShiftConst))) |
200 | 5.95k | return false; |
201 | 12.3k | |
202 | 12.3k | // Two logical shifts in the same direction: |
203 | 12.3k | // shl (shl X, C1), C2 --> shl X, C1 + C2 |
204 | 12.3k | // lshr (lshr X, C1), C2 --> lshr X, C1 + C2 |
205 | 12.3k | bool IsInnerShl = InnerShift->getOpcode() == Instruction::Shl; |
206 | 12.3k | if (IsInnerShl == IsOuterShl) |
207 | 3.94k | return true; |
208 | 8.40k | |
209 | 8.40k | // Equal shift amounts in opposite directions become bitwise 'and': |
210 | 8.40k | // lshr (shl X, C), C --> and X, C' |
211 | 8.40k | // shl (lshr X, C), C --> and X, C' |
212 | 8.40k | if (*InnerShiftConst == OuterShAmt) |
213 | 1.02k | return true; |
214 | 7.38k | |
215 | 7.38k | // If the 2nd shift is bigger than the 1st, we can fold: |
216 | 7.38k | // lshr (shl X, C1), C2 --> and (shl X, C1 - C2), C3 |
217 | 7.38k | // shl (lshr X, C1), C2 --> and (lshr X, C1 - C2), C3 |
218 | 7.38k | // but it isn't profitable unless we know the and'd out bits are already zero. |
219 | 7.38k | // Also, check that the inner shift is valid (less than the type width) or |
220 | 7.38k | // we'll crash trying to produce the bit mask for the 'and'. |
221 | 7.38k | unsigned TypeWidth = InnerShift->getType()->getScalarSizeInBits(); |
222 | 7.38k | if (InnerShiftConst->ugt(OuterShAmt) && InnerShiftConst->ult(TypeWidth)4.53k ) { |
223 | 4.53k | unsigned InnerShAmt = InnerShiftConst->getZExtValue(); |
224 | 4.53k | unsigned MaskShift = |
225 | 4.53k | IsInnerShl ? TypeWidth - InnerShAmt2.09k : InnerShAmt - OuterShAmt2.43k ; |
226 | 4.53k | APInt Mask = APInt::getLowBitsSet(TypeWidth, OuterShAmt) << MaskShift; |
227 | 4.53k | if (IC.MaskedValueIsZero(InnerShift->getOperand(0), Mask, 0, CxtI)) |
228 | 1.18k | return true; |
229 | 6.19k | } |
230 | 6.19k | |
231 | 6.19k | return false; |
232 | 6.19k | } |
233 | | |
234 | | /// See if we can compute the specified value, but shifted logically to the left |
235 | | /// or right by some number of bits. This should return true if the expression |
236 | | /// can be computed for the same cost as the current expression tree. This is |
237 | | /// used to eliminate extraneous shifting from things like: |
238 | | /// %C = shl i128 %A, 64 |
239 | | /// %D = shl i128 %B, 96 |
240 | | /// %E = or i128 %C, %D |
241 | | /// %F = lshr i128 %E, 64 |
242 | | /// where the client will ask if E can be computed shifted right by 64-bits. If |
243 | | /// this succeeds, getShiftedValue() will be called to produce the value. |
244 | | static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, |
245 | 1.66M | InstCombiner &IC, Instruction *CxtI) { |
246 | 1.66M | // We can always evaluate constants shifted. |
247 | 1.66M | if (isa<Constant>(V)) |
248 | 4.70k | return true; |
249 | 1.65M | |
250 | 1.65M | Instruction *I = dyn_cast<Instruction>(V); |
251 | 1.65M | if (!I) return false64.7k ; |
252 | 1.59M | |
253 | 1.59M | // If this is the opposite shift, we can directly reuse the input of the shift |
254 | 1.59M | // if the needed bits are already zero in the input. This allows us to reuse |
255 | 1.59M | // the value which means that we don't care if the shift has multiple uses. |
256 | 1.59M | // TODO: Handle opposite shift by exact value. |
257 | 1.59M | ConstantInt *CI = nullptr; |
258 | 1.59M | if ((IsLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))889k ) || |
259 | 1.59M | (1.58M !IsLeftShift1.58M && match(I, m_Shl(m_Value(), m_ConstantInt(CI)))702k )) { |
260 | 13.4k | if (CI->getValue() == NumBits) { |
261 | 1.85k | // TODO: Check that the input bits are already zero with MaskedValueIsZero |
262 | | #if 0 |
263 | | // If this is a truncate of a logical shr, we can truncate it to a smaller |
264 | | // lshr iff we know that the bits we would otherwise be shifting in are |
265 | | // already zeros. |
266 | | uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); |
267 | | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
268 | | if (MaskedValueIsZero(I->getOperand(0), |
269 | | APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && |
270 | | CI->getLimitedValue(BitWidth) < BitWidth) { |
271 | | return CanEvaluateTruncated(I->getOperand(0), Ty); |
272 | | } |
273 | | #endif |
274 | | |
275 | 1.85k | } |
276 | 13.4k | } |
277 | 1.59M | |
278 | 1.59M | // We can't mutate something that has multiple uses: doing so would |
279 | 1.59M | // require duplicating the instruction in general, which isn't profitable. |
280 | 1.59M | if (!I->hasOneUse()) return false1.02M ; |
281 | 565k | |
282 | 565k | switch (I->getOpcode()) { |
283 | 565k | default: return false465k ; |
284 | 565k | case Instruction::And: |
285 | 18.2k | case Instruction::Or: |
286 | 18.2k | case Instruction::Xor: |
287 | 18.2k | // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. |
288 | 18.2k | return canEvaluateShifted(I->getOperand(0), NumBits, IsLeftShift, IC, I) && |
289 | 18.2k | canEvaluateShifted(I->getOperand(1), NumBits, IsLeftShift, IC, I)5.90k ; |
290 | 18.2k | |
291 | 18.3k | case Instruction::Shl: |
292 | 18.3k | case Instruction::LShr: |
293 | 18.3k | return canEvaluateShiftedShift(NumBits, IsLeftShift, I, IC, CxtI); |
294 | 18.3k | |
295 | 18.3k | case Instruction::Select: { |
296 | 2.56k | SelectInst *SI = cast<SelectInst>(I); |
297 | 2.56k | Value *TrueVal = SI->getTrueValue(); |
298 | 2.56k | Value *FalseVal = SI->getFalseValue(); |
299 | 2.56k | return canEvaluateShifted(TrueVal, NumBits, IsLeftShift, IC, SI) && |
300 | 2.56k | canEvaluateShifted(FalseVal, NumBits, IsLeftShift, IC, SI)803 ; |
301 | 18.3k | } |
302 | 61.2k | case Instruction::PHI: { |
303 | 61.2k | // We can change a phi if we can change all operands. Note that we never |
304 | 61.2k | // get into trouble with cyclic PHIs here because we only consider |
305 | 61.2k | // instructions with a single use. |
306 | 61.2k | PHINode *PN = cast<PHINode>(I); |
307 | 61.2k | for (Value *IncValue : PN->incoming_values()) |
308 | 64.3k | if (!canEvaluateShifted(IncValue, NumBits, IsLeftShift, IC, PN)) |
309 | 61.1k | return false; |
310 | 61.2k | return true81 ; |
311 | 61.2k | } |
312 | 565k | } |
313 | 565k | } |
314 | | |
315 | | /// Fold OuterShift (InnerShift X, C1), C2. |
316 | | /// See canEvaluateShiftedShift() for the constraints on these instructions. |
317 | | static Value *foldShiftedShift(BinaryOperator *InnerShift, unsigned OuterShAmt, |
318 | | bool IsOuterShl, |
319 | 1.07k | InstCombiner::BuilderTy &Builder) { |
320 | 1.07k | bool IsInnerShl = InnerShift->getOpcode() == Instruction::Shl; |
321 | 1.07k | Type *ShType = InnerShift->getType(); |
322 | 1.07k | unsigned TypeWidth = ShType->getScalarSizeInBits(); |
323 | 1.07k | |
324 | 1.07k | // We only accept shifts-by-a-constant in canEvaluateShifted(). |
325 | 1.07k | const APInt *C1; |
326 | 1.07k | match(InnerShift->getOperand(1), m_APInt(C1)); |
327 | 1.07k | unsigned InnerShAmt = C1->getZExtValue(); |
328 | 1.07k | |
329 | 1.07k | // Change the shift amount and clear the appropriate IR flags. |
330 | 1.07k | auto NewInnerShift = [&](unsigned ShAmt) { |
331 | 72 | InnerShift->setOperand(1, ConstantInt::get(ShType, ShAmt)); |
332 | 72 | if (IsInnerShl) { |
333 | 58 | InnerShift->setHasNoUnsignedWrap(false); |
334 | 58 | InnerShift->setHasNoSignedWrap(false); |
335 | 58 | } else { |
336 | 14 | InnerShift->setIsExact(false); |
337 | 14 | } |
338 | 72 | return InnerShift; |
339 | 72 | }; |
340 | 1.07k | |
341 | 1.07k | // Two logical shifts in the same direction: |
342 | 1.07k | // shl (shl X, C1), C2 --> shl X, C1 + C2 |
343 | 1.07k | // lshr (lshr X, C1), C2 --> lshr X, C1 + C2 |
344 | 1.07k | if (IsInnerShl == IsOuterShl) { |
345 | 60 | // If this is an oversized composite shift, then unsigned shifts get 0. |
346 | 60 | if (InnerShAmt + OuterShAmt >= TypeWidth) |
347 | 0 | return Constant::getNullValue(ShType); |
348 | 60 | |
349 | 60 | return NewInnerShift(InnerShAmt + OuterShAmt); |
350 | 60 | } |
351 | 1.01k | |
352 | 1.01k | // Equal shift amounts in opposite directions become bitwise 'and': |
353 | 1.01k | // lshr (shl X, C), C --> and X, C' |
354 | 1.01k | // shl (lshr X, C), C --> and X, C' |
355 | 1.01k | if (InnerShAmt == OuterShAmt) { |
356 | 1.00k | APInt Mask = IsInnerShl |
357 | 1.00k | ? APInt::getLowBitsSet(TypeWidth, TypeWidth - OuterShAmt)27 |
358 | 1.00k | : APInt::getHighBitsSet(TypeWidth, TypeWidth - OuterShAmt)978 ; |
359 | 1.00k | Value *And = Builder.CreateAnd(InnerShift->getOperand(0), |
360 | 1.00k | ConstantInt::get(ShType, Mask)); |
361 | 1.00k | if (auto *AndI = dyn_cast<Instruction>(And)) { |
362 | 1.00k | AndI->moveBefore(InnerShift); |
363 | 1.00k | AndI->takeName(InnerShift); |
364 | 1.00k | } |
365 | 1.00k | return And; |
366 | 1.00k | } |
367 | 12 | |
368 | 12 | assert(InnerShAmt > OuterShAmt && |
369 | 12 | "Unexpected opposite direction logical shift pair"); |
370 | 12 | |
371 | 12 | // In general, we would need an 'and' for this transform, but |
372 | 12 | // canEvaluateShiftedShift() guarantees that the masked-off bits are not used. |
373 | 12 | // lshr (shl X, C1), C2 --> shl X, C1 - C2 |
374 | 12 | // shl (lshr X, C1), C2 --> lshr X, C1 - C2 |
375 | 12 | return NewInnerShift(InnerShAmt - OuterShAmt); |
376 | 12 | } |
377 | | |
378 | | /// When canEvaluateShifted() returns true for an expression, this function |
379 | | /// inserts the new computation that produces the shifted value. |
380 | | static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, |
381 | 2.41k | InstCombiner &IC, const DataLayout &DL) { |
382 | 2.41k | // We can always evaluate constants shifted. |
383 | 2.41k | if (Constant *C = dyn_cast<Constant>(V)) { |
384 | 739 | if (isLeftShift) |
385 | 720 | V = IC.Builder.CreateShl(C, NumBits); |
386 | 19 | else |
387 | 19 | V = IC.Builder.CreateLShr(C, NumBits); |
388 | 739 | // If we got a constantexpr back, try to simplify it with TD info. |
389 | 739 | if (auto *C = dyn_cast<Constant>(V)) |
390 | 739 | if (auto *FoldedC = |
391 | 0 | ConstantFoldConstant(C, DL, &IC.getTargetLibraryInfo())) |
392 | 0 | V = FoldedC; |
393 | 739 | return V; |
394 | 739 | } |
395 | 1.67k | |
396 | 1.67k | Instruction *I = cast<Instruction>(V); |
397 | 1.67k | IC.Worklist.Add(I); |
398 | 1.67k | |
399 | 1.67k | switch (I->getOpcode()) { |
400 | 1.67k | default: 0 llvm_unreachable0 ("Inconsistency with CanEvaluateShifted"); |
401 | 1.67k | case Instruction::And: |
402 | 532 | case Instruction::Or: |
403 | 532 | case Instruction::Xor: |
404 | 532 | // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. |
405 | 532 | I->setOperand( |
406 | 532 | 0, getShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL)); |
407 | 532 | I->setOperand( |
408 | 532 | 1, getShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); |
409 | 532 | return I; |
410 | 532 | |
411 | 1.07k | case Instruction::Shl: |
412 | 1.07k | case Instruction::LShr: |
413 | 1.07k | return foldShiftedShift(cast<BinaryOperator>(I), NumBits, isLeftShift, |
414 | 1.07k | IC.Builder); |
415 | 1.07k | |
416 | 1.07k | case Instruction::Select: |
417 | 30 | I->setOperand( |
418 | 30 | 1, getShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); |
419 | 30 | I->setOperand( |
420 | 30 | 2, getShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL)); |
421 | 30 | return I; |
422 | 1.07k | case Instruction::PHI: { |
423 | 38 | // We can change a phi if we can change all operands. Note that we never |
424 | 38 | // get into trouble with cyclic PHIs here because we only consider |
425 | 38 | // instructions with a single use. |
426 | 38 | PHINode *PN = cast<PHINode>(I); |
427 | 231 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i193 ) |
428 | 193 | PN->setIncomingValue(i, getShiftedValue(PN->getIncomingValue(i), NumBits, |
429 | 193 | isLeftShift, IC, DL)); |
430 | 38 | return PN; |
431 | 1.07k | } |
432 | 1.67k | } |
433 | 1.67k | } |
434 | | |
435 | | // If this is a bitwise operator or add with a constant RHS we might be able |
436 | | // to pull it through a shift. |
437 | | static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift, |
438 | 165k | BinaryOperator *BO) { |
439 | 165k | switch (BO->getOpcode()) { |
440 | 165k | default: |
441 | 123k | return false; // Do not perform transform! |
442 | 165k | case Instruction::Add: |
443 | 37.1k | return Shift.getOpcode() == Instruction::Shl; |
444 | 165k | case Instruction::Or: |
445 | 4.88k | case Instruction::Xor: |
446 | 4.88k | case Instruction::And: |
447 | 4.88k | return true; |
448 | 165k | } |
449 | 165k | } |
450 | | |
451 | | Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, |
452 | 1.85M | BinaryOperator &I) { |
453 | 1.85M | bool isLeftShift = I.getOpcode() == Instruction::Shl; |
454 | 1.85M | |
455 | 1.85M | const APInt *Op1C; |
456 | 1.85M | if (!match(Op1, m_APInt(Op1C))) |
457 | 544 | return nullptr; |
458 | 1.85M | |
459 | 1.85M | // See if we can propagate this shift into the input, this covers the trivial |
460 | 1.85M | // cast of lshr(shl(x,c1),c2) as well as other more complex cases. |
461 | 1.85M | if (I.getOpcode() != Instruction::AShr && |
462 | 1.85M | canEvaluateShifted(Op0, Op1C->getZExtValue(), isLeftShift, *this, &I)1.57M ) { |
463 | 1.09k | LLVM_DEBUG( |
464 | 1.09k | dbgs() << "ICE: GetShiftedValue propagating shift through expression" |
465 | 1.09k | " to eliminate shift:\n IN: " |
466 | 1.09k | << *Op0 << "\n SH: " << I << "\n"); |
467 | 1.09k | |
468 | 1.09k | return replaceInstUsesWith( |
469 | 1.09k | I, getShiftedValue(Op0, Op1C->getZExtValue(), isLeftShift, *this, DL)); |
470 | 1.09k | } |
471 | 1.85M | |
472 | 1.85M | // See if we can simplify any instructions used by the instruction whose sole |
473 | 1.85M | // purpose is to compute bits we don't care about. |
474 | 1.85M | unsigned TypeBits = Op0->getType()->getScalarSizeInBits(); |
475 | 1.85M | |
476 | 1.85M | assert(!Op1C->uge(TypeBits) && |
477 | 1.85M | "Shift over the type width should have been removed already"); |
478 | 1.85M | |
479 | 1.85M | if (Instruction *FoldedShift = foldBinOpIntoSelectOrPhi(I)) |
480 | 421 | return FoldedShift; |
481 | 1.85M | |
482 | 1.85M | // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) |
483 | 1.85M | if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { |
484 | 38.3k | Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); |
485 | 38.3k | // If 'shift2' is an ashr, we would have to get the sign bit into a funny |
486 | 38.3k | // place. Don't try to do this transformation in this case. Also, we |
487 | 38.3k | // require that the input operand is a shift-by-constant so that we have |
488 | 38.3k | // confidence that the shifts will get folded together. We could do this |
489 | 38.3k | // xform in more cases, but it is unlikely to be profitable. |
490 | 38.3k | if (TrOp && I.isLogicalShift()36.0k && TrOp->isShift()35.4k && |
491 | 38.3k | isa<ConstantInt>(TrOp->getOperand(1))438 ) { |
492 | 432 | // Okay, we'll do this xform. Make the shift of shift. |
493 | 432 | Constant *ShAmt = |
494 | 432 | ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType()); |
495 | 432 | // (shift2 (shift1 & 0x00FF), c2) |
496 | 432 | Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); |
497 | 432 | |
498 | 432 | // For logical shifts, the truncation has the effect of making the high |
499 | 432 | // part of the register be zeros. Emulate this by inserting an AND to |
500 | 432 | // clear the top bits as needed. This 'and' will usually be zapped by |
501 | 432 | // other xforms later if dead. |
502 | 432 | unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); |
503 | 432 | unsigned DstSize = TI->getType()->getScalarSizeInBits(); |
504 | 432 | APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); |
505 | 432 | |
506 | 432 | // The mask we constructed says what the trunc would do if occurring |
507 | 432 | // between the shifts. We want to know the effect *after* the second |
508 | 432 | // shift. We know that it is a logical shift by a constant, so adjust the |
509 | 432 | // mask as appropriate. |
510 | 432 | if (I.getOpcode() == Instruction::Shl) |
511 | 236 | MaskV <<= Op1C->getZExtValue(); |
512 | 196 | else { |
513 | 196 | assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); |
514 | 196 | MaskV.lshrInPlace(Op1C->getZExtValue()); |
515 | 196 | } |
516 | 432 | |
517 | 432 | // shift1 & 0x00FF |
518 | 432 | Value *And = Builder.CreateAnd(NSh, |
519 | 432 | ConstantInt::get(I.getContext(), MaskV), |
520 | 432 | TI->getName()); |
521 | 432 | |
522 | 432 | // Return the value truncated to the interesting size. |
523 | 432 | return new TruncInst(And, I.getType()); |
524 | 432 | } |
525 | 1.84M | } |
526 | 1.84M | |
527 | 1.84M | if (Op0->hasOneUse()) { |
528 | 694k | if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { |
529 | 293k | // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) |
530 | 293k | Value *V1, *V2; |
531 | 293k | ConstantInt *CC; |
532 | 293k | switch (Op0BO->getOpcode()) { |
533 | 293k | default: break141k ; |
534 | 293k | case Instruction::Add: |
535 | 78.2k | case Instruction::And: |
536 | 78.2k | case Instruction::Or: |
537 | 78.2k | case Instruction::Xor: { |
538 | 78.2k | // These operators commute. |
539 | 78.2k | // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) |
540 | 78.2k | if (isLeftShift && Op0BO->getOperand(1)->hasOneUse()15.0k && |
541 | 78.2k | match(Op0BO->getOperand(1), m_Shr(m_Value(V1), |
542 | 8.86k | m_Specific(Op1)))) { |
543 | 13 | Value *YS = // (Y << C) |
544 | 13 | Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); |
545 | 13 | // (X + (Y << C)) |
546 | 13 | Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1, |
547 | 13 | Op0BO->getOperand(1)->getName()); |
548 | 13 | unsigned Op1Val = Op1C->getLimitedValue(TypeBits); |
549 | 13 | |
550 | 13 | APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); |
551 | 13 | Constant *Mask = ConstantInt::get(I.getContext(), Bits); |
552 | 13 | if (VectorType *VT = dyn_cast<VectorType>(X->getType())) |
553 | 1 | Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); |
554 | 13 | return BinaryOperator::CreateAnd(X, Mask); |
555 | 13 | } |
556 | 78.2k | |
557 | 78.2k | // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) |
558 | 78.2k | Value *Op0BOOp1 = Op0BO->getOperand(1); |
559 | 78.2k | if (isLeftShift && Op0BOOp1->hasOneUse()15.0k && |
560 | 78.2k | match(Op0BOOp1, |
561 | 8.85k | m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), |
562 | 8.85k | m_ConstantInt(CC)))) { |
563 | 1 | Value *YS = // (Y << C) |
564 | 1 | Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); |
565 | 1 | // X & (CC << C) |
566 | 1 | Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), |
567 | 1 | V1->getName()+".mask"); |
568 | 1 | return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); |
569 | 1 | } |
570 | 78.2k | LLVM_FALLTHROUGH; |
571 | 78.2k | } |
572 | 78.2k | |
573 | 151k | case Instruction::Sub: { |
574 | 151k | // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) |
575 | 151k | if (isLeftShift && Op0BO->getOperand(0)->hasOneUse()22.6k && |
576 | 151k | match(Op0BO->getOperand(0), m_Shr(m_Value(V1), |
577 | 13.1k | m_Specific(Op1)))) { |
578 | 94 | Value *YS = // (Y << C) |
579 | 94 | Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); |
580 | 94 | // (X + (Y << C)) |
581 | 94 | Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS, |
582 | 94 | Op0BO->getOperand(0)->getName()); |
583 | 94 | unsigned Op1Val = Op1C->getLimitedValue(TypeBits); |
584 | 94 | |
585 | 94 | APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); |
586 | 94 | Constant *Mask = ConstantInt::get(I.getContext(), Bits); |
587 | 94 | if (VectorType *VT = dyn_cast<VectorType>(X->getType())) |
588 | 2 | Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); |
589 | 94 | return BinaryOperator::CreateAnd(X, Mask); |
590 | 94 | } |
591 | 151k | |
592 | 151k | // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) |
593 | 151k | if (isLeftShift && Op0BO->getOperand(0)->hasOneUse()22.5k && |
594 | 151k | match(Op0BO->getOperand(0), |
595 | 13.0k | m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), |
596 | 13.0k | m_ConstantInt(CC))) && V2 == Op123 ) { |
597 | 2 | Value *YS = // (Y << C) |
598 | 2 | Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); |
599 | 2 | // X & (CC << C) |
600 | 2 | Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), |
601 | 2 | V1->getName()+".mask"); |
602 | 2 | |
603 | 2 | return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); |
604 | 2 | } |
605 | 151k | |
606 | 151k | break; |
607 | 151k | } |
608 | 292k | } |
609 | 292k | |
610 | 292k | |
611 | 292k | // If the operand is a bitwise operator with a constant RHS, and the |
612 | 292k | // shift is the only use, we can pull it out of the shift. |
613 | 292k | const APInt *Op0C; |
614 | 292k | if (match(Op0BO->getOperand(1), m_APInt(Op0C))) { |
615 | 165k | if (canShiftBinOpWithConstantRHS(I, Op0BO)) { |
616 | 5.99k | Constant *NewRHS = ConstantExpr::get(I.getOpcode(), |
617 | 5.99k | cast<Constant>(Op0BO->getOperand(1)), Op1); |
618 | 5.99k | |
619 | 5.99k | Value *NewShift = |
620 | 5.99k | Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); |
621 | 5.99k | NewShift->takeName(Op0BO); |
622 | 5.99k | |
623 | 5.99k | return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, |
624 | 5.99k | NewRHS); |
625 | 5.99k | } |
626 | 286k | } |
627 | 286k | |
628 | 286k | // If the operand is a subtract with a constant LHS, and the shift |
629 | 286k | // is the only use, we can pull it out of the shift. |
630 | 286k | // This folds (shl (sub C1, X), C2) -> (sub (C1 << C2), (shl X, C2)) |
631 | 286k | if (isLeftShift && Op0BO->getOpcode() == Instruction::Sub30.2k && |
632 | 286k | match(Op0BO->getOperand(0), m_APInt(Op0C))7.57k ) { |
633 | 142 | Constant *NewRHS = ConstantExpr::get(I.getOpcode(), |
634 | 142 | cast<Constant>(Op0BO->getOperand(0)), Op1); |
635 | 142 | |
636 | 142 | Value *NewShift = Builder.CreateShl(Op0BO->getOperand(1), Op1); |
637 | 142 | NewShift->takeName(Op0BO); |
638 | 142 | |
639 | 142 | return BinaryOperator::CreateSub(NewRHS, NewShift); |
640 | 142 | } |
641 | 688k | } |
642 | 688k | |
643 | 688k | // If we have a select that conditionally executes some binary operator, |
644 | 688k | // see if we can pull it the select and operator through the shift. |
645 | 688k | // |
646 | 688k | // For example, turning: |
647 | 688k | // shl (select C, (add X, C1), X), C2 |
648 | 688k | // Into: |
649 | 688k | // Y = shl X, C2 |
650 | 688k | // select C, (add Y, C1 << C2), Y |
651 | 688k | Value *Cond; |
652 | 688k | BinaryOperator *TBO; |
653 | 688k | Value *FalseVal; |
654 | 688k | if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)), |
655 | 688k | m_Value(FalseVal)))) { |
656 | 432 | const APInt *C; |
657 | 432 | if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal && |
658 | 432 | match(TBO->getOperand(1), m_APInt(C))44 && |
659 | 432 | canShiftBinOpWithConstantRHS(I, TBO)44 ) { |
660 | 32 | Constant *NewRHS = ConstantExpr::get(I.getOpcode(), |
661 | 32 | cast<Constant>(TBO->getOperand(1)), Op1); |
662 | 32 | |
663 | 32 | Value *NewShift = |
664 | 32 | Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1); |
665 | 32 | Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, |
666 | 32 | NewRHS); |
667 | 32 | return SelectInst::Create(Cond, NewOp, NewShift); |
668 | 32 | } |
669 | 688k | } |
670 | 688k | |
671 | 688k | BinaryOperator *FBO; |
672 | 688k | Value *TrueVal; |
673 | 688k | if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal), |
674 | 688k | m_OneUse(m_BinOp(FBO))))) { |
675 | 162 | const APInt *C; |
676 | 162 | if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && |
677 | 162 | match(FBO->getOperand(1), m_APInt(C))97 && |
678 | 162 | canShiftBinOpWithConstantRHS(I, FBO)97 ) { |
679 | 11 | Constant *NewRHS = ConstantExpr::get(I.getOpcode(), |
680 | 11 | cast<Constant>(FBO->getOperand(1)), Op1); |
681 | 11 | |
682 | 11 | Value *NewShift = |
683 | 11 | Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1); |
684 | 11 | Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, |
685 | 11 | NewRHS); |
686 | 11 | return SelectInst::Create(Cond, NewShift, NewOp); |
687 | 11 | } |
688 | 1.84M | } |
689 | 688k | } |
690 | 1.84M | |
691 | 1.84M | return nullptr; |
692 | 1.84M | } |
693 | | |
694 | 1.02M | Instruction *InstCombiner::visitShl(BinaryOperator &I) { |
695 | 1.02M | if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), |
696 | 1.48k | I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), |
697 | 1.48k | SQ.getWithInstruction(&I))) |
698 | 1.48k | return replaceInstUsesWith(I, V); |
699 | 1.02M | |
700 | 1.02M | if (Instruction *X = foldVectorBinop(I)) |
701 | 8 | return X; |
702 | 1.02M | |
703 | 1.02M | if (Instruction *V = commonShiftTransforms(I)) |
704 | 7.54k | return V; |
705 | 1.01M | |
706 | 1.01M | if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, SQ)) |
707 | 58 | return V; |
708 | 1.01M | |
709 | 1.01M | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
710 | 1.01M | Type *Ty = I.getType(); |
711 | 1.01M | unsigned BitWidth = Ty->getScalarSizeInBits(); |
712 | 1.01M | |
713 | 1.01M | const APInt *ShAmtAPInt; |
714 | 1.01M | if (match(Op1, m_APInt(ShAmtAPInt))) { |
715 | 840k | unsigned ShAmt = ShAmtAPInt->getZExtValue(); |
716 | 840k | unsigned BitWidth = Ty->getScalarSizeInBits(); |
717 | 840k | |
718 | 840k | // shl (zext X), ShAmt --> zext (shl X, ShAmt) |
719 | 840k | // This is only valid if X would have zeros shifted out. |
720 | 840k | Value *X; |
721 | 840k | if (match(Op0, m_ZExt(m_Value(X)))) { |
722 | 163k | unsigned SrcWidth = X->getType()->getScalarSizeInBits(); |
723 | 163k | if (ShAmt < SrcWidth && |
724 | 163k | MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)64.4k ) |
725 | 589 | return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty); |
726 | 839k | } |
727 | 839k | |
728 | 839k | // (X >> C) << C --> X & (-1 << C) |
729 | 839k | if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1)))) { |
730 | 560 | APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); |
731 | 560 | return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); |
732 | 560 | } |
733 | 838k | |
734 | 838k | // FIXME: we do not yet transform non-exact shr's. The backend (DAGCombine) |
735 | 838k | // needs a few fixes for the rotate pattern recognition first. |
736 | 838k | const APInt *ShOp1; |
737 | 838k | if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) { |
738 | 675 | unsigned ShrAmt = ShOp1->getZExtValue(); |
739 | 675 | if (ShrAmt < ShAmt) { |
740 | 120 | // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1) |
741 | 120 | Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt); |
742 | 120 | auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff); |
743 | 120 | NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); |
744 | 120 | NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); |
745 | 120 | return NewShl; |
746 | 120 | } |
747 | 555 | if (ShrAmt > ShAmt) { |
748 | 555 | // If C1 > C2: (X >>?exact C1) << C2 --> X >>?exact (C1 - C2) |
749 | 555 | Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt); |
750 | 555 | auto *NewShr = BinaryOperator::Create( |
751 | 555 | cast<BinaryOperator>(Op0)->getOpcode(), X, ShiftDiff); |
752 | 555 | NewShr->setIsExact(true); |
753 | 555 | return NewShr; |
754 | 555 | } |
755 | 838k | } |
756 | 838k | |
757 | 838k | if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) { |
758 | 0 | unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); |
759 | 0 | // Oversized shifts are simplified to zero in InstSimplify. |
760 | 0 | if (AmtSum < BitWidth) |
761 | 0 | // (X << C1) << C2 --> X << (C1 + C2) |
762 | 0 | return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum)); |
763 | 838k | } |
764 | 838k | |
765 | 838k | // If the shifted-out value is known-zero, then this is a NUW shift. |
766 | 838k | if (!I.hasNoUnsignedWrap() && |
767 | 838k | MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmt), 0, &I)628k ) { |
768 | 7.61k | I.setHasNoUnsignedWrap(); |
769 | 7.61k | return &I; |
770 | 7.61k | } |
771 | 830k | |
772 | 830k | // If the shifted-out value is all signbits, then this is a NSW shift. |
773 | 830k | if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmt510k ) { |
774 | 10.6k | I.setHasNoSignedWrap(); |
775 | 10.6k | return &I; |
776 | 10.6k | } |
777 | 998k | } |
778 | 998k | |
779 | 998k | // Transform (x >> y) << y to x & (-1 << y) |
780 | 998k | // Valid for any type of right-shift. |
781 | 998k | Value *X; |
782 | 998k | if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_Specific(Op1))))) { |
783 | 64 | Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); |
784 | 64 | Value *Mask = Builder.CreateShl(AllOnes, Op1); |
785 | 64 | return BinaryOperator::CreateAnd(Mask, X); |
786 | 64 | } |
787 | 998k | |
788 | 998k | Constant *C1; |
789 | 998k | if (match(Op1, m_Constant(C1))) { |
790 | 820k | Constant *C2; |
791 | 820k | Value *X; |
792 | 820k | // (C2 << X) << C1 --> (C2 << C1) << X |
793 | 820k | if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X))))) |
794 | 35 | return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X); |
795 | 820k | |
796 | 820k | // (X * C2) << C1 --> X * (C2 << C1) |
797 | 820k | if (match(Op0, m_Mul(m_Value(X), m_Constant(C2)))) |
798 | 103 | return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1)); |
799 | 998k | } |
800 | 998k | |
801 | 998k | // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1 |
802 | 998k | if (match(Op0, m_One()) && |
803 | 998k | match(Op1, m_Sub(m_SpecificInt(BitWidth - 1), m_Value(X)))60.9k ) |
804 | 32 | return BinaryOperator::CreateLShr( |
805 | 32 | ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X); |
806 | 998k | |
807 | 998k | return nullptr; |
808 | 998k | } |
809 | | |
810 | 799k | Instruction *InstCombiner::visitLShr(BinaryOperator &I) { |
811 | 799k | if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), |
812 | 893 | SQ.getWithInstruction(&I))) |
813 | 893 | return replaceInstUsesWith(I, V); |
814 | 798k | |
815 | 798k | if (Instruction *X = foldVectorBinop(I)) |
816 | 9 | return X; |
817 | 798k | |
818 | 798k | if (Instruction *R = commonShiftTransforms(I)) |
819 | 3.25k | return R; |
820 | 795k | |
821 | 795k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
822 | 795k | Type *Ty = I.getType(); |
823 | 795k | const APInt *ShAmtAPInt; |
824 | 795k | if (match(Op1, m_APInt(ShAmtAPInt))) { |
825 | 721k | unsigned ShAmt = ShAmtAPInt->getZExtValue(); |
826 | 721k | unsigned BitWidth = Ty->getScalarSizeInBits(); |
827 | 721k | auto *II = dyn_cast<IntrinsicInst>(Op0); |
828 | 721k | if (II && isPowerOf2_32(BitWidth)209 && Log2_32(BitWidth) == ShAmt206 && |
829 | 721k | (24 II->getIntrinsicID() == Intrinsic::ctlz24 || |
830 | 24 | II->getIntrinsicID() == Intrinsic::cttz22 || |
831 | 24 | II->getIntrinsicID() == Intrinsic::ctpop20 )) { |
832 | 6 | // ctlz.i32(x)>>5 --> zext(x == 0) |
833 | 6 | // cttz.i32(x)>>5 --> zext(x == 0) |
834 | 6 | // ctpop.i32(x)>>5 --> zext(x == -1) |
835 | 6 | bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop; |
836 | 6 | Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -12 : 04 ); |
837 | 6 | Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS); |
838 | 6 | return new ZExtInst(Cmp, Ty); |
839 | 6 | } |
840 | 721k | |
841 | 721k | Value *X; |
842 | 721k | const APInt *ShOp1; |
843 | 721k | if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)623 ) { |
844 | 623 | if (ShOp1->ult(ShAmt)) { |
845 | 326 | unsigned ShlAmt = ShOp1->getZExtValue(); |
846 | 326 | Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt); |
847 | 326 | if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) { |
848 | 47 | // (X <<nuw C1) >>u C2 --> X >>u (C2 - C1) |
849 | 47 | auto *NewLShr = BinaryOperator::CreateLShr(X, ShiftDiff); |
850 | 47 | NewLShr->setIsExact(I.isExact()); |
851 | 47 | return NewLShr; |
852 | 47 | } |
853 | 279 | // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2) |
854 | 279 | Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact()); |
855 | 279 | APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); |
856 | 279 | return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask)); |
857 | 279 | } |
858 | 297 | if (ShOp1->ugt(ShAmt)) { |
859 | 56 | unsigned ShlAmt = ShOp1->getZExtValue(); |
860 | 56 | Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt); |
861 | 56 | if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) { |
862 | 11 | // (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2) |
863 | 11 | auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff); |
864 | 11 | NewShl->setHasNoUnsignedWrap(true); |
865 | 11 | return NewShl; |
866 | 11 | } |
867 | 45 | // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2) |
868 | 45 | Value *NewShl = Builder.CreateShl(X, ShiftDiff); |
869 | 45 | APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); |
870 | 45 | return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); |
871 | 45 | } |
872 | 241 | assert(*ShOp1 == ShAmt); |
873 | 241 | // (X << C) >>u C --> X & (-1 >>u C) |
874 | 241 | APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); |
875 | 241 | return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); |
876 | 241 | } |
877 | 721k | |
878 | 721k | if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && |
879 | 721k | (860 !Ty->isIntegerTy()860 || shouldChangeType(Ty, X->getType())859 )) { |
880 | 860 | assert(ShAmt < X->getType()->getScalarSizeInBits() && |
881 | 860 | "Big shift not simplified to zero?"); |
882 | 860 | // lshr (zext iM X to iN), C --> zext (lshr X, C) to iN |
883 | 860 | Value *NewLShr = Builder.CreateLShr(X, ShAmt); |
884 | 860 | return new ZExtInst(NewLShr, Ty); |
885 | 860 | } |
886 | 720k | |
887 | 720k | if (match(Op0, m_SExt(m_Value(X))) && |
888 | 720k | (641 !Ty->isIntegerTy()641 || shouldChangeType(Ty, X->getType())637 )) { |
889 | 638 | // Are we moving the sign bit to the low bit and widening with high zeros? |
890 | 638 | unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits(); |
891 | 638 | if (ShAmt == BitWidth - 1) { |
892 | 4 | // lshr (sext i1 X to iN), N-1 --> zext X to iN |
893 | 4 | if (SrcTyBitWidth == 1) |
894 | 2 | return new ZExtInst(X, Ty); |
895 | 2 | |
896 | 2 | // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN |
897 | 2 | if (Op0->hasOneUse()) { |
898 | 2 | Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1); |
899 | 2 | return new ZExtInst(NewLShr, Ty); |
900 | 2 | } |
901 | 634 | } |
902 | 634 | |
903 | 634 | // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN |
904 | 634 | if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()12 ) { |
905 | 12 | // The new shift amount can't be more than the narrow source type. |
906 | 12 | unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); |
907 | 12 | Value *AShr = Builder.CreateAShr(X, NewShAmt); |
908 | 12 | return new ZExtInst(AShr, Ty); |
909 | 12 | } |
910 | 720k | } |
911 | 720k | |
912 | 720k | if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { |
913 | 0 | unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); |
914 | 0 | // Oversized shifts are simplified to zero in InstSimplify. |
915 | 0 | if (AmtSum < BitWidth) |
916 | 0 | // (X >>u C1) >>u C2 --> X >>u (C1 + C2) |
917 | 0 | return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); |
918 | 720k | } |
919 | 720k | |
920 | 720k | // If the shifted-out value is known-zero, then this is an exact shift. |
921 | 720k | if (!I.isExact() && |
922 | 720k | MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)701k ) { |
923 | 2.24k | I.setIsExact(); |
924 | 2.24k | return &I; |
925 | 2.24k | } |
926 | 791k | } |
927 | 791k | |
928 | 791k | // Transform (x << y) >> y to x & (-1 >> y) |
929 | 791k | Value *X; |
930 | 791k | if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) { |
931 | 3 | Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); |
932 | 3 | Value *Mask = Builder.CreateLShr(AllOnes, Op1); |
933 | 3 | return BinaryOperator::CreateAnd(Mask, X); |
934 | 3 | } |
935 | 791k | |
936 | 791k | return nullptr; |
937 | 791k | } |
938 | | |
939 | 301k | Instruction *InstCombiner::visitAShr(BinaryOperator &I) { |
940 | 301k | if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), |
941 | 50 | SQ.getWithInstruction(&I))) |
942 | 50 | return replaceInstUsesWith(I, V); |
943 | 301k | |
944 | 301k | if (Instruction *X = foldVectorBinop(I)) |
945 | 4 | return X; |
946 | 301k | |
947 | 301k | if (Instruction *R = commonShiftTransforms(I)) |
948 | 2.59k | return R; |
949 | 298k | |
950 | 298k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
951 | 298k | Type *Ty = I.getType(); |
952 | 298k | unsigned BitWidth = Ty->getScalarSizeInBits(); |
953 | 298k | const APInt *ShAmtAPInt; |
954 | 298k | if (match(Op1, m_APInt(ShAmtAPInt)) && ShAmtAPInt->ult(BitWidth)281k ) { |
955 | 281k | unsigned ShAmt = ShAmtAPInt->getZExtValue(); |
956 | 281k | |
957 | 281k | // If the shift amount equals the difference in width of the destination |
958 | 281k | // and source scalar types: |
959 | 281k | // ashr (shl (zext X), C), C --> sext X |
960 | 281k | Value *X; |
961 | 281k | if (match(Op0, m_Shl(m_ZExt(m_Value(X)), m_Specific(Op1))) && |
962 | 281k | ShAmt == BitWidth - X->getType()->getScalarSizeInBits()1.79k ) |
963 | 46 | return new SExtInst(X, Ty); |
964 | 281k | |
965 | 281k | // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However, |
966 | 281k | // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. |
967 | 281k | const APInt *ShOp1; |
968 | 281k | if (match(Op0, m_NSWShl(m_Value(X), m_APInt(ShOp1))) && |
969 | 281k | ShOp1->ult(BitWidth)32 ) { |
970 | 32 | unsigned ShlAmt = ShOp1->getZExtValue(); |
971 | 32 | if (ShlAmt < ShAmt) { |
972 | 21 | // (X <<nsw C1) >>s C2 --> X >>s (C2 - C1) |
973 | 21 | Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt); |
974 | 21 | auto *NewAShr = BinaryOperator::CreateAShr(X, ShiftDiff); |
975 | 21 | NewAShr->setIsExact(I.isExact()); |
976 | 21 | return NewAShr; |
977 | 21 | } |
978 | 11 | if (ShlAmt > ShAmt) { |
979 | 11 | // (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2) |
980 | 11 | Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt); |
981 | 11 | auto *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiff); |
982 | 11 | NewShl->setHasNoSignedWrap(true); |
983 | 11 | return NewShl; |
984 | 11 | } |
985 | 281k | } |
986 | 281k | |
987 | 281k | if (match(Op0, m_AShr(m_Value(X), m_APInt(ShOp1))) && |
988 | 281k | ShOp1->ult(BitWidth)4 ) { |
989 | 4 | unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); |
990 | 4 | // Oversized arithmetic shifts replicate the sign bit. |
991 | 4 | AmtSum = std::min(AmtSum, BitWidth - 1); |
992 | 4 | // (X >>s C1) >>s C2 --> X >>s (C1 + C2) |
993 | 4 | return BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum)); |
994 | 4 | } |
995 | 281k | |
996 | 281k | if (match(Op0, m_OneUse(m_SExt(m_Value(X)))) && |
997 | 281k | (259 Ty->isVectorTy()259 || shouldChangeType(Ty, X->getType())257 )) { |
998 | 259 | // ashr (sext X), C --> sext (ashr X, C') |
999 | 259 | Type *SrcTy = X->getType(); |
1000 | 259 | ShAmt = std::min(ShAmt, SrcTy->getScalarSizeInBits() - 1); |
1001 | 259 | Value *NewSh = Builder.CreateAShr(X, ConstantInt::get(SrcTy, ShAmt)); |
1002 | 259 | return new SExtInst(NewSh, Ty); |
1003 | 259 | } |
1004 | 281k | |
1005 | 281k | // If the shifted-out value is known-zero, then this is an exact shift. |
1006 | 281k | if (!I.isExact() && |
1007 | 281k | MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)79.9k ) { |
1008 | 2.73k | I.setIsExact(); |
1009 | 2.73k | return &I; |
1010 | 2.73k | } |
1011 | 295k | } |
1012 | 295k | |
1013 | 295k | // See if we can turn a signed shr into an unsigned shr. |
1014 | 295k | if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) |
1015 | 213 | return BinaryOperator::CreateLShr(Op0, Op1); |
1016 | 295k | |
1017 | 295k | return nullptr; |
1018 | 295k | } |