/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/LoopUtils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines common loop utility functions. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/Transforms/Utils/LoopUtils.h" |
15 | | #include "llvm/ADT/ScopeExit.h" |
16 | | #include "llvm/Analysis/AliasAnalysis.h" |
17 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
18 | | #include "llvm/Analysis/GlobalsModRef.h" |
19 | | #include "llvm/Analysis/LoopInfo.h" |
20 | | #include "llvm/Analysis/LoopPass.h" |
21 | | #include "llvm/Analysis/ScalarEvolution.h" |
22 | | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" |
23 | | #include "llvm/Analysis/ScalarEvolutionExpander.h" |
24 | | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
25 | | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | | #include "llvm/IR/Dominators.h" |
27 | | #include "llvm/IR/Instructions.h" |
28 | | #include "llvm/IR/Module.h" |
29 | | #include "llvm/IR/PatternMatch.h" |
30 | | #include "llvm/IR/ValueHandle.h" |
31 | | #include "llvm/Pass.h" |
32 | | #include "llvm/Support/Debug.h" |
33 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
34 | | |
35 | | using namespace llvm; |
36 | | using namespace llvm::PatternMatch; |
37 | | |
38 | | #define DEBUG_TYPE "loop-utils" |
39 | | |
40 | | bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, |
41 | 121 | SmallPtrSetImpl<Instruction *> &Set) { |
42 | 315 | for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E315 ; ++Use194 ) |
43 | 223 | if (223 !Set.count(dyn_cast<Instruction>(*Use))223 ) |
44 | 29 | return false; |
45 | 92 | return true; |
46 | 121 | } |
47 | | |
48 | 1.52M | bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { |
49 | 1.52M | switch (Kind) { |
50 | 501k | default: |
51 | 501k | break; |
52 | 1.02M | case RK_IntegerAdd: |
53 | 1.02M | case RK_IntegerMult: |
54 | 1.02M | case RK_IntegerOr: |
55 | 1.02M | case RK_IntegerAnd: |
56 | 1.02M | case RK_IntegerXor: |
57 | 1.02M | case RK_IntegerMinMax: |
58 | 1.02M | return true; |
59 | 501k | } |
60 | 501k | return false; |
61 | 501k | } |
62 | | |
63 | 77.0k | bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { |
64 | 77.0k | return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); |
65 | 77.0k | } |
66 | | |
67 | 967k | bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { |
68 | 967k | switch (Kind) { |
69 | 637k | default: |
70 | 637k | break; |
71 | 329k | case RK_IntegerAdd: |
72 | 329k | case RK_IntegerMult: |
73 | 329k | case RK_FloatAdd: |
74 | 329k | case RK_FloatMult: |
75 | 329k | return true; |
76 | 637k | } |
77 | 637k | return false; |
78 | 637k | } |
79 | | |
80 | | Instruction * |
81 | | RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, |
82 | | SmallPtrSetImpl<Instruction *> &Visited, |
83 | 329k | SmallPtrSetImpl<Instruction *> &CI) { |
84 | 329k | if (!Phi->hasOneUse()) |
85 | 239k | return Phi; |
86 | 89.9k | |
87 | 89.9k | const APInt *M = nullptr; |
88 | 89.9k | Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); |
89 | 89.9k | |
90 | 89.9k | // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT |
91 | 89.9k | // with a new integer type of the corresponding bit width. |
92 | 89.9k | if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))89.9k ) { |
93 | 147 | int32_t Bits = (*M + 1).exactLogBase2(); |
94 | 147 | if (Bits > 0147 ) { |
95 | 117 | RT = IntegerType::get(Phi->getContext(), Bits); |
96 | 117 | Visited.insert(Phi); |
97 | 117 | CI.insert(J); |
98 | 117 | return J; |
99 | 117 | } |
100 | 89.8k | } |
101 | 89.8k | return Phi; |
102 | 89.8k | } |
103 | | |
104 | | bool RecurrenceDescriptor::getSourceExtensionKind( |
105 | | Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, |
106 | | SmallPtrSetImpl<Instruction *> &Visited, |
107 | 5 | SmallPtrSetImpl<Instruction *> &CI) { |
108 | 5 | |
109 | 5 | SmallVector<Instruction *, 8> Worklist; |
110 | 5 | bool FoundOneOperand = false; |
111 | 5 | unsigned DstSize = RT->getPrimitiveSizeInBits(); |
112 | 5 | Worklist.push_back(Exit); |
113 | 5 | |
114 | 5 | // Traverse the instructions in the reduction expression, beginning with the |
115 | 5 | // exit value. |
116 | 11 | while (!Worklist.empty()11 ) { |
117 | 8 | Instruction *I = Worklist.pop_back_val(); |
118 | 14 | for (Use &U : I->operands()) { |
119 | 14 | |
120 | 14 | // Terminate the traversal if the operand is not an instruction, or we |
121 | 14 | // reach the starting value. |
122 | 14 | Instruction *J = dyn_cast<Instruction>(U.get()); |
123 | 14 | if (!J || 14 J == Start14 ) |
124 | 3 | continue; |
125 | 11 | |
126 | 11 | // Otherwise, investigate the operation if it is also in the expression. |
127 | 11 | if (11 Visited.count(J)11 ) { |
128 | 3 | Worklist.push_back(J); |
129 | 3 | continue; |
130 | 3 | } |
131 | 8 | |
132 | 8 | // If the operand is not in Visited, it is not a reduction operation, but |
133 | 8 | // it does feed into one. Make sure it is either a single-use sign- or |
134 | 8 | // zero-extend instruction. |
135 | 8 | CastInst *Cast = dyn_cast<CastInst>(J); |
136 | 8 | bool IsSExtInst = isa<SExtInst>(J); |
137 | 8 | if (!Cast || 8 !Cast->hasOneUse()6 || !(isa<ZExtInst>(J) || 6 IsSExtInst0 )) |
138 | 2 | return false; |
139 | 6 | |
140 | 6 | // Ensure the source type of the extend is no larger than the reduction |
141 | 6 | // type. It is not necessary for the types to be identical. |
142 | 6 | unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); |
143 | 6 | if (SrcSize > DstSize) |
144 | 0 | return false; |
145 | 6 | |
146 | 6 | // Furthermore, ensure that all such extends are of the same kind. |
147 | 6 | if (6 FoundOneOperand6 ) { |
148 | 3 | if (IsSigned != IsSExtInst) |
149 | 0 | return false; |
150 | 3 | } else { |
151 | 3 | FoundOneOperand = true; |
152 | 3 | IsSigned = IsSExtInst; |
153 | 3 | } |
154 | 6 | |
155 | 6 | // Lastly, if the source type of the extend matches the reduction type, |
156 | 6 | // add the extend to CI so that we can avoid accounting for it in the |
157 | 6 | // cost model. |
158 | 6 | if (6 SrcSize == DstSize6 ) |
159 | 4 | CI.insert(Cast); |
160 | 14 | } |
161 | 8 | } |
162 | 3 | return true; |
163 | 5 | } |
164 | | |
165 | | bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, |
166 | | Loop *TheLoop, bool HasFunNoNaNAttr, |
167 | 1.52M | RecurrenceDescriptor &RedDes) { |
168 | 1.52M | if (Phi->getNumIncomingValues() != 2) |
169 | 0 | return false; |
170 | 1.52M | |
171 | 1.52M | // Reduction variables are only found in the loop header block. |
172 | 1.52M | if (1.52M Phi->getParent() != TheLoop->getHeader()1.52M ) |
173 | 0 | return false; |
174 | 1.52M | |
175 | 1.52M | // Obtain the reduction start value from the value that comes from the loop |
176 | 1.52M | // preheader. |
177 | 1.52M | Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); |
178 | 1.52M | |
179 | 1.52M | // ExitInstruction is the single value which is used outside the loop. |
180 | 1.52M | // We only allow for a single reduction value to be used outside the loop. |
181 | 1.52M | // This includes users of the reduction, variables (which form a cycle |
182 | 1.52M | // which ends in the phi node). |
183 | 1.52M | Instruction *ExitInstruction = nullptr; |
184 | 1.52M | // Indicates that we found a reduction operation in our scan. |
185 | 1.52M | bool FoundReduxOp = false; |
186 | 1.52M | |
187 | 1.52M | // We start with the PHI node and scan for all of the users of this |
188 | 1.52M | // instruction. All users must be instructions that can be used as reduction |
189 | 1.52M | // variables (such as ADD). We must have a single out-of-block user. The cycle |
190 | 1.52M | // must include the original PHI. |
191 | 1.52M | bool FoundStartPHI = false; |
192 | 1.52M | |
193 | 1.52M | // To recognize min/max patterns formed by a icmp select sequence, we store |
194 | 1.52M | // the number of instruction we saw from the recognized min/max pattern, |
195 | 1.52M | // to make sure we only see exactly the two instructions. |
196 | 1.52M | unsigned NumCmpSelectPatternInst = 0; |
197 | 1.52M | InstDesc ReduxDesc(false, nullptr); |
198 | 1.52M | |
199 | 1.52M | // Data used for determining if the recurrence has been type-promoted. |
200 | 1.52M | Type *RecurrenceType = Phi->getType(); |
201 | 1.52M | SmallPtrSet<Instruction *, 4> CastInsts; |
202 | 1.52M | Instruction *Start = Phi; |
203 | 1.52M | bool IsSigned = false; |
204 | 1.52M | |
205 | 1.52M | SmallPtrSet<Instruction *, 8> VisitedInsts; |
206 | 1.52M | SmallVector<Instruction *, 8> Worklist; |
207 | 1.52M | |
208 | 1.52M | // Return early if the recurrence kind does not match the type of Phi. If the |
209 | 1.52M | // recurrence kind is arithmetic, we attempt to look through AND operations |
210 | 1.52M | // resulting from the type promotion performed by InstCombine. Vector |
211 | 1.52M | // operations are not limited to the legal integer widths, so we may be able |
212 | 1.52M | // to evaluate the reduction in the narrower width. |
213 | 1.52M | if (RecurrenceType->isFloatingPointTy()1.52M ) { |
214 | 77.0k | if (!isFloatingPointRecurrenceKind(Kind)) |
215 | 53.0k | return false; |
216 | 1.44M | } else { |
217 | 1.44M | if (!isIntegerRecurrenceKind(Kind)) |
218 | 477k | return false; |
219 | 967k | if (967k isArithmeticRecurrenceKind(Kind)967k ) |
220 | 329k | Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); |
221 | 1.44M | } |
222 | 1.52M | |
223 | 991k | Worklist.push_back(Start); |
224 | 991k | VisitedInsts.insert(Start); |
225 | 991k | |
226 | 991k | // A value in the reduction can be used: |
227 | 991k | // - By the reduction: |
228 | 991k | // - Reduction operation: |
229 | 991k | // - One use of reduction value (safe). |
230 | 991k | // - Multiple use of reduction value (not safe). |
231 | 991k | // - PHI: |
232 | 991k | // - All uses of the PHI must be the reduction (safe). |
233 | 991k | // - Otherwise, not safe. |
234 | 991k | // - By instructions outside of the loop (safe). |
235 | 991k | // * One value may have several outside users, but all outside |
236 | 991k | // uses must be of the same value. |
237 | 991k | // - By an instruction that is not part of the reduction (not safe). |
238 | 991k | // This is either: |
239 | 991k | // * An instruction type other than PHI or the reduction operation. |
240 | 991k | // * A PHI in the header other than the initial PHI. |
241 | 1.98M | while (!Worklist.empty()1.98M ) { |
242 | 1.97M | Instruction *Cur = Worklist.back(); |
243 | 1.97M | Worklist.pop_back(); |
244 | 1.97M | |
245 | 1.97M | // No Users. |
246 | 1.97M | // If the instruction has no users then this is a broken chain and can't be |
247 | 1.97M | // a reduction variable. |
248 | 1.97M | if (Cur->use_empty()) |
249 | 9.18k | return false; |
250 | 1.96M | |
251 | 1.96M | bool IsAPhi = isa<PHINode>(Cur); |
252 | 1.96M | |
253 | 1.96M | // A header PHI use other than the original PHI. |
254 | 1.96M | if (Cur != Phi && 1.96M IsAPhi972k && Cur->getParent() == Phi->getParent()130 ) |
255 | 9 | return false; |
256 | 1.96M | |
257 | 1.96M | // Reductions of instructions such as Div, and Sub is only possible if the |
258 | 1.96M | // LHS is the reduction variable. |
259 | 1.96M | if (1.96M !Cur->isCommutative() && 1.96M !IsAPhi1.65M && !isa<SelectInst>(Cur)666k && |
260 | 1.96M | !isa<ICmpInst>(Cur)645k && !isa<FCmpInst>(Cur)622k && |
261 | 619k | !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) |
262 | 323k | return false; |
263 | 1.64M | |
264 | 1.64M | // Any reduction instruction must be of one of the allowed kinds. We ignore |
265 | 1.64M | // the starting value (the Phi or an AND instruction if the Phi has been |
266 | 1.64M | // type-promoted). |
267 | 1.64M | if (1.64M Cur != Start1.64M ) { |
268 | 648k | ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); |
269 | 648k | if (!ReduxDesc.isRecurrence()) |
270 | 582k | return false; |
271 | 1.05M | } |
272 | 1.05M | |
273 | 1.05M | // A reduction operation must only have one use of the reduction value. |
274 | 1.05M | if (1.05M !IsAPhi && 1.05M Kind != RK_IntegerMinMax66.7k && Kind != RK_FloatMinMax64.9k && |
275 | 64.8k | hasMultipleUsesOf(Cur, VisitedInsts)) |
276 | 36 | return false; |
277 | 1.05M | |
278 | 1.05M | // All inputs to a PHI node must be a reduction value. |
279 | 1.05M | if (1.05M IsAPhi && 1.05M Cur != Phi991k && !areAllUsesIn(Cur, VisitedInsts)121 ) |
280 | 29 | return false; |
281 | 1.05M | |
282 | 1.05M | if (1.05M Kind == RK_IntegerMinMax && |
283 | 161k | (isa<ICmpInst>(Cur) || 161k isa<SelectInst>(Cur)160k )) |
284 | 1.86k | ++NumCmpSelectPatternInst; |
285 | 1.05M | if (Kind == RK_FloatMinMax && 1.05M (isa<FCmpInst>(Cur) || 6.33k isa<SelectInst>(Cur)6.31k )) |
286 | 34 | ++NumCmpSelectPatternInst; |
287 | 1.05M | |
288 | 1.05M | // Check whether we found a reduction operator. |
289 | 66.7k | FoundReduxOp |= !IsAPhi && Cur != Start; |
290 | 1.05M | |
291 | 1.05M | // Process users of current instruction. Push non-PHI nodes after PHI nodes |
292 | 1.05M | // onto the stack. This way we are going to have seen all inputs to PHI |
293 | 1.05M | // nodes once we get to them. |
294 | 1.05M | SmallVector<Instruction *, 8> NonPHIs; |
295 | 1.05M | SmallVector<Instruction *, 8> PHIs; |
296 | 2.42M | for (User *U : Cur->users()) { |
297 | 2.42M | Instruction *UI = cast<Instruction>(U); |
298 | 2.42M | |
299 | 2.42M | // Check if we found the exit user. |
300 | 2.42M | BasicBlock *Parent = UI->getParent(); |
301 | 2.42M | if (!TheLoop->contains(Parent)2.42M ) { |
302 | 77.4k | // If we already know this instruction is used externally, move on to |
303 | 77.4k | // the next user. |
304 | 77.4k | if (ExitInstruction == Cur) |
305 | 4 | continue; |
306 | 77.4k | |
307 | 77.4k | // Exit if you find multiple values used outside or if the header phi |
308 | 77.4k | // node is being used. In this case the user uses the value of the |
309 | 77.4k | // previous iteration, in which case we would loose "VF-1" iterations of |
310 | 77.4k | // the reduction operation if we vectorize. |
311 | 77.4k | if (77.4k ExitInstruction != nullptr || 77.4k Cur == Phi77.4k ) |
312 | 59.9k | return false; |
313 | 17.4k | |
314 | 17.4k | // The instruction used by an outside user must be the last instruction |
315 | 17.4k | // before we feed back to the reduction phi. Otherwise, we loose VF-1 |
316 | 17.4k | // operations on the value. |
317 | 17.4k | if (17.4k !is_contained(Phi->operands(), Cur)17.4k ) |
318 | 94 | return false; |
319 | 17.3k | |
320 | 17.3k | ExitInstruction = Cur; |
321 | 17.3k | continue; |
322 | 17.3k | } |
323 | 2.34M | |
324 | 2.34M | // Process instructions only once (termination). Each reduction cycle |
325 | 2.34M | // value must only be used once, except by phi nodes and min/max |
326 | 2.34M | // reductions which are represented as a cmp followed by a select. |
327 | 2.34M | InstDesc IgnoredVal(false, nullptr); |
328 | 2.34M | if (VisitedInsts.insert(UI).second2.34M ) { |
329 | 2.28M | if (isa<PHINode>(UI)) |
330 | 1.01k | PHIs.push_back(UI); |
331 | 2.28M | else |
332 | 2.28M | NonPHIs.push_back(UI); |
333 | 2.34M | } else if (57.2k !isa<PHINode>(UI) && |
334 | 3.81k | ((!isa<FCmpInst>(UI) && 3.81k !isa<ICmpInst>(UI)3.81k && |
335 | 3.81k | !isa<SelectInst>(UI)) || |
336 | 985 | !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) |
337 | 2.88k | return false; |
338 | 2.34M | |
339 | 2.34M | // Remember that we completed the cycle. |
340 | 2.34M | if (2.34M UI == Phi2.34M ) |
341 | 52.5k | FoundStartPHI = true; |
342 | 2.42M | } |
343 | 995k | Worklist.append(PHIs.begin(), PHIs.end()); |
344 | 995k | Worklist.append(NonPHIs.begin(), NonPHIs.end()); |
345 | 995k | } |
346 | 991k | |
347 | 991k | // This means we have seen one but not the other instruction of the |
348 | 991k | // pattern or more than just a select and cmp. |
349 | 13.3k | if (13.3k (Kind == RK_IntegerMinMax || 13.3k Kind == RK_FloatMinMax13.2k ) && |
350 | 179 | NumCmpSelectPatternInst != 2) |
351 | 18 | return false; |
352 | 13.3k | |
353 | 13.3k | if (13.3k !FoundStartPHI || 13.3k !FoundReduxOp13.3k || !ExitInstruction13.3k ) |
354 | 2 | return false; |
355 | 13.3k | |
356 | 13.3k | // If we think Phi may have been type-promoted, we also need to ensure that |
357 | 13.3k | // all source operands of the reduction are either SExtInsts or ZEstInsts. If |
358 | 13.3k | // so, we will be able to evaluate the reduction in the narrower bit width. |
359 | 13.3k | if (13.3k Start != Phi13.3k ) |
360 | 5 | if (5 !getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, |
361 | 5 | IsSigned, VisitedInsts, CastInsts)) |
362 | 2 | return false; |
363 | 13.3k | |
364 | 13.3k | // We found a reduction var if we have reached the original phi node and we |
365 | 13.3k | // only have a single instruction with out-of-loop users. |
366 | 13.3k | |
367 | 13.3k | // The ExitInstruction(Instruction which is allowed to have out-of-loop users) |
368 | 13.3k | // is saved as part of the RecurrenceDescriptor. |
369 | 13.3k | |
370 | 13.3k | // Save the description of this reduction variable. |
371 | 13.3k | RecurrenceDescriptor RD( |
372 | 13.3k | RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), |
373 | 13.3k | ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); |
374 | 13.3k | RedDes = RD; |
375 | 13.3k | |
376 | 13.3k | return true; |
377 | 13.3k | } |
378 | | |
379 | | /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction |
380 | | /// pattern corresponding to a min(X, Y) or max(X, Y). |
381 | | RecurrenceDescriptor::InstDesc |
382 | 6.26k | RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { |
383 | 6.26k | |
384 | 6.26k | assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && |
385 | 6.26k | "Expect a select instruction"); |
386 | 6.26k | Instruction *Cmp = nullptr; |
387 | 6.26k | SelectInst *Select = nullptr; |
388 | 6.26k | |
389 | 6.26k | // We must handle the select(cmp()) as a single instruction. Advance to the |
390 | 6.26k | // select. |
391 | 6.26k | if ((Cmp = dyn_cast<ICmpInst>(I)) || 6.26k (Cmp = dyn_cast<FCmpInst>(I))5.08k ) { |
392 | 1.20k | if (!Cmp->hasOneUse() || 1.20k !(Select = dyn_cast<SelectInst>(*I->user_begin()))1.16k ) |
393 | 238 | return InstDesc(false, I); |
394 | 965 | return InstDesc(Select, Prev.getMinMaxKind()); |
395 | 965 | } |
396 | 5.06k | |
397 | 5.06k | // Only handle single use cases for now. |
398 | 5.06k | if (5.06k !(Select = dyn_cast<SelectInst>(I))5.06k ) |
399 | 0 | return InstDesc(false, I); |
400 | 5.06k | if (5.06k !(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && |
401 | 142 | !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) |
402 | 18 | return InstDesc(false, I); |
403 | 5.04k | if (5.04k !Cmp->hasOneUse()5.04k ) |
404 | 152 | return InstDesc(false, I); |
405 | 4.89k | |
406 | 4.89k | Value *CmpLeft; |
407 | 4.89k | Value *CmpRight; |
408 | 4.89k | |
409 | 4.89k | // Look for a min/max pattern. |
410 | 4.89k | if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) |
411 | 1.39k | return InstDesc(Select, MRK_UIntMin); |
412 | 3.50k | else if (3.50k m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.50k ) |
413 | 58 | return InstDesc(Select, MRK_UIntMax); |
414 | 3.44k | else if (3.44k m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.44k ) |
415 | 242 | return InstDesc(Select, MRK_SIntMax); |
416 | 3.20k | else if (3.20k m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.20k ) |
417 | 136 | return InstDesc(Select, MRK_SIntMin); |
418 | 3.06k | else if (3.06k m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.06k ) |
419 | 10 | return InstDesc(Select, MRK_FloatMin); |
420 | 3.05k | else if (3.05k m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.05k ) |
421 | 8 | return InstDesc(Select, MRK_FloatMax); |
422 | 3.04k | else if (3.04k m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.04k ) |
423 | 8 | return InstDesc(Select, MRK_FloatMin); |
424 | 3.03k | else if (3.03k m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.03k ) |
425 | 8 | return InstDesc(Select, MRK_FloatMax); |
426 | 3.03k | |
427 | 3.03k | return InstDesc(false, I); |
428 | 3.03k | } |
429 | | |
430 | | RecurrenceDescriptor::InstDesc |
431 | | RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, |
432 | 648k | InstDesc &Prev, bool HasFunNoNaNAttr) { |
433 | 648k | bool FP = I->getType()->isFloatingPointTy(); |
434 | 648k | Instruction *UAI = Prev.getUnsafeAlgebraInst(); |
435 | 648k | if (!UAI && 648k FP645k && !I->hasUnsafeAlgebra()24.2k ) |
436 | 24.1k | UAI = I; // Found an unsafe (unvectorizable) algebra instruction. |
437 | 648k | |
438 | 648k | switch (I->getOpcode()) { |
439 | 295k | default: |
440 | 295k | return InstDesc(false, I); |
441 | 121 | case Instruction::PHI: |
442 | 121 | return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); |
443 | 236k | case Instruction::Sub: |
444 | 236k | case Instruction::Add: |
445 | 236k | return InstDesc(Kind == RK_IntegerAdd, I); |
446 | 18.9k | case Instruction::Mul: |
447 | 18.9k | return InstDesc(Kind == RK_IntegerMult, I); |
448 | 17.7k | case Instruction::And: |
449 | 17.7k | return InstDesc(Kind == RK_IntegerAnd, I); |
450 | 17.9k | case Instruction::Or: |
451 | 17.9k | return InstDesc(Kind == RK_IntegerOr, I); |
452 | 610 | case Instruction::Xor: |
453 | 610 | return InstDesc(Kind == RK_IntegerXor, I); |
454 | 8.84k | case Instruction::FMul: |
455 | 8.84k | return InstDesc(Kind == RK_FloatMult, I, UAI); |
456 | 6.73k | case Instruction::FSub: |
457 | 6.73k | case Instruction::FAdd: |
458 | 6.73k | return InstDesc(Kind == RK_FloatAdd, I, UAI); |
459 | 46.4k | case Instruction::FCmp: |
460 | 46.4k | case Instruction::ICmp: |
461 | 46.4k | case Instruction::Select: |
462 | 46.4k | if (Kind != RK_IntegerMinMax && |
463 | 41.2k | (!HasFunNoNaNAttr || 41.2k Kind != RK_FloatMinMax68 )) |
464 | 41.2k | return InstDesc(false, I); |
465 | 5.28k | return isMinMaxSelectCmpPattern(I, Prev); |
466 | 648k | } |
467 | 648k | } |
468 | | |
469 | | bool RecurrenceDescriptor::hasMultipleUsesOf( |
470 | 64.8k | Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { |
471 | 64.8k | unsigned NumUses = 0; |
472 | 194k | for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; |
473 | 129k | ++Use129k ) { |
474 | 129k | if (Insts.count(dyn_cast<Instruction>(*Use))) |
475 | 64.9k | ++NumUses; |
476 | 129k | if (NumUses > 1) |
477 | 36 | return true; |
478 | 129k | } |
479 | 64.8k | |
480 | 64.8k | return false; |
481 | 64.8k | } |
482 | | bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, |
483 | 178k | RecurrenceDescriptor &RedDes) { |
484 | 178k | |
485 | 178k | BasicBlock *Header = TheLoop->getHeader(); |
486 | 178k | Function &F = *Header->getParent(); |
487 | 178k | bool HasFunNoNaNAttr = |
488 | 178k | F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; |
489 | 178k | |
490 | 178k | if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)178k ) { |
491 | 10.4k | DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); |
492 | 10.4k | return true; |
493 | 10.4k | } |
494 | 168k | if (168k AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
495 | 47 | DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); |
496 | 47 | return true; |
497 | 47 | } |
498 | 168k | if (168k AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
499 | 149 | DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); |
500 | 149 | return true; |
501 | 149 | } |
502 | 168k | if (168k AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
503 | 37 | DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); |
504 | 37 | return true; |
505 | 37 | } |
506 | 168k | if (168k AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
507 | 12 | DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); |
508 | 12 | return true; |
509 | 12 | } |
510 | 168k | if (168k AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, |
511 | 168k | RedDes)) { |
512 | 144 | DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); |
513 | 144 | return true; |
514 | 144 | } |
515 | 168k | if (168k AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
516 | 61 | DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); |
517 | 61 | return true; |
518 | 61 | } |
519 | 168k | if (168k AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)168k ) { |
520 | 2.48k | DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); |
521 | 2.48k | return true; |
522 | 2.48k | } |
523 | 165k | if (165k AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)165k ) { |
524 | 17 | DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); |
525 | 17 | return true; |
526 | 17 | } |
527 | 165k | // Not a reduction of known type. |
528 | 165k | return false; |
529 | 165k | } |
530 | | |
531 | | bool RecurrenceDescriptor::isFirstOrderRecurrence( |
532 | | PHINode *Phi, Loop *TheLoop, |
533 | 57.4k | DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { |
534 | 57.4k | |
535 | 57.4k | // Ensure the phi node is in the loop header and has two incoming values. |
536 | 57.4k | if (Phi->getParent() != TheLoop->getHeader() || |
537 | 57.4k | Phi->getNumIncomingValues() != 2) |
538 | 0 | return false; |
539 | 57.4k | |
540 | 57.4k | // Ensure the loop has a preheader and a single latch block. The loop |
541 | 57.4k | // vectorizer will need the latch to set up the next iteration of the loop. |
542 | 57.4k | auto *Preheader = TheLoop->getLoopPreheader(); |
543 | 57.4k | auto *Latch = TheLoop->getLoopLatch(); |
544 | 57.4k | if (!Preheader || 57.4k !Latch57.4k ) |
545 | 0 | return false; |
546 | 57.4k | |
547 | 57.4k | // Ensure the phi node's incoming blocks are the loop preheader and latch. |
548 | 57.4k | if (57.4k Phi->getBasicBlockIndex(Preheader) < 0 || |
549 | 57.4k | Phi->getBasicBlockIndex(Latch) < 0) |
550 | 0 | return false; |
551 | 57.4k | |
552 | 57.4k | // Get the previous value. The previous value comes from the latch edge while |
553 | 57.4k | // the initial value comes form the preheader edge. |
554 | 57.4k | auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); |
555 | 57.4k | if (!Previous || 57.4k !TheLoop->contains(Previous)57.4k || isa<PHINode>(Previous)57.4k || |
556 | 57.3k | SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. |
557 | 174 | return false; |
558 | 57.3k | |
559 | 57.3k | // Ensure every user of the phi node is dominated by the previous value. |
560 | 57.3k | // The dominance requirement ensures the loop vectorizer will not need to |
561 | 57.3k | // vectorize the initial value prior to the first iteration of the loop. |
562 | 57.3k | // TODO: Consider extending this sinking to handle other kinds of instructions |
563 | 57.3k | // and expressions, beyond sinking a single cast past Previous. |
564 | 57.3k | if (57.3k Phi->hasOneUse()57.3k ) { |
565 | 35.6k | auto *I = Phi->user_back(); |
566 | 35.6k | if (I->isCast() && 35.6k (I->getParent() == Phi->getParent())724 && I->hasOneUse()718 && |
567 | 35.6k | DT->dominates(Previous, I->user_back())596 ) { |
568 | 42 | if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. |
569 | 24 | SinkAfter[I] = Previous; |
570 | 42 | return true; |
571 | 42 | } |
572 | 57.2k | } |
573 | 57.2k | |
574 | 57.2k | for (User *U : Phi->users()) |
575 | 76.3k | if (auto *76.3k I76.3k = dyn_cast<Instruction>(U)) { |
576 | 76.3k | if (!DT->dominates(Previous, I)) |
577 | 57.0k | return false; |
578 | 185 | } |
579 | 185 | |
580 | 185 | return true; |
581 | 185 | } |
582 | | |
583 | | /// This function returns the identity element (or neutral element) for |
584 | | /// the operation K. |
585 | | Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, |
586 | 1.40k | Type *Tp) { |
587 | 1.40k | switch (K) { |
588 | 1.31k | case RK_IntegerXor: |
589 | 1.31k | case RK_IntegerAdd: |
590 | 1.31k | case RK_IntegerOr: |
591 | 1.31k | // Adding, Xoring, Oring zero to a number does not change it. |
592 | 1.31k | return ConstantInt::get(Tp, 0); |
593 | 47 | case RK_IntegerMult: |
594 | 47 | // Multiplying a number by 1 does not change it. |
595 | 47 | return ConstantInt::get(Tp, 1); |
596 | 27 | case RK_IntegerAnd: |
597 | 27 | // AND-ing a number with an all-1 value does not change it. |
598 | 27 | return ConstantInt::get(Tp, -1, true); |
599 | 1 | case RK_FloatMult: |
600 | 1 | // Multiplying a number by 1 does not change it. |
601 | 1 | return ConstantFP::get(Tp, 1.0L); |
602 | 18 | case RK_FloatAdd: |
603 | 18 | // Adding zero to a number does not change it. |
604 | 18 | return ConstantFP::get(Tp, 0.0L); |
605 | 0 | default: |
606 | 0 | llvm_unreachable("Unknown recurrence kind"); |
607 | 0 | } |
608 | 0 | } |
609 | | |
610 | | /// This function translates the recurrence kind to an LLVM binary operator. |
611 | 1.51k | unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { |
612 | 1.51k | switch (Kind) { |
613 | 1.22k | case RK_IntegerAdd: |
614 | 1.22k | return Instruction::Add; |
615 | 47 | case RK_IntegerMult: |
616 | 47 | return Instruction::Mul; |
617 | 85 | case RK_IntegerOr: |
618 | 85 | return Instruction::Or; |
619 | 27 | case RK_IntegerAnd: |
620 | 27 | return Instruction::And; |
621 | 10 | case RK_IntegerXor: |
622 | 10 | return Instruction::Xor; |
623 | 1 | case RK_FloatMult: |
624 | 1 | return Instruction::FMul; |
625 | 18 | case RK_FloatAdd: |
626 | 18 | return Instruction::FAdd; |
627 | 84 | case RK_IntegerMinMax: |
628 | 84 | return Instruction::ICmp; |
629 | 17 | case RK_FloatMinMax: |
630 | 17 | return Instruction::FCmp; |
631 | 0 | default: |
632 | 0 | llvm_unreachable("Unknown recurrence operation"); |
633 | 0 | } |
634 | 0 | } |
635 | | |
636 | | Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, |
637 | | MinMaxRecurrenceKind RK, |
638 | 230 | Value *Left, Value *Right) { |
639 | 230 | CmpInst::Predicate P = CmpInst::ICMP_NE; |
640 | 230 | switch (RK) { |
641 | 0 | default: |
642 | 0 | llvm_unreachable("Unknown min/max recurrence kind"); |
643 | 8 | case MRK_UIntMin: |
644 | 8 | P = CmpInst::ICMP_ULT; |
645 | 8 | break; |
646 | 24 | case MRK_UIntMax: |
647 | 24 | P = CmpInst::ICMP_UGT; |
648 | 24 | break; |
649 | 43 | case MRK_SIntMin: |
650 | 43 | P = CmpInst::ICMP_SLT; |
651 | 43 | break; |
652 | 96 | case MRK_SIntMax: |
653 | 96 | P = CmpInst::ICMP_SGT; |
654 | 96 | break; |
655 | 10 | case MRK_FloatMin: |
656 | 10 | P = CmpInst::FCMP_OLT; |
657 | 10 | break; |
658 | 49 | case MRK_FloatMax: |
659 | 49 | P = CmpInst::FCMP_OGT; |
660 | 49 | break; |
661 | 230 | } |
662 | 230 | |
663 | 230 | // We only match FP sequences with unsafe algebra, so we can unconditionally |
664 | 230 | // set it on any generated instructions. |
665 | 230 | IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
666 | 230 | FastMathFlags FMF; |
667 | 230 | FMF.setUnsafeAlgebra(); |
668 | 230 | Builder.setFastMathFlags(FMF); |
669 | 230 | |
670 | 230 | Value *Cmp; |
671 | 230 | if (RK == MRK_FloatMin || 230 RK == MRK_FloatMax220 ) |
672 | 59 | Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); |
673 | 230 | else |
674 | 171 | Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); |
675 | 230 | |
676 | 230 | Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); |
677 | 230 | return Select; |
678 | 230 | } |
679 | | |
680 | | InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, |
681 | | const SCEV *Step, BinaryOperator *BOp) |
682 | 108k | : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { |
683 | 108k | assert(IK != IK_NoInduction && "Not an induction"); |
684 | 108k | |
685 | 108k | // Start value type should match the induction kind and the value |
686 | 108k | // itself should not be null. |
687 | 108k | assert(StartValue && "StartValue is null"); |
688 | 108k | assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && |
689 | 108k | "StartValue is not a pointer for pointer induction"); |
690 | 108k | assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && |
691 | 108k | "StartValue is not an integer for integer induction"); |
692 | 108k | |
693 | 108k | // Check the Step Value. It should be non-zero integer value. |
694 | 108k | assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && |
695 | 108k | "Step value is zero"); |
696 | 108k | |
697 | 108k | assert((IK != IK_PtrInduction || getConstIntStepValue()) && |
698 | 108k | "Step value should be constant for pointer induction"); |
699 | 108k | assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && |
700 | 108k | "StepValue is not an integer"); |
701 | 108k | |
702 | 108k | assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && |
703 | 108k | "StepValue is not FP for FpInduction"); |
704 | 108k | assert((IK != IK_FpInduction || (InductionBinOp && |
705 | 108k | (InductionBinOp->getOpcode() == Instruction::FAdd || |
706 | 108k | InductionBinOp->getOpcode() == Instruction::FSub))) && |
707 | 108k | "Binary opcode should be specified for FP induction"); |
708 | 108k | } |
709 | | |
710 | 0 | int InductionDescriptor::getConsecutiveDirection() const { |
711 | 0 | ConstantInt *ConstStep = getConstIntStepValue(); |
712 | 0 | if (ConstStep && 0 (ConstStep->isOne() || 0 ConstStep->isMinusOne()0 )) |
713 | 0 | return ConstStep->getSExtValue(); |
714 | 0 | return 0; |
715 | 0 | } |
716 | | |
717 | 221k | ConstantInt *InductionDescriptor::getConstIntStepValue() const { |
718 | 221k | if (isa<SCEVConstant>(Step)) |
719 | 219k | return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); |
720 | 2.05k | return nullptr; |
721 | 2.05k | } |
722 | | |
723 | | Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, |
724 | | ScalarEvolution *SE, |
725 | 22.8k | const DataLayout& DL) const { |
726 | 22.8k | |
727 | 22.8k | SCEVExpander Exp(*SE, DL, "induction"); |
728 | 22.8k | assert(Index->getType() == Step->getType() && |
729 | 22.8k | "Index type does not match StepValue type"); |
730 | 22.8k | switch (IK) { |
731 | 11.3k | case IK_IntInduction: { |
732 | 11.3k | assert(Index->getType() == StartValue->getType() && |
733 | 11.3k | "Index type does not match StartValue type"); |
734 | 11.3k | |
735 | 11.3k | // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution |
736 | 11.3k | // and calculate (Start + Index * Step) for all cases, without |
737 | 11.3k | // special handling for "isOne" and "isMinusOne". |
738 | 11.3k | // But in the real life the result code getting worse. We mix SCEV |
739 | 11.3k | // expressions and ADD/SUB operations and receive redundant |
740 | 11.3k | // intermediate values being calculated in different ways and |
741 | 11.3k | // Instcombine is unable to reduce them all. |
742 | 11.3k | |
743 | 11.3k | if (getConstIntStepValue() && |
744 | 10.6k | getConstIntStepValue()->isMinusOne()) |
745 | 2.35k | return B.CreateSub(StartValue, Index); |
746 | 8.96k | if (8.96k getConstIntStepValue() && |
747 | 8.30k | getConstIntStepValue()->isOne()) |
748 | 4.23k | return B.CreateAdd(StartValue, Index); |
749 | 4.72k | const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), |
750 | 4.72k | SE->getMulExpr(Step, SE->getSCEV(Index))); |
751 | 4.72k | return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); |
752 | 4.72k | } |
753 | 11.4k | case IK_PtrInduction: { |
754 | 11.4k | assert(isa<SCEVConstant>(Step) && |
755 | 11.4k | "Expected constant step for pointer induction"); |
756 | 11.4k | const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); |
757 | 11.4k | Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); |
758 | 11.4k | return B.CreateGEP(nullptr, StartValue, Index); |
759 | 4.72k | } |
760 | 36 | case IK_FpInduction: { |
761 | 36 | assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); |
762 | 36 | assert(InductionBinOp && |
763 | 36 | (InductionBinOp->getOpcode() == Instruction::FAdd || |
764 | 36 | InductionBinOp->getOpcode() == Instruction::FSub) && |
765 | 36 | "Original bin op should be defined for FP induction"); |
766 | 36 | |
767 | 36 | Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); |
768 | 36 | |
769 | 36 | // Floating point operations had to be 'fast' to enable the induction. |
770 | 36 | FastMathFlags Flags; |
771 | 36 | Flags.setUnsafeAlgebra(); |
772 | 36 | |
773 | 36 | Value *MulExp = B.CreateFMul(StepValue, Index); |
774 | 36 | if (isa<Instruction>(MulExp)) |
775 | 36 | // We have to check, the MulExp may be a constant. |
776 | 36 | cast<Instruction>(MulExp)->setFastMathFlags(Flags); |
777 | 36 | |
778 | 36 | Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, |
779 | 36 | MulExp, "induction"); |
780 | 36 | if (isa<Instruction>(BOp)) |
781 | 36 | cast<Instruction>(BOp)->setFastMathFlags(Flags); |
782 | 36 | |
783 | 36 | return BOp; |
784 | 4.72k | } |
785 | 0 | case IK_NoInduction: |
786 | 0 | return nullptr; |
787 | 0 | } |
788 | 0 | llvm_unreachable0 ("invalid enum"); |
789 | 0 | } |
790 | | |
791 | | bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, |
792 | | ScalarEvolution *SE, |
793 | 12.4k | InductionDescriptor &D) { |
794 | 12.4k | |
795 | 12.4k | // Here we only handle FP induction variables. |
796 | 12.4k | assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); |
797 | 12.4k | |
798 | 12.4k | if (TheLoop->getHeader() != Phi->getParent()) |
799 | 0 | return false; |
800 | 12.4k | |
801 | 12.4k | // The loop may have multiple entrances or multiple exits; we can analyze |
802 | 12.4k | // this phi if it has a unique entry value and a unique backedge value. |
803 | 12.4k | if (12.4k Phi->getNumIncomingValues() != 212.4k ) |
804 | 0 | return false; |
805 | 12.4k | Value *BEValue = nullptr, *StartValue = nullptr; |
806 | 12.4k | if (TheLoop->contains(Phi->getIncomingBlock(0))12.4k ) { |
807 | 11.2k | BEValue = Phi->getIncomingValue(0); |
808 | 11.2k | StartValue = Phi->getIncomingValue(1); |
809 | 12.4k | } else { |
810 | 1.23k | assert(TheLoop->contains(Phi->getIncomingBlock(1)) && |
811 | 1.23k | "Unexpected Phi node in the loop"); |
812 | 1.23k | BEValue = Phi->getIncomingValue(1); |
813 | 1.23k | StartValue = Phi->getIncomingValue(0); |
814 | 1.23k | } |
815 | 12.4k | |
816 | 12.4k | BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); |
817 | 12.4k | if (!BOp) |
818 | 462 | return false; |
819 | 11.9k | |
820 | 11.9k | Value *Addend = nullptr; |
821 | 11.9k | if (BOp->getOpcode() == Instruction::FAdd11.9k ) { |
822 | 622 | if (BOp->getOperand(0) == Phi) |
823 | 424 | Addend = BOp->getOperand(1); |
824 | 198 | else if (198 BOp->getOperand(1) == Phi198 ) |
825 | 74 | Addend = BOp->getOperand(0); |
826 | 11.9k | } else if (11.3k BOp->getOpcode() == Instruction::FSub11.3k ) |
827 | 88 | if (88 BOp->getOperand(0) == Phi88 ) |
828 | 40 | Addend = BOp->getOperand(1); |
829 | 11.9k | |
830 | 11.9k | if (!Addend) |
831 | 11.4k | return false; |
832 | 538 | |
833 | 538 | // The addend should be loop invariant |
834 | 538 | if (auto *538 I538 = dyn_cast<Instruction>(Addend)) |
835 | 472 | if (472 TheLoop->contains(I)472 ) |
836 | 450 | return false; |
837 | 88 | |
838 | 88 | // FP Step has unknown SCEV |
839 | 88 | const SCEV *Step = SE->getUnknown(Addend); |
840 | 88 | D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); |
841 | 88 | return true; |
842 | 88 | } |
843 | | |
844 | | bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, |
845 | | PredicatedScalarEvolution &PSE, |
846 | | InductionDescriptor &D, |
847 | 222k | bool Assume) { |
848 | 222k | Type *PhiTy = Phi->getType(); |
849 | 222k | |
850 | 222k | // Handle integer and pointer inductions variables. |
851 | 222k | // Now we handle also FP induction but not trying to make a |
852 | 222k | // recurrent expression from the PHI node in-place. |
853 | 222k | |
854 | 222k | if (!PhiTy->isIntegerTy() && 222k !PhiTy->isPointerTy()85.4k && |
855 | 222k | !PhiTy->isFloatTy()12.4k && !PhiTy->isDoubleTy()12.0k && !PhiTy->isHalfTy()16 ) |
856 | 16 | return false; |
857 | 222k | |
858 | 222k | if (222k PhiTy->isFloatingPointTy()222k ) |
859 | 12.4k | return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); |
860 | 210k | |
861 | 210k | const SCEV *PhiScev = PSE.getSCEV(Phi); |
862 | 210k | const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); |
863 | 210k | |
864 | 210k | // We need this expression to be an AddRecExpr. |
865 | 210k | if (Assume && 210k !AR51.0k ) |
866 | 50.5k | AR = PSE.getAsAddRec(Phi); |
867 | 210k | |
868 | 210k | if (!AR210k ) { |
869 | 101k | DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); |
870 | 101k | return false; |
871 | 101k | } |
872 | 108k | |
873 | 108k | return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); |
874 | 108k | } |
875 | | |
876 | | bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, |
877 | | ScalarEvolution *SE, |
878 | | InductionDescriptor &D, |
879 | 109k | const SCEV *Expr) { |
880 | 109k | Type *PhiTy = Phi->getType(); |
881 | 109k | // We only handle integer and pointer inductions variables. |
882 | 109k | if (!PhiTy->isIntegerTy() && 109k !PhiTy->isPointerTy()17.4k ) |
883 | 0 | return false; |
884 | 109k | |
885 | 109k | // Check that the PHI is consecutive. |
886 | 109k | const SCEV *PhiScev = Expr ? 109k Expr108k : SE->getSCEV(Phi)56 ; |
887 | 109k | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); |
888 | 109k | |
889 | 109k | if (!AR109k ) { |
890 | 7 | DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); |
891 | 7 | return false; |
892 | 7 | } |
893 | 109k | |
894 | 109k | if (109k AR->getLoop() != TheLoop109k ) { |
895 | 0 | // FIXME: We should treat this as a uniform. Unfortunately, we |
896 | 0 | // don't currently know how to handled uniform PHIs. |
897 | 0 | DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); |
898 | 0 | return false; |
899 | 0 | } |
900 | 109k | |
901 | 109k | Value *StartValue = |
902 | 109k | Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); |
903 | 109k | const SCEV *Step = AR->getStepRecurrence(*SE); |
904 | 109k | // Calculate the pointer stride and check if it is consecutive. |
905 | 109k | // The stride may be a constant or a loop invariant integer value. |
906 | 109k | const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); |
907 | 109k | if (!ConstStep && 109k !SE->isLoopInvariant(Step, TheLoop)1.69k ) |
908 | 8 | return false; |
909 | 109k | |
910 | 109k | if (109k PhiTy->isIntegerTy()109k ) { |
911 | 91.5k | D = InductionDescriptor(StartValue, IK_IntInduction, Step); |
912 | 91.5k | return true; |
913 | 91.5k | } |
914 | 17.4k | |
915 | 109k | assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); |
916 | 17.4k | // Pointer induction should be a constant. |
917 | 17.4k | if (!ConstStep) |
918 | 960 | return false; |
919 | 16.5k | |
920 | 16.5k | ConstantInt *CV = ConstStep->getValue(); |
921 | 16.5k | Type *PointerElementType = PhiTy->getPointerElementType(); |
922 | 16.5k | // The pointer stride cannot be determined if the pointer element type is not |
923 | 16.5k | // sized. |
924 | 16.5k | if (!PointerElementType->isSized()) |
925 | 2 | return false; |
926 | 16.5k | |
927 | 16.5k | const DataLayout &DL = Phi->getModule()->getDataLayout(); |
928 | 16.5k | int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); |
929 | 16.5k | if (!Size) |
930 | 2 | return false; |
931 | 16.5k | |
932 | 16.5k | int64_t CVSize = CV->getSExtValue(); |
933 | 16.5k | if (CVSize % Size) |
934 | 4 | return false; |
935 | 16.5k | auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, |
936 | 16.5k | true /* signed */); |
937 | 16.5k | D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); |
938 | 16.5k | return true; |
939 | 16.5k | } |
940 | | |
941 | | bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, |
942 | 3.32M | bool PreserveLCSSA) { |
943 | 3.32M | bool Changed = false; |
944 | 3.32M | |
945 | 3.32M | // We re-use a vector for the in-loop predecesosrs. |
946 | 3.32M | SmallVector<BasicBlock *, 4> InLoopPredecessors; |
947 | 3.32M | |
948 | 4.19M | auto RewriteExit = [&](BasicBlock *BB) { |
949 | 4.19M | assert(InLoopPredecessors.empty() && |
950 | 4.19M | "Must start with an empty predecessors list!"); |
951 | 4.19M | auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); |
952 | 4.19M | |
953 | 4.19M | // See if there are any non-loop predecessors of this exit block and |
954 | 4.19M | // keep track of the in-loop predecessors. |
955 | 4.19M | bool IsDedicatedExit = true; |
956 | 4.19M | for (auto *PredBB : predecessors(BB)) |
957 | 14.3M | if (14.3M L->contains(PredBB)14.3M ) { |
958 | 4.61M | if (isa<IndirectBrInst>(PredBB->getTerminator())) |
959 | 4.61M | // We cannot rewrite exiting edges from an indirectbr. |
960 | 365 | return false; |
961 | 4.61M | |
962 | 4.61M | InLoopPredecessors.push_back(PredBB); |
963 | 14.3M | } else { |
964 | 9.70M | IsDedicatedExit = false; |
965 | 9.70M | } |
966 | 4.19M | |
967 | 4.19M | assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); |
968 | 4.19M | |
969 | 4.19M | // Nothing to do if this is already a dedicated exit. |
970 | 4.19M | if (IsDedicatedExit) |
971 | 2.83M | return false; |
972 | 1.35M | |
973 | 1.35M | auto *NewExitBB = SplitBlockPredecessors( |
974 | 1.35M | BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); |
975 | 1.35M | |
976 | 1.35M | if (!NewExitBB) |
977 | 1.35M | DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " |
978 | 1.35M | << *L << "\n"); |
979 | 1.35M | else |
980 | 1.35M | DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " |
981 | 4.19M | << NewExitBB->getName() << "\n"); |
982 | 4.19M | return true; |
983 | 4.19M | }; |
984 | 3.32M | |
985 | 3.32M | // Walk the exit blocks directly rather than building up a data structure for |
986 | 3.32M | // them, but only visit each one once. |
987 | 3.32M | SmallPtrSet<BasicBlock *, 4> Visited; |
988 | 3.32M | for (auto *BB : L->blocks()) |
989 | 14.7M | for (auto *SuccBB : successors(BB)) 14.7M { |
990 | 25.0M | // We're looking for exit blocks so skip in-loop successors. |
991 | 25.0M | if (L->contains(SuccBB)) |
992 | 20.6M | continue; |
993 | 4.42M | |
994 | 4.42M | // Visit each exit block exactly once. |
995 | 4.42M | if (4.42M !Visited.insert(SuccBB).second4.42M ) |
996 | 231k | continue; |
997 | 4.19M | |
998 | 4.19M | Changed |= RewriteExit(SuccBB); |
999 | 4.19M | } |
1000 | 3.32M | |
1001 | 3.32M | return Changed; |
1002 | 3.32M | } |
1003 | | |
1004 | | /// \brief Returns the instructions that use values defined in the loop. |
1005 | 47 | SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { |
1006 | 47 | SmallVector<Instruction *, 8> UsedOutside; |
1007 | 47 | |
1008 | 47 | for (auto *Block : L->getBlocks()) |
1009 | 47 | // FIXME: I believe that this could use copy_if if the Inst reference could |
1010 | 47 | // be adapted into a pointer. |
1011 | 49 | for (auto &Inst : *Block) 49 { |
1012 | 731 | auto Users = Inst.users(); |
1013 | 731 | if (any_of(Users, [&](User *U) 731 { |
1014 | 856 | auto *Use = cast<Instruction>(U); |
1015 | 856 | return !L->contains(Use->getParent()); |
1016 | 856 | })) |
1017 | 6 | UsedOutside.push_back(&Inst); |
1018 | 49 | } |
1019 | 47 | |
1020 | 47 | return UsedOutside; |
1021 | 47 | } |
1022 | | |
1023 | 209k | void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { |
1024 | 209k | // By definition, all loop passes need the LoopInfo analysis and the |
1025 | 209k | // Dominator tree it depends on. Because they all participate in the loop |
1026 | 209k | // pass manager, they must also preserve these. |
1027 | 209k | AU.addRequired<DominatorTreeWrapperPass>(); |
1028 | 209k | AU.addPreserved<DominatorTreeWrapperPass>(); |
1029 | 209k | AU.addRequired<LoopInfoWrapperPass>(); |
1030 | 209k | AU.addPreserved<LoopInfoWrapperPass>(); |
1031 | 209k | |
1032 | 209k | // We must also preserve LoopSimplify and LCSSA. We locally access their IDs |
1033 | 209k | // here because users shouldn't directly get them from this header. |
1034 | 209k | extern char &LoopSimplifyID; |
1035 | 209k | extern char &LCSSAID; |
1036 | 209k | AU.addRequiredID(LoopSimplifyID); |
1037 | 209k | AU.addPreservedID(LoopSimplifyID); |
1038 | 209k | AU.addRequiredID(LCSSAID); |
1039 | 209k | AU.addPreservedID(LCSSAID); |
1040 | 209k | // This is used in the LPPassManager to perform LCSSA verification on passes |
1041 | 209k | // which preserve lcssa form |
1042 | 209k | AU.addRequired<LCSSAVerificationPass>(); |
1043 | 209k | AU.addPreserved<LCSSAVerificationPass>(); |
1044 | 209k | |
1045 | 209k | // Loop passes are designed to run inside of a loop pass manager which means |
1046 | 209k | // that any function analyses they require must be required by the first loop |
1047 | 209k | // pass in the manager (so that it is computed before the loop pass manager |
1048 | 209k | // runs) and preserved by all loop pasess in the manager. To make this |
1049 | 209k | // reasonably robust, the set needed for most loop passes is maintained here. |
1050 | 209k | // If your loop pass requires an analysis not listed here, you will need to |
1051 | 209k | // carefully audit the loop pass manager nesting structure that results. |
1052 | 209k | AU.addRequired<AAResultsWrapperPass>(); |
1053 | 209k | AU.addPreserved<AAResultsWrapperPass>(); |
1054 | 209k | AU.addPreserved<BasicAAWrapperPass>(); |
1055 | 209k | AU.addPreserved<GlobalsAAWrapperPass>(); |
1056 | 209k | AU.addPreserved<SCEVAAWrapperPass>(); |
1057 | 209k | AU.addRequired<ScalarEvolutionWrapperPass>(); |
1058 | 209k | AU.addPreserved<ScalarEvolutionWrapperPass>(); |
1059 | 209k | } |
1060 | | |
1061 | | /// Manually defined generic "LoopPass" dependency initialization. This is used |
1062 | | /// to initialize the exact set of passes from above in \c |
1063 | | /// getLoopAnalysisUsage. It can be used within a loop pass's initialization |
1064 | | /// with: |
1065 | | /// |
1066 | | /// INITIALIZE_PASS_DEPENDENCY(LoopPass) |
1067 | | /// |
1068 | | /// As-if "LoopPass" were a pass. |
1069 | 481k | void llvm::initializeLoopPassPass(PassRegistry &Registry) { |
1070 | 481k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
1071 | 481k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
1072 | 481k | INITIALIZE_PASS_DEPENDENCY(LoopSimplify) |
1073 | 481k | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) |
1074 | 481k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
1075 | 481k | INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) |
1076 | 481k | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
1077 | 481k | INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) |
1078 | 481k | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
1079 | 481k | } |
1080 | | |
1081 | | /// \brief Find string metadata for loop |
1082 | | /// |
1083 | | /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an |
1084 | | /// operand or null otherwise. If the string metadata is not found return |
1085 | | /// Optional's not-a-value. |
1086 | | Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, |
1087 | 252k | StringRef Name) { |
1088 | 252k | MDNode *LoopID = TheLoop->getLoopID(); |
1089 | 252k | // Return none if LoopID is false. |
1090 | 252k | if (!LoopID) |
1091 | 237k | return None; |
1092 | 14.9k | |
1093 | 14.9k | // First operand should refer to the loop id itself. |
1094 | 252k | assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); |
1095 | 14.9k | assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); |
1096 | 14.9k | |
1097 | 14.9k | // Iterate over LoopID operands and look for MDString Metadata |
1098 | 44.3k | for (unsigned i = 1, e = LoopID->getNumOperands(); i < e44.3k ; ++i29.4k ) { |
1099 | 29.4k | MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); |
1100 | 29.4k | if (!MD) |
1101 | 0 | continue; |
1102 | 29.4k | MDString *S = dyn_cast<MDString>(MD->getOperand(0)); |
1103 | 29.4k | if (!S) |
1104 | 6.68k | continue; |
1105 | 22.7k | // Return true if MDString holds expected MetaData. |
1106 | 22.7k | if (22.7k Name.equals(S->getString())22.7k ) |
1107 | 13 | switch (MD->getNumOperands()) { |
1108 | 1 | case 1: |
1109 | 1 | return nullptr; |
1110 | 12 | case 2: |
1111 | 12 | return &MD->getOperand(1); |
1112 | 0 | default: |
1113 | 0 | llvm_unreachable("loop metadata has 0 or 1 operand"); |
1114 | 13 | } |
1115 | 29.4k | } |
1116 | 14.9k | return None; |
1117 | 252k | } |
1118 | | |
1119 | | /// Does a BFS from a given node to all of its children inside a given loop. |
1120 | | /// The returned vector of nodes includes the starting point. |
1121 | | SmallVector<DomTreeNode *, 16> |
1122 | 2.05M | llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { |
1123 | 2.05M | SmallVector<DomTreeNode *, 16> Worklist; |
1124 | 12.2M | auto AddRegionToWorklist = [&](DomTreeNode *DTN) { |
1125 | 12.2M | // Only include subregions in the top level loop. |
1126 | 12.2M | BasicBlock *BB = DTN->getBlock(); |
1127 | 12.2M | if (CurLoop->contains(BB)) |
1128 | 9.54M | Worklist.push_back(DTN); |
1129 | 12.2M | }; |
1130 | 2.05M | |
1131 | 2.05M | AddRegionToWorklist(N); |
1132 | 2.05M | |
1133 | 11.6M | for (size_t I = 0; I < Worklist.size()11.6M ; I++9.54M ) |
1134 | 9.54M | for (DomTreeNode *Child : Worklist[I]->getChildren()) |
1135 | 10.1M | AddRegionToWorklist(Child); |
1136 | 2.05M | |
1137 | 2.05M | return Worklist; |
1138 | 2.05M | } |
1139 | | |
1140 | | /// Returns true if the instruction in a loop is guaranteed to execute at least |
1141 | | /// once. |
1142 | | bool llvm::isGuaranteedToExecute(const Instruction &Inst, |
1143 | | const DominatorTree *DT, const Loop *CurLoop, |
1144 | 52.1k | const LoopSafetyInfo *SafetyInfo) { |
1145 | 52.1k | // We have to check to make sure that the instruction dominates all |
1146 | 52.1k | // of the exit blocks. If it doesn't, then there is a path out of the loop |
1147 | 52.1k | // which does not execute this instruction, so we can't hoist it. |
1148 | 52.1k | |
1149 | 52.1k | // If the instruction is in the header block for the loop (which is very |
1150 | 52.1k | // common), it is always guaranteed to dominate the exit blocks. Since this |
1151 | 52.1k | // is a common case, and can save some work, check it now. |
1152 | 52.1k | if (Inst.getParent() == CurLoop->getHeader()) |
1153 | 52.1k | // If there's a throw in the header block, we can't guarantee we'll reach |
1154 | 52.1k | // Inst. |
1155 | 31.1k | return !SafetyInfo->HeaderMayThrow; |
1156 | 21.0k | |
1157 | 21.0k | // Somewhere in this loop there is an instruction which may throw and make us |
1158 | 21.0k | // exit the loop. |
1159 | 21.0k | if (21.0k SafetyInfo->MayThrow21.0k ) |
1160 | 934 | return false; |
1161 | 20.1k | |
1162 | 20.1k | // Get the exit blocks for the current loop. |
1163 | 20.1k | SmallVector<BasicBlock *, 8> ExitBlocks; |
1164 | 20.1k | CurLoop->getExitBlocks(ExitBlocks); |
1165 | 20.1k | |
1166 | 20.1k | // Verify that the block dominates each of the exit blocks of the loop. |
1167 | 20.1k | for (BasicBlock *ExitBlock : ExitBlocks) |
1168 | 21.7k | if (21.7k !DT->dominates(Inst.getParent(), ExitBlock)21.7k ) |
1169 | 16.2k | return false; |
1170 | 3.88k | |
1171 | 3.88k | // As a degenerate case, if the loop is statically infinite then we haven't |
1172 | 3.88k | // proven anything since there are no exit blocks. |
1173 | 3.88k | if (3.88k ExitBlocks.empty()3.88k ) |
1174 | 5 | return false; |
1175 | 3.87k | |
1176 | 3.87k | // FIXME: In general, we have to prove that the loop isn't an infinite loop. |
1177 | 3.87k | // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is |
1178 | 3.87k | // just a special case of this.) |
1179 | 3.87k | return true; |
1180 | 3.87k | } |
1181 | | |
1182 | 20 | Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { |
1183 | 20 | // Only support loops with a unique exiting block, and a latch. |
1184 | 20 | if (!L->getExitingBlock()) |
1185 | 2 | return None; |
1186 | 18 | |
1187 | 18 | // Get the branch weights for the the loop's backedge. |
1188 | 18 | BranchInst *LatchBR = |
1189 | 18 | dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); |
1190 | 18 | if (!LatchBR || 18 LatchBR->getNumSuccessors() != 218 ) |
1191 | 0 | return None; |
1192 | 18 | |
1193 | 18 | assert((LatchBR->getSuccessor(0) == L->getHeader() || |
1194 | 18 | LatchBR->getSuccessor(1) == L->getHeader()) && |
1195 | 18 | "At least one edge out of the latch must go to the header"); |
1196 | 18 | |
1197 | 18 | // To estimate the number of times the loop body was executed, we want to |
1198 | 18 | // know the number of times the backedge was taken, vs. the number of times |
1199 | 18 | // we exited the loop. |
1200 | 18 | uint64_t TrueVal, FalseVal; |
1201 | 18 | if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) |
1202 | 6 | return None; |
1203 | 12 | |
1204 | 12 | if (12 !TrueVal || 12 !FalseVal12 ) |
1205 | 4 | return 0; |
1206 | 8 | |
1207 | 8 | // Divide the count of the backedge by the count of the edge exiting the loop, |
1208 | 8 | // rounding to nearest. |
1209 | 8 | if (8 LatchBR->getSuccessor(0) == L->getHeader()8 ) |
1210 | 4 | return (TrueVal + (FalseVal / 2)) / FalseVal; |
1211 | 8 | else |
1212 | 4 | return (FalseVal + (TrueVal / 2)) / TrueVal; |
1213 | 0 | } |
1214 | | |
1215 | | /// \brief Adds a 'fast' flag to floating point operations. |
1216 | 647 | static Value *addFastMathFlag(Value *V) { |
1217 | 647 | if (isa<FPMathOperator>(V)647 ) { |
1218 | 149 | FastMathFlags Flags; |
1219 | 149 | Flags.setUnsafeAlgebra(); |
1220 | 149 | cast<Instruction>(V)->setFastMathFlags(Flags); |
1221 | 149 | } |
1222 | 647 | return V; |
1223 | 647 | } |
1224 | | |
1225 | | // Helper to generate a log2 shuffle reduction. |
1226 | | Value * |
1227 | | llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, |
1228 | | RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, |
1229 | 439 | ArrayRef<Value *> RedOps) { |
1230 | 439 | unsigned VF = Src->getType()->getVectorNumElements(); |
1231 | 439 | // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles |
1232 | 439 | // and vector ops, reducing the set of values being computed by half each |
1233 | 439 | // round. |
1234 | 439 | assert(isPowerOf2_32(VF) && |
1235 | 439 | "Reduction emission only supported for pow2 vectors!"); |
1236 | 439 | Value *TmpVec = Src; |
1237 | 439 | SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); |
1238 | 1.24k | for (unsigned i = VF; i != 11.24k ; i >>= 1810 ) { |
1239 | 810 | // Move the upper half of the vector to the lower half. |
1240 | 2.43k | for (unsigned j = 0; j != i / 22.43k ; ++j1.62k ) |
1241 | 1.62k | ShuffleMask[j] = Builder.getInt32(i / 2 + j); |
1242 | 810 | |
1243 | 810 | // Fill the rest of the mask with undef. |
1244 | 810 | std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), |
1245 | 810 | UndefValue::get(Builder.getInt32Ty())); |
1246 | 810 | |
1247 | 810 | Value *Shuf = Builder.CreateShuffleVector( |
1248 | 810 | TmpVec, UndefValue::get(TmpVec->getType()), |
1249 | 810 | ConstantVector::get(ShuffleMask), "rdx.shuf"); |
1250 | 810 | |
1251 | 810 | if (Op != Instruction::ICmp && 810 Op != Instruction::FCmp706 ) { |
1252 | 647 | // Floating point operations had to be 'fast' to enable the reduction. |
1253 | 647 | TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, |
1254 | 647 | TmpVec, Shuf, "bin.rdx")); |
1255 | 810 | } else { |
1256 | 163 | assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && |
1257 | 163 | "Invalid min/max"); |
1258 | 163 | TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, |
1259 | 163 | Shuf); |
1260 | 163 | } |
1261 | 810 | if (!RedOps.empty()) |
1262 | 258 | propagateIRFlags(TmpVec, RedOps); |
1263 | 810 | } |
1264 | 439 | // The result is in the first element of the vector. |
1265 | 439 | return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); |
1266 | 439 | } |
1267 | | |
1268 | | /// Create a simple vector reduction specified by an opcode and some |
1269 | | /// flags (if generating min/max reductions). |
1270 | | Value *llvm::createSimpleTargetReduction( |
1271 | | IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, |
1272 | | Value *Src, TargetTransformInfo::ReductionFlags Flags, |
1273 | 1.28k | ArrayRef<Value *> RedOps) { |
1274 | 1.28k | assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); |
1275 | 1.28k | |
1276 | 1.28k | Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); |
1277 | 1.28k | std::function<Value*()> BuildFunc; |
1278 | 1.28k | using RD = RecurrenceDescriptor; |
1279 | 1.28k | RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; |
1280 | 1.28k | // TODO: Support creating ordered reductions. |
1281 | 1.28k | FastMathFlags FMFUnsafe; |
1282 | 1.28k | FMFUnsafe.setUnsafeAlgebra(); |
1283 | 1.28k | |
1284 | 1.28k | switch (Opcode) { |
1285 | 971 | case Instruction::Add: |
1286 | 839 | BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; |
1287 | 971 | break; |
1288 | 45 | case Instruction::Mul: |
1289 | 0 | BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; |
1290 | 45 | break; |
1291 | 25 | case Instruction::And: |
1292 | 0 | BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; |
1293 | 25 | break; |
1294 | 75 | case Instruction::Or: |
1295 | 0 | BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; |
1296 | 75 | break; |
1297 | 8 | case Instruction::Xor: |
1298 | 0 | BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; |
1299 | 8 | break; |
1300 | 60 | case Instruction::FAdd: |
1301 | 0 | BuildFunc = [&]() { |
1302 | 0 | auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); |
1303 | 0 | cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); |
1304 | 0 | return Rdx; |
1305 | 0 | }; |
1306 | 60 | break; |
1307 | 1 | case Instruction::FMul: |
1308 | 0 | BuildFunc = [&]() { |
1309 | 0 | auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); |
1310 | 0 | cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); |
1311 | 0 | return Rdx; |
1312 | 0 | }; |
1313 | 1 | break; |
1314 | 68 | case Instruction::ICmp: |
1315 | 68 | if (Flags.IsMaxOp68 ) { |
1316 | 42 | MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax39 : RD::MRK_UIntMax3 ; |
1317 | 10 | BuildFunc = [&]() { |
1318 | 10 | return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); |
1319 | 10 | }; |
1320 | 68 | } else { |
1321 | 26 | MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin23 : RD::MRK_UIntMin3 ; |
1322 | 6 | BuildFunc = [&]() { |
1323 | 6 | return Builder.CreateIntMinReduce(Src, Flags.IsSigned); |
1324 | 6 | }; |
1325 | 26 | } |
1326 | 68 | break; |
1327 | 28 | case Instruction::FCmp: |
1328 | 28 | if (Flags.IsMaxOp28 ) { |
1329 | 19 | MinMaxKind = RD::MRK_FloatMax; |
1330 | 0 | BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; |
1331 | 28 | } else { |
1332 | 9 | MinMaxKind = RD::MRK_FloatMin; |
1333 | 0 | BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; |
1334 | 9 | } |
1335 | 28 | break; |
1336 | 0 | default: |
1337 | 0 | llvm_unreachable("Unhandled opcode"); |
1338 | 0 | break; |
1339 | 1.28k | } |
1340 | 1.28k | if (1.28k TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)1.28k ) |
1341 | 855 | return BuildFunc(); |
1342 | 426 | return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); |
1343 | 426 | } |
1344 | | |
1345 | | /// Create a vector reduction using a given recurrence descriptor. |
1346 | | Value *llvm::createTargetReduction(IRBuilder<> &Builder, |
1347 | | const TargetTransformInfo *TTI, |
1348 | | RecurrenceDescriptor &Desc, Value *Src, |
1349 | 1.16k | bool NoNaN) { |
1350 | 1.16k | // TODO: Support in-order reductions based on the recurrence descriptor. |
1351 | 1.16k | RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); |
1352 | 1.16k | TargetTransformInfo::ReductionFlags Flags; |
1353 | 1.16k | Flags.NoNaN = NoNaN; |
1354 | 1.16k | auto getSimpleRdx = [&](unsigned Opc) { |
1355 | 1.16k | return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); |
1356 | 1.16k | }; |
1357 | 1.16k | switch (RecKind) { |
1358 | 18 | case RecurrenceDescriptor::RK_FloatAdd: |
1359 | 18 | return getSimpleRdx(Instruction::FAdd); |
1360 | 1 | case RecurrenceDescriptor::RK_FloatMult: |
1361 | 1 | return getSimpleRdx(Instruction::FMul); |
1362 | 931 | case RecurrenceDescriptor::RK_IntegerAdd: |
1363 | 931 | return getSimpleRdx(Instruction::Add); |
1364 | 45 | case RecurrenceDescriptor::RK_IntegerMult: |
1365 | 45 | return getSimpleRdx(Instruction::Mul); |
1366 | 25 | case RecurrenceDescriptor::RK_IntegerAnd: |
1367 | 25 | return getSimpleRdx(Instruction::And); |
1368 | 75 | case RecurrenceDescriptor::RK_IntegerOr: |
1369 | 75 | return getSimpleRdx(Instruction::Or); |
1370 | 8 | case RecurrenceDescriptor::RK_IntegerXor: |
1371 | 8 | return getSimpleRdx(Instruction::Xor); |
1372 | 49 | case RecurrenceDescriptor::RK_IntegerMinMax: { |
1373 | 49 | switch (Desc.getMinMaxRecurrenceKind()) { |
1374 | 20 | case RecurrenceDescriptor::MRK_SIntMax: |
1375 | 20 | Flags.IsSigned = true; |
1376 | 20 | Flags.IsMaxOp = true; |
1377 | 20 | break; |
1378 | 3 | case RecurrenceDescriptor::MRK_UIntMax: |
1379 | 3 | Flags.IsMaxOp = true; |
1380 | 3 | break; |
1381 | 23 | case RecurrenceDescriptor::MRK_SIntMin: |
1382 | 23 | Flags.IsSigned = true; |
1383 | 23 | break; |
1384 | 3 | case RecurrenceDescriptor::MRK_UIntMin: |
1385 | 3 | break; |
1386 | 0 | default: |
1387 | 0 | llvm_unreachable("Unhandled MRK"); |
1388 | 49 | } |
1389 | 49 | return getSimpleRdx(Instruction::ICmp); |
1390 | 49 | } |
1391 | 17 | case RecurrenceDescriptor::RK_FloatMinMax: { |
1392 | 17 | Flags.IsMaxOp = |
1393 | 17 | Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; |
1394 | 17 | return getSimpleRdx(Instruction::FCmp); |
1395 | 49 | } |
1396 | 0 | default: |
1397 | 0 | llvm_unreachable("Unhandled RecKind"); |
1398 | 0 | } |
1399 | 0 | } |
1400 | | |
1401 | 6.72k | void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { |
1402 | 6.72k | auto *VecOp = dyn_cast<Instruction>(I); |
1403 | 6.72k | if (!VecOp) |
1404 | 14 | return; |
1405 | 6.71k | auto *Intersection = (OpValue == nullptr) ? 6.71k dyn_cast<Instruction>(VL[0])443 |
1406 | 6.26k | : dyn_cast<Instruction>(OpValue); |
1407 | 6.71k | if (!Intersection) |
1408 | 0 | return; |
1409 | 6.71k | const unsigned Opcode = Intersection->getOpcode(); |
1410 | 6.71k | VecOp->copyIRFlags(Intersection); |
1411 | 31.7k | for (auto *V : VL) { |
1412 | 31.7k | auto *Instr = dyn_cast<Instruction>(V); |
1413 | 31.7k | if (!Instr) |
1414 | 0 | continue; |
1415 | 31.7k | if (31.7k OpValue == nullptr || 31.7k Opcode == Instr->getOpcode()25.7k ) |
1416 | 31.7k | VecOp->andIRFlags(V); |
1417 | 31.7k | } |
1418 | 6.72k | } |