/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Support/SCEVAffinator.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--------- SCEVAffinator.cpp - Create Scops from LLVM IR -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Create a polyhedral description for a SCEV value. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "polly/Support/SCEVAffinator.h" |
14 | | #include "polly/Options.h" |
15 | | #include "polly/ScopInfo.h" |
16 | | #include "polly/Support/GICHelper.h" |
17 | | #include "polly/Support/SCEVValidator.h" |
18 | | #include "isl/aff.h" |
19 | | #include "isl/local_space.h" |
20 | | #include "isl/set.h" |
21 | | #include "isl/val.h" |
22 | | |
23 | | using namespace llvm; |
24 | | using namespace polly; |
25 | | |
26 | | static cl::opt<bool> IgnoreIntegerWrapping( |
27 | | "polly-ignore-integer-wrapping", |
28 | | cl::desc("Do not build run-time checks to proof absence of integer " |
29 | | "wrapping"), |
30 | | cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); |
31 | | |
32 | | // The maximal number of basic sets we allow during the construction of a |
33 | | // piecewise affine function. More complex ones will result in very high |
34 | | // compile time. |
35 | | static int const MaxDisjunctionsInPwAff = 100; |
36 | | |
37 | | // The maximal number of bits for which a general expression is modeled |
38 | | // precisely. |
39 | | static unsigned const MaxSmallBitWidth = 7; |
40 | | |
41 | | /// Add the number of basic sets in @p Domain to @p User |
42 | | static isl_stat addNumBasicSets(__isl_take isl_set *Domain, |
43 | 709 | __isl_take isl_aff *Aff, void *User) { |
44 | 709 | auto *NumBasicSets = static_cast<unsigned *>(User); |
45 | 709 | *NumBasicSets += isl_set_n_basic_set(Domain); |
46 | 709 | isl_set_free(Domain); |
47 | 709 | isl_aff_free(Aff); |
48 | 709 | return isl_stat_ok; |
49 | 709 | } |
50 | | |
51 | | /// Determine if @p PWAC is too complex to continue. |
52 | 374 | static bool isTooComplex(PWACtx PWAC) { |
53 | 374 | unsigned NumBasicSets = 0; |
54 | 374 | isl_pw_aff_foreach_piece(PWAC.first.get(), addNumBasicSets, &NumBasicSets); |
55 | 374 | if (NumBasicSets <= MaxDisjunctionsInPwAff) |
56 | 372 | return false; |
57 | 2 | return true; |
58 | 2 | } |
59 | | |
60 | | /// Return the flag describing the possible wrapping of @p Expr. |
61 | 23.2k | static SCEV::NoWrapFlags getNoWrapFlags(const SCEV *Expr) { |
62 | 23.2k | if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr)) |
63 | 9.84k | return NAry->getNoWrapFlags(); |
64 | 13.3k | return SCEV::NoWrapMask; |
65 | 13.3k | } |
66 | | |
67 | | static PWACtx combine(PWACtx PWAC0, PWACtx PWAC1, |
68 | | __isl_give isl_pw_aff *(Fn)(__isl_take isl_pw_aff *, |
69 | 14.2k | __isl_take isl_pw_aff *)) { |
70 | 14.2k | PWAC0.first = isl::manage(Fn(PWAC0.first.release(), PWAC1.first.release())); |
71 | 14.2k | PWAC0.second = PWAC0.second.unite(PWAC1.second); |
72 | 14.2k | return PWAC0; |
73 | 14.2k | } |
74 | | |
75 | | static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width, |
76 | 2.98k | __isl_take isl_set *Dom) { |
77 | 2.98k | auto *Ctx = isl_set_get_ctx(Dom); |
78 | 2.98k | auto *WidthVal = isl_val_int_from_ui(Ctx, Width); |
79 | 2.98k | auto *ExpVal = isl_val_2exp(WidthVal); |
80 | 2.98k | return isl_pw_aff_val_on_domain(Dom, ExpVal); |
81 | 2.98k | } |
82 | | |
83 | | SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI) |
84 | | : S(S), Ctx(S->getIslCtx().get()), SE(*S->getSE()), LI(LI), |
85 | 1.20k | TD(S->getFunction().getParent()->getDataLayout()) {} |
86 | | |
87 | 12.6k | Loop *SCEVAffinator::getScope() { return BB ? LI.getLoopFor(BB)12.2k : nullptr410 ; } |
88 | | |
89 | 45 | void SCEVAffinator::interpretAsUnsigned(PWACtx &PWAC, unsigned Width) { |
90 | 45 | auto *NonNegDom = isl_pw_aff_nonneg_set(PWAC.first.copy()); |
91 | 45 | auto *NonNegPWA = |
92 | 45 | isl_pw_aff_intersect_domain(PWAC.first.copy(), isl_set_copy(NonNegDom)); |
93 | 45 | auto *ExpPWA = getWidthExpValOnDomain(Width, isl_set_complement(NonNegDom)); |
94 | 45 | PWAC.first = isl::manage(isl_pw_aff_union_add( |
95 | 45 | NonNegPWA, isl_pw_aff_add(PWAC.first.release(), ExpPWA))); |
96 | 45 | } |
97 | | |
98 | 164 | void SCEVAffinator::takeNonNegativeAssumption(PWACtx &PWAC) { |
99 | 164 | auto *NegPWA = isl_pw_aff_neg(PWAC.first.copy()); |
100 | 164 | auto *NegDom = isl_pw_aff_pos_set(NegPWA); |
101 | 164 | PWAC.second = |
102 | 164 | isl::manage(isl_set_union(PWAC.second.release(), isl_set_copy(NegDom))); |
103 | 164 | auto *Restriction = BB ? NegDom156 : isl_set_params(NegDom)8 ; |
104 | 164 | auto DL = BB ? BB->getTerminator()->getDebugLoc()156 : DebugLoc()8 ; |
105 | 164 | S->recordAssumption(UNSIGNED, isl::manage(Restriction), DL, AS_RESTRICTION, |
106 | 164 | BB); |
107 | 164 | } |
108 | | |
109 | 19.8k | PWACtx SCEVAffinator::getPWACtxFromPWA(isl::pw_aff PWA) { |
110 | 19.8k | return std::make_pair(PWA, isl::set::empty(isl::space(Ctx, 0, NumIterators))); |
111 | 19.8k | } |
112 | | |
113 | 10.2k | PWACtx SCEVAffinator::getPwAff(const SCEV *Expr, BasicBlock *BB) { |
114 | 10.2k | this->BB = BB; |
115 | 10.2k | |
116 | 10.2k | if (BB) { |
117 | 9.74k | auto *DC = S->getDomainConditions(BB).release(); |
118 | 9.74k | NumIterators = isl_set_n_dim(DC); |
119 | 9.74k | isl_set_free(DC); |
120 | 9.74k | } else |
121 | 541 | NumIterators = 0; |
122 | 10.2k | |
123 | 10.2k | return visit(Expr); |
124 | 10.2k | } |
125 | | |
126 | 23.2k | PWACtx SCEVAffinator::checkForWrapping(const SCEV *Expr, PWACtx PWAC) const { |
127 | 23.2k | // If the SCEV flags do contain NSW (no signed wrap) then PWA already |
128 | 23.2k | // represents Expr in modulo semantic (it is not allowed to overflow), thus we |
129 | 23.2k | // are done. Otherwise, we will compute: |
130 | 23.2k | // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1) |
131 | 23.2k | // whereas n is the number of bits of the Expr, hence: |
132 | 23.2k | // n = bitwidth(ExprType) |
133 | 23.2k | |
134 | 23.2k | if (IgnoreIntegerWrapping || (getNoWrapFlags(Expr) & SCEV::FlagNSW)) |
135 | 20.3k | return PWAC; |
136 | 2.84k | |
137 | 2.84k | isl::pw_aff PWAMod = addModuloSemantic(PWAC.first, Expr->getType()); |
138 | 2.84k | |
139 | 2.84k | isl::set NotEqualSet = PWAC.first.ne_set(PWAMod); |
140 | 2.84k | PWAC.second = PWAC.second.unite(NotEqualSet).coalesce(); |
141 | 2.84k | |
142 | 2.84k | const DebugLoc &Loc = BB ? BB->getTerminator()->getDebugLoc()2.83k : DebugLoc()6 ; |
143 | 2.84k | if (!BB) |
144 | 6 | NotEqualSet = NotEqualSet.params(); |
145 | 2.84k | NotEqualSet = NotEqualSet.coalesce(); |
146 | 2.84k | |
147 | 2.84k | if (!NotEqualSet.is_empty()) |
148 | 2.83k | S->recordAssumption(WRAPPING, NotEqualSet, Loc, AS_RESTRICTION, BB); |
149 | 2.84k | |
150 | 2.84k | return PWAC; |
151 | 2.84k | } |
152 | | |
153 | | isl::pw_aff SCEVAffinator::addModuloSemantic(isl::pw_aff PWA, |
154 | 2.92k | Type *ExprType) const { |
155 | 2.92k | unsigned Width = TD.getTypeSizeInBits(ExprType); |
156 | 2.92k | |
157 | 2.92k | auto ModVal = isl::val::int_from_ui(Ctx, Width); |
158 | 2.92k | ModVal = ModVal.pow2(); |
159 | 2.92k | |
160 | 2.92k | isl::set Domain = PWA.domain(); |
161 | 2.92k | isl::pw_aff AddPW = |
162 | 2.92k | isl::manage(getWidthExpValOnDomain(Width - 1, Domain.release())); |
163 | 2.92k | |
164 | 2.92k | return PWA.add(AddPW).mod(ModVal).sub(AddPW); |
165 | 2.92k | } |
166 | | |
167 | 1.69k | bool SCEVAffinator::hasNSWAddRecForLoop(Loop *L) const { |
168 | 10.6k | for (const auto &CachedPair : CachedExpressions) { |
169 | 10.6k | auto *AddRec = dyn_cast<SCEVAddRecExpr>(CachedPair.first.first); |
170 | 10.6k | if (!AddRec) |
171 | 6.49k | continue; |
172 | 4.16k | if (AddRec->getLoop() != L) |
173 | 2.27k | continue; |
174 | 1.88k | if (AddRec->getNoWrapFlags() & SCEV::FlagNSW) |
175 | 1.31k | return true; |
176 | 1.88k | } |
177 | 1.69k | |
178 | 1.69k | return false386 ; |
179 | 1.69k | } |
180 | | |
181 | 36.0k | bool SCEVAffinator::computeModuloForExpr(const SCEV *Expr) { |
182 | 36.0k | unsigned Width = TD.getTypeSizeInBits(Expr->getType()); |
183 | 36.0k | // We assume nsw expressions never overflow. |
184 | 36.0k | if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr)) |
185 | 15.0k | if (NAry->getNoWrapFlags() & SCEV::FlagNSW) |
186 | 10.6k | return false; |
187 | 25.4k | return Width <= MaxSmallBitWidth; |
188 | 25.4k | } |
189 | | |
190 | 17.3k | PWACtx SCEVAffinator::visit(const SCEV *Expr) { |
191 | 17.3k | |
192 | 17.3k | auto Key = std::make_pair(Expr, BB); |
193 | 17.3k | PWACtx PWAC = CachedExpressions[Key]; |
194 | 17.3k | if (PWAC.first) |
195 | 4.74k | return PWAC; |
196 | 12.6k | |
197 | 12.6k | auto ConstantAndLeftOverPair = extractConstantFactor(Expr, SE); |
198 | 12.6k | auto *Factor = ConstantAndLeftOverPair.first; |
199 | 12.6k | Expr = ConstantAndLeftOverPair.second; |
200 | 12.6k | |
201 | 12.6k | auto *Scope = getScope(); |
202 | 12.6k | S->addParams(getParamsInAffineExpr(&S->getRegion(), Scope, Expr, SE)); |
203 | 12.6k | |
204 | 12.6k | // In case the scev is a valid parameter, we do not further analyze this |
205 | 12.6k | // expression, but create a new parameter in the isl_pw_aff. This allows us |
206 | 12.6k | // to treat subexpressions that we cannot translate into an piecewise affine |
207 | 12.6k | // expression, as constant parameters of the piecewise affine expression. |
208 | 12.6k | if (isl_id *Id = S->getIdForParam(Expr).release()) { |
209 | 1.85k | isl_space *Space = isl_space_set_alloc(Ctx.get(), 1, NumIterators); |
210 | 1.85k | Space = isl_space_set_dim_id(Space, isl_dim_param, 0, Id); |
211 | 1.85k | |
212 | 1.85k | isl_set *Domain = isl_set_universe(isl_space_copy(Space)); |
213 | 1.85k | isl_aff *Affine = isl_aff_zero_on_domain(isl_local_space_from_space(Space)); |
214 | 1.85k | Affine = isl_aff_add_coefficient_si(Affine, isl_dim_param, 0, 1); |
215 | 1.85k | |
216 | 1.85k | PWAC = getPWACtxFromPWA(isl::manage(isl_pw_aff_alloc(Domain, Affine))); |
217 | 10.7k | } else { |
218 | 10.7k | PWAC = SCEVVisitor<SCEVAffinator, PWACtx>::visit(Expr); |
219 | 10.7k | if (computeModuloForExpr(Expr)) |
220 | 61 | PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); |
221 | 10.6k | else |
222 | 10.6k | PWAC = checkForWrapping(Expr, PWAC); |
223 | 10.7k | } |
224 | 12.6k | |
225 | 12.6k | if (!Factor->getType()->isIntegerTy(1)) { |
226 | 12.5k | PWAC = combine(PWAC, visitConstant(Factor), isl_pw_aff_mul); |
227 | 12.5k | if (computeModuloForExpr(Key.first)) |
228 | 19 | PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); |
229 | 12.5k | } |
230 | 12.6k | |
231 | 12.6k | // For compile time reasons we need to simplify the PWAC before we cache and |
232 | 12.6k | // return it. |
233 | 12.6k | PWAC.first = PWAC.first.coalesce(); |
234 | 12.6k | if (!computeModuloForExpr(Key.first)) |
235 | 12.5k | PWAC = checkForWrapping(Key.first, PWAC); |
236 | 12.6k | |
237 | 12.6k | CachedExpressions[Key] = PWAC; |
238 | 12.6k | return PWAC; |
239 | 12.6k | } |
240 | | |
241 | 17.9k | PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) { |
242 | 17.9k | ConstantInt *Value = Expr->getValue(); |
243 | 17.9k | isl_val *v; |
244 | 17.9k | |
245 | 17.9k | // LLVM does not define if an integer value is interpreted as a signed or |
246 | 17.9k | // unsigned value. Hence, without further information, it is unknown how |
247 | 17.9k | // this value needs to be converted to GMP. At the moment, we only support |
248 | 17.9k | // signed operations. So we just interpret it as signed. Later, there are |
249 | 17.9k | // two options: |
250 | 17.9k | // |
251 | 17.9k | // 1. We always interpret any value as signed and convert the values on |
252 | 17.9k | // demand. |
253 | 17.9k | // 2. We pass down the signedness of the calculation and use it to interpret |
254 | 17.9k | // this constant correctly. |
255 | 17.9k | v = isl_valFromAPInt(Ctx.get(), Value->getValue(), /* isSigned */ true); |
256 | 17.9k | |
257 | 17.9k | isl_space *Space = isl_space_set_alloc(Ctx.get(), 0, NumIterators); |
258 | 17.9k | isl_local_space *ls = isl_local_space_from_space(Space); |
259 | 17.9k | return getPWACtxFromPWA( |
260 | 17.9k | isl::manage(isl_pw_aff_from_aff(isl_aff_val_on_domain(ls, v)))); |
261 | 17.9k | } |
262 | | |
263 | 51 | PWACtx SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) { |
264 | 51 | // Truncate operations are basically modulo operations, thus we can |
265 | 51 | // model them that way. However, for large types we assume the operand |
266 | 51 | // to fit in the new type size instead of introducing a modulo with a very |
267 | 51 | // large constant. |
268 | 51 | |
269 | 51 | auto *Op = Expr->getOperand(); |
270 | 51 | auto OpPWAC = visit(Op); |
271 | 51 | |
272 | 51 | unsigned Width = TD.getTypeSizeInBits(Expr->getType()); |
273 | 51 | |
274 | 51 | if (computeModuloForExpr(Expr)) |
275 | 37 | return OpPWAC; |
276 | 14 | |
277 | 14 | auto *Dom = OpPWAC.first.domain().release(); |
278 | 14 | auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom); |
279 | 14 | auto *GreaterDom = |
280 | 14 | isl_pw_aff_ge_set(OpPWAC.first.copy(), isl_pw_aff_copy(ExpPWA)); |
281 | 14 | auto *SmallerDom = |
282 | 14 | isl_pw_aff_lt_set(OpPWAC.first.copy(), isl_pw_aff_neg(ExpPWA)); |
283 | 14 | auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom); |
284 | 14 | OpPWAC.second = OpPWAC.second.unite(isl::manage_copy(OutOfBoundsDom)); |
285 | 14 | |
286 | 14 | if (!BB) { |
287 | 1 | assert(isl_set_dim(OutOfBoundsDom, isl_dim_set) == 0 && |
288 | 1 | "Expected a zero dimensional set for non-basic-block domains"); |
289 | 1 | OutOfBoundsDom = isl_set_params(OutOfBoundsDom); |
290 | 1 | } |
291 | 14 | |
292 | 14 | S->recordAssumption(UNSIGNED, isl::manage(OutOfBoundsDom), DebugLoc(), |
293 | 14 | AS_RESTRICTION, BB); |
294 | 14 | |
295 | 14 | return OpPWAC; |
296 | 14 | } |
297 | | |
298 | 113 | PWACtx SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { |
299 | 113 | // A zero-extended value can be interpreted as a piecewise defined signed |
300 | 113 | // value. If the value was non-negative it stays the same, otherwise it |
301 | 113 | // is the sum of the original value and 2^n where n is the bit-width of |
302 | 113 | // the original (or operand) type. Examples: |
303 | 113 | // zext i8 127 to i32 -> { [127] } |
304 | 113 | // zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] } |
305 | 113 | // zext i8 %v to i32 -> [v] -> { [v] | v >= 0; [256 + v] | v < 0 } |
306 | 113 | // |
307 | 113 | // However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a |
308 | 113 | // truncate) to represent some forms of modulo computation. The left-hand side |
309 | 113 | // of the condition in the code below would result in the SCEV |
310 | 113 | // "zext i1 <false, +, true>for.body" which is just another description |
311 | 113 | // of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0". |
312 | 113 | // |
313 | 113 | // for (i = 0; i < N; i++) |
314 | 113 | // if (i & 1 != 0 /* == i % 2 */) |
315 | 113 | // /* do something */ |
316 | 113 | // |
317 | 113 | // If we do not make the modulo explicit but only use the mechanism described |
318 | 113 | // above we will get the very restrictive assumption "N < 3", because for all |
319 | 113 | // values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap. |
320 | 113 | // Alternatively, we can make the modulo in the operand explicit in the |
321 | 113 | // resulting piecewise function and thereby avoid the assumption on N. For the |
322 | 113 | // example this would result in the following piecewise affine function: |
323 | 113 | // { [i0] -> [(1)] : 2*floor((-1 + i0)/2) = -1 + i0; |
324 | 113 | // [i0] -> [(0)] : 2*floor((i0)/2) = i0 } |
325 | 113 | // To this end we can first determine if the (immediate) operand of the |
326 | 113 | // zero-extend can wrap and, in case it might, we will use explicit modulo |
327 | 113 | // semantic to compute the result instead of emitting non-wrapping |
328 | 113 | // assumptions. |
329 | 113 | // |
330 | 113 | // Note that operands with large bit-widths are less likely to be negative |
331 | 113 | // because it would result in a very large access offset or loop bound after |
332 | 113 | // the zero-extend. To this end one can optimistically assume the operand to |
333 | 113 | // be positive and avoid the piecewise definition if the bit-width is bigger |
334 | 113 | // than some threshold (here MaxZextSmallBitWidth). |
335 | 113 | // |
336 | 113 | // We choose to go with a hybrid solution of all modeling techniques described |
337 | 113 | // above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the |
338 | 113 | // wrapping explicitly and use a piecewise defined function. However, if the |
339 | 113 | // bit-width is bigger than MaxZextSmallBitWidth we will employ overflow |
340 | 113 | // assumptions and assume the "former negative" piece will not exist. |
341 | 113 | |
342 | 113 | auto *Op = Expr->getOperand(); |
343 | 113 | auto OpPWAC = visit(Op); |
344 | 113 | |
345 | 113 | // If the width is to big we assume the negative part does not occur. |
346 | 113 | if (!computeModuloForExpr(Op)) { |
347 | 68 | takeNonNegativeAssumption(OpPWAC); |
348 | 68 | return OpPWAC; |
349 | 68 | } |
350 | 45 | |
351 | 45 | // If the width is small build the piece for the non-negative part and |
352 | 45 | // the one for the negative part and unify them. |
353 | 45 | unsigned Width = TD.getTypeSizeInBits(Op->getType()); |
354 | 45 | interpretAsUnsigned(OpPWAC, Width); |
355 | 45 | return OpPWAC; |
356 | 45 | } |
357 | | |
358 | 381 | PWACtx SCEVAffinator::visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { |
359 | 381 | // As all values are represented as signed, a sign extension is a noop. |
360 | 381 | return visit(Expr->getOperand()); |
361 | 381 | } |
362 | | |
363 | 262 | PWACtx SCEVAffinator::visitAddExpr(const SCEVAddExpr *Expr) { |
364 | 262 | PWACtx Sum = visit(Expr->getOperand(0)); |
365 | 262 | |
366 | 557 | for (int i = 1, e = Expr->getNumOperands(); i < e; ++i295 ) { |
367 | 296 | Sum = combine(Sum, visit(Expr->getOperand(i)), isl_pw_aff_add); |
368 | 296 | if (isTooComplex(Sum)) |
369 | 1 | return complexityBailout(); |
370 | 296 | } |
371 | 262 | |
372 | 262 | return Sum261 ; |
373 | 262 | } |
374 | | |
375 | 1 | PWACtx SCEVAffinator::visitMulExpr(const SCEVMulExpr *Expr) { |
376 | 1 | PWACtx Prod = visit(Expr->getOperand(0)); |
377 | 1 | |
378 | 2 | for (int i = 1, e = Expr->getNumOperands(); i < e; ++i1 ) { |
379 | 1 | Prod = combine(Prod, visit(Expr->getOperand(i)), isl_pw_aff_mul); |
380 | 1 | if (isTooComplex(Prod)) |
381 | 0 | return complexityBailout(); |
382 | 1 | } |
383 | 1 | |
384 | 1 | return Prod; |
385 | 1 | } |
386 | | |
387 | 4.32k | PWACtx SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
388 | 4.32k | assert(Expr->isAffine() && "Only affine AddRecurrences allowed"); |
389 | 4.32k | |
390 | 4.32k | auto Flags = Expr->getNoWrapFlags(); |
391 | 4.32k | |
392 | 4.32k | // Directly generate isl_pw_aff for Expr if 'start' is zero. |
393 | 4.32k | if (Expr->getStart()->isZero()) { |
394 | 3.12k | assert(S->contains(Expr->getLoop()) && |
395 | 3.12k | "Scop does not contain the loop referenced in this AddRec"); |
396 | 3.12k | |
397 | 3.12k | PWACtx Step = visit(Expr->getOperand(1)); |
398 | 3.12k | isl_space *Space = isl_space_set_alloc(Ctx.get(), 0, NumIterators); |
399 | 3.12k | isl_local_space *LocalSpace = isl_local_space_from_space(Space); |
400 | 3.12k | |
401 | 3.12k | unsigned loopDimension = S->getRelativeLoopDepth(Expr->getLoop()); |
402 | 3.12k | |
403 | 3.12k | isl_aff *LAff = isl_aff_set_coefficient_si( |
404 | 3.12k | isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); |
405 | 3.12k | isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); |
406 | 3.12k | |
407 | 3.12k | Step.first = Step.first.mul(isl::manage(LPwAff)); |
408 | 3.12k | return Step; |
409 | 3.12k | } |
410 | 1.20k | |
411 | 1.20k | // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}' |
412 | 1.20k | // if 'start' is not zero. |
413 | 1.20k | // TODO: Using the original SCEV no-wrap flags is not always safe, however |
414 | 1.20k | // as our code generation is reordering the expression anyway it doesn't |
415 | 1.20k | // really matter. |
416 | 1.20k | const SCEV *ZeroStartExpr = |
417 | 1.20k | SE.getAddRecExpr(SE.getConstant(Expr->getStart()->getType(), 0), |
418 | 1.20k | Expr->getStepRecurrence(SE), Expr->getLoop(), Flags); |
419 | 1.20k | |
420 | 1.20k | PWACtx Result = visit(ZeroStartExpr); |
421 | 1.20k | PWACtx Start = visit(Expr->getStart()); |
422 | 1.20k | Result = combine(Result, Start, isl_pw_aff_add); |
423 | 1.20k | return Result; |
424 | 1.20k | } |
425 | | |
426 | 65 | PWACtx SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) { |
427 | 65 | PWACtx Max = visit(Expr->getOperand(0)); |
428 | 65 | |
429 | 136 | for (int i = 1, e = Expr->getNumOperands(); i < e; ++i71 ) { |
430 | 72 | Max = combine(Max, visit(Expr->getOperand(i)), isl_pw_aff_max); |
431 | 72 | if (isTooComplex(Max)) |
432 | 1 | return complexityBailout(); |
433 | 72 | } |
434 | 65 | |
435 | 65 | return Max64 ; |
436 | 65 | } |
437 | | |
438 | 4 | PWACtx SCEVAffinator::visitSMinExpr(const SCEVSMinExpr *Expr) { |
439 | 4 | PWACtx Min = visit(Expr->getOperand(0)); |
440 | 4 | |
441 | 9 | for (int i = 1, e = Expr->getNumOperands(); i < e; ++i5 ) { |
442 | 5 | Min = combine(Min, visit(Expr->getOperand(i)), isl_pw_aff_min); |
443 | 5 | if (isTooComplex(Min)) |
444 | 0 | return complexityBailout(); |
445 | 5 | } |
446 | 4 | |
447 | 4 | return Min; |
448 | 4 | } |
449 | | |
450 | 0 | PWACtx SCEVAffinator::visitUMaxExpr(const SCEVUMaxExpr *Expr) { |
451 | 0 | llvm_unreachable("SCEVUMaxExpr not yet supported"); |
452 | 0 | } |
453 | | |
454 | 0 | PWACtx SCEVAffinator::visitUMinExpr(const SCEVUMinExpr *Expr) { |
455 | 0 | llvm_unreachable("SCEVUMinExpr not yet supported"); |
456 | 0 | } |
457 | | |
458 | 53 | PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) { |
459 | 53 | // The handling of unsigned division is basically the same as for signed |
460 | 53 | // division, except the interpretation of the operands. As the divisor |
461 | 53 | // has to be constant in both cases we can simply interpret it as an |
462 | 53 | // unsigned value without additional complexity in the representation. |
463 | 53 | // For the dividend we could choose from the different representation |
464 | 53 | // schemes introduced for zero-extend operations but for now we will |
465 | 53 | // simply use an assumption. |
466 | 53 | auto *Dividend = Expr->getLHS(); |
467 | 53 | auto *Divisor = Expr->getRHS(); |
468 | 53 | assert(isa<SCEVConstant>(Divisor) && |
469 | 53 | "UDiv is no parameter but has a non-constant RHS."); |
470 | 53 | |
471 | 53 | auto DividendPWAC = visit(Dividend); |
472 | 53 | auto DivisorPWAC = visit(Divisor); |
473 | 53 | |
474 | 53 | if (SE.isKnownNegative(Divisor)) { |
475 | 2 | // Interpret negative divisors unsigned. This is a special case of the |
476 | 2 | // piece-wise defined value described for zero-extends as we already know |
477 | 2 | // the actual value of the constant divisor. |
478 | 2 | unsigned Width = TD.getTypeSizeInBits(Expr->getType()); |
479 | 2 | auto *DivisorDom = DivisorPWAC.first.domain().release(); |
480 | 2 | auto *WidthExpPWA = getWidthExpValOnDomain(Width, DivisorDom); |
481 | 2 | DivisorPWAC.first = DivisorPWAC.first.add(isl::manage(WidthExpPWA)); |
482 | 2 | } |
483 | 53 | |
484 | 53 | // TODO: One can represent the dividend as piece-wise function to be more |
485 | 53 | // precise but therefor a heuristic is needed. |
486 | 53 | |
487 | 53 | // Assume a non-negative dividend. |
488 | 53 | takeNonNegativeAssumption(DividendPWAC); |
489 | 53 | |
490 | 53 | DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_div); |
491 | 53 | DividendPWAC.first = DividendPWAC.first.floor(); |
492 | 53 | |
493 | 53 | return DividendPWAC; |
494 | 53 | } |
495 | | |
496 | 43 | PWACtx SCEVAffinator::visitSDivInstruction(Instruction *SDiv) { |
497 | 43 | assert(SDiv->getOpcode() == Instruction::SDiv && "Assumed SDiv instruction!"); |
498 | 43 | |
499 | 43 | auto *Scope = getScope(); |
500 | 43 | auto *Divisor = SDiv->getOperand(1); |
501 | 43 | auto *DivisorSCEV = SE.getSCEVAtScope(Divisor, Scope); |
502 | 43 | auto DivisorPWAC = visit(DivisorSCEV); |
503 | 43 | assert(isa<SCEVConstant>(DivisorSCEV) && |
504 | 43 | "SDiv is no parameter but has a non-constant RHS."); |
505 | 43 | |
506 | 43 | auto *Dividend = SDiv->getOperand(0); |
507 | 43 | auto *DividendSCEV = SE.getSCEVAtScope(Dividend, Scope); |
508 | 43 | auto DividendPWAC = visit(DividendSCEV); |
509 | 43 | DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_tdiv_q); |
510 | 43 | return DividendPWAC; |
511 | 43 | } |
512 | | |
513 | 37 | PWACtx SCEVAffinator::visitSRemInstruction(Instruction *SRem) { |
514 | 37 | assert(SRem->getOpcode() == Instruction::SRem && "Assumed SRem instruction!"); |
515 | 37 | |
516 | 37 | auto *Scope = getScope(); |
517 | 37 | auto *Divisor = SRem->getOperand(1); |
518 | 37 | auto *DivisorSCEV = SE.getSCEVAtScope(Divisor, Scope); |
519 | 37 | auto DivisorPWAC = visit(DivisorSCEV); |
520 | 37 | assert(isa<ConstantInt>(Divisor) && |
521 | 37 | "SRem is no parameter but has a non-constant RHS."); |
522 | 37 | |
523 | 37 | auto *Dividend = SRem->getOperand(0); |
524 | 37 | auto *DividendSCEV = SE.getSCEVAtScope(Dividend, Scope); |
525 | 37 | auto DividendPWAC = visit(DividendSCEV); |
526 | 37 | DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_tdiv_r); |
527 | 37 | return DividendPWAC; |
528 | 37 | } |
529 | | |
530 | 92 | PWACtx SCEVAffinator::visitUnknown(const SCEVUnknown *Expr) { |
531 | 92 | if (Instruction *I = dyn_cast<Instruction>(Expr->getValue())) { |
532 | 92 | switch (I->getOpcode()) { |
533 | 92 | case Instruction::IntToPtr: |
534 | 4 | return visit(SE.getSCEVAtScope(I->getOperand(0), getScope())); |
535 | 92 | case Instruction::PtrToInt: |
536 | 8 | return visit(SE.getSCEVAtScope(I->getOperand(0), getScope())); |
537 | 92 | case Instruction::SDiv: |
538 | 43 | return visitSDivInstruction(I); |
539 | 92 | case Instruction::SRem: |
540 | 37 | return visitSRemInstruction(I); |
541 | 92 | default: |
542 | 0 | break; // Fall through. |
543 | 0 | } |
544 | 0 | } |
545 | 0 | |
546 | 0 | llvm_unreachable( |
547 | 0 | "Unknowns SCEV was neither parameter nor a valid instruction."); |
548 | 0 | } |
549 | | |
550 | 2 | PWACtx SCEVAffinator::complexityBailout() { |
551 | 2 | // We hit the complexity limit for affine expressions; invalidate the scop |
552 | 2 | // and return a constant zero. |
553 | 2 | const DebugLoc &Loc = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc()0 ; |
554 | 2 | S->invalidate(COMPLEXITY, Loc); |
555 | 2 | return visit(SE.getZero(Type::getInt32Ty(S->getFunction().getContext()))); |
556 | 2 | } |