/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Loop unrolling may create many similar GEPs for array accesses. |
11 | | // e.g., a 2-level loop |
12 | | // |
13 | | // float a[32][32]; // global variable |
14 | | // |
15 | | // for (int i = 0; i < 2; ++i) { |
16 | | // for (int j = 0; j < 2; ++j) { |
17 | | // ... |
18 | | // ... = a[x + i][y + j]; |
19 | | // ... |
20 | | // } |
21 | | // } |
22 | | // |
23 | | // will probably be unrolled to: |
24 | | // |
25 | | // gep %a, 0, %x, %y; load |
26 | | // gep %a, 0, %x, %y + 1; load |
27 | | // gep %a, 0, %x + 1, %y; load |
28 | | // gep %a, 0, %x + 1, %y + 1; load |
29 | | // |
30 | | // LLVM's GVN does not use partial redundancy elimination yet, and is thus |
31 | | // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs |
32 | | // significant slowdown in targets with limited addressing modes. For instance, |
33 | | // because the PTX target does not support the reg+reg addressing mode, the |
34 | | // NVPTX backend emits PTX code that literally computes the pointer address of |
35 | | // each GEP, wasting tons of registers. It emits the following PTX for the |
36 | | // first load and similar PTX for other loads. |
37 | | // |
38 | | // mov.u32 %r1, %x; |
39 | | // mov.u32 %r2, %y; |
40 | | // mul.wide.u32 %rl2, %r1, 128; |
41 | | // mov.u64 %rl3, a; |
42 | | // add.s64 %rl4, %rl3, %rl2; |
43 | | // mul.wide.u32 %rl5, %r2, 4; |
44 | | // add.s64 %rl6, %rl4, %rl5; |
45 | | // ld.global.f32 %f1, [%rl6]; |
46 | | // |
47 | | // To reduce the register pressure, the optimization implemented in this file |
48 | | // merges the common part of a group of GEPs, so we can compute each pointer |
49 | | // address by adding a simple offset to the common part, saving many registers. |
50 | | // |
51 | | // It works by splitting each GEP into a variadic base and a constant offset. |
52 | | // The variadic base can be computed once and reused by multiple GEPs, and the |
53 | | // constant offsets can be nicely folded into the reg+immediate addressing mode |
54 | | // (supported by most targets) without using any extra register. |
55 | | // |
56 | | // For instance, we transform the four GEPs and four loads in the above example |
57 | | // into: |
58 | | // |
59 | | // base = gep a, 0, x, y |
60 | | // load base |
61 | | // laod base + 1 * sizeof(float) |
62 | | // load base + 32 * sizeof(float) |
63 | | // load base + 33 * sizeof(float) |
64 | | // |
65 | | // Given the transformed IR, a backend that supports the reg+immediate |
66 | | // addressing mode can easily fold the pointer arithmetics into the loads. For |
67 | | // example, the NVPTX backend can easily fold the pointer arithmetics into the |
68 | | // ld.global.f32 instructions, and the resultant PTX uses much fewer registers. |
69 | | // |
70 | | // mov.u32 %r1, %tid.x; |
71 | | // mov.u32 %r2, %tid.y; |
72 | | // mul.wide.u32 %rl2, %r1, 128; |
73 | | // mov.u64 %rl3, a; |
74 | | // add.s64 %rl4, %rl3, %rl2; |
75 | | // mul.wide.u32 %rl5, %r2, 4; |
76 | | // add.s64 %rl6, %rl4, %rl5; |
77 | | // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX |
78 | | // ld.global.f32 %f2, [%rl6+4]; // much better |
79 | | // ld.global.f32 %f3, [%rl6+128]; // much better |
80 | | // ld.global.f32 %f4, [%rl6+132]; // much better |
81 | | // |
82 | | // Another improvement enabled by the LowerGEP flag is to lower a GEP with |
83 | | // multiple indices to either multiple GEPs with a single index or arithmetic |
84 | | // operations (depending on whether the target uses alias analysis in codegen). |
85 | | // Such transformation can have following benefits: |
86 | | // (1) It can always extract constants in the indices of structure type. |
87 | | // (2) After such Lowering, there are more optimization opportunities such as |
88 | | // CSE, LICM and CGP. |
89 | | // |
90 | | // E.g. The following GEPs have multiple indices: |
91 | | // BB1: |
92 | | // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 |
93 | | // load %p |
94 | | // ... |
95 | | // BB2: |
96 | | // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 |
97 | | // load %p2 |
98 | | // ... |
99 | | // |
100 | | // We can not do CSE for to the common part related to index "i64 %i". Lowering |
101 | | // GEPs can achieve such goals. |
102 | | // If the target does not use alias analysis in codegen, this pass will |
103 | | // lower a GEP with multiple indices into arithmetic operations: |
104 | | // BB1: |
105 | | // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
106 | | // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
107 | | // %3 = add i64 %1, %2 ; CSE opportunity |
108 | | // %4 = mul i64 %j1, length_of_struct |
109 | | // %5 = add i64 %3, %4 |
110 | | // %6 = add i64 %3, struct_field_3 ; Constant offset |
111 | | // %p = inttoptr i64 %6 to i32* |
112 | | // load %p |
113 | | // ... |
114 | | // BB2: |
115 | | // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
116 | | // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
117 | | // %9 = add i64 %7, %8 ; CSE opportunity |
118 | | // %10 = mul i64 %j2, length_of_struct |
119 | | // %11 = add i64 %9, %10 |
120 | | // %12 = add i64 %11, struct_field_2 ; Constant offset |
121 | | // %p = inttoptr i64 %12 to i32* |
122 | | // load %p2 |
123 | | // ... |
124 | | // |
125 | | // If the target uses alias analysis in codegen, this pass will lower a GEP |
126 | | // with multiple indices into multiple GEPs with a single index: |
127 | | // BB1: |
128 | | // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
129 | | // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
130 | | // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity |
131 | | // %4 = mul i64 %j1, length_of_struct |
132 | | // %5 = getelementptr i8* %3, i64 %4 |
133 | | // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset |
134 | | // %p = bitcast i8* %6 to i32* |
135 | | // load %p |
136 | | // ... |
137 | | // BB2: |
138 | | // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
139 | | // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
140 | | // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity |
141 | | // %10 = mul i64 %j2, length_of_struct |
142 | | // %11 = getelementptr i8* %9, i64 %10 |
143 | | // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset |
144 | | // %p2 = bitcast i8* %12 to i32* |
145 | | // load %p2 |
146 | | // ... |
147 | | // |
148 | | // Lowering GEPs can also benefit other passes such as LICM and CGP. |
149 | | // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple |
150 | | // indices if one of the index is variant. If we lower such GEP into invariant |
151 | | // parts and variant parts, LICM can hoist/sink those invariant parts. |
152 | | // CGP (CodeGen Prepare) tries to sink address calculations that match the |
153 | | // target's addressing modes. A GEP with multiple indices may not match and will |
154 | | // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of |
155 | | // them. So we end up with a better addressing mode. |
156 | | // |
157 | | //===----------------------------------------------------------------------===// |
158 | | |
159 | | #include "llvm/Analysis/LoopInfo.h" |
160 | | #include "llvm/Analysis/MemoryBuiltins.h" |
161 | | #include "llvm/Analysis/ScalarEvolution.h" |
162 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
163 | | #include "llvm/Analysis/TargetTransformInfo.h" |
164 | | #include "llvm/Analysis/ValueTracking.h" |
165 | | #include "llvm/IR/Constants.h" |
166 | | #include "llvm/IR/DataLayout.h" |
167 | | #include "llvm/IR/Dominators.h" |
168 | | #include "llvm/IR/IRBuilder.h" |
169 | | #include "llvm/IR/Instructions.h" |
170 | | #include "llvm/IR/LLVMContext.h" |
171 | | #include "llvm/IR/Module.h" |
172 | | #include "llvm/IR/Operator.h" |
173 | | #include "llvm/IR/PatternMatch.h" |
174 | | #include "llvm/Support/CommandLine.h" |
175 | | #include "llvm/Support/raw_ostream.h" |
176 | | #include "llvm/Target/TargetMachine.h" |
177 | | #include "llvm/Target/TargetSubtargetInfo.h" |
178 | | #include "llvm/Transforms/Scalar.h" |
179 | | #include "llvm/Transforms/Utils/Local.h" |
180 | | |
181 | | using namespace llvm; |
182 | | using namespace llvm::PatternMatch; |
183 | | |
184 | | static cl::opt<bool> DisableSeparateConstOffsetFromGEP( |
185 | | "disable-separate-const-offset-from-gep", cl::init(false), |
186 | | cl::desc("Do not separate the constant offset from a GEP instruction"), |
187 | | cl::Hidden); |
188 | | // Setting this flag may emit false positives when the input module already |
189 | | // contains dead instructions. Therefore, we set it only in unit tests that are |
190 | | // free of dead code. |
191 | | static cl::opt<bool> |
192 | | VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), |
193 | | cl::desc("Verify this pass produces no dead code"), |
194 | | cl::Hidden); |
195 | | |
196 | | namespace { |
197 | | |
198 | | /// \brief A helper class for separating a constant offset from a GEP index. |
199 | | /// |
200 | | /// In real programs, a GEP index may be more complicated than a simple addition |
201 | | /// of something and a constant integer which can be trivially splitted. For |
202 | | /// example, to split ((a << 3) | 5) + b, we need to search deeper for the |
203 | | /// constant offset, so that we can separate the index to (a << 3) + b and 5. |
204 | | /// |
205 | | /// Therefore, this class looks into the expression that computes a given GEP |
206 | | /// index, and tries to find a constant integer that can be hoisted to the |
207 | | /// outermost level of the expression as an addition. Not every constant in an |
208 | | /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + |
209 | | /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, |
210 | | /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). |
211 | | class ConstantOffsetExtractor { |
212 | | public: |
213 | | /// Extracts a constant offset from the given GEP index. It returns the |
214 | | /// new index representing the remainder (equal to the original index minus |
215 | | /// the constant offset), or nullptr if we cannot extract a constant offset. |
216 | | /// \p Idx The given GEP index |
217 | | /// \p GEP The given GEP |
218 | | /// \p UserChainTail Outputs the tail of UserChain so that we can |
219 | | /// garbage-collect unused instructions in UserChain. |
220 | | static Value *Extract(Value *Idx, GetElementPtrInst *GEP, |
221 | | User *&UserChainTail, const DominatorTree *DT); |
222 | | /// Looks for a constant offset from the given GEP index without extracting |
223 | | /// it. It returns the numeric value of the extracted constant offset (0 if |
224 | | /// failed). The meaning of the arguments are the same as Extract. |
225 | | static int64_t Find(Value *Idx, GetElementPtrInst *GEP, |
226 | | const DominatorTree *DT); |
227 | | |
228 | | private: |
229 | | ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT) |
230 | 9.83k | : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) { |
231 | 9.83k | } |
232 | | /// Searches the expression that computes V for a non-zero constant C s.t. |
233 | | /// V can be reassociated into the form V' + C. If the searching is |
234 | | /// successful, returns C and update UserChain as a def-use chain from C to V; |
235 | | /// otherwise, UserChain is empty. |
236 | | /// |
237 | | /// \p V The given expression |
238 | | /// \p SignExtended Whether V will be sign-extended in the computation of the |
239 | | /// GEP index |
240 | | /// \p ZeroExtended Whether V will be zero-extended in the computation of the |
241 | | /// GEP index |
242 | | /// \p NonNegative Whether V is guaranteed to be non-negative. For example, |
243 | | /// an index of an inbounds GEP is guaranteed to be |
244 | | /// non-negative. Levaraging this, we can better split |
245 | | /// inbounds GEPs. |
246 | | APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); |
247 | | /// A helper function to look into both operands of a binary operator. |
248 | | APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, |
249 | | bool ZeroExtended); |
250 | | /// After finding the constant offset C from the GEP index I, we build a new |
251 | | /// index I' s.t. I' + C = I. This function builds and returns the new |
252 | | /// index I' according to UserChain produced by function "find". |
253 | | /// |
254 | | /// The building conceptually takes two steps: |
255 | | /// 1) iteratively distribute s/zext towards the leaves of the expression tree |
256 | | /// that computes I |
257 | | /// 2) reassociate the expression tree to the form I' + C. |
258 | | /// |
259 | | /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute |
260 | | /// sext to a, b and 5 so that we have |
261 | | /// sext(a) + (sext(b) + 5). |
262 | | /// Then, we reassociate it to |
263 | | /// (sext(a) + sext(b)) + 5. |
264 | | /// Given this form, we know I' is sext(a) + sext(b). |
265 | | Value *rebuildWithoutConstOffset(); |
266 | | /// After the first step of rebuilding the GEP index without the constant |
267 | | /// offset, distribute s/zext to the operands of all operators in UserChain. |
268 | | /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) => |
269 | | /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))). |
270 | | /// |
271 | | /// The function also updates UserChain to point to new subexpressions after |
272 | | /// distributing s/zext. e.g., the old UserChain of the above example is |
273 | | /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)), |
274 | | /// and the new UserChain is |
275 | | /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) -> |
276 | | /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5)) |
277 | | /// |
278 | | /// \p ChainIndex The index to UserChain. ChainIndex is initially |
279 | | /// UserChain.size() - 1, and is decremented during |
280 | | /// the recursion. |
281 | | Value *distributeExtsAndCloneChain(unsigned ChainIndex); |
282 | | /// Reassociates the GEP index to the form I' + C and returns I'. |
283 | | Value *removeConstOffset(unsigned ChainIndex); |
284 | | /// A helper function to apply ExtInsts, a list of s/zext, to value V. |
285 | | /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function |
286 | | /// returns "sext i32 (zext i16 V to i32) to i64". |
287 | | Value *applyExts(Value *V); |
288 | | |
289 | | /// A helper function that returns whether we can trace into the operands |
290 | | /// of binary operator BO for a constant offset. |
291 | | /// |
292 | | /// \p SignExtended Whether BO is surrounded by sext |
293 | | /// \p ZeroExtended Whether BO is surrounded by zext |
294 | | /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound |
295 | | /// array index. |
296 | | bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO, |
297 | | bool NonNegative); |
298 | | |
299 | | /// The path from the constant offset to the old GEP index. e.g., if the GEP |
300 | | /// index is "a * b + (c + 5)". After running function find, UserChain[0] will |
301 | | /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and |
302 | | /// UserChain[2] will be the entire expression "a * b + (c + 5)". |
303 | | /// |
304 | | /// This path helps to rebuild the new GEP index. |
305 | | SmallVector<User *, 8> UserChain; |
306 | | /// A data structure used in rebuildWithoutConstOffset. Contains all |
307 | | /// sext/zext instructions along UserChain. |
308 | | SmallVector<CastInst *, 16> ExtInsts; |
309 | | Instruction *IP; /// Insertion position of cloned instructions. |
310 | | const DataLayout &DL; |
311 | | const DominatorTree *DT; |
312 | | }; |
313 | | |
314 | | /// \brief A pass that tries to split every GEP in the function into a variadic |
315 | | /// base and a constant offset. It is a FunctionPass because searching for the |
316 | | /// constant offset may inspect other basic blocks. |
317 | | class SeparateConstOffsetFromGEP : public FunctionPass { |
318 | | public: |
319 | | static char ID; |
320 | | SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr, |
321 | | bool LowerGEP = false) |
322 | 3.10k | : FunctionPass(ID), DL(nullptr), DT(nullptr), TM(TM), LowerGEP(LowerGEP) { |
323 | 3.10k | initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); |
324 | 3.10k | } |
325 | | |
326 | 3.09k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
327 | 3.09k | AU.addRequired<DominatorTreeWrapperPass>(); |
328 | 3.09k | AU.addRequired<ScalarEvolutionWrapperPass>(); |
329 | 3.09k | AU.addRequired<TargetTransformInfoWrapperPass>(); |
330 | 3.09k | AU.addRequired<LoopInfoWrapperPass>(); |
331 | 3.09k | AU.setPreservesCFG(); |
332 | 3.09k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
333 | 3.09k | } |
334 | | |
335 | 3.09k | bool doInitialization(Module &M) override { |
336 | 3.09k | DL = &M.getDataLayout(); |
337 | 3.09k | return false; |
338 | 3.09k | } |
339 | | bool runOnFunction(Function &F) override; |
340 | | |
341 | | private: |
342 | | /// Tries to split the given GEP into a variadic base and a constant offset, |
343 | | /// and returns true if the splitting succeeds. |
344 | | bool splitGEP(GetElementPtrInst *GEP); |
345 | | /// Lower a GEP with multiple indices into multiple GEPs with a single index. |
346 | | /// Function splitGEP already split the original GEP into a variadic part and |
347 | | /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the |
348 | | /// variadic part into a set of GEPs with a single index and applies |
349 | | /// AccumulativeByteOffset to it. |
350 | | /// \p Variadic The variadic part of the original GEP. |
351 | | /// \p AccumulativeByteOffset The constant offset. |
352 | | void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, |
353 | | int64_t AccumulativeByteOffset); |
354 | | /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. |
355 | | /// Function splitGEP already split the original GEP into a variadic part and |
356 | | /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the |
357 | | /// variadic part into a set of arithmetic operations and applies |
358 | | /// AccumulativeByteOffset to it. |
359 | | /// \p Variadic The variadic part of the original GEP. |
360 | | /// \p AccumulativeByteOffset The constant offset. |
361 | | void lowerToArithmetics(GetElementPtrInst *Variadic, |
362 | | int64_t AccumulativeByteOffset); |
363 | | /// Finds the constant offset within each index and accumulates them. If |
364 | | /// LowerGEP is true, it finds in indices of both sequential and structure |
365 | | /// types, otherwise it only finds in sequential indices. The output |
366 | | /// NeedsExtraction indicates whether we successfully find a non-zero constant |
367 | | /// offset. |
368 | | int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); |
369 | | /// Canonicalize array indices to pointer-size integers. This helps to |
370 | | /// simplify the logic of splitting a GEP. For example, if a + b is a |
371 | | /// pointer-size integer, we have |
372 | | /// gep base, a + b = gep (gep base, a), b |
373 | | /// However, this equality may not hold if the size of a + b is smaller than |
374 | | /// the pointer size, because LLVM conceptually sign-extends GEP indices to |
375 | | /// pointer size before computing the address |
376 | | /// (http://llvm.org/docs/LangRef.html#id181). |
377 | | /// |
378 | | /// This canonicalization is very likely already done in clang and |
379 | | /// instcombine. Therefore, the program will probably remain the same. |
380 | | /// |
381 | | /// Returns true if the module changes. |
382 | | /// |
383 | | /// Verified in @i32_add in split-gep.ll |
384 | | bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); |
385 | | /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. |
386 | | /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting |
387 | | /// the constant offset. After extraction, it becomes desirable to reunion the |
388 | | /// distributed sexts. For example, |
389 | | /// |
390 | | /// &a[sext(i +nsw (j +nsw 5)] |
391 | | /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)] |
392 | | /// => constant extraction &a[sext(i) + sext(j)] + 5 |
393 | | /// => reunion &a[sext(i +nsw j)] + 5 |
394 | | bool reuniteExts(Function &F); |
395 | | /// A helper that reunites sexts in an instruction. |
396 | | bool reuniteExts(Instruction *I); |
397 | | /// Find the closest dominator of <Dominatee> that is equivalent to <Key>. |
398 | | Instruction *findClosestMatchingDominator(const SCEV *Key, |
399 | | Instruction *Dominatee); |
400 | | /// Verify F is free of dead code. |
401 | | void verifyNoDeadCode(Function &F); |
402 | | |
403 | | bool hasMoreThanOneUseInLoop(Value *v, Loop *L); |
404 | | // Swap the index operand of two GEP. |
405 | | void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second); |
406 | | // Check if it is safe to swap operand of two GEP. |
407 | | bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second, |
408 | | Loop *CurLoop); |
409 | | |
410 | | const DataLayout *DL; |
411 | | DominatorTree *DT; |
412 | | ScalarEvolution *SE; |
413 | | const TargetMachine *TM; |
414 | | |
415 | | LoopInfo *LI; |
416 | | TargetLibraryInfo *TLI; |
417 | | /// Whether to lower a GEP with multiple indices into arithmetic operations or |
418 | | /// multiple GEPs with a single index. |
419 | | bool LowerGEP; |
420 | | DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs; |
421 | | }; |
422 | | } // anonymous namespace |
423 | | |
424 | | char SeparateConstOffsetFromGEP::ID = 0; |
425 | 24.6k | INITIALIZE_PASS_BEGIN24.6k (
|
426 | 24.6k | SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", |
427 | 24.6k | "Split GEPs to a variadic base and a constant offset for better CSE", false, |
428 | 24.6k | false) |
429 | 24.6k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
430 | 24.6k | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
431 | 24.6k | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
432 | 24.6k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
433 | 24.6k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
434 | 24.6k | INITIALIZE_PASS_END( |
435 | | SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", |
436 | | "Split GEPs to a variadic base and a constant offset for better CSE", false, |
437 | | false) |
438 | | |
439 | | FunctionPass * |
440 | | llvm::createSeparateConstOffsetFromGEPPass(const TargetMachine *TM, |
441 | 3.10k | bool LowerGEP) { |
442 | 3.10k | return new SeparateConstOffsetFromGEP(TM, LowerGEP); |
443 | 3.10k | } |
444 | | |
445 | | bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, |
446 | | bool ZeroExtended, |
447 | | BinaryOperator *BO, |
448 | 2.94k | bool NonNegative) { |
449 | 2.94k | // We only consider ADD, SUB and OR, because a non-zero constant found in |
450 | 2.94k | // expressions composed of these operations can be easily hoisted as a |
451 | 2.94k | // constant offset by reassociation. |
452 | 2.94k | if (BO->getOpcode() != Instruction::Add && |
453 | 583 | BO->getOpcode() != Instruction::Sub && |
454 | 2.94k | BO->getOpcode() != Instruction::Or502 ) { |
455 | 287 | return false; |
456 | 287 | } |
457 | 2.65k | |
458 | 2.65k | Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1); |
459 | 2.65k | // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS |
460 | 2.65k | // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS). |
461 | 2.65k | if (BO->getOpcode() == Instruction::Or && |
462 | 215 | !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT)) |
463 | 5 | return false; |
464 | 2.65k | |
465 | 2.65k | // In addition, tracing into BO requires that its surrounding s/zext (if |
466 | 2.65k | // any) is distributable to both operands. |
467 | 2.65k | // |
468 | 2.65k | // Suppose BO = A op B. |
469 | 2.65k | // SignExtended | ZeroExtended | Distributable? |
470 | 2.65k | // --------------+--------------+---------------------------------- |
471 | 2.65k | // 0 | 0 | true because no s/zext exists |
472 | 2.65k | // 0 | 1 | zext(BO) == zext(A) op zext(B) |
473 | 2.65k | // 1 | 0 | sext(BO) == sext(A) op sext(B) |
474 | 2.65k | // 1 | 1 | zext(sext(BO)) == |
475 | 2.65k | // | | zext(sext(A)) op zext(sext(B)) |
476 | 2.65k | if (2.65k BO->getOpcode() == Instruction::Add && 2.65k !ZeroExtended2.36k && NonNegative2.32k ) { |
477 | 1.19k | // If a + b >= 0 and (a >= 0 or b >= 0), then |
478 | 1.19k | // sext(a + b) = sext(a) + sext(b) |
479 | 1.19k | // even if the addition is not marked nsw. |
480 | 1.19k | // |
481 | 1.19k | // Leveraging this invarient, we can trace into an sext'ed inbound GEP |
482 | 1.19k | // index if the constant offset is non-negative. |
483 | 1.19k | // |
484 | 1.19k | // Verified in @sext_add in split-gep.ll. |
485 | 1.19k | if (ConstantInt *ConstLHS1.19k = dyn_cast<ConstantInt>(LHS)) { |
486 | 0 | if (!ConstLHS->isNegative()) |
487 | 0 | return true; |
488 | 1.19k | } |
489 | 1.19k | if (ConstantInt *1.19k ConstRHS1.19k = dyn_cast<ConstantInt>(RHS)) { |
490 | 1.11k | if (!ConstRHS->isNegative()) |
491 | 912 | return true; |
492 | 1.74k | } |
493 | 1.19k | } |
494 | 1.74k | |
495 | 1.74k | // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) |
496 | 1.74k | // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) |
497 | 1.74k | if (1.74k BO->getOpcode() == Instruction::Add || |
498 | 1.74k | BO->getOpcode() == Instruction::Sub291 ) { |
499 | 1.53k | if (SignExtended && 1.53k !BO->hasNoSignedWrap()280 ) |
500 | 15 | return false; |
501 | 1.51k | if (1.51k ZeroExtended && 1.51k !BO->hasNoUnsignedWrap()35 ) |
502 | 6 | return false; |
503 | 1.72k | } |
504 | 1.72k | |
505 | 1.72k | return true; |
506 | 1.72k | } |
507 | | |
508 | | APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, |
509 | | bool SignExtended, |
510 | 2.63k | bool ZeroExtended) { |
511 | 2.63k | // BO being non-negative does not shed light on whether its operands are |
512 | 2.63k | // non-negative. Clear the NonNegative flag here. |
513 | 2.63k | APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, |
514 | 2.63k | /* NonNegative */ false); |
515 | 2.63k | // If we found a constant offset in the left operand, stop and return that. |
516 | 2.63k | // This shortcut might cause us to miss opportunities of combining the |
517 | 2.63k | // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. |
518 | 2.63k | // However, such cases are probably already handled by -instcombine, |
519 | 2.63k | // given this pass runs after the standard optimizations. |
520 | 2.63k | if (ConstantOffset != 02.63k ) return ConstantOffset564 ; |
521 | 2.06k | ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, |
522 | 2.06k | /* NonNegative */ false); |
523 | 2.06k | // If U is a sub operator, negate the constant offset found in the right |
524 | 2.06k | // operand. |
525 | 2.06k | if (BO->getOpcode() == Instruction::Sub) |
526 | 60 | ConstantOffset = -ConstantOffset; |
527 | 2.63k | return ConstantOffset; |
528 | 2.63k | } |
529 | | |
530 | | APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, |
531 | 20.5k | bool ZeroExtended, bool NonNegative) { |
532 | 20.5k | // TODO(jingyue): We could trace into integer/pointer casts, such as |
533 | 20.5k | // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only |
534 | 20.5k | // integers because it gives good enough results for our benchmarks. |
535 | 20.5k | unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); |
536 | 20.5k | |
537 | 20.5k | // We cannot do much with Values that are not a User, such as an Argument. |
538 | 20.5k | User *U = dyn_cast<User>(V); |
539 | 20.5k | if (U == nullptr20.5k ) return APInt(BitWidth, 0)1.80k ; |
540 | 18.7k | |
541 | 18.7k | APInt ConstantOffset(BitWidth, 0); |
542 | 18.7k | if (ConstantInt *CI18.7k = dyn_cast<ConstantInt>(V)) { |
543 | 3.21k | // Hooray, we found it! |
544 | 3.21k | ConstantOffset = CI->getValue(); |
545 | 18.7k | } else if (BinaryOperator *15.5k BO15.5k = dyn_cast<BinaryOperator>(V)) { |
546 | 2.94k | // Trace into subexpressions for more hoisting opportunities. |
547 | 2.94k | if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) |
548 | 2.63k | ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); |
549 | 15.5k | } else if (12.5k isa<SExtInst>(V)12.5k ) { |
550 | 5.91k | ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, |
551 | 5.91k | ZeroExtended, NonNegative).sext(BitWidth); |
552 | 12.5k | } else if (6.68k isa<ZExtInst>(V)6.68k ) { |
553 | 119 | // As an optimization, we can clear the SignExtended flag because |
554 | 119 | // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. |
555 | 119 | // |
556 | 119 | // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. |
557 | 119 | ConstantOffset = |
558 | 119 | find(U->getOperand(0), /* SignExtended */ false, |
559 | 119 | /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); |
560 | 119 | } |
561 | 18.7k | |
562 | 18.7k | // If we found a non-zero constant offset, add it to the path for |
563 | 18.7k | // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't |
564 | 18.7k | // help this optimization. |
565 | 18.7k | if (ConstantOffset != 0) |
566 | 5.05k | UserChain.push_back(U); |
567 | 20.5k | return ConstantOffset; |
568 | 20.5k | } |
569 | | |
570 | 2.05k | Value *ConstantOffsetExtractor::applyExts(Value *V) { |
571 | 2.05k | Value *Current = V; |
572 | 2.05k | // ExtInsts is built in the use-def order. Therefore, we apply them to V |
573 | 2.05k | // in the reversed order. |
574 | 2.69k | for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E2.69k ; ++I641 ) { |
575 | 641 | if (Constant *C641 = dyn_cast<Constant>(Current)) { |
576 | 332 | // If Current is a constant, apply s/zext using ConstantExpr::getCast. |
577 | 332 | // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt. |
578 | 332 | Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType()); |
579 | 641 | } else { |
580 | 309 | Instruction *Ext = (*I)->clone(); |
581 | 309 | Ext->setOperand(0, Current); |
582 | 309 | Ext->insertBefore(IP); |
583 | 309 | Current = Ext; |
584 | 309 | } |
585 | 641 | } |
586 | 2.05k | return Current; |
587 | 2.05k | } |
588 | | |
589 | 910 | Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { |
590 | 910 | distributeExtsAndCloneChain(UserChain.size() - 1); |
591 | 910 | // Remove all nullptrs (used to be s/zext) from UserChain. |
592 | 910 | unsigned NewSize = 0; |
593 | 2.37k | for (User *I : UserChain) { |
594 | 2.37k | if (I != nullptr2.37k ) { |
595 | 2.05k | UserChain[NewSize] = I; |
596 | 2.05k | NewSize++; |
597 | 2.05k | } |
598 | 2.37k | } |
599 | 910 | UserChain.resize(NewSize); |
600 | 910 | return removeConstOffset(UserChain.size() - 1); |
601 | 910 | } |
602 | | |
603 | | Value * |
604 | 2.37k | ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { |
605 | 2.37k | User *U = UserChain[ChainIndex]; |
606 | 2.37k | if (ChainIndex == 02.37k ) { |
607 | 910 | assert(isa<ConstantInt>(U)); |
608 | 910 | // If U is a ConstantInt, applyExts will return a ConstantInt as well. |
609 | 910 | return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U)); |
610 | 910 | } |
611 | 1.46k | |
612 | 1.46k | if (CastInst *1.46k Cast1.46k = dyn_cast<CastInst>(U)) { |
613 | 324 | assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) && |
614 | 324 | "We only traced into two types of CastInst: sext and zext"); |
615 | 324 | ExtInsts.push_back(Cast); |
616 | 324 | UserChain[ChainIndex] = nullptr; |
617 | 324 | return distributeExtsAndCloneChain(ChainIndex - 1); |
618 | 324 | } |
619 | 1.14k | |
620 | 1.14k | // Function find only trace into BinaryOperator and CastInst. |
621 | 1.14k | BinaryOperator *BO = cast<BinaryOperator>(U); |
622 | 1.14k | // OpNo = which operand of BO is UserChain[ChainIndex - 1] |
623 | 1.14k | unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0282 : 1862 ); |
624 | 1.14k | Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); |
625 | 1.14k | Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); |
626 | 1.14k | |
627 | 1.14k | BinaryOperator *NewBO = nullptr; |
628 | 1.14k | if (OpNo == 01.14k ) { |
629 | 282 | NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, |
630 | 282 | BO->getName(), IP); |
631 | 1.14k | } else { |
632 | 862 | NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, |
633 | 862 | BO->getName(), IP); |
634 | 862 | } |
635 | 2.37k | return UserChain[ChainIndex] = NewBO; |
636 | 2.37k | } |
637 | | |
638 | 2.05k | Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { |
639 | 2.05k | if (ChainIndex == 02.05k ) { |
640 | 910 | assert(isa<ConstantInt>(UserChain[ChainIndex])); |
641 | 910 | return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); |
642 | 910 | } |
643 | 1.14k | |
644 | 1.14k | BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]); |
645 | 1.14k | assert(BO->getNumUses() <= 1 && |
646 | 1.14k | "distributeExtsAndCloneChain clones each BinaryOperator in " |
647 | 1.14k | "UserChain, so no one should be used more than " |
648 | 1.14k | "once"); |
649 | 1.14k | |
650 | 1.14k | unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0282 : 1862 ); |
651 | 1.14k | assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); |
652 | 1.14k | Value *NextInChain = removeConstOffset(ChainIndex - 1); |
653 | 1.14k | Value *TheOther = BO->getOperand(1 - OpNo); |
654 | 1.14k | |
655 | 1.14k | // If NextInChain is 0 and not the LHS of a sub, we can simplify the |
656 | 1.14k | // sub-expression to be just TheOther. |
657 | 1.14k | if (ConstantInt *CI1.14k = dyn_cast<ConstantInt>(NextInChain)) { |
658 | 867 | if (CI->isZero() && 867 !(BO->getOpcode() == Instruction::Sub && 867 OpNo == 09 )) |
659 | 860 | return TheOther; |
660 | 284 | } |
661 | 284 | |
662 | 284 | BinaryOperator::BinaryOps NewOp = BO->getOpcode(); |
663 | 284 | if (BO->getOpcode() == Instruction::Or284 ) { |
664 | 1 | // Rebuild "or" as "add", because "or" may be invalid for the new |
665 | 1 | // epxression. |
666 | 1 | // |
667 | 1 | // For instance, given |
668 | 1 | // a | (b + 5) where a and b + 5 have no common bits, |
669 | 1 | // we can extract 5 as the constant offset. |
670 | 1 | // |
671 | 1 | // However, reusing the "or" in the new index would give us |
672 | 1 | // (a | b) + 5 |
673 | 1 | // which does not equal a | (b + 5). |
674 | 1 | // |
675 | 1 | // Replacing the "or" with "add" is fine, because |
676 | 1 | // a | (b + 5) = a + (b + 5) = (a + b) + 5 |
677 | 1 | NewOp = Instruction::Add; |
678 | 1 | } |
679 | 284 | |
680 | 284 | BinaryOperator *NewBO; |
681 | 284 | if (OpNo == 0284 ) { |
682 | 274 | NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP); |
683 | 284 | } else { |
684 | 10 | NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP); |
685 | 10 | } |
686 | 2.05k | NewBO->takeName(BO); |
687 | 2.05k | return NewBO; |
688 | 2.05k | } |
689 | | |
690 | | Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, |
691 | | User *&UserChainTail, |
692 | 1.33k | const DominatorTree *DT) { |
693 | 1.33k | ConstantOffsetExtractor Extractor(GEP, DT); |
694 | 1.33k | // Find a non-zero constant offset first. |
695 | 1.33k | APInt ConstantOffset = |
696 | 1.33k | Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, |
697 | 1.33k | GEP->isInBounds()); |
698 | 1.33k | if (ConstantOffset == 01.33k ) { |
699 | 427 | UserChainTail = nullptr; |
700 | 427 | return nullptr; |
701 | 427 | } |
702 | 910 | // Separates the constant offset from the GEP index. |
703 | 910 | Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset(); |
704 | 910 | UserChainTail = Extractor.UserChain.back(); |
705 | 910 | return IdxWithoutConstOffset; |
706 | 910 | } |
707 | | |
708 | | int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP, |
709 | 8.49k | const DominatorTree *DT) { |
710 | 8.49k | // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. |
711 | 8.49k | return ConstantOffsetExtractor(GEP, DT) |
712 | 8.49k | .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, |
713 | 8.49k | GEP->isInBounds()) |
714 | 8.49k | .getSExtValue(); |
715 | 8.49k | } |
716 | | |
717 | | bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( |
718 | 7.43k | GetElementPtrInst *GEP) { |
719 | 7.43k | bool Changed = false; |
720 | 7.43k | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); |
721 | 7.43k | gep_type_iterator GTI = gep_type_begin(*GEP); |
722 | 7.43k | for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); |
723 | 16.0k | I != E16.0k ; ++I, ++GTI8.60k ) { |
724 | 8.60k | // Skip struct member indices which must be i32. |
725 | 8.60k | if (GTI.isSequential()8.60k ) { |
726 | 8.49k | if ((*I)->getType() != IntPtrTy8.49k ) { |
727 | 3.98k | *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP); |
728 | 3.98k | Changed = true; |
729 | 3.98k | } |
730 | 8.49k | } |
731 | 8.60k | } |
732 | 7.43k | return Changed; |
733 | 7.43k | } |
734 | | |
735 | | int64_t |
736 | | SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, |
737 | 7.43k | bool &NeedsExtraction) { |
738 | 7.43k | NeedsExtraction = false; |
739 | 7.43k | int64_t AccumulativeByteOffset = 0; |
740 | 7.43k | gep_type_iterator GTI = gep_type_begin(*GEP); |
741 | 16.0k | for (unsigned I = 1, E = GEP->getNumOperands(); I != E16.0k ; ++I, ++GTI8.60k ) { |
742 | 8.60k | if (GTI.isSequential()8.60k ) { |
743 | 8.49k | // Tries to extract a constant offset from this GEP index. |
744 | 8.49k | int64_t ConstantOffset = |
745 | 8.49k | ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT); |
746 | 8.49k | if (ConstantOffset != 08.49k ) { |
747 | 1.04k | NeedsExtraction = true; |
748 | 1.04k | // A GEP may have multiple indices. We accumulate the extracted |
749 | 1.04k | // constant offset to a byte offset, and later offset the remainder of |
750 | 1.04k | // the original GEP with this byte offset. |
751 | 1.04k | AccumulativeByteOffset += |
752 | 1.04k | ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); |
753 | 1.04k | } |
754 | 8.60k | } else if (113 LowerGEP113 ) { |
755 | 86 | StructType *StTy = GTI.getStructType(); |
756 | 86 | uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue(); |
757 | 86 | // Skip field 0 as the offset is always 0. |
758 | 86 | if (Field != 086 ) { |
759 | 81 | NeedsExtraction = true; |
760 | 81 | AccumulativeByteOffset += |
761 | 81 | DL->getStructLayout(StTy)->getElementOffset(Field); |
762 | 81 | } |
763 | 113 | } |
764 | 8.60k | } |
765 | 7.43k | return AccumulativeByteOffset; |
766 | 7.43k | } |
767 | | |
768 | | void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( |
769 | 439 | GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { |
770 | 439 | IRBuilder<> Builder(Variadic); |
771 | 439 | Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); |
772 | 439 | |
773 | 439 | Type *I8PtrTy = |
774 | 439 | Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); |
775 | 439 | Value *ResultPtr = Variadic->getOperand(0); |
776 | 439 | Loop *L = LI->getLoopFor(Variadic->getParent()); |
777 | 439 | // Check if the base is not loop invariant or used more than once. |
778 | 439 | bool isSwapCandidate = |
779 | 170 | L && L->isLoopInvariant(ResultPtr) && |
780 | 164 | !hasMoreThanOneUseInLoop(ResultPtr, L); |
781 | 439 | Value *FirstResult = nullptr; |
782 | 439 | |
783 | 439 | if (ResultPtr->getType() != I8PtrTy) |
784 | 438 | ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); |
785 | 439 | |
786 | 439 | gep_type_iterator GTI = gep_type_begin(*Variadic); |
787 | 439 | // Create an ugly GEP for each sequential index. We don't create GEPs for |
788 | 439 | // structure indices, as they are accumulated in the constant offset index. |
789 | 1.02k | for (unsigned I = 1, E = Variadic->getNumOperands(); I != E1.02k ; ++I, ++GTI587 ) { |
790 | 587 | if (GTI.isSequential()587 ) { |
791 | 546 | Value *Idx = Variadic->getOperand(I); |
792 | 546 | // Skip zero indices. |
793 | 546 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) |
794 | 119 | if (119 CI->isZero()119 ) |
795 | 115 | continue; |
796 | 431 | |
797 | 431 | APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), |
798 | 431 | DL->getTypeAllocSize(GTI.getIndexedType())); |
799 | 431 | // Scale the index by element size. |
800 | 431 | if (ElementSize != 1431 ) { |
801 | 430 | if (ElementSize.isPowerOf2()430 ) { |
802 | 418 | Idx = Builder.CreateShl( |
803 | 418 | Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); |
804 | 430 | } else { |
805 | 12 | Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); |
806 | 12 | } |
807 | 430 | } |
808 | 431 | // Create an ugly GEP with a single index for each index. |
809 | 431 | ResultPtr = |
810 | 431 | Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); |
811 | 431 | if (FirstResult == nullptr) |
812 | 431 | FirstResult = ResultPtr; |
813 | 546 | } |
814 | 587 | } |
815 | 439 | |
816 | 439 | // Create a GEP with the constant offset index. |
817 | 439 | if (AccumulativeByteOffset != 0439 ) { |
818 | 439 | Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); |
819 | 439 | ResultPtr = |
820 | 439 | Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); |
821 | 439 | } else |
822 | 0 | isSwapCandidate = false; |
823 | 439 | |
824 | 439 | // If we created a GEP with constant index, and the base is loop invariant, |
825 | 439 | // then we swap the first one with it, so LICM can move constant GEP out |
826 | 439 | // later. |
827 | 439 | GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult); |
828 | 439 | GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr); |
829 | 439 | if (isSwapCandidate && 439 isLegalToSwapOperand(FirstGEP, SecondGEP, L)0 ) |
830 | 0 | swapGEPOperand(FirstGEP, SecondGEP); |
831 | 439 | |
832 | 439 | if (ResultPtr->getType() != Variadic->getType()) |
833 | 438 | ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); |
834 | 439 | |
835 | 439 | Variadic->replaceAllUsesWith(ResultPtr); |
836 | 439 | Variadic->eraseFromParent(); |
837 | 439 | } |
838 | | |
839 | | void |
840 | | SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, |
841 | 100 | int64_t AccumulativeByteOffset) { |
842 | 100 | IRBuilder<> Builder(Variadic); |
843 | 100 | Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); |
844 | 100 | |
845 | 100 | Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); |
846 | 100 | gep_type_iterator GTI = gep_type_begin(*Variadic); |
847 | 100 | // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We |
848 | 100 | // don't create arithmetics for structure indices, as they are accumulated |
849 | 100 | // in the constant offset index. |
850 | 332 | for (unsigned I = 1, E = Variadic->getNumOperands(); I != E332 ; ++I, ++GTI232 ) { |
851 | 232 | if (GTI.isSequential()232 ) { |
852 | 191 | Value *Idx = Variadic->getOperand(I); |
853 | 191 | // Skip zero indices. |
854 | 191 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) |
855 | 80 | if (80 CI->isZero()80 ) |
856 | 76 | continue; |
857 | 115 | |
858 | 115 | APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), |
859 | 115 | DL->getTypeAllocSize(GTI.getIndexedType())); |
860 | 115 | // Scale the index by element size. |
861 | 115 | if (ElementSize != 1115 ) { |
862 | 115 | if (ElementSize.isPowerOf2()115 ) { |
863 | 98 | Idx = Builder.CreateShl( |
864 | 98 | Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); |
865 | 115 | } else { |
866 | 17 | Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); |
867 | 17 | } |
868 | 115 | } |
869 | 191 | // Create an ADD for each index. |
870 | 191 | ResultPtr = Builder.CreateAdd(ResultPtr, Idx); |
871 | 191 | } |
872 | 232 | } |
873 | 100 | |
874 | 100 | // Create an ADD for the constant offset index. |
875 | 100 | if (AccumulativeByteOffset != 0100 ) { |
876 | 100 | ResultPtr = Builder.CreateAdd( |
877 | 100 | ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); |
878 | 100 | } |
879 | 100 | |
880 | 100 | ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); |
881 | 100 | Variadic->replaceAllUsesWith(ResultPtr); |
882 | 100 | Variadic->eraseFromParent(); |
883 | 100 | } |
884 | | |
885 | 14.1k | bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { |
886 | 14.1k | // Skip vector GEPs. |
887 | 14.1k | if (GEP->getType()->isVectorTy()) |
888 | 12 | return false; |
889 | 14.1k | |
890 | 14.1k | // The backend can already nicely handle the case where all indices are |
891 | 14.1k | // constant. |
892 | 14.1k | if (14.1k GEP->hasAllConstantIndices()14.1k ) |
893 | 6.68k | return false; |
894 | 7.43k | |
895 | 7.43k | bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); |
896 | 7.43k | |
897 | 7.43k | bool NeedsExtraction; |
898 | 7.43k | int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); |
899 | 7.43k | |
900 | 7.43k | if (!NeedsExtraction) |
901 | 6.38k | return Changed; |
902 | 1.04k | // If LowerGEP is disabled, before really splitting the GEP, check whether the |
903 | 1.04k | // backend supports the addressing mode we are about to produce. If no, this |
904 | 1.04k | // splitting probably won't be beneficial. |
905 | 1.04k | // If LowerGEP is enabled, even the extracted constant offset can not match |
906 | 1.04k | // the addressing mode, we can still do optimizations to other lowered parts |
907 | 1.04k | // of variable indices. Therefore, we don't check for addressing modes in that |
908 | 1.04k | // case. |
909 | 1.04k | if (1.04k !LowerGEP1.04k ) { |
910 | 509 | TargetTransformInfo &TTI = |
911 | 509 | getAnalysis<TargetTransformInfoWrapperPass>().getTTI( |
912 | 509 | *GEP->getParent()->getParent()); |
913 | 509 | unsigned AddrSpace = GEP->getPointerAddressSpace(); |
914 | 509 | if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), |
915 | 509 | /*BaseGV=*/nullptr, AccumulativeByteOffset, |
916 | 509 | /*HasBaseReg=*/true, /*Scale=*/0, |
917 | 509 | AddrSpace)) { |
918 | 137 | return Changed; |
919 | 137 | } |
920 | 911 | } |
921 | 911 | |
922 | 911 | // Remove the constant offset in each sequential index. The resultant GEP |
923 | 911 | // computes the variadic base. |
924 | 911 | // Notice that we don't remove struct field indices here. If LowerGEP is |
925 | 911 | // disabled, a structure index is not accumulated and we still use the old |
926 | 911 | // one. If LowerGEP is enabled, a structure index is accumulated in the |
927 | 911 | // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later |
928 | 911 | // handle the constant offset and won't need a new structure index. |
929 | 911 | gep_type_iterator GTI = gep_type_begin(*GEP); |
930 | 2.33k | for (unsigned I = 1, E = GEP->getNumOperands(); I != E2.33k ; ++I, ++GTI1.42k ) { |
931 | 1.42k | if (GTI.isSequential()1.42k ) { |
932 | 1.33k | // Splits this GEP index into a variadic part and a constant offset, and |
933 | 1.33k | // uses the variadic part as the new index. |
934 | 1.33k | Value *OldIdx = GEP->getOperand(I); |
935 | 1.33k | User *UserChainTail; |
936 | 1.33k | Value *NewIdx = |
937 | 1.33k | ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT); |
938 | 1.33k | if (NewIdx != nullptr1.33k ) { |
939 | 910 | // Switches to the index with the constant offset removed. |
940 | 910 | GEP->setOperand(I, NewIdx); |
941 | 910 | // After switching to the new index, we can garbage-collect UserChain |
942 | 910 | // and the old index if they are not used. |
943 | 910 | RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); |
944 | 910 | RecursivelyDeleteTriviallyDeadInstructions(OldIdx); |
945 | 910 | } |
946 | 1.33k | } |
947 | 1.42k | } |
948 | 911 | |
949 | 911 | // Clear the inbounds attribute because the new index may be off-bound. |
950 | 911 | // e.g., |
951 | 911 | // |
952 | 911 | // b = add i64 a, 5 |
953 | 911 | // addr = gep inbounds float, float* p, i64 b |
954 | 911 | // |
955 | 911 | // is transformed to: |
956 | 911 | // |
957 | 911 | // addr2 = gep float, float* p, i64 a ; inbounds removed |
958 | 911 | // addr = gep inbounds float, float* addr2, i64 5 |
959 | 911 | // |
960 | 911 | // If a is -4, although the old index b is in bounds, the new index a is |
961 | 911 | // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the |
962 | 911 | // inbounds keyword is not present, the offsets are added to the base |
963 | 911 | // address with silently-wrapping two's complement arithmetic". |
964 | 911 | // Therefore, the final code will be a semantically equivalent. |
965 | 911 | // |
966 | 911 | // TODO(jingyue): do some range analysis to keep as many inbounds as |
967 | 911 | // possible. GEPs with inbounds are more friendly to alias analysis. |
968 | 911 | bool GEPWasInBounds = GEP->isInBounds(); |
969 | 911 | GEP->setIsInBounds(false); |
970 | 911 | |
971 | 911 | // Lowers a GEP to either GEPs with a single index or arithmetic operations. |
972 | 911 | if (LowerGEP911 ) { |
973 | 539 | // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to |
974 | 539 | // arithmetic operations if the target uses alias analysis in codegen. |
975 | 539 | if (TM && 539 TM->getSubtargetImpl(*GEP->getParent()->getParent())->useAA()539 ) |
976 | 439 | lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); |
977 | 539 | else |
978 | 100 | lowerToArithmetics(GEP, AccumulativeByteOffset); |
979 | 539 | return true; |
980 | 539 | } |
981 | 372 | |
982 | 372 | // No need to create another GEP if the accumulative byte offset is 0. |
983 | 372 | if (372 AccumulativeByteOffset == 0372 ) |
984 | 0 | return true; |
985 | 372 | |
986 | 372 | // Offsets the base with the accumulative byte offset. |
987 | 372 | // |
988 | 372 | // %gep ; the base |
989 | 372 | // ... %gep ... |
990 | 372 | // |
991 | 372 | // => add the offset |
992 | 372 | // |
993 | 372 | // %gep2 ; clone of %gep |
994 | 372 | // %new.gep = gep %gep2, <offset / sizeof(*%gep)> |
995 | 372 | // %gep ; will be removed |
996 | 372 | // ... %gep ... |
997 | 372 | // |
998 | 372 | // => replace all uses of %gep with %new.gep and remove %gep |
999 | 372 | // |
1000 | 372 | // %gep2 ; clone of %gep |
1001 | 372 | // %new.gep = gep %gep2, <offset / sizeof(*%gep)> |
1002 | 372 | // ... %new.gep ... |
1003 | 372 | // |
1004 | 372 | // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an |
1005 | 372 | // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): |
1006 | 372 | // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the |
1007 | 372 | // type of %gep. |
1008 | 372 | // |
1009 | 372 | // %gep2 ; clone of %gep |
1010 | 372 | // %0 = bitcast %gep2 to i8* |
1011 | 372 | // %uglygep = gep %0, <offset> |
1012 | 372 | // %new.gep = bitcast %uglygep to <type of %gep> |
1013 | 372 | // ... %new.gep ... |
1014 | 372 | Instruction *NewGEP = GEP->clone(); |
1015 | 372 | NewGEP->insertBefore(GEP); |
1016 | 372 | |
1017 | 372 | // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned = |
1018 | 372 | // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is |
1019 | 372 | // used with unsigned integers later. |
1020 | 372 | int64_t ElementTypeSizeOfGEP = static_cast<int64_t>( |
1021 | 372 | DL->getTypeAllocSize(GEP->getResultElementType())); |
1022 | 372 | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); |
1023 | 372 | if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0372 ) { |
1024 | 370 | // Very likely. As long as %gep is natually aligned, the byte offset we |
1025 | 370 | // extracted should be a multiple of sizeof(*%gep). |
1026 | 370 | int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; |
1027 | 370 | NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, |
1028 | 370 | ConstantInt::get(IntPtrTy, Index, true), |
1029 | 370 | GEP->getName(), GEP); |
1030 | 370 | // Inherit the inbounds attribute of the original GEP. |
1031 | 370 | cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); |
1032 | 372 | } else { |
1033 | 2 | // Unlikely but possible. For example, |
1034 | 2 | // #pragma pack(1) |
1035 | 2 | // struct S { |
1036 | 2 | // int a[3]; |
1037 | 2 | // int64 b[8]; |
1038 | 2 | // }; |
1039 | 2 | // #pragma pack() |
1040 | 2 | // |
1041 | 2 | // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After |
1042 | 2 | // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is |
1043 | 2 | // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of |
1044 | 2 | // sizeof(int64). |
1045 | 2 | // |
1046 | 2 | // Emit an uglygep in this case. |
1047 | 2 | Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), |
1048 | 2 | GEP->getPointerAddressSpace()); |
1049 | 2 | NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); |
1050 | 2 | NewGEP = GetElementPtrInst::Create( |
1051 | 2 | Type::getInt8Ty(GEP->getContext()), NewGEP, |
1052 | 2 | ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", |
1053 | 2 | GEP); |
1054 | 2 | // Inherit the inbounds attribute of the original GEP. |
1055 | 2 | cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); |
1056 | 2 | if (GEP->getType() != I8PtrTy) |
1057 | 2 | NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); |
1058 | 2 | } |
1059 | 14.1k | |
1060 | 14.1k | GEP->replaceAllUsesWith(NewGEP); |
1061 | 14.1k | GEP->eraseFromParent(); |
1062 | 14.1k | |
1063 | 14.1k | return true; |
1064 | 14.1k | } |
1065 | | |
1066 | 24.4k | bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { |
1067 | 24.4k | if (skipFunction(F)) |
1068 | 2 | return false; |
1069 | 24.4k | |
1070 | 24.4k | if (24.4k DisableSeparateConstOffsetFromGEP24.4k ) |
1071 | 0 | return false; |
1072 | 24.4k | |
1073 | 24.4k | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1074 | 24.4k | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
1075 | 24.4k | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
1076 | 24.4k | TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); |
1077 | 24.4k | bool Changed = false; |
1078 | 29.5k | for (BasicBlock &B : F) { |
1079 | 177k | for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;) |
1080 | 147k | if (GetElementPtrInst *147k GEP147k = dyn_cast<GetElementPtrInst>(I++)) |
1081 | 14.1k | Changed |= splitGEP(GEP); |
1082 | 29.5k | // No need to split GEP ConstantExprs because all its indices are constant |
1083 | 29.5k | // already. |
1084 | 29.5k | } |
1085 | 24.4k | |
1086 | 24.4k | Changed |= reuniteExts(F); |
1087 | 24.4k | |
1088 | 24.4k | if (VerifyNoDeadCode) |
1089 | 23 | verifyNoDeadCode(F); |
1090 | 24.4k | |
1091 | 24.4k | return Changed; |
1092 | 24.4k | } |
1093 | | |
1094 | | Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator( |
1095 | 5 | const SCEV *Key, Instruction *Dominatee) { |
1096 | 5 | auto Pos = DominatingExprs.find(Key); |
1097 | 5 | if (Pos == DominatingExprs.end()) |
1098 | 3 | return nullptr; |
1099 | 2 | |
1100 | 2 | auto &Candidates = Pos->second; |
1101 | 2 | // Because we process the basic blocks in pre-order of the dominator tree, a |
1102 | 2 | // candidate that doesn't dominate the current instruction won't dominate any |
1103 | 2 | // future instruction either. Therefore, we pop it out of the stack. This |
1104 | 2 | // optimization makes the algorithm O(n). |
1105 | 2 | while (!Candidates.empty()2 ) { |
1106 | 2 | Instruction *Candidate = Candidates.back(); |
1107 | 2 | if (DT->dominates(Candidate, Dominatee)) |
1108 | 2 | return Candidate; |
1109 | 0 | Candidates.pop_back(); |
1110 | 0 | } |
1111 | 0 | return nullptr; |
1112 | 5 | } |
1113 | | |
1114 | 153k | bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { |
1115 | 153k | if (!SE->isSCEVable(I->getType())) |
1116 | 95.3k | return false; |
1117 | 58.5k | |
1118 | 58.5k | // Dom: LHS+RHS |
1119 | 58.5k | // I: sext(LHS)+sext(RHS) |
1120 | 58.5k | // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom). |
1121 | 58.5k | // TODO: handle zext |
1122 | 58.5k | Value *LHS = nullptr, *RHS = nullptr; |
1123 | 58.5k | if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) || |
1124 | 58.5k | match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))58.5k ) { |
1125 | 7 | if (LHS->getType() == RHS->getType()7 ) { |
1126 | 5 | const SCEV *Key = |
1127 | 5 | SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); |
1128 | 5 | if (auto *Dom5 = findClosestMatchingDominator(Key, I)) { |
1129 | 2 | Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); |
1130 | 2 | NewSExt->takeName(I); |
1131 | 2 | I->replaceAllUsesWith(NewSExt); |
1132 | 2 | RecursivelyDeleteTriviallyDeadInstructions(I); |
1133 | 2 | return true; |
1134 | 2 | } |
1135 | 58.5k | } |
1136 | 7 | } |
1137 | 58.5k | |
1138 | 58.5k | // Add I to DominatingExprs if it's an add/sub that can't sign overflow. |
1139 | 58.5k | if (58.5k match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || |
1140 | 58.5k | match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))57.9k ) { |
1141 | 751 | if (programUndefinedIfFullPoison(I)751 ) { |
1142 | 43 | const SCEV *Key = |
1143 | 43 | SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); |
1144 | 43 | DominatingExprs[Key].push_back(I); |
1145 | 43 | } |
1146 | 751 | } |
1147 | 153k | return false; |
1148 | 153k | } |
1149 | | |
1150 | 24.4k | bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) { |
1151 | 24.4k | bool Changed = false; |
1152 | 24.4k | DominatingExprs.clear(); |
1153 | 29.5k | for (const auto Node : depth_first(DT)) { |
1154 | 29.5k | BasicBlock *BB = Node->getBlock(); |
1155 | 183k | for (auto I = BB->begin(); I != BB->end()183k ; ) { |
1156 | 153k | Instruction *Cur = &*I++; |
1157 | 153k | Changed |= reuniteExts(Cur); |
1158 | 153k | } |
1159 | 29.5k | } |
1160 | 24.4k | return Changed; |
1161 | 24.4k | } |
1162 | | |
1163 | 23 | void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) { |
1164 | 23 | for (BasicBlock &B : F) { |
1165 | 271 | for (Instruction &I : B) { |
1166 | 271 | if (isInstructionTriviallyDead(&I)271 ) { |
1167 | 0 | std::string ErrMessage; |
1168 | 0 | raw_string_ostream RSO(ErrMessage); |
1169 | 0 | RSO << "Dead instruction detected!\n" << I << "\n"; |
1170 | 0 | llvm_unreachable(RSO.str().c_str()); |
1171 | 0 | } |
1172 | 23 | } |
1173 | 23 | } |
1174 | 23 | } |
1175 | | |
1176 | | bool SeparateConstOffsetFromGEP::isLegalToSwapOperand( |
1177 | 0 | GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) { |
1178 | 0 | if (!FirstGEP || 0 !FirstGEP->hasOneUse()0 ) |
1179 | 0 | return false; |
1180 | 0 |
|
1181 | 0 | if (0 !SecondGEP || 0 FirstGEP->getParent() != SecondGEP->getParent()0 ) |
1182 | 0 | return false; |
1183 | 0 |
|
1184 | 0 | if (0 FirstGEP == SecondGEP0 ) |
1185 | 0 | return false; |
1186 | 0 |
|
1187 | 0 | unsigned FirstNum = FirstGEP->getNumOperands(); |
1188 | 0 | unsigned SecondNum = SecondGEP->getNumOperands(); |
1189 | 0 | // Give up if the number of operands are not 2. |
1190 | 0 | if (FirstNum != SecondNum || 0 FirstNum != 20 ) |
1191 | 0 | return false; |
1192 | 0 |
|
1193 | 0 | Value *FirstBase = FirstGEP->getOperand(0); |
1194 | 0 | Value *SecondBase = SecondGEP->getOperand(0); |
1195 | 0 | Value *FirstOffset = FirstGEP->getOperand(1); |
1196 | 0 | // Give up if the index of the first GEP is loop invariant. |
1197 | 0 | if (CurLoop->isLoopInvariant(FirstOffset)) |
1198 | 0 | return false; |
1199 | 0 |
|
1200 | 0 | // Give up if base doesn't have same type. |
1201 | 0 | if (0 FirstBase->getType() != SecondBase->getType()0 ) |
1202 | 0 | return false; |
1203 | 0 |
|
1204 | 0 | Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset); |
1205 | 0 |
|
1206 | 0 | // Check if the second operand of first GEP has constant coefficient. |
1207 | 0 | // For an example, for the following code, we won't gain anything by |
1208 | 0 | // hoisting the second GEP out because the second GEP can be folded away. |
1209 | 0 | // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256 |
1210 | 0 | // %67 = shl i64 %scevgep.sum.ur159, 2 |
1211 | 0 | // %uglygep160 = getelementptr i8* %65, i64 %67 |
1212 | 0 | // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024 |
1213 | 0 |
|
1214 | 0 | // Skip constant shift instruction which may be generated by Splitting GEPs. |
1215 | 0 | if (FirstOffsetDef && 0 FirstOffsetDef->isShift()0 && |
1216 | 0 | isa<ConstantInt>(FirstOffsetDef->getOperand(1))) |
1217 | 0 | FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0)); |
1218 | 0 |
|
1219 | 0 | // Give up if FirstOffsetDef is an Add or Sub with constant. |
1220 | 0 | // Because it may not profitable at all due to constant folding. |
1221 | 0 | if (FirstOffsetDef) |
1222 | 0 | if (BinaryOperator *0 BO0 = dyn_cast<BinaryOperator>(FirstOffsetDef)) { |
1223 | 0 | unsigned opc = BO->getOpcode(); |
1224 | 0 | if ((opc == Instruction::Add || 0 opc == Instruction::Sub0 ) && |
1225 | 0 | (isa<ConstantInt>(BO->getOperand(0)) || |
1226 | 0 | isa<ConstantInt>(BO->getOperand(1)))) |
1227 | 0 | return false; |
1228 | 0 | } |
1229 | 0 | return true; |
1230 | 0 | } |
1231 | | |
1232 | 164 | bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) { |
1233 | 164 | int UsesInLoop = 0; |
1234 | 400 | for (User *U : V->users()) { |
1235 | 400 | if (Instruction *User = dyn_cast<Instruction>(U)) |
1236 | 331 | if (331 L->contains(User)331 ) |
1237 | 328 | if (328 ++UsesInLoop > 1328 ) |
1238 | 164 | return true; |
1239 | 0 | } |
1240 | 0 | return false; |
1241 | 0 | } |
1242 | | |
1243 | | void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First, |
1244 | 0 | GetElementPtrInst *Second) { |
1245 | 0 | Value *Offset1 = First->getOperand(1); |
1246 | 0 | Value *Offset2 = Second->getOperand(1); |
1247 | 0 | First->setOperand(1, Offset2); |
1248 | 0 | Second->setOperand(1, Offset1); |
1249 | 0 |
|
1250 | 0 | // We changed p+o+c to p+c+o, p+c may not be inbound anymore. |
1251 | 0 | const DataLayout &DAL = First->getModule()->getDataLayout(); |
1252 | 0 | APInt Offset(DAL.getPointerSizeInBits( |
1253 | 0 | cast<PointerType>(First->getType())->getAddressSpace()), |
1254 | 0 | 0); |
1255 | 0 | Value *NewBase = |
1256 | 0 | First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset); |
1257 | 0 | uint64_t ObjectSize; |
1258 | 0 | if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) || |
1259 | 0 | Offset.ugt(ObjectSize)0 ) { |
1260 | 0 | First->setIsInBounds(false); |
1261 | 0 | Second->setIsInBounds(false); |
1262 | 0 | } else |
1263 | 0 | First->setIsInBounds(true); |
1264 | 0 | } |