/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Analysis/BasicAliasAnalysis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines the primary stateless implementation of the |
11 | | // Alias Analysis interface that implements identities (two different |
12 | | // globals cannot alias, etc), but does no stateful analysis. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
17 | | #include "llvm/ADT/APInt.h" |
18 | | #include "llvm/ADT/SmallPtrSet.h" |
19 | | #include "llvm/ADT/SmallVector.h" |
20 | | #include "llvm/ADT/Statistic.h" |
21 | | #include "llvm/Analysis/AliasAnalysis.h" |
22 | | #include "llvm/Analysis/AssumptionCache.h" |
23 | | #include "llvm/Analysis/CFG.h" |
24 | | #include "llvm/Analysis/CaptureTracking.h" |
25 | | #include "llvm/Analysis/InstructionSimplify.h" |
26 | | #include "llvm/Analysis/LoopInfo.h" |
27 | | #include "llvm/Analysis/MemoryBuiltins.h" |
28 | | #include "llvm/Analysis/MemoryLocation.h" |
29 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
30 | | #include "llvm/Analysis/ValueTracking.h" |
31 | | #include "llvm/IR/Argument.h" |
32 | | #include "llvm/IR/Attributes.h" |
33 | | #include "llvm/IR/CallSite.h" |
34 | | #include "llvm/IR/Constant.h" |
35 | | #include "llvm/IR/Constants.h" |
36 | | #include "llvm/IR/DataLayout.h" |
37 | | #include "llvm/IR/DerivedTypes.h" |
38 | | #include "llvm/IR/Dominators.h" |
39 | | #include "llvm/IR/Function.h" |
40 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
41 | | #include "llvm/IR/GlobalAlias.h" |
42 | | #include "llvm/IR/GlobalVariable.h" |
43 | | #include "llvm/IR/InstrTypes.h" |
44 | | #include "llvm/IR/Instruction.h" |
45 | | #include "llvm/IR/Instructions.h" |
46 | | #include "llvm/IR/IntrinsicInst.h" |
47 | | #include "llvm/IR/Intrinsics.h" |
48 | | #include "llvm/IR/Metadata.h" |
49 | | #include "llvm/IR/Operator.h" |
50 | | #include "llvm/IR/Type.h" |
51 | | #include "llvm/IR/User.h" |
52 | | #include "llvm/IR/Value.h" |
53 | | #include "llvm/Pass.h" |
54 | | #include "llvm/Support/Casting.h" |
55 | | #include "llvm/Support/CommandLine.h" |
56 | | #include "llvm/Support/Compiler.h" |
57 | | #include "llvm/Support/KnownBits.h" |
58 | | #include <cassert> |
59 | | #include <cstdint> |
60 | | #include <cstdlib> |
61 | | #include <utility> |
62 | | |
63 | | #define DEBUG_TYPE "basicaa" |
64 | | |
65 | | using namespace llvm; |
66 | | |
67 | | /// Enable analysis of recursive PHI nodes. |
68 | | static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, |
69 | | cl::init(false)); |
70 | | /// SearchLimitReached / SearchTimes shows how often the limit of |
71 | | /// to decompose GEPs is reached. It will affect the precision |
72 | | /// of basic alias analysis. |
73 | | STATISTIC(SearchLimitReached, "Number of times the limit to " |
74 | | "decompose GEPs is reached"); |
75 | | STATISTIC(SearchTimes, "Number of times a GEP is decomposed"); |
76 | | |
77 | | /// Cutoff after which to stop analysing a set of phi nodes potentially involved |
78 | | /// in a cycle. Because we are analysing 'through' phi nodes, we need to be |
79 | | /// careful with value equivalence. We use reachability to make sure a value |
80 | | /// cannot be involved in a cycle. |
81 | | const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; |
82 | | |
83 | | // The max limit of the search depth in DecomposeGEPExpression() and |
84 | | // GetUnderlyingObject(), both functions need to use the same search |
85 | | // depth otherwise the algorithm in aliasGEP will assert. |
86 | | static const unsigned MaxLookupSearchDepth = 6; |
87 | | |
88 | | bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA, |
89 | 436 | FunctionAnalysisManager::Invalidator &Inv) { |
90 | 436 | // We don't care if this analysis itself is preserved, it has no state. But |
91 | 436 | // we need to check that the analyses it depends on have been. Note that we |
92 | 436 | // may be created without handles to some analyses and in that case don't |
93 | 436 | // depend on them. |
94 | 436 | if (Inv.invalidate<AssumptionAnalysis>(F, PA) || |
95 | 436 | (DT && 436 Inv.invalidate<DominatorTreeAnalysis>(F, PA)436 ) || |
96 | 235 | (LI && 235 Inv.invalidate<LoopAnalysis>(F, PA)66 )) |
97 | 207 | return true; |
98 | 229 | |
99 | 229 | // Otherwise this analysis result remains valid. |
100 | 229 | return false; |
101 | 229 | } |
102 | | |
103 | | //===----------------------------------------------------------------------===// |
104 | | // Useful predicates |
105 | | //===----------------------------------------------------------------------===// |
106 | | |
107 | | /// Returns true if the pointer is to a function-local object that never |
108 | | /// escapes from the function. |
109 | 86.2M | static bool isNonEscapingLocalObject(const Value *V) { |
110 | 86.2M | // If this is a local allocation, check to see if it escapes. |
111 | 86.2M | if (isa<AllocaInst>(V) || 86.2M isNoAliasCall(V)80.7M ) |
112 | 86.2M | // Set StoreCaptures to True so that we can assume in our callers that the |
113 | 86.2M | // pointer is not the result of a load instruction. Currently |
114 | 86.2M | // PointerMayBeCaptured doesn't have any special analysis for the |
115 | 86.2M | // StoreCaptures=false case; if it did, our callers could be refined to be |
116 | 86.2M | // more precise. |
117 | 6.52M | return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); |
118 | 79.7M | |
119 | 79.7M | // If this is an argument that corresponds to a byval or noalias argument, |
120 | 79.7M | // then it has not escaped before entering the function. Check if it escapes |
121 | 79.7M | // inside the function. |
122 | 79.7M | if (const Argument *79.7M A79.7M = dyn_cast<Argument>(V)) |
123 | 17.0M | if (17.0M A->hasByValAttr() || 17.0M A->hasNoAliasAttr()17.0M ) |
124 | 17.0M | // Note even if the argument is marked nocapture, we still need to check |
125 | 17.0M | // for copies made inside the function. The nocapture attribute only |
126 | 17.0M | // specifies that there are no copies made that outlive the function. |
127 | 74.2k | return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); |
128 | 79.6M | |
129 | 79.6M | return false; |
130 | 79.6M | } |
131 | | |
132 | | /// Returns true if the pointer is one which would have been considered an |
133 | | /// escape by isNonEscapingLocalObject. |
134 | 115M | static bool isEscapeSource(const Value *V) { |
135 | 115M | if (isa<CallInst>(V) || 115M isa<InvokeInst>(V)109M || isa<Argument>(V)109M ) |
136 | 24.6M | return true; |
137 | 91.1M | |
138 | 91.1M | // The load case works because isNonEscapingLocalObject considers all |
139 | 91.1M | // stores to be escapes (it passes true for the StoreCaptures argument |
140 | 91.1M | // to PointerMayBeCaptured). |
141 | 91.1M | if (91.1M isa<LoadInst>(V)91.1M ) |
142 | 54.4M | return true; |
143 | 36.7M | |
144 | 36.7M | return false; |
145 | 36.7M | } |
146 | | |
147 | | /// Returns the size of the object specified by V or UnknownSize if unknown. |
148 | | static uint64_t getObjectSize(const Value *V, const DataLayout &DL, |
149 | | const TargetLibraryInfo &TLI, |
150 | 46.4M | bool RoundToAlign = false) { |
151 | 46.4M | uint64_t Size; |
152 | 46.4M | ObjectSizeOpts Opts; |
153 | 46.4M | Opts.RoundToAlign = RoundToAlign; |
154 | 46.4M | if (getObjectSize(V, Size, DL, &TLI, Opts)) |
155 | 20.2M | return Size; |
156 | 26.1M | return MemoryLocation::UnknownSize; |
157 | 26.1M | } |
158 | | |
159 | | /// Returns true if we can prove that the object specified by V is smaller than |
160 | | /// Size. |
161 | | static bool isObjectSmallerThan(const Value *V, uint64_t Size, |
162 | | const DataLayout &DL, |
163 | 123M | const TargetLibraryInfo &TLI) { |
164 | 123M | // Note that the meanings of the "object" are slightly different in the |
165 | 123M | // following contexts: |
166 | 123M | // c1: llvm::getObjectSize() |
167 | 123M | // c2: llvm.objectsize() intrinsic |
168 | 123M | // c3: isObjectSmallerThan() |
169 | 123M | // c1 and c2 share the same meaning; however, the meaning of "object" in c3 |
170 | 123M | // refers to the "entire object". |
171 | 123M | // |
172 | 123M | // Consider this example: |
173 | 123M | // char *p = (char*)malloc(100) |
174 | 123M | // char *q = p+80; |
175 | 123M | // |
176 | 123M | // In the context of c1 and c2, the "object" pointed by q refers to the |
177 | 123M | // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20. |
178 | 123M | // |
179 | 123M | // However, in the context of c3, the "object" refers to the chunk of memory |
180 | 123M | // being allocated. So, the "object" has 100 bytes, and q points to the middle |
181 | 123M | // the "object". In case q is passed to isObjectSmallerThan() as the 1st |
182 | 123M | // parameter, before the llvm::getObjectSize() is called to get the size of |
183 | 123M | // entire object, we should: |
184 | 123M | // - either rewind the pointer q to the base-address of the object in |
185 | 123M | // question (in this case rewind to p), or |
186 | 123M | // - just give up. It is up to caller to make sure the pointer is pointing |
187 | 123M | // to the base address the object. |
188 | 123M | // |
189 | 123M | // We go for 2nd option for simplicity. |
190 | 123M | if (!isIdentifiedObject(V)) |
191 | 79.5M | return false; |
192 | 44.4M | |
193 | 44.4M | // This function needs to use the aligned object size because we allow |
194 | 44.4M | // reads a bit past the end given sufficient alignment. |
195 | 44.4M | uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true); |
196 | 44.4M | |
197 | 19.8M | return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; |
198 | 123M | } |
199 | | |
200 | | /// Returns true if we can prove that the object specified by V has size Size. |
201 | | static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, |
202 | 1.97M | const TargetLibraryInfo &TLI) { |
203 | 1.97M | uint64_t ObjectSize = getObjectSize(V, DL, TLI); |
204 | 354k | return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size; |
205 | 1.97M | } |
206 | | |
207 | | //===----------------------------------------------------------------------===// |
208 | | // GetElementPtr Instruction Decomposition and Analysis |
209 | | //===----------------------------------------------------------------------===// |
210 | | |
211 | | /// Analyzes the specified value as a linear expression: "A*V + B", where A and |
212 | | /// B are constant integers. |
213 | | /// |
214 | | /// Returns the scale and offset values as APInts and return V as a Value*, and |
215 | | /// return whether we looked through any sign or zero extends. The incoming |
216 | | /// Value is known to have IntegerType, and it may already be sign or zero |
217 | | /// extended. |
218 | | /// |
219 | | /// Note that this looks through extends, so the high bits may not be |
220 | | /// represented in the result. |
221 | | /*static*/ const Value *BasicAAResult::GetLinearExpression( |
222 | | const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, |
223 | | unsigned &SExtBits, const DataLayout &DL, unsigned Depth, |
224 | 50.4M | AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) { |
225 | 50.4M | assert(V->getType()->isIntegerTy() && "Not an integer value"); |
226 | 50.4M | |
227 | 50.4M | // Limit our recursion depth. |
228 | 50.4M | if (Depth == 650.4M ) { |
229 | 9.12k | Scale = 1; |
230 | 9.12k | Offset = 0; |
231 | 9.12k | return V; |
232 | 9.12k | } |
233 | 50.3M | |
234 | 50.3M | if (const ConstantInt *50.3M Const50.3M = dyn_cast<ConstantInt>(V)) { |
235 | 921 | // If it's a constant, just convert it to an offset and remove the variable. |
236 | 921 | // If we've been called recursively, the Offset bit width will be greater |
237 | 921 | // than the constant's (the Offset's always as wide as the outermost call), |
238 | 921 | // so we'll zext here and process any extension in the isa<SExtInst> & |
239 | 921 | // isa<ZExtInst> cases below. |
240 | 921 | Offset += Const->getValue().zextOrSelf(Offset.getBitWidth()); |
241 | 921 | assert(Scale == 0 && "Constant values don't have a scale"); |
242 | 921 | return V; |
243 | 921 | } |
244 | 50.3M | |
245 | 50.3M | if (const BinaryOperator *50.3M BOp50.3M = dyn_cast<BinaryOperator>(V)) { |
246 | 9.86M | if (ConstantInt *RHSC9.86M = dyn_cast<ConstantInt>(BOp->getOperand(1))) { |
247 | 6.11M | // If we've been called recursively, then Offset and Scale will be wider |
248 | 6.11M | // than the BOp operands. We'll always zext it here as we'll process sign |
249 | 6.11M | // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases). |
250 | 6.11M | APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth()); |
251 | 6.11M | |
252 | 6.11M | switch (BOp->getOpcode()) { |
253 | 1.11M | default: |
254 | 1.11M | // We don't understand this instruction, so we can't decompose it any |
255 | 1.11M | // further. |
256 | 1.11M | Scale = 1; |
257 | 1.11M | Offset = 0; |
258 | 1.11M | return V; |
259 | 815k | case Instruction::Or: |
260 | 815k | // X|C == X+C if all the bits in C are unset in X. Otherwise we can't |
261 | 815k | // analyze it. |
262 | 815k | if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, |
263 | 815k | BOp, DT)) { |
264 | 1.06k | Scale = 1; |
265 | 1.06k | Offset = 0; |
266 | 1.06k | return V; |
267 | 1.06k | } |
268 | 814k | LLVM_FALLTHROUGH814k ; |
269 | 3.86M | case Instruction::Add: |
270 | 3.86M | V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, |
271 | 3.86M | SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); |
272 | 3.86M | Offset += RHS; |
273 | 3.86M | break; |
274 | 367 | case Instruction::Sub: |
275 | 367 | V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, |
276 | 367 | SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); |
277 | 367 | Offset -= RHS; |
278 | 367 | break; |
279 | 109k | case Instruction::Mul: |
280 | 109k | V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, |
281 | 109k | SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); |
282 | 109k | Offset *= RHS; |
283 | 109k | Scale *= RHS; |
284 | 109k | break; |
285 | 1.01M | case Instruction::Shl: |
286 | 1.01M | V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, |
287 | 1.01M | SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); |
288 | 1.01M | Offset <<= RHS.getLimitedValue(); |
289 | 1.01M | Scale <<= RHS.getLimitedValue(); |
290 | 1.01M | // the semantics of nsw and nuw for left shifts don't match those of |
291 | 1.01M | // multiplications, so we won't propagate them. |
292 | 1.01M | NSW = NUW = false; |
293 | 1.01M | return V; |
294 | 3.97M | } |
295 | 3.97M | |
296 | 3.97M | if (3.97M isa<OverflowingBinaryOperator>(BOp)3.97M ) { |
297 | 3.16M | NUW &= BOp->hasNoUnsignedWrap(); |
298 | 3.16M | NSW &= BOp->hasNoSignedWrap(); |
299 | 3.16M | } |
300 | 6.11M | return V; |
301 | 6.11M | } |
302 | 9.86M | } |
303 | 44.2M | |
304 | 44.2M | // Since GEP indices are sign extended anyway, we don't care about the high |
305 | 44.2M | // bits of a sign or zero extended value - just scales and offsets. The |
306 | 44.2M | // extensions have to be consistent though. |
307 | 44.2M | if (44.2M isa<SExtInst>(V) || 44.2M isa<ZExtInst>(V)37.1M ) { |
308 | 8.48M | Value *CastOp = cast<CastInst>(V)->getOperand(0); |
309 | 8.48M | unsigned NewWidth = V->getType()->getPrimitiveSizeInBits(); |
310 | 8.48M | unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); |
311 | 8.48M | unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits; |
312 | 8.48M | const Value *Result = |
313 | 8.48M | GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, |
314 | 8.48M | Depth + 1, AC, DT, NSW, NUW); |
315 | 8.48M | |
316 | 8.48M | // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this |
317 | 8.48M | // by just incrementing the number of bits we've extended by. |
318 | 8.48M | unsigned ExtendedBy = NewWidth - SmallWidth; |
319 | 8.48M | |
320 | 8.48M | if (isa<SExtInst>(V) && 8.48M ZExtBits == 07.14M ) { |
321 | 7.14M | // sext(sext(%x, a), b) == sext(%x, a + b) |
322 | 7.14M | |
323 | 7.14M | if (NSW7.14M ) { |
324 | 6.68M | // We haven't sign-wrapped, so it's valid to decompose sext(%x + c) |
325 | 6.68M | // into sext(%x) + sext(c). We'll sext the Offset ourselves: |
326 | 6.68M | unsigned OldWidth = Offset.getBitWidth(); |
327 | 6.68M | Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); |
328 | 7.14M | } else { |
329 | 459k | // We may have signed-wrapped, so don't decompose sext(%x + c) into |
330 | 459k | // sext(%x) + sext(c) |
331 | 459k | Scale = 1; |
332 | 459k | Offset = 0; |
333 | 459k | Result = CastOp; |
334 | 459k | ZExtBits = OldZExtBits; |
335 | 459k | SExtBits = OldSExtBits; |
336 | 459k | } |
337 | 7.14M | SExtBits += ExtendedBy; |
338 | 8.48M | } else { |
339 | 1.34M | // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b) |
340 | 1.34M | |
341 | 1.34M | if (!NUW1.34M ) { |
342 | 99.6k | // We may have unsigned-wrapped, so don't decompose zext(%x + c) into |
343 | 99.6k | // zext(%x) + zext(c) |
344 | 99.6k | Scale = 1; |
345 | 99.6k | Offset = 0; |
346 | 99.6k | Result = CastOp; |
347 | 99.6k | ZExtBits = OldZExtBits; |
348 | 99.6k | SExtBits = OldSExtBits; |
349 | 99.6k | } |
350 | 1.34M | ZExtBits += ExtendedBy; |
351 | 1.34M | } |
352 | 8.48M | |
353 | 8.48M | return Result; |
354 | 8.48M | } |
355 | 35.7M | |
356 | 35.7M | Scale = 1; |
357 | 35.7M | Offset = 0; |
358 | 35.7M | return V; |
359 | 35.7M | } |
360 | | |
361 | | /// To ensure a pointer offset fits in an integer of size PointerSize |
362 | | /// (in bits) when that size is smaller than 64. This is an issue in |
363 | | /// particular for 32b programs with negative indices that rely on two's |
364 | | /// complement wrap-arounds for precise alias information. |
365 | 192M | static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) { |
366 | 192M | assert(PointerSize <= 64 && "Invalid PointerSize!"); |
367 | 192M | unsigned ShiftBits = 64 - PointerSize; |
368 | 192M | return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits; |
369 | 192M | } |
370 | | |
371 | | /// If V is a symbolic pointer expression, decompose it into a base pointer |
372 | | /// with a constant offset and a number of scaled symbolic offsets. |
373 | | /// |
374 | | /// The scaled symbolic offsets (represented by pairs of a Value* and a scale |
375 | | /// in the VarIndices vector) are Value*'s that are known to be scaled by the |
376 | | /// specified amount, but which may have other unrepresented high bits. As |
377 | | /// such, the gep cannot necessarily be reconstructed from its decomposed form. |
378 | | /// |
379 | | /// When DataLayout is around, this function is capable of analyzing everything |
380 | | /// that GetUnderlyingObject can look through. To be able to do that |
381 | | /// GetUnderlyingObject and DecomposeGEPExpression must use the same search |
382 | | /// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks |
383 | | /// through pointer casts. |
384 | | bool BasicAAResult::DecomposeGEPExpression(const Value *V, |
385 | | DecomposedGEP &Decomposed, const DataLayout &DL, AssumptionCache *AC, |
386 | 106M | DominatorTree *DT) { |
387 | 106M | // Limit recursion depth to limit compile time in crazy cases. |
388 | 106M | unsigned MaxLookup = MaxLookupSearchDepth; |
389 | 106M | SearchTimes++; |
390 | 106M | |
391 | 106M | Decomposed.StructOffset = 0; |
392 | 106M | Decomposed.OtherOffset = 0; |
393 | 106M | Decomposed.VarIndices.clear(); |
394 | 237M | do { |
395 | 237M | // See if this is a bitcast or GEP. |
396 | 237M | const Operator *Op = dyn_cast<Operator>(V); |
397 | 237M | if (!Op237M ) { |
398 | 41.0M | // The only non-operator case we can handle are GlobalAliases. |
399 | 41.0M | if (const GlobalAlias *GA41.0M = dyn_cast<GlobalAlias>(V)) { |
400 | 0 | if (!GA->isInterposable()0 ) { |
401 | 0 | V = GA->getAliasee(); |
402 | 0 | continue; |
403 | 0 | } |
404 | 41.0M | } |
405 | 41.0M | Decomposed.Base = V; |
406 | 41.0M | return false; |
407 | 41.0M | } |
408 | 196M | |
409 | 196M | if (196M Op->getOpcode() == Instruction::BitCast || |
410 | 196M | Op->getOpcode() == Instruction::AddrSpaceCast179M ) { |
411 | 16.8M | V = Op->getOperand(0); |
412 | 16.8M | continue; |
413 | 16.8M | } |
414 | 179M | |
415 | 179M | const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); |
416 | 179M | if (!GEPOp179M ) { |
417 | 65.7M | if (auto CS = ImmutableCallSite(V)) |
418 | 3.58M | if (const Value *3.58M RV3.58M = CS.getReturnedArgOperand()) { |
419 | 1.97k | V = RV; |
420 | 1.97k | continue; |
421 | 1.97k | } |
422 | 65.7M | |
423 | 65.7M | // If it's not a GEP, hand it off to SimplifyInstruction to see if it |
424 | 65.7M | // can come up with something. This matches what GetUnderlyingObject does. |
425 | 65.7M | if (const Instruction *65.7M I65.7M = dyn_cast<Instruction>(V)) |
426 | 65.7M | // TODO: Get a DominatorTree and AssumptionCache and use them here |
427 | 65.7M | // (these are both now available in this function, but this should be |
428 | 65.7M | // updated when GetUnderlyingObject is updated). TLI should be |
429 | 65.7M | // provided also. |
430 | 65.7M | if (const Value *65.7M Simplified65.7M = |
431 | 153k | SimplifyInstruction(const_cast<Instruction *>(I), DL)) { |
432 | 153k | V = Simplified; |
433 | 153k | continue; |
434 | 153k | } |
435 | 65.5M | |
436 | 65.5M | Decomposed.Base = V; |
437 | 65.5M | return false; |
438 | 65.5M | } |
439 | 113M | |
440 | 113M | // Don't attempt to analyze GEPs over unsized objects. |
441 | 113M | if (113M !GEPOp->getSourceElementType()->isSized()113M ) { |
442 | 0 | Decomposed.Base = V; |
443 | 0 | return false; |
444 | 0 | } |
445 | 113M | |
446 | 113M | unsigned AS = GEPOp->getPointerAddressSpace(); |
447 | 113M | // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. |
448 | 113M | gep_type_iterator GTI = gep_type_begin(GEPOp); |
449 | 113M | unsigned PointerSize = DL.getPointerSizeInBits(AS); |
450 | 113M | // Assume all GEP operands are constants until proven otherwise. |
451 | 113M | bool GepHasConstantOffset = true; |
452 | 113M | for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); |
453 | 377M | I != E377M ; ++I, ++GTI264M ) { |
454 | 264M | const Value *Index = *I; |
455 | 264M | // Compute the (potentially symbolic) offset in bytes for this index. |
456 | 264M | if (StructType *STy264M = GTI.getStructTypeOrNull()) { |
457 | 77.2M | // For a struct, add the member offset. |
458 | 77.2M | unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); |
459 | 77.2M | if (FieldNo == 0) |
460 | 8.61M | continue; |
461 | 68.6M | |
462 | 68.6M | Decomposed.StructOffset += |
463 | 68.6M | DL.getStructLayout(STy)->getElementOffset(FieldNo); |
464 | 68.6M | continue; |
465 | 68.6M | } |
466 | 186M | |
467 | 186M | // For an array/pointer, add the element offset, explicitly scaled. |
468 | 186M | if (const ConstantInt *186M CIdx186M = dyn_cast<ConstantInt>(Index)) { |
469 | 150M | if (CIdx->isZero()) |
470 | 94.2M | continue; |
471 | 56.4M | Decomposed.OtherOffset += |
472 | 56.4M | DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue(); |
473 | 56.4M | continue; |
474 | 56.4M | } |
475 | 36.2M | |
476 | 36.2M | GepHasConstantOffset = false; |
477 | 36.2M | |
478 | 36.2M | uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType()); |
479 | 36.2M | unsigned ZExtBits = 0, SExtBits = 0; |
480 | 36.2M | |
481 | 36.2M | // If the integer type is smaller than the pointer size, it is implicitly |
482 | 36.2M | // sign extended to pointer size. |
483 | 36.2M | unsigned Width = Index->getType()->getIntegerBitWidth(); |
484 | 36.2M | if (PointerSize > Width) |
485 | 499 | SExtBits += PointerSize - Width; |
486 | 36.2M | |
487 | 36.2M | // Use GetLinearExpression to decompose the index into a C1*V+C2 form. |
488 | 36.2M | APInt IndexScale(Width, 0), IndexOffset(Width, 0); |
489 | 36.2M | bool NSW = true, NUW = true; |
490 | 36.2M | Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, |
491 | 36.2M | SExtBits, DL, 0, AC, DT, NSW, NUW); |
492 | 36.2M | |
493 | 36.2M | // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. |
494 | 36.2M | // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. |
495 | 36.2M | Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale; |
496 | 36.2M | Scale *= IndexScale.getSExtValue(); |
497 | 36.2M | |
498 | 36.2M | // If we already had an occurrence of this index variable, merge this |
499 | 36.2M | // scale into it. For example, we want to handle: |
500 | 36.2M | // A[x][x] -> x*16 + x*4 -> x*20 |
501 | 36.2M | // This also ensures that 'x' only appears in the index list once. |
502 | 38.5M | for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e38.5M ; ++i2.30M ) { |
503 | 2.44M | if (Decomposed.VarIndices[i].V == Index && |
504 | 136k | Decomposed.VarIndices[i].ZExtBits == ZExtBits && |
505 | 2.44M | Decomposed.VarIndices[i].SExtBits == SExtBits136k ) { |
506 | 136k | Scale += Decomposed.VarIndices[i].Scale; |
507 | 136k | Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i); |
508 | 136k | break; |
509 | 136k | } |
510 | 2.44M | } |
511 | 36.2M | |
512 | 36.2M | // Make sure that we have a scale that makes sense for this target's |
513 | 36.2M | // pointer size. |
514 | 36.2M | Scale = adjustToPointerSize(Scale, PointerSize); |
515 | 36.2M | |
516 | 36.2M | if (Scale36.2M ) { |
517 | 36.2M | VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, |
518 | 36.2M | static_cast<int64_t>(Scale)}; |
519 | 36.2M | Decomposed.VarIndices.push_back(Entry); |
520 | 36.2M | } |
521 | 264M | } |
522 | 113M | |
523 | 113M | // Take care of wrap-arounds |
524 | 113M | if (GepHasConstantOffset113M ) { |
525 | 78.3M | Decomposed.StructOffset = |
526 | 78.3M | adjustToPointerSize(Decomposed.StructOffset, PointerSize); |
527 | 78.3M | Decomposed.OtherOffset = |
528 | 78.3M | adjustToPointerSize(Decomposed.OtherOffset, PointerSize); |
529 | 78.3M | } |
530 | 237M | |
531 | 237M | // Analyze the base pointer next. |
532 | 237M | V = GEPOp->getOperand(0); |
533 | 130M | } while (--MaxLookup); |
534 | 106M | |
535 | 106M | // If the chain of expressions is too deep, just return early. |
536 | 271k | Decomposed.Base = V; |
537 | 271k | SearchLimitReached++; |
538 | 271k | return true; |
539 | 106M | } |
540 | | |
541 | | /// Returns whether the given pointer value points to memory that is local to |
542 | | /// the function, with global constants being considered local to all |
543 | | /// functions. |
544 | | bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, |
545 | 26.8M | bool OrLocal) { |
546 | 26.8M | assert(Visited.empty() && "Visited must be cleared after use!"); |
547 | 26.8M | |
548 | 26.8M | unsigned MaxLookup = 8; |
549 | 26.8M | SmallVector<const Value *, 16> Worklist; |
550 | 26.8M | Worklist.push_back(Loc.Ptr); |
551 | 32.0M | do { |
552 | 32.0M | const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); |
553 | 32.0M | if (!Visited.insert(V).second32.0M ) { |
554 | 349k | Visited.clear(); |
555 | 349k | return AAResultBase::pointsToConstantMemory(Loc, OrLocal); |
556 | 349k | } |
557 | 31.6M | |
558 | 31.6M | // An alloca instruction defines local memory. |
559 | 31.6M | if (31.6M OrLocal && 31.6M isa<AllocaInst>(V)235k ) |
560 | 35.0k | continue; |
561 | 31.6M | |
562 | 31.6M | // A global constant counts as local memory for our purposes. |
563 | 31.6M | if (const GlobalVariable *31.6M GV31.6M = dyn_cast<GlobalVariable>(V)) { |
564 | 5.00M | // Note: this doesn't require GV to be "ODR" because it isn't legal for a |
565 | 5.00M | // global to be marked constant in some modules and non-constant in |
566 | 5.00M | // others. GV may even be a declaration, not a definition. |
567 | 5.00M | if (!GV->isConstant()5.00M ) { |
568 | 4.83M | Visited.clear(); |
569 | 4.83M | return AAResultBase::pointsToConstantMemory(Loc, OrLocal); |
570 | 4.83M | } |
571 | 175k | continue; |
572 | 175k | } |
573 | 26.6M | |
574 | 26.6M | // If both select values point to local memory, then so does the select. |
575 | 26.6M | if (const SelectInst *26.6M SI26.6M = dyn_cast<SelectInst>(V)) { |
576 | 62.6k | Worklist.push_back(SI->getTrueValue()); |
577 | 62.6k | Worklist.push_back(SI->getFalseValue()); |
578 | 62.6k | continue; |
579 | 62.6k | } |
580 | 26.5M | |
581 | 26.5M | // If all values incoming to a phi node point to local memory, then so does |
582 | 26.5M | // the phi. |
583 | 26.5M | if (const PHINode *26.5M PN26.5M = dyn_cast<PHINode>(V)) { |
584 | 5.15M | // Don't bother inspecting phi nodes with many operands. |
585 | 5.15M | if (PN->getNumIncomingValues() > MaxLookup5.15M ) { |
586 | 44.8k | Visited.clear(); |
587 | 44.8k | return AAResultBase::pointsToConstantMemory(Loc, OrLocal); |
588 | 44.8k | } |
589 | 5.11M | for (Value *IncValue : PN->incoming_values()) |
590 | 10.6M | Worklist.push_back(IncValue); |
591 | 5.15M | continue; |
592 | 5.15M | } |
593 | 21.4M | |
594 | 21.4M | // Otherwise be conservative. |
595 | 21.4M | Visited.clear(); |
596 | 21.4M | return AAResultBase::pointsToConstantMemory(Loc, OrLocal); |
597 | 5.38M | } while (!Worklist.empty() && 5.38M --MaxLookup5.19M ); |
598 | 26.8M | |
599 | 187k | Visited.clear(); |
600 | 187k | return Worklist.empty(); |
601 | 26.8M | } |
602 | | |
603 | | /// Returns the behavior when calling the given call site. |
604 | 22.7M | FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) { |
605 | 22.7M | if (CS.doesNotAccessMemory()) |
606 | 22.7M | // Can't do better than this. |
607 | 152k | return FMRB_DoesNotAccessMemory; |
608 | 22.5M | |
609 | 22.5M | FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; |
610 | 22.5M | |
611 | 22.5M | // If the callsite knows it only reads memory, don't return worse |
612 | 22.5M | // than that. |
613 | 22.5M | if (CS.onlyReadsMemory()) |
614 | 530k | Min = FMRB_OnlyReadsMemory; |
615 | 22.0M | else if (22.0M CS.doesNotReadMemory()22.0M ) |
616 | 75 | Min = FMRB_DoesNotReadMemory; |
617 | 22.5M | |
618 | 22.5M | if (CS.onlyAccessesArgMemory()) |
619 | 2.55M | Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); |
620 | 22.5M | |
621 | 22.5M | // If CS has operand bundles then aliasing attributes from the function it |
622 | 22.5M | // calls do not directly apply to the CallSite. This can be made more |
623 | 22.5M | // precise in the future. |
624 | 22.5M | if (!CS.hasOperandBundles()) |
625 | 22.5M | if (const Function *22.5M F22.5M = CS.getCalledFunction()) |
626 | 21.4M | Min = |
627 | 21.4M | FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F)); |
628 | 22.7M | |
629 | 22.7M | return Min; |
630 | 22.7M | } |
631 | | |
632 | | /// Returns the behavior when calling the given function. For use when the call |
633 | | /// site is not known. |
634 | 22.1M | FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { |
635 | 22.1M | // If the function declares it doesn't access memory, we can't do better. |
636 | 22.1M | if (F->doesNotAccessMemory()) |
637 | 9.78k | return FMRB_DoesNotAccessMemory; |
638 | 22.1M | |
639 | 22.1M | FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; |
640 | 22.1M | |
641 | 22.1M | // If the function declares it only reads memory, go with that. |
642 | 22.1M | if (F->onlyReadsMemory()) |
643 | 544k | Min = FMRB_OnlyReadsMemory; |
644 | 21.6M | else if (21.6M F->doesNotReadMemory()21.6M ) |
645 | 75 | Min = FMRB_DoesNotReadMemory; |
646 | 22.1M | |
647 | 22.1M | if (F->onlyAccessesArgMemory()) |
648 | 2.57M | Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); |
649 | 19.5M | else if (19.5M F->onlyAccessesInaccessibleMemory()19.5M ) |
650 | 155 | Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem); |
651 | 19.5M | else if (19.5M F->onlyAccessesInaccessibleMemOrArgMem()19.5M ) |
652 | 348 | Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem); |
653 | 22.1M | |
654 | 22.1M | return Min; |
655 | 22.1M | } |
656 | | |
657 | | /// Returns true if this is a writeonly (i.e Mod only) parameter. |
658 | | static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, |
659 | 960k | const TargetLibraryInfo &TLI) { |
660 | 960k | if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly)) |
661 | 158k | return true; |
662 | 801k | |
663 | 801k | // We can bound the aliasing properties of memset_pattern16 just as we can |
664 | 801k | // for memcpy/memset. This is particularly important because the |
665 | 801k | // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 |
666 | 801k | // whenever possible. |
667 | 801k | // FIXME Consider handling this in InferFunctionAttr.cpp together with other |
668 | 801k | // attributes. |
669 | 801k | LibFunc F; |
670 | 801k | if (CS.getCalledFunction() && 801k TLI.getLibFunc(*CS.getCalledFunction(), F)801k && |
671 | 801k | F == LibFunc_memset_pattern1632.7k && TLI.has(F)3.59k ) |
672 | 3.59k | if (3.59k ArgIdx == 03.59k ) |
673 | 1.94k | return true; |
674 | 799k | |
675 | 799k | // TODO: memset_pattern4, memset_pattern8 |
676 | 799k | // TODO: _chk variants |
677 | 799k | // TODO: strcmp, strcpy |
678 | 799k | |
679 | 799k | return false; |
680 | 799k | } |
681 | | |
682 | | ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, |
683 | 960k | unsigned ArgIdx) { |
684 | 960k | // Checking for known builtin intrinsics and target library functions. |
685 | 960k | if (isWriteOnlyParam(CS, ArgIdx, TLI)) |
686 | 160k | return MRI_Mod; |
687 | 799k | |
688 | 799k | if (799k CS.paramHasAttr(ArgIdx, Attribute::ReadOnly)799k ) |
689 | 130k | return MRI_Ref; |
690 | 668k | |
691 | 668k | if (668k CS.paramHasAttr(ArgIdx, Attribute::ReadNone)668k ) |
692 | 1 | return MRI_NoModRef; |
693 | 668k | |
694 | 668k | return AAResultBase::getArgModRefInfo(CS, ArgIdx); |
695 | 668k | } |
696 | | |
697 | 36.6M | static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) { |
698 | 36.6M | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); |
699 | 4.82M | return II && II->getIntrinsicID() == IID; |
700 | 36.6M | } |
701 | | |
702 | | #ifndef NDEBUG |
703 | | static const Function *getParent(const Value *V) { |
704 | | if (const Instruction *inst = dyn_cast<Instruction>(V)) { |
705 | | if (!inst->getParent()) |
706 | | return nullptr; |
707 | | return inst->getParent()->getParent(); |
708 | | } |
709 | | |
710 | | if (const Argument *arg = dyn_cast<Argument>(V)) |
711 | | return arg->getParent(); |
712 | | |
713 | | return nullptr; |
714 | | } |
715 | | |
716 | | static bool notDifferentParent(const Value *O1, const Value *O2) { |
717 | | |
718 | | const Function *F1 = getParent(O1); |
719 | | const Function *F2 = getParent(O2); |
720 | | |
721 | | return !F1 || !F2 || F1 == F2; |
722 | | } |
723 | | #endif |
724 | | |
725 | | AliasResult BasicAAResult::alias(const MemoryLocation &LocA, |
726 | 111M | const MemoryLocation &LocB) { |
727 | 111M | assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && |
728 | 111M | "BasicAliasAnalysis doesn't support interprocedural queries."); |
729 | 111M | |
730 | 111M | // If we have a directly cached entry for these locations, we have recursed |
731 | 111M | // through this once, so just return the cached results. Notably, when this |
732 | 111M | // happens, we don't clear the cache. |
733 | 111M | auto CacheIt = AliasCache.find(LocPair(LocA, LocB)); |
734 | 111M | if (CacheIt != AliasCache.end()) |
735 | 51.4M | return CacheIt->second; |
736 | 60.4M | |
737 | 60.4M | AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, |
738 | 60.4M | LocB.Size, LocB.AATags); |
739 | 60.4M | // AliasCache rarely has more than 1 or 2 elements, always use |
740 | 60.4M | // shrink_and_clear so it quickly returns to the inline capacity of the |
741 | 60.4M | // SmallDenseMap if it ever grows larger. |
742 | 60.4M | // FIXME: This should really be shrink_to_inline_capacity_and_clear(). |
743 | 60.4M | AliasCache.shrink_and_clear(); |
744 | 60.4M | VisitedPhiBBs.clear(); |
745 | 60.4M | return Alias; |
746 | 60.4M | } |
747 | | |
748 | | /// Checks to see if the specified callsite can clobber the specified memory |
749 | | /// object. |
750 | | /// |
751 | | /// Since we only look at local properties of this function, we really can't |
752 | | /// say much about this query. We do, however, use simple "address taken" |
753 | | /// analysis on local objects. |
754 | | ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, |
755 | 9.57M | const MemoryLocation &Loc) { |
756 | 9.57M | assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && |
757 | 9.57M | "AliasAnalysis query involving multiple functions!"); |
758 | 9.57M | |
759 | 9.57M | const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); |
760 | 9.57M | |
761 | 9.57M | // If this is a tail call and Loc.Ptr points to a stack location, we know that |
762 | 9.57M | // the tail call cannot access or modify the local stack. |
763 | 9.57M | // We cannot exclude byval arguments here; these belong to the caller of |
764 | 9.57M | // the current function not to the current function, and a tail callee |
765 | 9.57M | // may reference them. |
766 | 9.57M | if (isa<AllocaInst>(Object)) |
767 | 1.56M | if (const CallInst *1.56M CI1.56M = dyn_cast<CallInst>(CS.getInstruction())) |
768 | 1.47M | if (1.47M CI->isTailCall()1.47M ) |
769 | 178k | return MRI_NoModRef; |
770 | 9.40M | |
771 | 9.40M | // If the pointer is to a locally allocated object that does not escape, |
772 | 9.40M | // then the call can not mod/ref the pointer unless the call takes the pointer |
773 | 9.40M | // as an argument, and itself doesn't capture it. |
774 | 9.40M | if (9.40M !isa<Constant>(Object) && 9.40M CS.getInstruction() != Object7.54M && |
775 | 9.40M | isNonEscapingLocalObject(Object)7.11M ) { |
776 | 412k | |
777 | 412k | // Optimistically assume that call doesn't touch Object and check this |
778 | 412k | // assumption in the following loop. |
779 | 412k | ModRefInfo Result = MRI_NoModRef; |
780 | 412k | |
781 | 412k | unsigned OperandNo = 0; |
782 | 412k | for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); |
783 | 1.41M | CI != CE1.41M ; ++CI, ++OperandNo999k ) { |
784 | 1.09M | // Only look at the no-capture or byval pointer arguments. If this |
785 | 1.09M | // pointer were passed to arguments that were neither of these, then it |
786 | 1.09M | // couldn't be no-capture. |
787 | 1.09M | if (!(*CI)->getType()->isPointerTy() || |
788 | 594k | (!CS.doesNotCapture(OperandNo) && |
789 | 594k | OperandNo < CS.getNumArgOperands()304k && !CS.isByValArgument(OperandNo)304k )) |
790 | 800k | continue; |
791 | 290k | |
792 | 290k | // Call doesn't access memory through this operand, so we don't care |
793 | 290k | // if it aliases with Object. |
794 | 290k | if (290k CS.doesNotAccessMemory(OperandNo)290k ) |
795 | 1.67k | continue; |
796 | 288k | |
797 | 288k | // If this is a no-capture pointer argument, see if we can tell that it |
798 | 288k | // is impossible to alias the pointer we're checking. |
799 | 288k | AliasResult AR = |
800 | 288k | getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); |
801 | 288k | |
802 | 288k | // Operand doesnt alias 'Object', continue looking for other aliases |
803 | 288k | if (AR == NoAlias) |
804 | 182k | continue; |
805 | 105k | // Operand aliases 'Object', but call doesn't modify it. Strengthen |
806 | 105k | // initial assumption and keep looking in case if there are more aliases. |
807 | 105k | if (105k CS.onlyReadsMemory(OperandNo)105k ) { |
808 | 8.60k | Result = static_cast<ModRefInfo>(Result | MRI_Ref); |
809 | 8.60k | continue; |
810 | 8.60k | } |
811 | 96.8k | // Operand aliases 'Object' but call only writes into it. |
812 | 96.8k | if (96.8k CS.doesNotReadMemory(OperandNo)96.8k ) { |
813 | 6.25k | Result = static_cast<ModRefInfo>(Result | MRI_Mod); |
814 | 6.25k | continue; |
815 | 6.25k | } |
816 | 90.6k | // This operand aliases 'Object' and call reads and writes into it. |
817 | 90.6k | Result = MRI_ModRef; |
818 | 90.6k | break; |
819 | 90.6k | } |
820 | 412k | |
821 | 412k | // Early return if we improved mod ref information |
822 | 412k | if (Result != MRI_ModRef) |
823 | 321k | return Result; |
824 | 9.08M | } |
825 | 9.08M | |
826 | 9.08M | // If the CallSite is to malloc or calloc, we can assume that it doesn't |
827 | 9.08M | // modify any IR visible value. This is only valid because we assume these |
828 | 9.08M | // routines do not read values visible in the IR. TODO: Consider special |
829 | 9.08M | // casing realloc and strdup routines which access only their arguments as |
830 | 9.08M | // well. Or alternatively, replace all of this with inaccessiblememonly once |
831 | 9.08M | // that's implemented fully. |
832 | 9.08M | auto *Inst = CS.getInstruction(); |
833 | 9.08M | if (isMallocOrCallocLikeFn(Inst, &TLI)9.08M ) { |
834 | 110k | // Be conservative if the accessed pointer may alias the allocation - |
835 | 110k | // fallback to the generic handling below. |
836 | 110k | if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias) |
837 | 92.4k | return MRI_NoModRef; |
838 | 8.98M | } |
839 | 8.98M | |
840 | 8.98M | // The semantics of memcpy intrinsics forbid overlap between their respective |
841 | 8.98M | // operands, i.e., source and destination of any given memcpy must no-alias. |
842 | 8.98M | // If Loc must-aliases either one of these two locations, then it necessarily |
843 | 8.98M | // no-aliases the other. |
844 | 8.98M | if (auto *8.98M Inst8.98M = dyn_cast<MemCpyInst>(CS.getInstruction())) { |
845 | 147k | AliasResult SrcAA, DestAA; |
846 | 147k | |
847 | 147k | if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), |
848 | 147k | Loc)) == MustAlias) |
849 | 147k | // Loc is exactly the memcpy source thus disjoint from memcpy dest. |
850 | 6.66k | return MRI_Ref; |
851 | 140k | if (140k (DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), |
852 | 140k | Loc)) == MustAlias) |
853 | 140k | // The converse case. |
854 | 10.4k | return MRI_Mod; |
855 | 129k | |
856 | 129k | // It's also possible for Loc to alias both src and dest, or neither. |
857 | 129k | ModRefInfo rv = MRI_NoModRef; |
858 | 129k | if (SrcAA != NoAlias) |
859 | 99.3k | rv = static_cast<ModRefInfo>(rv | MRI_Ref); |
860 | 129k | if (DestAA != NoAlias) |
861 | 62.8k | rv = static_cast<ModRefInfo>(rv | MRI_Mod); |
862 | 147k | return rv; |
863 | 147k | } |
864 | 8.84M | |
865 | 8.84M | // While the assume intrinsic is marked as arbitrarily writing so that |
866 | 8.84M | // proper control dependencies will be maintained, it never aliases any |
867 | 8.84M | // particular memory location. |
868 | 8.84M | if (8.84M isIntrinsicCall(CS, Intrinsic::assume)8.84M ) |
869 | 189 | return MRI_NoModRef; |
870 | 8.84M | |
871 | 8.84M | // Like assumes, guard intrinsics are also marked as arbitrarily writing so |
872 | 8.84M | // that proper control dependencies are maintained but they never mods any |
873 | 8.84M | // particular memory location. |
874 | 8.84M | // |
875 | 8.84M | // *Unlike* assumes, guard intrinsics are modeled as reading memory since the |
876 | 8.84M | // heap state at the point the guard is issued needs to be consistent in case |
877 | 8.84M | // the guard invokes the "deopt" continuation. |
878 | 8.84M | if (8.84M isIntrinsicCall(CS, Intrinsic::experimental_guard)8.84M ) |
879 | 6 | return MRI_Ref; |
880 | 8.84M | |
881 | 8.84M | // Like assumes, invariant.start intrinsics were also marked as arbitrarily |
882 | 8.84M | // writing so that proper control dependencies are maintained but they never |
883 | 8.84M | // mod any particular memory location visible to the IR. |
884 | 8.84M | // *Unlike* assumes (which are now modeled as NoModRef), invariant.start |
885 | 8.84M | // intrinsic is now modeled as reading memory. This prevents hoisting the |
886 | 8.84M | // invariant.start intrinsic over stores. Consider: |
887 | 8.84M | // *ptr = 40; |
888 | 8.84M | // *ptr = 50; |
889 | 8.84M | // invariant_start(ptr) |
890 | 8.84M | // int val = *ptr; |
891 | 8.84M | // print(val); |
892 | 8.84M | // |
893 | 8.84M | // This cannot be transformed to: |
894 | 8.84M | // |
895 | 8.84M | // *ptr = 40; |
896 | 8.84M | // invariant_start(ptr) |
897 | 8.84M | // *ptr = 50; |
898 | 8.84M | // int val = *ptr; |
899 | 8.84M | // print(val); |
900 | 8.84M | // |
901 | 8.84M | // The transformation will cause the second store to be ignored (based on |
902 | 8.84M | // rules of invariant.start) and print 40, while the first program always |
903 | 8.84M | // prints 50. |
904 | 8.84M | if (8.84M isIntrinsicCall(CS, Intrinsic::invariant_start)8.84M ) |
905 | 8 | return MRI_Ref; |
906 | 8.84M | |
907 | 8.84M | // The AAResultBase base class has some smarts, lets use them. |
908 | 8.84M | return AAResultBase::getModRefInfo(CS, Loc); |
909 | 8.84M | } |
910 | | |
911 | | ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, |
912 | 2.52M | ImmutableCallSite CS2) { |
913 | 2.52M | // While the assume intrinsic is marked as arbitrarily writing so that |
914 | 2.52M | // proper control dependencies will be maintained, it never aliases any |
915 | 2.52M | // particular memory location. |
916 | 2.52M | if (isIntrinsicCall(CS1, Intrinsic::assume) || |
917 | 2.52M | isIntrinsicCall(CS2, Intrinsic::assume)) |
918 | 2 | return MRI_NoModRef; |
919 | 2.52M | |
920 | 2.52M | // Like assumes, guard intrinsics are also marked as arbitrarily writing so |
921 | 2.52M | // that proper control dependencies are maintained but they never mod any |
922 | 2.52M | // particular memory location. |
923 | 2.52M | // |
924 | 2.52M | // *Unlike* assumes, guard intrinsics are modeled as reading memory since the |
925 | 2.52M | // heap state at the point the guard is issued needs to be consistent in case |
926 | 2.52M | // the guard invokes the "deopt" continuation. |
927 | 2.52M | |
928 | 2.52M | // NB! This function is *not* commutative, so we specical case two |
929 | 2.52M | // possibilities for guard intrinsics. |
930 | 2.52M | |
931 | 2.52M | if (2.52M isIntrinsicCall(CS1, Intrinsic::experimental_guard)2.52M ) |
932 | 2 | return getModRefBehavior(CS2) & MRI_Mod ? 2 MRI_Ref1 : MRI_NoModRef1 ; |
933 | 2.52M | |
934 | 2.52M | if (2.52M isIntrinsicCall(CS2, Intrinsic::experimental_guard)2.52M ) |
935 | 2 | return getModRefBehavior(CS1) & MRI_Mod ? 2 MRI_Mod1 : MRI_NoModRef1 ; |
936 | 2.52M | |
937 | 2.52M | // The AAResultBase base class has some smarts, lets use them. |
938 | 2.52M | return AAResultBase::getModRefInfo(CS1, CS2); |
939 | 2.52M | } |
940 | | |
941 | | /// Provide ad-hoc rules to disambiguate accesses through two GEP operators, |
942 | | /// both having the exact same pointer operand. |
943 | | static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, |
944 | | uint64_t V1Size, |
945 | | const GEPOperator *GEP2, |
946 | | uint64_t V2Size, |
947 | 20.8M | const DataLayout &DL) { |
948 | 20.8M | assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == |
949 | 20.8M | GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && |
950 | 20.8M | GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && |
951 | 20.8M | "Expected GEPs with the same pointer operand"); |
952 | 20.8M | |
953 | 20.8M | // Try to determine whether GEP1 and GEP2 index through arrays, into structs, |
954 | 20.8M | // such that the struct field accesses provably cannot alias. |
955 | 20.8M | // We also need at least two indices (the pointer, and the struct field). |
956 | 20.8M | if (GEP1->getNumIndices() != GEP2->getNumIndices() || |
957 | 19.5M | GEP1->getNumIndices() < 2) |
958 | 7.21M | return MayAlias; |
959 | 13.5M | |
960 | 13.5M | // If we don't know the size of the accesses through both GEPs, we can't |
961 | 13.5M | // determine whether the struct fields accessed can't alias. |
962 | 13.5M | if (13.5M V1Size == MemoryLocation::UnknownSize || |
963 | 13.5M | V2Size == MemoryLocation::UnknownSize) |
964 | 47.5k | return MayAlias; |
965 | 13.5M | |
966 | 13.5M | ConstantInt *C1 = |
967 | 13.5M | dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1)); |
968 | 13.5M | ConstantInt *C2 = |
969 | 13.5M | dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1)); |
970 | 13.5M | |
971 | 13.5M | // If the last (struct) indices are constants and are equal, the other indices |
972 | 13.5M | // might be also be dynamically equal, so the GEPs can alias. |
973 | 13.5M | if (C1 && 13.5M C213.1M && C1->getSExtValue() == C2->getSExtValue()13.1M ) |
974 | 578k | return MayAlias; |
975 | 12.9M | |
976 | 12.9M | // Find the last-indexed type of the GEP, i.e., the type you'd get if |
977 | 12.9M | // you stripped the last index. |
978 | 12.9M | // On the way, look at each indexed type. If there's something other |
979 | 12.9M | // than an array, different indices can lead to different final types. |
980 | 12.9M | SmallVector<Value *, 8> IntermediateIndices; |
981 | 12.9M | |
982 | 12.9M | // Insert the first index; we don't need to check the type indexed |
983 | 12.9M | // through it as it only drops the pointer indirection. |
984 | 12.9M | assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine"); |
985 | 12.9M | IntermediateIndices.push_back(GEP1->getOperand(1)); |
986 | 12.9M | |
987 | 12.9M | // Insert all the remaining indices but the last one. |
988 | 12.9M | // Also, check that they all index through arrays. |
989 | 13.3M | for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e13.3M ; ++i368k ) { |
990 | 2.45M | if (!isa<ArrayType>(GetElementPtrInst::getIndexedType( |
991 | 2.45M | GEP1->getSourceElementType(), IntermediateIndices))) |
992 | 2.08M | return MayAlias; |
993 | 368k | IntermediateIndices.push_back(GEP1->getOperand(i + 1)); |
994 | 368k | } |
995 | 12.9M | |
996 | 10.8M | auto *Ty = GetElementPtrInst::getIndexedType( |
997 | 10.8M | GEP1->getSourceElementType(), IntermediateIndices); |
998 | 10.8M | StructType *LastIndexedStruct = dyn_cast<StructType>(Ty); |
999 | 10.8M | |
1000 | 10.8M | if (isa<SequentialType>(Ty)10.8M ) { |
1001 | 8.00M | // We know that: |
1002 | 8.00M | // - both GEPs begin indexing from the exact same pointer; |
1003 | 8.00M | // - the last indices in both GEPs are constants, indexing into a sequential |
1004 | 8.00M | // type (array or pointer); |
1005 | 8.00M | // - both GEPs only index through arrays prior to that. |
1006 | 8.00M | // |
1007 | 8.00M | // Because array indices greater than the number of elements are valid in |
1008 | 8.00M | // GEPs, unless we know the intermediate indices are identical between |
1009 | 8.00M | // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't |
1010 | 8.00M | // partially overlap. We also need to check that the loaded size matches |
1011 | 8.00M | // the element size, otherwise we could still have overlap. |
1012 | 8.00M | const uint64_t ElementSize = |
1013 | 8.00M | DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType()); |
1014 | 8.00M | if (V1Size != ElementSize || 8.00M V2Size != ElementSize376k ) |
1015 | 7.63M | return MayAlias; |
1016 | 373k | |
1017 | 935k | for (unsigned i = 0, e = GEP1->getNumIndices() - 1; 373k i != e935k ; ++i561k ) |
1018 | 674k | if (674k GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1)674k ) |
1019 | 112k | return MayAlias; |
1020 | 373k | |
1021 | 373k | // Now we know that the array/pointer that GEP1 indexes into and that |
1022 | 373k | // that GEP2 indexes into must either precisely overlap or be disjoint. |
1023 | 373k | // Because they cannot partially overlap and because fields in an array |
1024 | 373k | // cannot overlap, if we can prove the final indices are different between |
1025 | 373k | // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. |
1026 | 373k | |
1027 | 373k | // If the last indices are constants, we've already checked they don't |
1028 | 373k | // equal each other so we can exit early. |
1029 | 260k | if (260k C1 && 260k C2178k ) |
1030 | 171k | return NoAlias; |
1031 | 89.8k | { |
1032 | 89.8k | Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1); |
1033 | 89.8k | Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1); |
1034 | 89.8k | if (isa<PHINode>(GEP1LastIdx) || 89.8k isa<PHINode>(GEP2LastIdx)75.4k ) { |
1035 | 20.4k | // If one of the indices is a PHI node, be safe and only use |
1036 | 20.4k | // computeKnownBits so we don't make any assumptions about the |
1037 | 20.4k | // relationships between the two indices. This is important if we're |
1038 | 20.4k | // asking about values from different loop iterations. See PR32314. |
1039 | 20.4k | // TODO: We may be able to change the check so we only do this when |
1040 | 20.4k | // we definitely looked through a PHINode. |
1041 | 20.4k | if (GEP1LastIdx != GEP2LastIdx && |
1042 | 20.4k | GEP1LastIdx->getType() == GEP2LastIdx->getType()19.6k ) { |
1043 | 19.6k | KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); |
1044 | 19.6k | KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); |
1045 | 19.6k | if (Known1.Zero.intersects(Known2.One) || |
1046 | 16.0k | Known1.One.intersects(Known2.Zero)) |
1047 | 3.90k | return NoAlias; |
1048 | 89.8k | } |
1049 | 69.3k | } else if (69.3k isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL)69.3k ) |
1050 | 6.27k | return NoAlias; |
1051 | 79.6k | } |
1052 | 79.6k | return MayAlias; |
1053 | 2.87M | } else if (2.87M !LastIndexedStruct || 2.87M !C12.87M || !C22.87M ) { |
1054 | 0 | return MayAlias; |
1055 | 0 | } |
1056 | 2.87M | |
1057 | 2.87M | // We know that: |
1058 | 2.87M | // - both GEPs begin indexing from the exact same pointer; |
1059 | 2.87M | // - the last indices in both GEPs are constants, indexing into a struct; |
1060 | 2.87M | // - said indices are different, hence, the pointed-to fields are different; |
1061 | 2.87M | // - both GEPs only index through arrays prior to that. |
1062 | 2.87M | // |
1063 | 2.87M | // This lets us determine that the struct that GEP1 indexes into and the |
1064 | 2.87M | // struct that GEP2 indexes into must either precisely overlap or be |
1065 | 2.87M | // completely disjoint. Because they cannot partially overlap, indexing into |
1066 | 2.87M | // different non-overlapping fields of the struct will never alias. |
1067 | 2.87M | |
1068 | 2.87M | // Therefore, the only remaining thing needed to show that both GEPs can't |
1069 | 2.87M | // alias is that the fields are not overlapping. |
1070 | 2.87M | const StructLayout *SL = DL.getStructLayout(LastIndexedStruct); |
1071 | 2.87M | const uint64_t StructSize = SL->getSizeInBytes(); |
1072 | 2.87M | const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue()); |
1073 | 2.87M | const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue()); |
1074 | 2.87M | |
1075 | 2.87M | auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size, |
1076 | 4.26M | uint64_t V2Off, uint64_t V2Size) { |
1077 | 2.87M | return V1Off < V2Off && V1Off + V1Size <= V2Off && |
1078 | 2.85M | ((V2Off + V2Size <= StructSize) || |
1079 | 2.85M | (V2Off + V2Size - StructSize <= V1Off)); |
1080 | 4.26M | }; |
1081 | 2.87M | |
1082 | 2.87M | if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) || |
1083 | 1.39M | EltsDontOverlap(V2Off, V2Size, V1Off, V1Size)) |
1084 | 2.85M | return NoAlias; |
1085 | 20.0k | |
1086 | 20.0k | return MayAlias; |
1087 | 20.0k | } |
1088 | | |
1089 | | // If a we have (a) a GEP and (b) a pointer based on an alloca, and the |
1090 | | // beginning of the object the GEP points would have a negative offset with |
1091 | | // repsect to the alloca, that means the GEP can not alias pointer (b). |
1092 | | // Note that the pointer based on the alloca may not be a GEP. For |
1093 | | // example, it may be the alloca itself. |
1094 | | // The same applies if (b) is based on a GlobalVariable. Note that just being |
1095 | | // based on isIdentifiedObject() is not enough - we need an identified object |
1096 | | // that does not permit access to negative offsets. For example, a negative |
1097 | | // offset from a noalias argument or call can be inbounds w.r.t the actual |
1098 | | // underlying object. |
1099 | | // |
1100 | | // For example, consider: |
1101 | | // |
1102 | | // struct { int f0, int f1, ...} foo; |
1103 | | // foo alloca; |
1104 | | // foo* random = bar(alloca); |
1105 | | // int *f0 = &alloca.f0 |
1106 | | // int *f1 = &random->f1; |
1107 | | // |
1108 | | // Which is lowered, approximately, to: |
1109 | | // |
1110 | | // %alloca = alloca %struct.foo |
1111 | | // %random = call %struct.foo* @random(%struct.foo* %alloca) |
1112 | | // %f0 = getelementptr inbounds %struct, %struct.foo* %alloca, i32 0, i32 0 |
1113 | | // %f1 = getelementptr inbounds %struct, %struct.foo* %random, i32 0, i32 1 |
1114 | | // |
1115 | | // Assume %f1 and %f0 alias. Then %f1 would point into the object allocated |
1116 | | // by %alloca. Since the %f1 GEP is inbounds, that means %random must also |
1117 | | // point into the same object. But since %f0 points to the beginning of %alloca, |
1118 | | // the highest %f1 can be is (%alloca + 3). This means %random can not be higher |
1119 | | // than (%alloca - 1), and so is not inbounds, a contradiction. |
1120 | | bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, |
1121 | | const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, |
1122 | 94.0M | uint64_t ObjectAccessSize) { |
1123 | 94.0M | // If the object access size is unknown, or the GEP isn't inbounds, bail. |
1124 | 94.0M | if (ObjectAccessSize == MemoryLocation::UnknownSize || 94.0M !GEPOp->isInBounds()91.1M ) |
1125 | 21.3M | return false; |
1126 | 72.6M | |
1127 | 72.6M | // We need the object to be an alloca or a globalvariable, and want to know |
1128 | 72.6M | // the offset of the pointer from the object precisely, so no variable |
1129 | 72.6M | // indices are allowed. |
1130 | 72.6M | if (72.6M !(isa<AllocaInst>(DecompObject.Base) || |
1131 | 57.6M | isa<GlobalVariable>(DecompObject.Base)) || |
1132 | 28.1M | !DecompObject.VarIndices.empty()) |
1133 | 53.4M | return false; |
1134 | 19.1M | |
1135 | 19.1M | int64_t ObjectBaseOffset = DecompObject.StructOffset + |
1136 | 19.1M | DecompObject.OtherOffset; |
1137 | 19.1M | |
1138 | 19.1M | // If the GEP has no variable indices, we know the precise offset |
1139 | 19.1M | // from the base, then use it. If the GEP has variable indices, we're in |
1140 | 19.1M | // a bit more trouble: we can't count on the constant offsets that come |
1141 | 19.1M | // from non-struct sources, since these can be "rewound" by a negative |
1142 | 19.1M | // variable offset. So use only offsets that came from structs. |
1143 | 19.1M | int64_t GEPBaseOffset = DecompGEP.StructOffset; |
1144 | 19.1M | if (DecompGEP.VarIndices.empty()) |
1145 | 17.7M | GEPBaseOffset += DecompGEP.OtherOffset; |
1146 | 94.0M | |
1147 | 94.0M | return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize); |
1148 | 94.0M | } |
1149 | | |
1150 | | /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against |
1151 | | /// another pointer. |
1152 | | /// |
1153 | | /// We know that V1 is a GEP, but we don't know anything about V2. |
1154 | | /// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for |
1155 | | /// V2. |
1156 | | AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, |
1157 | | const AAMDNodes &V1AAInfo, const Value *V2, |
1158 | | uint64_t V2Size, const AAMDNodes &V2AAInfo, |
1159 | | const Value *UnderlyingV1, |
1160 | 53.4M | const Value *UnderlyingV2) { |
1161 | 53.4M | DecomposedGEP DecompGEP1, DecompGEP2; |
1162 | 53.4M | bool GEP1MaxLookupReached = |
1163 | 53.4M | DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT); |
1164 | 53.4M | bool GEP2MaxLookupReached = |
1165 | 53.4M | DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT); |
1166 | 53.4M | |
1167 | 53.4M | int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset; |
1168 | 53.4M | int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset; |
1169 | 53.4M | |
1170 | 53.4M | assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 && |
1171 | 53.4M | "DecomposeGEPExpression returned a result different from " |
1172 | 53.4M | "GetUnderlyingObject"); |
1173 | 53.4M | |
1174 | 53.4M | // If the GEP's offset relative to its base is such that the base would |
1175 | 53.4M | // fall below the start of the object underlying V2, then the GEP and V2 |
1176 | 53.4M | // cannot alias. |
1177 | 53.4M | if (!GEP1MaxLookupReached && 53.4M !GEP2MaxLookupReached53.2M && |
1178 | 53.2M | isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size)) |
1179 | 3.47M | return NoAlias; |
1180 | 49.9M | // If we have two gep instructions with must-alias or not-alias'ing base |
1181 | 49.9M | // pointers, figure out if the indexes to the GEP tell us anything about the |
1182 | 49.9M | // derived pointer. |
1183 | 49.9M | if (const GEPOperator *49.9M GEP249.9M = dyn_cast<GEPOperator>(V2)) { |
1184 | 40.9M | // Check for the GEP base being at a negative offset, this time in the other |
1185 | 40.9M | // direction. |
1186 | 40.9M | if (!GEP1MaxLookupReached && 40.9M !GEP2MaxLookupReached40.8M && |
1187 | 40.8M | isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size)) |
1188 | 6.93M | return NoAlias; |
1189 | 34.0M | // Do the base pointers alias? |
1190 | 34.0M | AliasResult BaseAlias = |
1191 | 34.0M | aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(), |
1192 | 34.0M | UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes()); |
1193 | 34.0M | |
1194 | 34.0M | // Check for geps of non-aliasing underlying pointers where the offsets are |
1195 | 34.0M | // identical. |
1196 | 34.0M | if ((BaseAlias == MayAlias) && 34.0M V1Size == V2Size12.5M ) { |
1197 | 6.57M | // Do the base pointers alias assuming type and size. |
1198 | 6.57M | AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo, |
1199 | 6.57M | UnderlyingV2, V2Size, V2AAInfo); |
1200 | 6.57M | if (PreciseBaseAlias == NoAlias6.57M ) { |
1201 | 2.96M | // See if the computed offset from the common pointer tells us about the |
1202 | 2.96M | // relation of the resulting pointer. |
1203 | 2.96M | // If the max search depth is reached the result is undefined |
1204 | 2.96M | if (GEP2MaxLookupReached || 2.96M GEP1MaxLookupReached2.95M ) |
1205 | 21.2k | return MayAlias; |
1206 | 2.94M | |
1207 | 2.94M | // Same offsets. |
1208 | 2.94M | if (2.94M GEP1BaseOffset == GEP2BaseOffset && |
1209 | 396k | DecompGEP1.VarIndices == DecompGEP2.VarIndices) |
1210 | 166k | return NoAlias; |
1211 | 33.8M | } |
1212 | 6.57M | } |
1213 | 33.8M | |
1214 | 33.8M | // If we get a No or May, then return it immediately, no amount of analysis |
1215 | 33.8M | // will improve this situation. |
1216 | 33.8M | if (33.8M BaseAlias != MustAlias33.8M ) { |
1217 | 12.6M | assert(BaseAlias == NoAlias || BaseAlias == MayAlias); |
1218 | 12.6M | return BaseAlias; |
1219 | 12.6M | } |
1220 | 21.1M | |
1221 | 21.1M | // Otherwise, we have a MustAlias. Since the base pointers alias each other |
1222 | 21.1M | // exactly, see if the computed offset from the common pointer tells us |
1223 | 21.1M | // about the relation of the resulting pointer. |
1224 | 21.1M | // If we know the two GEPs are based off of the exact same pointer (and not |
1225 | 21.1M | // just the same underlying object), see if that tells us anything about |
1226 | 21.1M | // the resulting pointers. |
1227 | 21.1M | if (21.1M GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == |
1228 | 21.1M | GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && |
1229 | 21.1M | GEP1->getPointerOperandType() == GEP2->getPointerOperandType()20.9M ) { |
1230 | 20.8M | AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); |
1231 | 20.8M | // If we couldn't find anything interesting, don't abandon just yet. |
1232 | 20.8M | if (R != MayAlias) |
1233 | 3.03M | return R; |
1234 | 18.1M | } |
1235 | 18.1M | |
1236 | 18.1M | // If the max search depth is reached, the result is undefined |
1237 | 18.1M | if (18.1M GEP2MaxLookupReached || 18.1M GEP1MaxLookupReached18.1M ) |
1238 | 10.5k | return MayAlias; |
1239 | 18.1M | |
1240 | 18.1M | // Subtract the GEP2 pointer from the GEP1 pointer to find out their |
1241 | 18.1M | // symbolic difference. |
1242 | 18.1M | GEP1BaseOffset -= GEP2BaseOffset; |
1243 | 18.1M | GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices); |
1244 | 18.1M | |
1245 | 49.9M | } else { |
1246 | 8.98M | // Check to see if these two pointers are related by the getelementptr |
1247 | 8.98M | // instruction. If one pointer is a GEP with a non-zero index of the other |
1248 | 8.98M | // pointer, we know they cannot alias. |
1249 | 8.98M | |
1250 | 8.98M | // If both accesses are unknown size, we can't do anything useful here. |
1251 | 8.98M | if (V1Size == MemoryLocation::UnknownSize && |
1252 | 1.51M | V2Size == MemoryLocation::UnknownSize) |
1253 | 1.51M | return MayAlias; |
1254 | 7.47M | |
1255 | 7.47M | AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, |
1256 | 7.47M | AAMDNodes(), V2, MemoryLocation::UnknownSize, |
1257 | 7.47M | V2AAInfo, nullptr, UnderlyingV2); |
1258 | 7.47M | if (R != MustAlias7.47M ) { |
1259 | 6.04M | // If V2 may alias GEP base pointer, conservatively returns MayAlias. |
1260 | 6.04M | // If V2 is known not to alias GEP base pointer, then the two values |
1261 | 6.04M | // cannot alias per GEP semantics: "Any memory access must be done through |
1262 | 6.04M | // a pointer value associated with an address range of the memory access, |
1263 | 6.04M | // otherwise the behavior is undefined.". |
1264 | 6.04M | assert(R == NoAlias || R == MayAlias); |
1265 | 6.04M | return R; |
1266 | 6.04M | } |
1267 | 1.42M | |
1268 | 1.42M | // If the max search depth is reached the result is undefined |
1269 | 1.42M | if (1.42M GEP1MaxLookupReached1.42M ) |
1270 | 854 | return MayAlias; |
1271 | 19.5M | } |
1272 | 19.5M | |
1273 | 19.5M | // In the two GEP Case, if there is no difference in the offsets of the |
1274 | 19.5M | // computed pointers, the resultant pointers are a must alias. This |
1275 | 19.5M | // happens when we have two lexically identical GEP's (for example). |
1276 | 19.5M | // |
1277 | 19.5M | // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 |
1278 | 19.5M | // must aliases the GEP, the end result is a must alias also. |
1279 | 19.5M | if (19.5M GEP1BaseOffset == 0 && 19.5M DecompGEP1.VarIndices.empty()324k ) |
1280 | 83.8k | return MustAlias; |
1281 | 19.4M | |
1282 | 19.4M | // If there is a constant difference between the pointers, but the difference |
1283 | 19.4M | // is less than the size of the associated memory object, then we know |
1284 | 19.4M | // that the objects are partially overlapping. If the difference is |
1285 | 19.4M | // greater, we know they do not overlap. |
1286 | 19.4M | if (19.4M GEP1BaseOffset != 0 && 19.4M DecompGEP1.VarIndices.empty()19.2M ) { |
1287 | 18.6M | if (GEP1BaseOffset >= 018.6M ) { |
1288 | 3.35M | if (V2Size != MemoryLocation::UnknownSize3.35M ) { |
1289 | 3.12M | if ((uint64_t)GEP1BaseOffset < V2Size) |
1290 | 61.4k | return PartialAlias; |
1291 | 3.06M | return NoAlias; |
1292 | 3.06M | } |
1293 | 0 | } else { |
1294 | 15.3M | // We have the situation where: |
1295 | 15.3M | // + + |
1296 | 15.3M | // | BaseOffset | |
1297 | 15.3M | // ---------------->| |
1298 | 15.3M | // |-->V1Size |-------> V2Size |
1299 | 15.3M | // GEP1 V2 |
1300 | 15.3M | // We need to know that V2Size is not unknown, otherwise we might have |
1301 | 15.3M | // stripped a gep with negative index ('gep <ptr>, -1, ...). |
1302 | 15.3M | if (V1Size != MemoryLocation::UnknownSize && |
1303 | 15.3M | V2Size != MemoryLocation::UnknownSize15.2M ) { |
1304 | 15.2M | if (-(uint64_t)GEP1BaseOffset < V1Size) |
1305 | 4.40k | return PartialAlias; |
1306 | 15.2M | return NoAlias; |
1307 | 15.2M | } |
1308 | 15.3M | } |
1309 | 18.6M | } |
1310 | 1.13M | |
1311 | 1.13M | if (1.13M !DecompGEP1.VarIndices.empty()1.13M ) { |
1312 | 812k | uint64_t Modulo = 0; |
1313 | 812k | bool AllPositive = true; |
1314 | 2.30M | for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e2.30M ; ++i1.48M ) { |
1315 | 1.48M | |
1316 | 1.48M | // Try to distinguish something like &A[i][1] against &A[42][0]. |
1317 | 1.48M | // Grab the least significant bit set in any of the scales. We |
1318 | 1.48M | // don't need std::abs here (even if the scale's negative) as we'll |
1319 | 1.48M | // be ^'ing Modulo with itself later. |
1320 | 1.48M | Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale; |
1321 | 1.48M | |
1322 | 1.48M | if (AllPositive1.48M ) { |
1323 | 911k | // If the Value could change between cycles, then any reasoning about |
1324 | 911k | // the Value this cycle may not hold in the next cycle. We'll just |
1325 | 911k | // give up if we can't determine conditions that hold for every cycle: |
1326 | 911k | const Value *V = DecompGEP1.VarIndices[i].V; |
1327 | 911k | |
1328 | 911k | KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT); |
1329 | 911k | bool SignKnownZero = Known.isNonNegative(); |
1330 | 911k | bool SignKnownOne = Known.isNegative(); |
1331 | 911k | |
1332 | 911k | // Zero-extension widens the variable, and so forces the sign |
1333 | 911k | // bit to zero. |
1334 | 722k | bool IsZExt = DecompGEP1.VarIndices[i].ZExtBits > 0 || isa<ZExtInst>(V); |
1335 | 911k | SignKnownZero |= IsZExt; |
1336 | 911k | SignKnownOne &= !IsZExt; |
1337 | 911k | |
1338 | 911k | // If the variable begins with a zero then we know it's |
1339 | 911k | // positive, regardless of whether the value is signed or |
1340 | 911k | // unsigned. |
1341 | 911k | int64_t Scale = DecompGEP1.VarIndices[i].Scale; |
1342 | 911k | AllPositive = |
1343 | 911k | (SignKnownZero && Scale >= 0249k ) || (SignKnownOne && 753k Scale < 010 ); |
1344 | 911k | } |
1345 | 1.48M | } |
1346 | 812k | |
1347 | 812k | Modulo = Modulo ^ (Modulo & (Modulo - 1)); |
1348 | 812k | |
1349 | 812k | // We can compute the difference between the two addresses |
1350 | 812k | // mod Modulo. Check whether that difference guarantees that the |
1351 | 812k | // two locations do not alias. |
1352 | 812k | uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); |
1353 | 812k | if (V1Size != MemoryLocation::UnknownSize && |
1354 | 812k | V2Size != MemoryLocation::UnknownSize794k && ModOffset >= V2Size791k && |
1355 | 63.9k | V1Size <= Modulo - ModOffset) |
1356 | 60.4k | return NoAlias; |
1357 | 752k | |
1358 | 752k | // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. |
1359 | 752k | // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers |
1360 | 752k | // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. |
1361 | 752k | if (752k AllPositive && 752k GEP1BaseOffset > 055.1k && V2Size <= (uint64_t)GEP1BaseOffset28.8k ) |
1362 | 28.5k | return NoAlias; |
1363 | 723k | |
1364 | 723k | if (723k constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size, |
1365 | 723k | GEP1BaseOffset, &AC, DT)) |
1366 | 10.0k | return NoAlias; |
1367 | 1.03M | } |
1368 | 1.03M | |
1369 | 1.03M | // Statically, we can see that the base objects are the same, but the |
1370 | 1.03M | // pointers have dynamic offsets which we can't resolve. And none of our |
1371 | 1.03M | // little tricks above worked. |
1372 | 1.03M | return MayAlias; |
1373 | 1.03M | } |
1374 | | |
1375 | 3.76M | static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { |
1376 | 3.76M | // If the results agree, take it. |
1377 | 3.76M | if (A == B) |
1378 | 2.65M | return A; |
1379 | 1.10M | // A mix of PartialAlias and MustAlias is PartialAlias. |
1380 | 1.10M | if (1.10M (A == PartialAlias && 1.10M B == MustAlias157 ) || |
1381 | 1.10M | (B == PartialAlias && 1.10M A == MustAlias16 )) |
1382 | 2 | return PartialAlias; |
1383 | 1.10M | // Otherwise, we don't know anything. |
1384 | 1.10M | return MayAlias; |
1385 | 1.10M | } |
1386 | | |
1387 | | /// Provides a bunch of ad-hoc rules to disambiguate a Select instruction |
1388 | | /// against another. |
1389 | | AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, |
1390 | | const AAMDNodes &SIAAInfo, |
1391 | | const Value *V2, uint64_t V2Size, |
1392 | | const AAMDNodes &V2AAInfo, |
1393 | 815k | const Value *UnderV2) { |
1394 | 815k | // If the values are Selects with the same condition, we can do a more precise |
1395 | 815k | // check: just check for aliases between the values on corresponding arms. |
1396 | 815k | if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) |
1397 | 5.05k | if (5.05k SI->getCondition() == SI2->getCondition()5.05k ) { |
1398 | 1.56k | AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, |
1399 | 1.56k | SI2->getTrueValue(), V2Size, V2AAInfo); |
1400 | 1.56k | if (Alias == MayAlias) |
1401 | 595 | return MayAlias; |
1402 | 965 | AliasResult ThisAlias = |
1403 | 965 | aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, |
1404 | 965 | SI2->getFalseValue(), V2Size, V2AAInfo); |
1405 | 965 | return MergeAliasResults(ThisAlias, Alias); |
1406 | 965 | } |
1407 | 813k | |
1408 | 813k | // If both arms of the Select node NoAlias or MustAlias V2, then returns |
1409 | 813k | // NoAlias / MustAlias. Otherwise, returns MayAlias. |
1410 | 813k | AliasResult Alias = |
1411 | 813k | aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), |
1412 | 813k | SISize, SIAAInfo, UnderV2); |
1413 | 813k | if (Alias == MayAlias) |
1414 | 454k | return MayAlias; |
1415 | 359k | |
1416 | 359k | AliasResult ThisAlias = |
1417 | 359k | aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo, |
1418 | 359k | UnderV2); |
1419 | 359k | return MergeAliasResults(ThisAlias, Alias); |
1420 | 359k | } |
1421 | | |
1422 | | /// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against |
1423 | | /// another. |
1424 | | AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, |
1425 | | const AAMDNodes &PNAAInfo, const Value *V2, |
1426 | | uint64_t V2Size, const AAMDNodes &V2AAInfo, |
1427 | 10.6M | const Value *UnderV2) { |
1428 | 10.6M | // Track phi nodes we have visited. We use this information when we determine |
1429 | 10.6M | // value equivalence. |
1430 | 10.6M | VisitedPhiBBs.insert(PN->getParent()); |
1431 | 10.6M | |
1432 | 10.6M | // If the values are PHIs in the same block, we can do a more precise |
1433 | 10.6M | // as well as efficient check: just check for aliases between the values |
1434 | 10.6M | // on corresponding edges. |
1435 | 10.6M | if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) |
1436 | 1.39M | if (1.39M PN2->getParent() == PN->getParent()1.39M ) { |
1437 | 640k | LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo), |
1438 | 640k | MemoryLocation(V2, V2Size, V2AAInfo)); |
1439 | 640k | if (PN > V2) |
1440 | 293k | std::swap(Locs.first, Locs.second); |
1441 | 640k | // Analyse the PHIs' inputs under the assumption that the PHIs are |
1442 | 640k | // NoAlias. |
1443 | 640k | // If the PHIs are May/MustAlias there must be (recursively) an input |
1444 | 640k | // operand from outside the PHIs' cycle that is MayAlias/MustAlias or |
1445 | 640k | // there must be an operation on the PHIs within the PHIs' value cycle |
1446 | 640k | // that causes a MayAlias. |
1447 | 640k | // Pretend the phis do not alias. |
1448 | 640k | AliasResult Alias = NoAlias; |
1449 | 640k | assert(AliasCache.count(Locs) && |
1450 | 640k | "There must exist an entry for the phi node"); |
1451 | 640k | AliasResult OrigAliasResult = AliasCache[Locs]; |
1452 | 640k | AliasCache[Locs] = NoAlias; |
1453 | 640k | |
1454 | 1.10M | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e1.10M ; ++i469k ) { |
1455 | 946k | AliasResult ThisAlias = |
1456 | 946k | aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, |
1457 | 946k | PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), |
1458 | 946k | V2Size, V2AAInfo); |
1459 | 946k | Alias = MergeAliasResults(ThisAlias, Alias); |
1460 | 946k | if (Alias == MayAlias) |
1461 | 477k | break; |
1462 | 946k | } |
1463 | 640k | |
1464 | 640k | // Reset if speculation failed. |
1465 | 640k | if (Alias != NoAlias) |
1466 | 477k | AliasCache[Locs] = OrigAliasResult; |
1467 | 1.39M | |
1468 | 1.39M | return Alias; |
1469 | 1.39M | } |
1470 | 10.0M | |
1471 | 10.0M | SmallPtrSet<Value *, 4> UniqueSrc; |
1472 | 10.0M | SmallVector<Value *, 4> V1Srcs; |
1473 | 10.0M | bool isRecursive = false; |
1474 | 19.7M | for (Value *PV1 : PN->incoming_values()) { |
1475 | 19.7M | if (isa<PHINode>(PV1)) |
1476 | 19.7M | // If any of the source itself is a PHI, return MayAlias conservatively |
1477 | 19.7M | // to avoid compile time explosion. The worst possible case is if both |
1478 | 19.7M | // sides are PHI nodes. In which case, this is O(m x n) time where 'm' |
1479 | 19.7M | // and 'n' are the number of PHI sources. |
1480 | 1.82M | return MayAlias; |
1481 | 17.8M | |
1482 | 17.8M | if (17.8M EnableRecPhiAnalysis17.8M ) |
1483 | 16 | if (GEPOperator *16 PV1GEP16 = dyn_cast<GEPOperator>(PV1)) { |
1484 | 8 | // Check whether the incoming value is a GEP that advances the pointer |
1485 | 8 | // result of this PHI node (e.g. in a loop). If this is the case, we |
1486 | 8 | // would recurse and always get a MayAlias. Handle this case specially |
1487 | 8 | // below. |
1488 | 8 | if (PV1GEP->getPointerOperand() == PN && 8 PV1GEP->getNumIndices() == 18 && |
1489 | 8 | isa<ConstantInt>(PV1GEP->idx_begin())8 ) { |
1490 | 8 | isRecursive = true; |
1491 | 8 | continue; |
1492 | 8 | } |
1493 | 17.8M | } |
1494 | 17.8M | |
1495 | 17.8M | if (17.8M UniqueSrc.insert(PV1).second17.8M ) |
1496 | 17.2M | V1Srcs.push_back(PV1); |
1497 | 19.7M | } |
1498 | 10.0M | |
1499 | 10.0M | // If this PHI node is recursive, set the size of the accessed memory to |
1500 | 10.0M | // unknown to represent all the possible values the GEP could advance the |
1501 | 10.0M | // pointer to. |
1502 | 8.22M | if (8.22M isRecursive8.22M ) |
1503 | 8 | PNSize = MemoryLocation::UnknownSize; |
1504 | 8.22M | |
1505 | 8.22M | AliasResult Alias = |
1506 | 8.22M | aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], |
1507 | 8.22M | PNSize, PNAAInfo, UnderV2); |
1508 | 8.22M | |
1509 | 8.22M | // Early exit if the check of the first PHI source against V2 is MayAlias. |
1510 | 8.22M | // Other results are not possible. |
1511 | 8.22M | if (Alias == MayAlias) |
1512 | 5.83M | return MayAlias; |
1513 | 2.38M | |
1514 | 2.38M | // If all sources of the PHI node NoAlias or MustAlias V2, then returns |
1515 | 2.38M | // NoAlias / MustAlias. Otherwise, returns MayAlias. |
1516 | 4.49M | for (unsigned i = 1, e = V1Srcs.size(); 2.38M i != e4.49M ; ++i2.10M ) { |
1517 | 2.45M | Value *V = V1Srcs[i]; |
1518 | 2.45M | |
1519 | 2.45M | AliasResult ThisAlias = |
1520 | 2.45M | aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2); |
1521 | 2.45M | Alias = MergeAliasResults(ThisAlias, Alias); |
1522 | 2.45M | if (Alias == MayAlias) |
1523 | 346k | break; |
1524 | 2.45M | } |
1525 | 10.6M | |
1526 | 10.6M | return Alias; |
1527 | 10.6M | } |
1528 | | |
1529 | | /// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as |
1530 | | /// array references. |
1531 | | AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, |
1532 | | AAMDNodes V1AAInfo, const Value *V2, |
1533 | | uint64_t V2Size, AAMDNodes V2AAInfo, |
1534 | 121M | const Value *O1, const Value *O2) { |
1535 | 121M | // If either of the memory references is empty, it doesn't matter what the |
1536 | 121M | // pointer values are. |
1537 | 121M | if (V1Size == 0 || 121M V2Size == 0121M ) |
1538 | 23 | return NoAlias; |
1539 | 121M | |
1540 | 121M | // Strip off any casts if they exist. |
1541 | 121M | V1 = V1->stripPointerCastsAndBarriers(); |
1542 | 121M | V2 = V2->stripPointerCastsAndBarriers(); |
1543 | 121M | |
1544 | 121M | // If V1 or V2 is undef, the result is NoAlias because we can always pick a |
1545 | 121M | // value for undef that aliases nothing in the program. |
1546 | 121M | if (isa<UndefValue>(V1) || 121M isa<UndefValue>(V2)121M ) |
1547 | 7.53k | return NoAlias; |
1548 | 121M | |
1549 | 121M | // Are we checking for alias of the same value? |
1550 | 121M | // Because we look 'through' phi nodes, we could look at "Value" pointers from |
1551 | 121M | // different iterations. We must therefore make sure that this is not the |
1552 | 121M | // case. The function isValueEqualInPotentialCycles ensures that this cannot |
1553 | 121M | // happen by looking at the visited phi nodes and making sure they cannot |
1554 | 121M | // reach the value. |
1555 | 121M | if (121M isValueEqualInPotentialCycles(V1, V2)121M ) |
1556 | 23.6M | return MustAlias; |
1557 | 97.7M | |
1558 | 97.7M | if (97.7M !V1->getType()->isPointerTy() || 97.7M !V2->getType()->isPointerTy()97.7M ) |
1559 | 106 | return NoAlias; // Scalars cannot alias each other |
1560 | 97.7M | |
1561 | 97.7M | // Figure out what objects these things are pointing to if we can. |
1562 | 97.7M | if (97.7M O1 == nullptr97.7M ) |
1563 | 85.9M | O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); |
1564 | 97.7M | |
1565 | 97.7M | if (O2 == nullptr) |
1566 | 91.6M | O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); |
1567 | 97.7M | |
1568 | 97.7M | // Null values in the default address space don't point to any object, so they |
1569 | 97.7M | // don't alias any other pointer. |
1570 | 97.7M | if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) |
1571 | 18.9k | if (18.9k CPN->getType()->getAddressSpace() == 018.9k ) |
1572 | 18.9k | return NoAlias; |
1573 | 97.6M | if (const ConstantPointerNull *97.6M CPN97.6M = dyn_cast<ConstantPointerNull>(O2)) |
1574 | 104k | if (104k CPN->getType()->getAddressSpace() == 0104k ) |
1575 | 104k | return NoAlias; |
1576 | 97.5M | |
1577 | 97.5M | if (97.5M O1 != O297.5M ) { |
1578 | 66.9M | // If V1/V2 point to two different objects, we know that we have no alias. |
1579 | 66.9M | if (isIdentifiedObject(O1) && 66.9M isIdentifiedObject(O2)16.7M ) |
1580 | 6.48M | return NoAlias; |
1581 | 60.5M | |
1582 | 60.5M | // Constant pointers can't alias with non-const isIdentifiedObject objects. |
1583 | 60.5M | if (60.5M (isa<Constant>(O1) && 60.5M isIdentifiedObject(O2)4.81M && !isa<Constant>(O2)316 ) || |
1584 | 60.5M | (isa<Constant>(O2) && 60.5M isIdentifiedObject(O1)4.98M && !isa<Constant>(O1)2.64k )) |
1585 | 2.10k | return NoAlias; |
1586 | 60.5M | |
1587 | 60.5M | // Function arguments can't alias with things that are known to be |
1588 | 60.5M | // unambigously identified at the function level. |
1589 | 60.5M | if (60.5M (isa<Argument>(O1) && 60.5M isIdentifiedFunctionLocal(O2)9.31M ) || |
1590 | 59.9M | (isa<Argument>(O2) && 59.9M isIdentifiedFunctionLocal(O1)11.6M )) |
1591 | 2.48M | return NoAlias; |
1592 | 58.0M | |
1593 | 58.0M | // If one pointer is the result of a call/invoke or load and the other is a |
1594 | 58.0M | // non-escaping local object within the same function, then we know the |
1595 | 58.0M | // object couldn't escape to a point where the call could return it. |
1596 | 58.0M | // |
1597 | 58.0M | // Note that if the pointers are in different functions, there are a |
1598 | 58.0M | // variety of complications. A call with a nocapture argument may still |
1599 | 58.0M | // temporary store the nocapture argument's value in a temporary memory |
1600 | 58.0M | // location if that memory location doesn't escape. Or it may pass a |
1601 | 58.0M | // nocapture value to other functions as long as they don't capture it. |
1602 | 58.0M | if (58.0M isEscapeSource(O1) && 58.0M isNonEscapingLocalObject(O2)39.7M ) |
1603 | 193k | return NoAlias; |
1604 | 57.8M | if (57.8M isEscapeSource(O2) && 57.8M isNonEscapingLocalObject(O1)39.3M ) |
1605 | 297k | return NoAlias; |
1606 | 88.1M | } |
1607 | 88.1M | |
1608 | 88.1M | // If the size of one access is larger than the entire object on the other |
1609 | 88.1M | // side, then we know such behavior is undefined and can assume no alias. |
1610 | 88.1M | if (88.1M (V1Size != MemoryLocation::UnknownSize && |
1611 | 61.8M | isObjectSmallerThan(O2, V1Size, DL, TLI)) || |
1612 | 88.0M | (V2Size != MemoryLocation::UnknownSize && |
1613 | 62.0M | isObjectSmallerThan(O1, V2Size, DL, TLI))) |
1614 | 325k | return NoAlias; |
1615 | 87.8M | |
1616 | 87.8M | // Check the cache before climbing up use-def chains. This also terminates |
1617 | 87.8M | // otherwise infinitely recursive queries. |
1618 | 87.8M | LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo), |
1619 | 87.8M | MemoryLocation(V2, V2Size, V2AAInfo)); |
1620 | 87.8M | if (V1 > V2) |
1621 | 35.0M | std::swap(Locs.first, Locs.second); |
1622 | 87.8M | std::pair<AliasCacheTy::iterator, bool> Pair = |
1623 | 87.8M | AliasCache.insert(std::make_pair(Locs, MayAlias)); |
1624 | 87.8M | if (!Pair.second) |
1625 | 1.29M | return Pair.first->second; |
1626 | 86.5M | |
1627 | 86.5M | // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the |
1628 | 86.5M | // GEP can't simplify, we don't even look at the PHI cases. |
1629 | 86.5M | if (86.5M !isa<GEPOperator>(V1) && 86.5M isa<GEPOperator>(V2)39.5M ) { |
1630 | 6.50M | std::swap(V1, V2); |
1631 | 6.50M | std::swap(V1Size, V2Size); |
1632 | 6.50M | std::swap(O1, O2); |
1633 | 6.50M | std::swap(V1AAInfo, V2AAInfo); |
1634 | 6.50M | } |
1635 | 86.5M | if (const GEPOperator *GV186.5M = dyn_cast<GEPOperator>(V1)) { |
1636 | 53.4M | AliasResult Result = |
1637 | 53.4M | aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); |
1638 | 53.4M | if (Result != MayAlias) |
1639 | 32.7M | return AliasCache[Locs] = Result; |
1640 | 53.7M | } |
1641 | 53.7M | |
1642 | 53.7M | if (53.7M isa<PHINode>(V2) && 53.7M !isa<PHINode>(V1)6.44M ) { |
1643 | 5.05M | std::swap(V1, V2); |
1644 | 5.05M | std::swap(O1, O2); |
1645 | 5.05M | std::swap(V1Size, V2Size); |
1646 | 5.05M | std::swap(V1AAInfo, V2AAInfo); |
1647 | 5.05M | } |
1648 | 53.7M | if (const PHINode *PN53.7M = dyn_cast<PHINode>(V1)) { |
1649 | 10.6M | AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, |
1650 | 10.6M | V2, V2Size, V2AAInfo, O2); |
1651 | 10.6M | if (Result != MayAlias) |
1652 | 2.20M | return AliasCache[Locs] = Result; |
1653 | 51.5M | } |
1654 | 51.5M | |
1655 | 51.5M | if (51.5M isa<SelectInst>(V2) && 51.5M !isa<SelectInst>(V1)651k ) { |
1656 | 646k | std::swap(V1, V2); |
1657 | 646k | std::swap(O1, O2); |
1658 | 646k | std::swap(V1Size, V2Size); |
1659 | 646k | std::swap(V1AAInfo, V2AAInfo); |
1660 | 646k | } |
1661 | 51.5M | if (const SelectInst *S151.5M = dyn_cast<SelectInst>(V1)) { |
1662 | 815k | AliasResult Result = |
1663 | 815k | aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2); |
1664 | 815k | if (Result != MayAlias) |
1665 | 83.3k | return AliasCache[Locs] = Result; |
1666 | 51.4M | } |
1667 | 51.4M | |
1668 | 51.4M | // If both pointers are pointing into the same object and one of them |
1669 | 51.4M | // accesses the entire object, then the accesses must overlap in some way. |
1670 | 51.4M | if (51.4M O1 == O251.4M ) |
1671 | 1.58M | if (1.58M (V1Size != MemoryLocation::UnknownSize && |
1672 | 1.08M | isObjectSize(O1, V1Size, DL, TLI)) || |
1673 | 1.58M | (V2Size != MemoryLocation::UnknownSize && |
1674 | 894k | isObjectSize(O2, V2Size, DL, TLI))) |
1675 | 1.03k | return AliasCache[Locs] = PartialAlias; |
1676 | 51.4M | |
1677 | 51.4M | // Recurse back into the best AA results we have, potentially with refined |
1678 | 51.4M | // memory locations. We have already ensured that BasicAA has a MayAlias |
1679 | 51.4M | // cache result for these, so any recursion back into BasicAA won't loop. |
1680 | 51.4M | AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second); |
1681 | 51.4M | return AliasCache[Locs] = Result; |
1682 | 51.4M | } |
1683 | | |
1684 | | /// Check whether two Values can be considered equivalent. |
1685 | | /// |
1686 | | /// In addition to pointer equivalence of \p V1 and \p V2 this checks whether |
1687 | | /// they can not be part of a cycle in the value graph by looking at all |
1688 | | /// visited phi nodes an making sure that the phis cannot reach the value. We |
1689 | | /// have to do this because we are looking through phi nodes (That is we say |
1690 | | /// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). |
1691 | | bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, |
1692 | 131M | const Value *V2) { |
1693 | 131M | if (V != V2) |
1694 | 98.4M | return false; |
1695 | 33.0M | |
1696 | 33.0M | const Instruction *Inst = dyn_cast<Instruction>(V); |
1697 | 33.0M | if (!Inst) |
1698 | 13.8M | return true; |
1699 | 19.1M | |
1700 | 19.1M | if (19.1M VisitedPhiBBs.empty()19.1M ) |
1701 | 18.7M | return true; |
1702 | 453k | |
1703 | 453k | if (453k VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck453k ) |
1704 | 0 | return false; |
1705 | 453k | |
1706 | 453k | // Make sure that the visited phis cannot reach the Value. This ensures that |
1707 | 453k | // the Values cannot come from different iterations of a potential cycle the |
1708 | 453k | // phi nodes could be involved in. |
1709 | 453k | for (auto *P : VisitedPhiBBs) |
1710 | 477k | if (477k isPotentiallyReachable(&P->front(), Inst, DT, LI)477k ) |
1711 | 393k | return false; |
1712 | 60.1k | |
1713 | 60.1k | return true; |
1714 | 60.1k | } |
1715 | | |
1716 | | /// Computes the symbolic difference between two de-composed GEPs. |
1717 | | /// |
1718 | | /// Dest and Src are the variable indices from two decomposed GetElementPtr |
1719 | | /// instructions GEP1 and GEP2 which have common base pointers. |
1720 | | void BasicAAResult::GetIndexDifference( |
1721 | | SmallVectorImpl<VariableGEPIndex> &Dest, |
1722 | 18.1M | const SmallVectorImpl<VariableGEPIndex> &Src) { |
1723 | 18.1M | if (Src.empty()) |
1724 | 8.74M | return; |
1725 | 9.39M | |
1726 | 19.0M | for (unsigned i = 0, e = Src.size(); 9.39M i != e19.0M ; ++i9.68M ) { |
1727 | 9.68M | const Value *V = Src[i].V; |
1728 | 9.68M | unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits; |
1729 | 9.68M | int64_t Scale = Src[i].Scale; |
1730 | 9.68M | |
1731 | 9.68M | // Find V in Dest. This is N^2, but pointer indices almost never have more |
1732 | 9.68M | // than a few variable indexes. |
1733 | 10.5M | for (unsigned j = 0, e = Dest.size(); j != e10.5M ; ++j818k ) { |
1734 | 9.84M | if (!isValueEqualInPotentialCycles(Dest[j].V, V) || |
1735 | 9.84M | Dest[j].ZExtBits != ZExtBits9.02M || Dest[j].SExtBits != SExtBits9.02M ) |
1736 | 818k | continue; |
1737 | 9.02M | |
1738 | 9.02M | // If we found it, subtract off Scale V's from the entry in Dest. If it |
1739 | 9.02M | // goes to zero, remove the entry. |
1740 | 9.02M | if (9.02M Dest[j].Scale != Scale9.02M ) |
1741 | 23.6k | Dest[j].Scale -= Scale; |
1742 | 9.02M | else |
1743 | 9.00M | Dest.erase(Dest.begin() + j); |
1744 | 9.84M | Scale = 0; |
1745 | 9.84M | break; |
1746 | 9.84M | } |
1747 | 9.68M | |
1748 | 9.68M | // If we didn't consume this entry, add it to the end of the Dest list. |
1749 | 9.68M | if (Scale9.68M ) { |
1750 | 656k | VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale}; |
1751 | 656k | Dest.push_back(Entry); |
1752 | 656k | } |
1753 | 9.68M | } |
1754 | 18.1M | } |
1755 | | |
1756 | | bool BasicAAResult::constantOffsetHeuristic( |
1757 | | const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size, |
1758 | | uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, |
1759 | 723k | DominatorTree *DT) { |
1760 | 723k | if (VarIndices.size() != 2 || 723k V1Size == MemoryLocation::UnknownSize429k || |
1761 | 417k | V2Size == MemoryLocation::UnknownSize) |
1762 | 306k | return false; |
1763 | 416k | |
1764 | 416k | const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1]; |
1765 | 416k | |
1766 | 416k | if (Var0.ZExtBits != Var1.ZExtBits || 416k Var0.SExtBits != Var1.SExtBits406k || |
1767 | 374k | Var0.Scale != -Var1.Scale) |
1768 | 73.7k | return false; |
1769 | 343k | |
1770 | 343k | unsigned Width = Var1.V->getType()->getIntegerBitWidth(); |
1771 | 343k | |
1772 | 343k | // We'll strip off the Extensions of Var0 and Var1 and do another round |
1773 | 343k | // of GetLinearExpression decomposition. In the example above, if Var0 |
1774 | 343k | // is zext(%x + 1) we should get V1 == %x and V1Offset == 1. |
1775 | 343k | |
1776 | 343k | APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0), |
1777 | 343k | V1Offset(Width, 0); |
1778 | 343k | bool NSW = true, NUW = true; |
1779 | 343k | unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0; |
1780 | 343k | const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits, |
1781 | 343k | V0SExtBits, DL, 0, AC, DT, NSW, NUW); |
1782 | 343k | NSW = true; |
1783 | 343k | NUW = true; |
1784 | 343k | const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits, |
1785 | 343k | V1SExtBits, DL, 0, AC, DT, NSW, NUW); |
1786 | 343k | |
1787 | 343k | if (V0Scale != V1Scale || 343k V0ZExtBits != V1ZExtBits341k || |
1788 | 343k | V0SExtBits != V1SExtBits341k || !isValueEqualInPotentialCycles(V0, V1)341k ) |
1789 | 323k | return false; |
1790 | 19.3k | |
1791 | 19.3k | // We have a hit - Var0 and Var1 only differ by a constant offset! |
1792 | 19.3k | |
1793 | 19.3k | // If we've been sext'ed then zext'd the maximum difference between Var0 and |
1794 | 19.3k | // Var1 is possible to calculate, but we're just interested in the absolute |
1795 | 19.3k | // minimum difference between the two. The minimum distance may occur due to |
1796 | 19.3k | // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so |
1797 | 19.3k | // the minimum distance between %i and %i + 5 is 3. |
1798 | 19.3k | APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff; |
1799 | 19.3k | MinDiff = APIntOps::umin(MinDiff, Wrapped); |
1800 | 19.3k | uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale); |
1801 | 19.3k | |
1802 | 19.3k | // We can't definitely say whether GEP1 is before or after V2 due to wrapping |
1803 | 19.3k | // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other |
1804 | 19.3k | // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and |
1805 | 19.3k | // V2Size can fit in the MinDiffBytes gap. |
1806 | 19.3k | return V1Size + std::abs(BaseOffset) <= MinDiffBytes && |
1807 | 10.1k | V2Size + std::abs(BaseOffset) <= MinDiffBytes; |
1808 | 723k | } |
1809 | | |
1810 | | //===----------------------------------------------------------------------===// |
1811 | | // BasicAliasAnalysis Pass |
1812 | | //===----------------------------------------------------------------------===// |
1813 | | |
1814 | | AnalysisKey BasicAA::Key; |
1815 | | |
1816 | 661 | BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { |
1817 | 661 | return BasicAAResult(F.getParent()->getDataLayout(), |
1818 | 661 | AM.getResult<TargetLibraryAnalysis>(F), |
1819 | 661 | AM.getResult<AssumptionAnalysis>(F), |
1820 | 661 | &AM.getResult<DominatorTreeAnalysis>(F), |
1821 | 661 | AM.getCachedResult<LoopAnalysis>(F)); |
1822 | 661 | } |
1823 | | |
1824 | 368k | BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { |
1825 | 368k | initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); |
1826 | 368k | } |
1827 | | |
1828 | | char BasicAAWrapperPass::ID = 0; |
1829 | | |
1830 | 0 | void BasicAAWrapperPass::anchor() {} |
1831 | | |
1832 | 90.2k | INITIALIZE_PASS_BEGIN90.2k (BasicAAWrapperPass, "basicaa",
|
1833 | 90.2k | "Basic Alias Analysis (stateless AA impl)", true, true) |
1834 | 90.2k | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
1835 | 90.2k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
1836 | 90.2k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
1837 | 90.2k | INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", |
1838 | | "Basic Alias Analysis (stateless AA impl)", true, true) |
1839 | | |
1840 | 33.5k | FunctionPass *llvm::createBasicAAWrapperPass() { |
1841 | 33.5k | return new BasicAAWrapperPass(); |
1842 | 33.5k | } |
1843 | | |
1844 | 9.69M | bool BasicAAWrapperPass::runOnFunction(Function &F) { |
1845 | 9.69M | auto &ACT = getAnalysis<AssumptionCacheTracker>(); |
1846 | 9.69M | auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); |
1847 | 9.69M | auto &DTWP = getAnalysis<DominatorTreeWrapperPass>(); |
1848 | 9.69M | auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); |
1849 | 9.69M | |
1850 | 9.69M | Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), |
1851 | 9.69M | ACT.getAssumptionCache(F), &DTWP.getDomTree(), |
1852 | 9.69M | LIWP ? &LIWP->getLoopInfo()2.85M : nullptr6.83M )); |
1853 | 9.69M | |
1854 | 9.69M | return false; |
1855 | 9.69M | } |
1856 | | |
1857 | 367k | void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1858 | 367k | AU.setPreservesAll(); |
1859 | 367k | AU.addRequired<AssumptionCacheTracker>(); |
1860 | 367k | AU.addRequired<DominatorTreeWrapperPass>(); |
1861 | 367k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
1862 | 367k | } |
1863 | | |
1864 | 996k | BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { |
1865 | 996k | return BasicAAResult( |
1866 | 996k | F.getParent()->getDataLayout(), |
1867 | 996k | P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), |
1868 | 996k | P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); |
1869 | 996k | } |