/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/Analysis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines several CodeGen-specific LLVM IR analysis utilities. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/CodeGen/Analysis.h" |
15 | | #include "llvm/Analysis/ValueTracking.h" |
16 | | #include "llvm/CodeGen/MachineFunction.h" |
17 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
18 | | #include "llvm/IR/DataLayout.h" |
19 | | #include "llvm/IR/DerivedTypes.h" |
20 | | #include "llvm/IR/Function.h" |
21 | | #include "llvm/IR/Instructions.h" |
22 | | #include "llvm/IR/IntrinsicInst.h" |
23 | | #include "llvm/IR/LLVMContext.h" |
24 | | #include "llvm/IR/Module.h" |
25 | | #include "llvm/Support/ErrorHandling.h" |
26 | | #include "llvm/Support/MathExtras.h" |
27 | | #include "llvm/Target/TargetInstrInfo.h" |
28 | | #include "llvm/Target/TargetLowering.h" |
29 | | #include "llvm/Target/TargetSubtargetInfo.h" |
30 | | #include "llvm/Transforms/Utils/GlobalStatus.h" |
31 | | |
32 | | using namespace llvm; |
33 | | |
34 | | /// Compute the linearized index of a member in a nested aggregate/struct/array |
35 | | /// by recursing and accumulating CurIndex as long as there are indices in the |
36 | | /// index list. |
37 | | unsigned llvm::ComputeLinearIndex(Type *Ty, |
38 | | const unsigned *Indices, |
39 | | const unsigned *IndicesEnd, |
40 | 83.1k | unsigned CurIndex) { |
41 | 83.1k | // Base case: We're done. |
42 | 83.1k | if (Indices && 83.1k Indices == IndicesEnd65.3k ) |
43 | 32.6k | return CurIndex; |
44 | 50.5k | |
45 | 50.5k | // Given a struct type, recursively traverse the elements. |
46 | 50.5k | if (StructType *50.5k STy50.5k = dyn_cast<StructType>(Ty)) { |
47 | 31.7k | for (StructType::element_iterator EB = STy->element_begin(), |
48 | 31.7k | EI = EB, |
49 | 31.7k | EE = STy->element_end(); |
50 | 48.6k | EI != EE48.6k ; ++EI16.8k ) { |
51 | 48.6k | if (Indices && 48.6k *Indices == unsigned(EI - EB)48.6k ) |
52 | 31.7k | return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); |
53 | 16.8k | CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); |
54 | 16.8k | } |
55 | 13 | assert(!Indices && "Unexpected out of bound"); |
56 | 13 | return CurIndex; |
57 | 50.5k | } |
58 | 50.5k | // Given an array type, recursively traverse the elements. |
59 | 18.7k | else if (ArrayType *18.7k ATy18.7k = dyn_cast<ArrayType>(Ty)) { |
60 | 965 | Type *EltTy = ATy->getElementType(); |
61 | 965 | unsigned NumElts = ATy->getNumElements(); |
62 | 965 | // Compute the Linear offset when jumping one element of the array |
63 | 965 | unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0); |
64 | 965 | if (Indices965 ) { |
65 | 957 | assert(*Indices < NumElts && "Unexpected out of bound"); |
66 | 957 | // If the indice is inside the array, compute the index to the requested |
67 | 957 | // elt and recurse inside the element with the end of the indices list |
68 | 957 | CurIndex += EltLinearOffset* *Indices; |
69 | 957 | return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); |
70 | 957 | } |
71 | 8 | CurIndex += EltLinearOffset*NumElts; |
72 | 8 | return CurIndex; |
73 | 8 | } |
74 | 17.8k | // We haven't found the type we're looking for, so keep searching. |
75 | 17.8k | return CurIndex + 1; |
76 | 17.8k | } |
77 | | |
78 | | /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of |
79 | | /// EVTs that represent all the individual underlying |
80 | | /// non-aggregate types that comprise it. |
81 | | /// |
82 | | /// If Offsets is non-null, it points to a vector to be filled in |
83 | | /// with the in-memory offsets of each of the individual values. |
84 | | /// |
85 | | void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, |
86 | | Type *Ty, SmallVectorImpl<EVT> &ValueVTs, |
87 | | SmallVectorImpl<uint64_t> *Offsets, |
88 | 33.4M | uint64_t StartingOffset) { |
89 | 33.4M | // Given a struct type, recursively traverse the elements. |
90 | 33.4M | if (StructType *STy33.4M = dyn_cast<StructType>(Ty)) { |
91 | 48.5k | const StructLayout *SL = DL.getStructLayout(STy); |
92 | 48.5k | for (StructType::element_iterator EB = STy->element_begin(), |
93 | 48.5k | EI = EB, |
94 | 48.5k | EE = STy->element_end(); |
95 | 153k | EI != EE153k ; ++EI104k ) |
96 | 104k | ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets, |
97 | 104k | StartingOffset + SL->getElementOffset(EI - EB)); |
98 | 48.5k | return; |
99 | 48.5k | } |
100 | 33.3M | // Given an array type, recursively traverse the elements. |
101 | 33.3M | if (ArrayType *33.3M ATy33.3M = dyn_cast<ArrayType>(Ty)) { |
102 | 3.28k | Type *EltTy = ATy->getElementType(); |
103 | 3.28k | uint64_t EltSize = DL.getTypeAllocSize(EltTy); |
104 | 19.1k | for (unsigned i = 0, e = ATy->getNumElements(); i != e19.1k ; ++i15.8k ) |
105 | 15.8k | ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, |
106 | 15.8k | StartingOffset + i * EltSize); |
107 | 3.28k | return; |
108 | 3.28k | } |
109 | 33.3M | // Interpret void as zero return values. |
110 | 33.3M | if (33.3M Ty->isVoidTy()33.3M ) |
111 | 1.27M | return; |
112 | 32.0M | // Base case: we can get an EVT for this LLVM IR type. |
113 | 32.0M | ValueVTs.push_back(TLI.getValueType(DL, Ty)); |
114 | 32.0M | if (Offsets) |
115 | 8.88M | Offsets->push_back(StartingOffset); |
116 | 33.4M | } |
117 | | |
118 | | /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. |
119 | 347 | GlobalValue *llvm::ExtractTypeInfo(Value *V) { |
120 | 347 | V = V->stripPointerCasts(); |
121 | 347 | GlobalValue *GV = dyn_cast<GlobalValue>(V); |
122 | 347 | GlobalVariable *Var = dyn_cast<GlobalVariable>(V); |
123 | 347 | |
124 | 347 | if (Var && 347 Var->getName() == "llvm.eh.catch.all.value"344 ) { |
125 | 0 | assert(Var->hasInitializer() && |
126 | 0 | "The EH catch-all value must have an initializer"); |
127 | 0 | Value *Init = Var->getInitializer(); |
128 | 0 | GV = dyn_cast<GlobalValue>(Init); |
129 | 0 | if (!GV0 ) V = cast<ConstantPointerNull>(Init)0 ; |
130 | 0 | } |
131 | 347 | |
132 | 347 | assert((GV || isa<ConstantPointerNull>(V)) && |
133 | 347 | "TypeInfo must be a global variable or NULL"); |
134 | 347 | return GV; |
135 | 347 | } |
136 | | |
137 | | /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being |
138 | | /// processed uses a memory 'm' constraint. |
139 | | bool |
140 | | llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, |
141 | 0 | const TargetLowering &TLI) { |
142 | 0 | for (unsigned i = 0, e = CInfos.size(); i != e0 ; ++i0 ) { |
143 | 0 | InlineAsm::ConstraintInfo &CI = CInfos[i]; |
144 | 0 | for (unsigned j = 0, ee = CI.Codes.size(); j != ee0 ; ++j0 ) { |
145 | 0 | TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); |
146 | 0 | if (CType == TargetLowering::C_Memory) |
147 | 0 | return true; |
148 | 0 | } |
149 | 0 |
|
150 | 0 | // Indirect operand accesses access memory. |
151 | 0 | if (0 CI.isIndirect0 ) |
152 | 0 | return true; |
153 | 0 | } |
154 | 0 |
|
155 | 0 | return false; |
156 | 0 | } |
157 | | |
158 | | /// getFCmpCondCode - Return the ISD condition code corresponding to |
159 | | /// the given LLVM IR floating-point condition code. This includes |
160 | | /// consideration of global floating-point math flags. |
161 | | /// |
162 | 42.3k | ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { |
163 | 42.3k | switch (Pred) { |
164 | 33 | case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; |
165 | 6.89k | case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; |
166 | 8.72k | case FCmpInst::FCMP_OGT: return ISD::SETOGT; |
167 | 700 | case FCmpInst::FCMP_OGE: return ISD::SETOGE; |
168 | 13.5k | case FCmpInst::FCMP_OLT: return ISD::SETOLT; |
169 | 319 | case FCmpInst::FCMP_OLE: return ISD::SETOLE; |
170 | 201 | case FCmpInst::FCMP_ONE: return ISD::SETONE; |
171 | 209 | case FCmpInst::FCMP_ORD: return ISD::SETO; |
172 | 2.03k | case FCmpInst::FCMP_UNO: return ISD::SETUO; |
173 | 996 | case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; |
174 | 549 | case FCmpInst::FCMP_UGT: return ISD::SETUGT; |
175 | 294 | case FCmpInst::FCMP_UGE: return ISD::SETUGE; |
176 | 6.15k | case FCmpInst::FCMP_ULT: return ISD::SETULT; |
177 | 392 | case FCmpInst::FCMP_ULE: return ISD::SETULE; |
178 | 1.30k | case FCmpInst::FCMP_UNE: return ISD::SETUNE; |
179 | 33 | case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; |
180 | 0 | default: 0 llvm_unreachable0 ("Invalid FCmp predicate opcode!"); |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | 285 | ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { |
185 | 285 | switch (CC) { |
186 | 25 | case ISD::SETOEQ: 25 case ISD::SETUEQ: return ISD::SETEQ25 ; |
187 | 1 | case ISD::SETONE: 1 case ISD::SETUNE: return ISD::SETNE1 ; |
188 | 66 | case ISD::SETOLT: 66 case ISD::SETULT: return ISD::SETLT66 ; |
189 | 60 | case ISD::SETOLE: 60 case ISD::SETULE: return ISD::SETLE60 ; |
190 | 73 | case ISD::SETOGT: 73 case ISD::SETUGT: return ISD::SETGT73 ; |
191 | 60 | case ISD::SETOGE: 60 case ISD::SETUGE: return ISD::SETGE60 ; |
192 | 0 | default: return CC; |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | /// getICmpCondCode - Return the ISD condition code corresponding to |
197 | | /// the given LLVM IR integer condition code. |
198 | | /// |
199 | 1.95M | ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { |
200 | 1.95M | switch (Pred) { |
201 | 1.18M | case ICmpInst::ICMP_EQ: return ISD::SETEQ; |
202 | 117k | case ICmpInst::ICMP_NE: return ISD::SETNE; |
203 | 24.1k | case ICmpInst::ICMP_SLE: return ISD::SETLE; |
204 | 24.2k | case ICmpInst::ICMP_ULE: return ISD::SETULE; |
205 | 1.59k | case ICmpInst::ICMP_SGE: return ISD::SETGE; |
206 | 1.09k | case ICmpInst::ICMP_UGE: return ISD::SETUGE; |
207 | 165k | case ICmpInst::ICMP_SLT: return ISD::SETLT; |
208 | 144k | case ICmpInst::ICMP_ULT: return ISD::SETULT; |
209 | 227k | case ICmpInst::ICMP_SGT: return ISD::SETGT; |
210 | 63.9k | case ICmpInst::ICMP_UGT: return ISD::SETUGT; |
211 | 0 | default: |
212 | 0 | llvm_unreachable("Invalid ICmp predicate opcode!"); |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | static bool isNoopBitcast(Type *T1, Type *T2, |
217 | 2.20k | const TargetLoweringBase& TLI) { |
218 | 1.73k | return T1 == T2 || (T1->isPointerTy() && 1.73k T2->isPointerTy()1.73k ) || |
219 | 1 | (isa<VectorType>(T1) && 1 isa<VectorType>(T2)1 && |
220 | 1 | TLI.isTypeLegal(EVT::getEVT(T1))1 && TLI.isTypeLegal(EVT::getEVT(T2))1 ); |
221 | 2.20k | } |
222 | | |
223 | | /// Look through operations that will be free to find the earliest source of |
224 | | /// this value. |
225 | | /// |
226 | | /// @param ValLoc If V has aggegate type, we will be interested in a particular |
227 | | /// scalar component. This records its address; the reverse of this list gives a |
228 | | /// sequence of indices appropriate for an extractvalue to locate the important |
229 | | /// value. This value is updated during the function and on exit will indicate |
230 | | /// similar information for the Value returned. |
231 | | /// |
232 | | /// @param DataBits If this function looks through truncate instructions, this |
233 | | /// will record the smallest size attained. |
234 | | static const Value *getNoopInput(const Value *V, |
235 | | SmallVectorImpl<unsigned> &ValLoc, |
236 | | unsigned &DataBits, |
237 | | const TargetLoweringBase &TLI, |
238 | 473k | const DataLayout &DL) { |
239 | 476k | while (true476k ) { |
240 | 476k | // Try to look through V1; if V1 is not an instruction, it can't be looked |
241 | 476k | // through. |
242 | 476k | const Instruction *I = dyn_cast<Instruction>(V); |
243 | 476k | if (!I || 476k I->getNumOperands() == 0441k ) return V34.2k ; |
244 | 441k | const Value *NoopInput = nullptr; |
245 | 441k | |
246 | 441k | Value *Op = I->getOperand(0); |
247 | 441k | if (isa<BitCastInst>(I)441k ) { |
248 | 1.73k | // Look through truly no-op bitcasts. |
249 | 1.73k | if (isNoopBitcast(Op->getType(), I->getType(), TLI)) |
250 | 1.73k | NoopInput = Op; |
251 | 441k | } else if (440k isa<GetElementPtrInst>(I)440k ) { |
252 | 54 | // Look through getelementptr |
253 | 54 | if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) |
254 | 0 | NoopInput = Op; |
255 | 440k | } else if (440k isa<IntToPtrInst>(I)440k ) { |
256 | 21 | // Look through inttoptr. |
257 | 21 | // Make sure this isn't a truncating or extending cast. We could |
258 | 21 | // support this eventually, but don't bother for now. |
259 | 21 | if (!isa<VectorType>(I->getType()) && |
260 | 21 | DL.getPointerSizeInBits() == |
261 | 21 | cast<IntegerType>(Op->getType())->getBitWidth()) |
262 | 21 | NoopInput = Op; |
263 | 440k | } else if (440k isa<PtrToIntInst>(I)440k ) { |
264 | 9 | // Look through ptrtoint. |
265 | 9 | // Make sure this isn't a truncating or extending cast. We could |
266 | 9 | // support this eventually, but don't bother for now. |
267 | 9 | if (!isa<VectorType>(I->getType()) && |
268 | 9 | DL.getPointerSizeInBits() == |
269 | 9 | cast<IntegerType>(I->getType())->getBitWidth()) |
270 | 9 | NoopInput = Op; |
271 | 440k | } else if (440k isa<TruncInst>(I) && |
272 | 440k | TLI.allowTruncateForTailCall(Op->getType(), I->getType())1.48k ) { |
273 | 22 | DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); |
274 | 22 | NoopInput = Op; |
275 | 440k | } else if (auto 440k CS440k = ImmutableCallSite(I)) { |
276 | 422k | const Value *ReturnedOp = CS.getReturnedArgOperand(); |
277 | 422k | if (ReturnedOp && 422k isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)463 ) |
278 | 463 | NoopInput = ReturnedOp; |
279 | 440k | } else if (const InsertValueInst *17.8k IVI17.8k = dyn_cast<InsertValueInst>(V)) { |
280 | 169 | // Value may come from either the aggregate or the scalar |
281 | 169 | ArrayRef<unsigned> InsertLoc = IVI->getIndices(); |
282 | 169 | if (ValLoc.size() >= InsertLoc.size() && |
283 | 169 | std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())167 ) { |
284 | 91 | // The type being inserted is a nested sub-type of the aggregate; we |
285 | 91 | // have to remove those initial indices to get the location we're |
286 | 91 | // interested in for the operand. |
287 | 91 | ValLoc.resize(ValLoc.size() - InsertLoc.size()); |
288 | 91 | NoopInput = IVI->getInsertedValueOperand(); |
289 | 169 | } else { |
290 | 78 | // The struct we're inserting into has the value we're interested in, no |
291 | 78 | // change of address. |
292 | 78 | NoopInput = Op; |
293 | 78 | } |
294 | 17.8k | } else if (const ExtractValueInst *17.7k EVI17.7k = dyn_cast<ExtractValueInst>(V)) { |
295 | 41 | // The part we're interested in will inevitably be some sub-section of the |
296 | 41 | // previous aggregate. Combine the two paths to obtain the true address of |
297 | 41 | // our element. |
298 | 41 | ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); |
299 | 41 | ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend()); |
300 | 41 | NoopInput = Op; |
301 | 41 | } |
302 | 441k | // Terminate if we couldn't find anything to look through. |
303 | 441k | if (!NoopInput) |
304 | 439k | return V; |
305 | 2.46k | |
306 | 2.46k | V = NoopInput; |
307 | 2.46k | } |
308 | 473k | } |
309 | | |
310 | | /// Return true if this scalar return value only has bits discarded on its path |
311 | | /// from the "tail call" to the "ret". This includes the obvious noop |
312 | | /// instructions handled by getNoopInput above as well as free truncations (or |
313 | | /// extensions prior to the call). |
314 | | static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, |
315 | | SmallVectorImpl<unsigned> &RetIndices, |
316 | | SmallVectorImpl<unsigned> &CallIndices, |
317 | | bool AllowDifferingSizes, |
318 | | const TargetLoweringBase &TLI, |
319 | 236k | const DataLayout &DL) { |
320 | 236k | |
321 | 236k | // Trace the sub-value needed by the return value as far back up the graph as |
322 | 236k | // possible, in the hope that it will intersect with the value produced by the |
323 | 236k | // call. In the simple case with no "returned" attribute, the hope is actually |
324 | 236k | // that we end up back at the tail call instruction itself. |
325 | 236k | unsigned BitsRequired = UINT_MAX; |
326 | 236k | RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL); |
327 | 236k | |
328 | 236k | // If this slot in the value returned is undef, it doesn't matter what the |
329 | 236k | // call puts there, it'll be fine. |
330 | 236k | if (isa<UndefValue>(RetVal)) |
331 | 2 | return true; |
332 | 236k | |
333 | 236k | // Now do a similar search up through the graph to find where the value |
334 | 236k | // actually returned by the "tail call" comes from. In the simple case without |
335 | 236k | // a "returned" attribute, the search will be blocked immediately and the loop |
336 | 236k | // a Noop. |
337 | 236k | unsigned BitsProvided = UINT_MAX; |
338 | 236k | CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL); |
339 | 236k | |
340 | 236k | // There's no hope if we can't actually trace them to (the same part of!) the |
341 | 236k | // same value. |
342 | 236k | if (CallVal != RetVal || 236k CallIndices != RetIndices182k ) |
343 | 54.0k | return false; |
344 | 182k | |
345 | 182k | // However, intervening truncates may have made the call non-tail. Make sure |
346 | 182k | // all the bits that are needed by the "ret" have been provided by the "tail |
347 | 182k | // call". FIXME: with sufficiently cunning bit-tracking, we could look through |
348 | 182k | // extensions too. |
349 | 182k | if (182k BitsProvided < BitsRequired || |
350 | 182k | (!AllowDifferingSizes && 182k BitsProvided != BitsRequired143 )) |
351 | 3 | return false; |
352 | 182k | |
353 | 182k | return true; |
354 | 182k | } |
355 | | |
356 | | /// For an aggregate type, determine whether a given index is within bounds or |
357 | | /// not. |
358 | 392 | static bool indexReallyValid(CompositeType *T, unsigned Idx) { |
359 | 392 | if (ArrayType *AT = dyn_cast<ArrayType>(T)) |
360 | 28 | return Idx < AT->getNumElements(); |
361 | 364 | |
362 | 364 | return Idx < cast<StructType>(T)->getNumElements(); |
363 | 364 | } |
364 | | |
365 | | /// Move the given iterators to the next leaf type in depth first traversal. |
366 | | /// |
367 | | /// Performs a depth-first traversal of the type as specified by its arguments, |
368 | | /// stopping at the next leaf node (which may be a legitimate scalar type or an |
369 | | /// empty struct or array). |
370 | | /// |
371 | | /// @param SubTypes List of the partial components making up the type from |
372 | | /// outermost to innermost non-empty aggregate. The element currently |
373 | | /// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). |
374 | | /// |
375 | | /// @param Path Set of extractvalue indices leading from the outermost type |
376 | | /// (SubTypes[0]) to the leaf node currently represented. |
377 | | /// |
378 | | /// @returns true if a new type was found, false otherwise. Calling this |
379 | | /// function again on a finished iterator will repeatedly return |
380 | | /// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty |
381 | | /// aggregate or a non-aggregate |
382 | | static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, |
383 | 365k | SmallVectorImpl<unsigned> &Path) { |
384 | 365k | // First march back up the tree until we can successfully increment one of the |
385 | 365k | // coordinates in Path. |
386 | 365k | while (!Path.empty() && 365k !indexReallyValid(SubTypes.back(), Path.back() + 1)218 ) { |
387 | 88 | Path.pop_back(); |
388 | 88 | SubTypes.pop_back(); |
389 | 88 | } |
390 | 365k | |
391 | 365k | // If we reached the top, then the iterator is done. |
392 | 365k | if (Path.empty()) |
393 | 365k | return false; |
394 | 131 | |
395 | 131 | // We know there's *some* valid leaf now, so march back down the tree picking |
396 | 131 | // out the left-most element at each node. |
397 | 131 | ++Path.back(); |
398 | 131 | Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); |
399 | 153 | while (DeeperType->isAggregateType()153 ) { |
400 | 26 | CompositeType *CT = cast<CompositeType>(DeeperType); |
401 | 26 | if (!indexReallyValid(CT, 0)) |
402 | 4 | return true; |
403 | 22 | |
404 | 22 | SubTypes.push_back(CT); |
405 | 22 | Path.push_back(0); |
406 | 22 | |
407 | 22 | DeeperType = CT->getTypeAtIndex(0U); |
408 | 22 | } |
409 | 131 | |
410 | 127 | return true; |
411 | 365k | } |
412 | | |
413 | | /// Find the first non-empty, scalar-like type in Next and setup the iterator |
414 | | /// components. |
415 | | /// |
416 | | /// Assuming Next is an aggregate of some kind, this function will traverse the |
417 | | /// tree from left to right (i.e. depth-first) looking for the first |
418 | | /// non-aggregate type which will play a role in function return. |
419 | | /// |
420 | | /// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup |
421 | | /// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first |
422 | | /// i32 in that type. |
423 | | static bool firstRealType(Type *Next, |
424 | | SmallVectorImpl<CompositeType *> &SubTypes, |
425 | 473k | SmallVectorImpl<unsigned> &Path) { |
426 | 473k | // First initialise the iterator components to the first "leaf" node |
427 | 473k | // (i.e. node with no valid sub-type at any index, so {} does count as a leaf |
428 | 473k | // despite nominally being an aggregate). |
429 | 473k | while (Next->isAggregateType() && |
430 | 473k | indexReallyValid(cast<CompositeType>(Next), 0)148 ) { |
431 | 146 | SubTypes.push_back(cast<CompositeType>(Next)); |
432 | 146 | Path.push_back(0); |
433 | 146 | Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); |
434 | 146 | } |
435 | 473k | |
436 | 473k | // If there's no Path now, Next was originally scalar already (or empty |
437 | 473k | // leaf). We're done. |
438 | 473k | if (Path.empty()) |
439 | 473k | return true; |
440 | 142 | |
441 | 142 | // Otherwise, use normal iteration to keep looking through the tree until we |
442 | 142 | // find a non-aggregate type. |
443 | 146 | while (142 SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()146 ) { |
444 | 4 | if (!advanceToNextLeafType(SubTypes, Path)) |
445 | 0 | return false; |
446 | 4 | } |
447 | 142 | |
448 | 142 | return true; |
449 | 473k | } |
450 | | |
451 | | /// Set the iterator data-structures to the next non-empty, non-aggregate |
452 | | /// subtype. |
453 | | static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, |
454 | 365k | SmallVectorImpl<unsigned> &Path) { |
455 | 365k | do { |
456 | 365k | if (!advanceToNextLeafType(SubTypes, Path)) |
457 | 365k | return false; |
458 | 126 | |
459 | 365k | assert(!Path.empty() && "found a leaf but didn't set the path?"); |
460 | 365k | } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); |
461 | 365k | |
462 | 124 | return true; |
463 | 365k | } |
464 | | |
465 | | |
466 | | /// Test if the given instruction is in a position to be optimized |
467 | | /// with a tail-call. This roughly means that it's in a block with |
468 | | /// a return and there's nothing that needs to be scheduled |
469 | | /// between it and the return. |
470 | | /// |
471 | | /// This function only tests target-independent requirements. |
472 | 1.33M | bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { |
473 | 1.33M | const Instruction *I = CS.getInstruction(); |
474 | 1.33M | const BasicBlock *ExitBB = I->getParent(); |
475 | 1.33M | const TerminatorInst *Term = ExitBB->getTerminator(); |
476 | 1.33M | const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); |
477 | 1.33M | |
478 | 1.33M | // The block must end in a return statement or unreachable. |
479 | 1.33M | // |
480 | 1.33M | // FIXME: Decline tailcall if it's not guaranteed and if the block ends in |
481 | 1.33M | // an unreachable, for now. The way tailcall optimization is currently |
482 | 1.33M | // implemented means it will add an epilogue followed by a jump. That is |
483 | 1.33M | // not profitable. Also, if the callee is a special function (e.g. |
484 | 1.33M | // longjmp on x86), it can end up causing miscompilation that has not |
485 | 1.33M | // been fully understood. |
486 | 1.33M | if (!Ret && |
487 | 760k | (!TM.Options.GuaranteedTailCallOpt || 760k !isa<UnreachableInst>(Term)1 )) |
488 | 760k | return false; |
489 | 570k | |
490 | 570k | // If I will have a chain, make sure no other instruction that will have a |
491 | 570k | // chain interposes between I and the return. |
492 | 570k | if (570k I->mayHaveSideEffects() || 570k I->mayReadFromMemory()6.17k || |
493 | 390 | !isSafeToSpeculativelyExecute(I)) |
494 | 570k | for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; 570k --BBI12.0k ) { |
495 | 582k | if (&*BBI == I) |
496 | 295k | break; |
497 | 286k | // Debug info intrinsics do not get in the way of tail call optimization. |
498 | 286k | if (286k isa<DbgInfoIntrinsic>(BBI)286k ) |
499 | 7 | continue; |
500 | 286k | if (286k BBI->mayHaveSideEffects() || 286k BBI->mayReadFromMemory()12.3k || |
501 | 12.1k | !isSafeToSpeculativelyExecute(&*BBI)) |
502 | 274k | return false; |
503 | 570k | } |
504 | 570k | |
505 | 295k | const Function *F = ExitBB->getParent(); |
506 | 295k | return returnTypeIsEligibleForTailCall( |
507 | 295k | F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); |
508 | 1.33M | } |
509 | | |
510 | | bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, |
511 | | const ReturnInst *Ret, |
512 | | const TargetLoweringBase &TLI, |
513 | 457k | bool *AllowDifferingSizes) { |
514 | 457k | // ADS may be null, so don't write to it directly. |
515 | 457k | bool DummyADS; |
516 | 457k | bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes238k : DummyADS219k ; |
517 | 457k | ADS = true; |
518 | 457k | |
519 | 457k | AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex); |
520 | 457k | AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), |
521 | 457k | AttributeList::ReturnIndex); |
522 | 457k | |
523 | 457k | // Noalias is completely benign as far as calling convention goes, it |
524 | 457k | // shouldn't affect whether the call is a tail call. |
525 | 457k | CallerAttrs.removeAttribute(Attribute::NoAlias); |
526 | 457k | CalleeAttrs.removeAttribute(Attribute::NoAlias); |
527 | 457k | |
528 | 457k | if (CallerAttrs.contains(Attribute::ZExt)457k ) { |
529 | 1.69k | if (!CalleeAttrs.contains(Attribute::ZExt)) |
530 | 1.47k | return false; |
531 | 224 | |
532 | 224 | ADS = false; |
533 | 224 | CallerAttrs.removeAttribute(Attribute::ZExt); |
534 | 224 | CalleeAttrs.removeAttribute(Attribute::ZExt); |
535 | 457k | } else if (455k CallerAttrs.contains(Attribute::SExt)455k ) { |
536 | 38 | if (!CalleeAttrs.contains(Attribute::SExt)) |
537 | 8 | return false; |
538 | 30 | |
539 | 30 | ADS = false; |
540 | 30 | CallerAttrs.removeAttribute(Attribute::SExt); |
541 | 30 | CalleeAttrs.removeAttribute(Attribute::SExt); |
542 | 30 | } |
543 | 457k | |
544 | 457k | // If they're still different, there's some facet we don't understand |
545 | 457k | // (currently only "inreg", but in future who knows). It may be OK but the |
546 | 457k | // only safe option is to reject the tail call. |
547 | 456k | return CallerAttrs == CalleeAttrs; |
548 | 457k | } |
549 | | |
550 | | bool llvm::returnTypeIsEligibleForTailCall(const Function *F, |
551 | | const Instruction *I, |
552 | | const ReturnInst *Ret, |
553 | 295k | const TargetLoweringBase &TLI) { |
554 | 295k | // If the block ends with a void return or unreachable, it doesn't matter |
555 | 295k | // what the call's return type is. |
556 | 295k | if (!Ret || 295k Ret->getNumOperands() == 0295k ) return true57.3k ; |
557 | 238k | |
558 | 238k | // If the return value is undef, it doesn't matter what the call's |
559 | 238k | // return type is. |
560 | 238k | if (238k isa<UndefValue>(Ret->getOperand(0))238k ) return true195 ; |
561 | 238k | |
562 | 238k | // Make sure the attributes attached to each return are compatible. |
563 | 238k | bool AllowDifferingSizes; |
564 | 238k | if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes)) |
565 | 1.59k | return false; |
566 | 236k | |
567 | 236k | const Value *RetVal = Ret->getOperand(0), *CallVal = I; |
568 | 236k | // Intrinsic like llvm.memcpy has no return value, but the expanded |
569 | 236k | // libcall may or may not have return value. On most platforms, it |
570 | 236k | // will be expanded as memcpy in libc, which returns the first |
571 | 236k | // argument. On other platforms like arm-none-eabi, memcpy may be |
572 | 236k | // expanded as library call without return value, like __aeabi_memcpy. |
573 | 236k | const CallInst *Call = cast<CallInst>(I); |
574 | 236k | if (Function *F236k = Call->getCalledFunction()) { |
575 | 226k | Intrinsic::ID IID = F->getIntrinsicID(); |
576 | 226k | if (((IID == Intrinsic::memcpy && |
577 | 43 | TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) || |
578 | 226k | (IID == Intrinsic::memmove && |
579 | 226k | TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) || |
580 | 226k | (IID == Intrinsic::memset && |
581 | 226k | TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) && |
582 | 64 | RetVal == Call->getArgOperand(0)) |
583 | 7 | return true; |
584 | 236k | } |
585 | 236k | |
586 | 236k | SmallVector<unsigned, 4> RetPath, CallPath; |
587 | 236k | SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; |
588 | 236k | |
589 | 236k | bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); |
590 | 236k | bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); |
591 | 236k | |
592 | 236k | // Nothing's actually returned, it doesn't matter what the callee put there |
593 | 236k | // it's a valid tail call. |
594 | 236k | if (RetEmpty) |
595 | 0 | return true; |
596 | 236k | |
597 | 236k | // Iterate pairwise through each of the value types making up the tail call |
598 | 236k | // and the corresponding return. For each one we want to know whether it's |
599 | 236k | // essentially going directly from the tail call to the ret, via operations |
600 | 236k | // that end up not generating any code. |
601 | 236k | // |
602 | 236k | // We allow a certain amount of covariance here. For example it's permitted |
603 | 236k | // for the tail call to define more bits than the ret actually cares about |
604 | 236k | // (e.g. via a truncate). |
605 | 236k | do 236k { |
606 | 236k | if (CallEmpty236k ) { |
607 | 2 | // We've exhausted the values produced by the tail call instruction, the |
608 | 2 | // rest are essentially undef. The type doesn't really matter, but we need |
609 | 2 | // *something*. |
610 | 2 | Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); |
611 | 2 | CallVal = UndefValue::get(SlotType); |
612 | 2 | } |
613 | 236k | |
614 | 236k | // The manipulations performed when we're looking through an insertvalue or |
615 | 236k | // an extractvalue would happen at the front of the RetPath list, so since |
616 | 236k | // we have to copy it anyway it's more efficient to create a reversed copy. |
617 | 236k | SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); |
618 | 236k | SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); |
619 | 236k | |
620 | 236k | // Finally, we can check whether the value produced by the tail call at this |
621 | 236k | // index is compatible with the value we return. |
622 | 236k | if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, |
623 | 236k | AllowDifferingSizes, TLI, |
624 | 236k | F->getParent()->getDataLayout())) |
625 | 54.0k | return false; |
626 | 182k | |
627 | 182k | CallEmpty = !nextRealType(CallSubTypes, CallPath); |
628 | 236k | } while(nextRealType(RetSubTypes, RetPath)); |
629 | 236k | |
630 | 182k | return true; |
631 | 295k | } |
632 | | |
633 | | static void collectFuncletMembers( |
634 | | DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, |
635 | 1.41k | const MachineBasicBlock *MBB) { |
636 | 1.41k | SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB}; |
637 | 4.45k | while (!Worklist.empty()4.45k ) { |
638 | 3.03k | const MachineBasicBlock *Visiting = Worklist.pop_back_val(); |
639 | 3.03k | // Don't follow blocks which start new funclets. |
640 | 3.03k | if (Visiting->isEHPad() && 3.03k Visiting != MBB1.24k ) |
641 | 695 | continue; |
642 | 2.34k | |
643 | 2.34k | // Add this MBB to our funclet. |
644 | 2.34k | auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet)); |
645 | 2.34k | |
646 | 2.34k | // Don't revisit blocks. |
647 | 2.34k | if (!P.second2.34k ) { |
648 | 632 | assert(P.first->second == Funclet && "MBB is part of two funclets!"); |
649 | 632 | continue; |
650 | 632 | } |
651 | 1.71k | |
652 | 1.71k | // Returns are boundaries where funclet transfer can occur, don't follow |
653 | 1.71k | // successors. |
654 | 1.71k | if (1.71k Visiting->isReturnBlock()1.71k ) |
655 | 698 | continue; |
656 | 1.01k | |
657 | 1.01k | for (const MachineBasicBlock *Succ : Visiting->successors()) |
658 | 1.62k | Worklist.push_back(Succ); |
659 | 3.03k | } |
660 | 1.41k | } |
661 | | |
662 | | DenseMap<const MachineBasicBlock *, int> |
663 | 2.21M | llvm::getFuncletMembership(const MachineFunction &MF) { |
664 | 2.21M | DenseMap<const MachineBasicBlock *, int> FuncletMembership; |
665 | 2.21M | |
666 | 2.21M | // We don't have anything to do if there aren't any EH pads. |
667 | 2.21M | if (!MF.hasEHFunclets()) |
668 | 2.21M | return FuncletMembership; |
669 | 342 | |
670 | 342 | int EntryBBNumber = MF.front().getNumber(); |
671 | 342 | bool IsSEH = isAsynchronousEHPersonality( |
672 | 342 | classifyEHPersonality(MF.getFunction()->getPersonalityFn())); |
673 | 342 | |
674 | 342 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
675 | 342 | SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; |
676 | 342 | SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks; |
677 | 342 | SmallVector<const MachineBasicBlock *, 16> SEHCatchPads; |
678 | 342 | SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors; |
679 | 1.95k | for (const MachineBasicBlock &MBB : MF) { |
680 | 1.95k | if (MBB.isEHFuncletEntry()1.95k ) { |
681 | 520 | FuncletBlocks.push_back(&MBB); |
682 | 1.95k | } else if (1.43k IsSEH && 1.43k MBB.isEHPad()379 ) { |
683 | 115 | SEHCatchPads.push_back(&MBB); |
684 | 1.43k | } else if (1.32k MBB.pred_empty()1.32k ) { |
685 | 351 | UnreachableBlocks.push_back(&MBB); |
686 | 351 | } |
687 | 1.95k | |
688 | 1.95k | MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); |
689 | 1.95k | |
690 | 1.95k | // CatchPads are not funclets for SEH so do not consider CatchRet to |
691 | 1.95k | // transfer control to another funclet. |
692 | 1.95k | if (MBBI == MBB.end() || 1.95k MBBI->getOpcode() != TII->getCatchReturnOpcode()1.20k ) |
693 | 1.67k | continue; |
694 | 283 | |
695 | 283 | // FIXME: SEH CatchPads are not necessarily in the parent function: |
696 | 283 | // they could be inside a finally block. |
697 | 283 | const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB(); |
698 | 283 | const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB(); |
699 | 283 | CatchRetSuccessors.push_back( |
700 | 283 | {Successor, IsSEH ? EntryBBNumber0 : SuccessorColor->getNumber()283 }); |
701 | 1.95k | } |
702 | 342 | |
703 | 342 | // We don't have anything to do if there aren't any EH pads. |
704 | 342 | if (FuncletBlocks.empty()) |
705 | 56 | return FuncletMembership; |
706 | 286 | |
707 | 286 | // Identify all the basic blocks reachable from the function entry. |
708 | 286 | collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); |
709 | 286 | // All blocks not part of a funclet are in the parent function. |
710 | 286 | for (const MachineBasicBlock *MBB : UnreachableBlocks) |
711 | 295 | collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); |
712 | 286 | // Next, identify all the blocks inside the funclets. |
713 | 286 | for (const MachineBasicBlock *MBB : FuncletBlocks) |
714 | 520 | collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); |
715 | 286 | // SEH CatchPads aren't really funclets, handle them separately. |
716 | 286 | for (const MachineBasicBlock *MBB : SEHCatchPads) |
717 | 26 | collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); |
718 | 286 | // Finally, identify all the targets of a catchret. |
719 | 286 | for (std::pair<const MachineBasicBlock *, int> CatchRetPair : |
720 | 286 | CatchRetSuccessors) |
721 | 283 | collectFuncletMembers(FuncletMembership, CatchRetPair.second, |
722 | 283 | CatchRetPair.first); |
723 | 2.21M | return FuncletMembership; |
724 | 2.21M | } |