Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/Analysis.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file defines several CodeGen-specific LLVM IR analysis utilities.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/CodeGen/Analysis.h"
15
#include "llvm/Analysis/ValueTracking.h"
16
#include "llvm/CodeGen/MachineFunction.h"
17
#include "llvm/CodeGen/MachineModuleInfo.h"
18
#include "llvm/IR/DataLayout.h"
19
#include "llvm/IR/DerivedTypes.h"
20
#include "llvm/IR/Function.h"
21
#include "llvm/IR/Instructions.h"
22
#include "llvm/IR/IntrinsicInst.h"
23
#include "llvm/IR/LLVMContext.h"
24
#include "llvm/IR/Module.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/MathExtras.h"
27
#include "llvm/Target/TargetInstrInfo.h"
28
#include "llvm/Target/TargetLowering.h"
29
#include "llvm/Target/TargetSubtargetInfo.h"
30
#include "llvm/Transforms/Utils/GlobalStatus.h"
31
32
using namespace llvm;
33
34
/// Compute the linearized index of a member in a nested aggregate/struct/array
35
/// by recursing and accumulating CurIndex as long as there are indices in the
36
/// index list.
37
unsigned llvm::ComputeLinearIndex(Type *Ty,
38
                                  const unsigned *Indices,
39
                                  const unsigned *IndicesEnd,
40
83.1k
                                  unsigned CurIndex) {
41
83.1k
  // Base case: We're done.
42
83.1k
  if (
Indices && 83.1k
Indices == IndicesEnd65.3k
)
43
32.6k
    return CurIndex;
44
50.5k
45
50.5k
  // Given a struct type, recursively traverse the elements.
46
50.5k
  
if (StructType *50.5k
STy50.5k
= dyn_cast<StructType>(Ty)) {
47
31.7k
    for (StructType::element_iterator EB = STy->element_begin(),
48
31.7k
                                      EI = EB,
49
31.7k
                                      EE = STy->element_end();
50
48.6k
        
EI != EE48.6k
;
++EI16.8k
) {
51
48.6k
      if (
Indices && 48.6k
*Indices == unsigned(EI - EB)48.6k
)
52
31.7k
        return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
53
16.8k
      CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
54
16.8k
    }
55
13
    assert(!Indices && "Unexpected out of bound");
56
13
    return CurIndex;
57
50.5k
  }
58
50.5k
  // Given an array type, recursively traverse the elements.
59
18.7k
  else 
if (ArrayType *18.7k
ATy18.7k
= dyn_cast<ArrayType>(Ty)) {
60
965
    Type *EltTy = ATy->getElementType();
61
965
    unsigned NumElts = ATy->getNumElements();
62
965
    // Compute the Linear offset when jumping one element of the array
63
965
    unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
64
965
    if (
Indices965
) {
65
957
      assert(*Indices < NumElts && "Unexpected out of bound");
66
957
      // If the indice is inside the array, compute the index to the requested
67
957
      // elt and recurse inside the element with the end of the indices list
68
957
      CurIndex += EltLinearOffset* *Indices;
69
957
      return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
70
957
    }
71
8
    CurIndex += EltLinearOffset*NumElts;
72
8
    return CurIndex;
73
8
  }
74
17.8k
  // We haven't found the type we're looking for, so keep searching.
75
17.8k
  return CurIndex + 1;
76
17.8k
}
77
78
/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
79
/// EVTs that represent all the individual underlying
80
/// non-aggregate types that comprise it.
81
///
82
/// If Offsets is non-null, it points to a vector to be filled in
83
/// with the in-memory offsets of each of the individual values.
84
///
85
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
86
                           Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
87
                           SmallVectorImpl<uint64_t> *Offsets,
88
33.4M
                           uint64_t StartingOffset) {
89
33.4M
  // Given a struct type, recursively traverse the elements.
90
33.4M
  if (StructType *
STy33.4M
= dyn_cast<StructType>(Ty)) {
91
48.5k
    const StructLayout *SL = DL.getStructLayout(STy);
92
48.5k
    for (StructType::element_iterator EB = STy->element_begin(),
93
48.5k
                                      EI = EB,
94
48.5k
                                      EE = STy->element_end();
95
153k
         
EI != EE153k
;
++EI104k
)
96
104k
      ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
97
104k
                      StartingOffset + SL->getElementOffset(EI - EB));
98
48.5k
    return;
99
48.5k
  }
100
33.3M
  // Given an array type, recursively traverse the elements.
101
33.3M
  
if (ArrayType *33.3M
ATy33.3M
= dyn_cast<ArrayType>(Ty)) {
102
3.28k
    Type *EltTy = ATy->getElementType();
103
3.28k
    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
104
19.1k
    for (unsigned i = 0, e = ATy->getNumElements(); 
i != e19.1k
;
++i15.8k
)
105
15.8k
      ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
106
15.8k
                      StartingOffset + i * EltSize);
107
3.28k
    return;
108
3.28k
  }
109
33.3M
  // Interpret void as zero return values.
110
33.3M
  
if (33.3M
Ty->isVoidTy()33.3M
)
111
1.27M
    return;
112
32.0M
  // Base case: we can get an EVT for this LLVM IR type.
113
32.0M
  ValueVTs.push_back(TLI.getValueType(DL, Ty));
114
32.0M
  if (Offsets)
115
8.88M
    Offsets->push_back(StartingOffset);
116
33.4M
}
117
118
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
119
347
GlobalValue *llvm::ExtractTypeInfo(Value *V) {
120
347
  V = V->stripPointerCasts();
121
347
  GlobalValue *GV = dyn_cast<GlobalValue>(V);
122
347
  GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
123
347
124
347
  if (
Var && 347
Var->getName() == "llvm.eh.catch.all.value"344
) {
125
0
    assert(Var->hasInitializer() &&
126
0
           "The EH catch-all value must have an initializer");
127
0
    Value *Init = Var->getInitializer();
128
0
    GV = dyn_cast<GlobalValue>(Init);
129
0
    if (
!GV0
)
V = cast<ConstantPointerNull>(Init)0
;
130
0
  }
131
347
132
347
  assert((GV || isa<ConstantPointerNull>(V)) &&
133
347
         "TypeInfo must be a global variable or NULL");
134
347
  return GV;
135
347
}
136
137
/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
138
/// processed uses a memory 'm' constraint.
139
bool
140
llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
141
0
                                const TargetLowering &TLI) {
142
0
  for (unsigned i = 0, e = CInfos.size(); 
i != e0
;
++i0
) {
143
0
    InlineAsm::ConstraintInfo &CI = CInfos[i];
144
0
    for (unsigned j = 0, ee = CI.Codes.size(); 
j != ee0
;
++j0
) {
145
0
      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
146
0
      if (CType == TargetLowering::C_Memory)
147
0
        return true;
148
0
    }
149
0
150
0
    // Indirect operand accesses access memory.
151
0
    
if (0
CI.isIndirect0
)
152
0
      return true;
153
0
  }
154
0
155
0
  return false;
156
0
}
157
158
/// getFCmpCondCode - Return the ISD condition code corresponding to
159
/// the given LLVM IR floating-point condition code.  This includes
160
/// consideration of global floating-point math flags.
161
///
162
42.3k
ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
163
42.3k
  switch (Pred) {
164
33
  case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
165
6.89k
  case FCmpInst::FCMP_OEQ:   return ISD::SETOEQ;
166
8.72k
  case FCmpInst::FCMP_OGT:   return ISD::SETOGT;
167
700
  case FCmpInst::FCMP_OGE:   return ISD::SETOGE;
168
13.5k
  case FCmpInst::FCMP_OLT:   return ISD::SETOLT;
169
319
  case FCmpInst::FCMP_OLE:   return ISD::SETOLE;
170
201
  case FCmpInst::FCMP_ONE:   return ISD::SETONE;
171
209
  case FCmpInst::FCMP_ORD:   return ISD::SETO;
172
2.03k
  case FCmpInst::FCMP_UNO:   return ISD::SETUO;
173
996
  case FCmpInst::FCMP_UEQ:   return ISD::SETUEQ;
174
549
  case FCmpInst::FCMP_UGT:   return ISD::SETUGT;
175
294
  case FCmpInst::FCMP_UGE:   return ISD::SETUGE;
176
6.15k
  case FCmpInst::FCMP_ULT:   return ISD::SETULT;
177
392
  case FCmpInst::FCMP_ULE:   return ISD::SETULE;
178
1.30k
  case FCmpInst::FCMP_UNE:   return ISD::SETUNE;
179
33
  case FCmpInst::FCMP_TRUE:  return ISD::SETTRUE;
180
0
  
default: 0
llvm_unreachable0
("Invalid FCmp predicate opcode!");
181
0
  }
182
0
}
183
184
285
ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
185
285
  switch (CC) {
186
25
    
case ISD::SETOEQ: 25
case ISD::SETUEQ: return ISD::SETEQ25
;
187
1
    
case ISD::SETONE: 1
case ISD::SETUNE: return ISD::SETNE1
;
188
66
    
case ISD::SETOLT: 66
case ISD::SETULT: return ISD::SETLT66
;
189
60
    
case ISD::SETOLE: 60
case ISD::SETULE: return ISD::SETLE60
;
190
73
    
case ISD::SETOGT: 73
case ISD::SETUGT: return ISD::SETGT73
;
191
60
    
case ISD::SETOGE: 60
case ISD::SETUGE: return ISD::SETGE60
;
192
0
    default: return CC;
193
0
  }
194
0
}
195
196
/// getICmpCondCode - Return the ISD condition code corresponding to
197
/// the given LLVM IR integer condition code.
198
///
199
1.95M
ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
200
1.95M
  switch (Pred) {
201
1.18M
  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
202
117k
  case ICmpInst::ICMP_NE:  return ISD::SETNE;
203
24.1k
  case ICmpInst::ICMP_SLE: return ISD::SETLE;
204
24.2k
  case ICmpInst::ICMP_ULE: return ISD::SETULE;
205
1.59k
  case ICmpInst::ICMP_SGE: return ISD::SETGE;
206
1.09k
  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
207
165k
  case ICmpInst::ICMP_SLT: return ISD::SETLT;
208
144k
  case ICmpInst::ICMP_ULT: return ISD::SETULT;
209
227k
  case ICmpInst::ICMP_SGT: return ISD::SETGT;
210
63.9k
  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
211
0
  default:
212
0
    llvm_unreachable("Invalid ICmp predicate opcode!");
213
0
  }
214
0
}
215
216
static bool isNoopBitcast(Type *T1, Type *T2,
217
2.20k
                          const TargetLoweringBase& TLI) {
218
1.73k
  return T1 == T2 || 
(T1->isPointerTy() && 1.73k
T2->isPointerTy()1.73k
) ||
219
1
         
(isa<VectorType>(T1) && 1
isa<VectorType>(T2)1
&&
220
1
          
TLI.isTypeLegal(EVT::getEVT(T1))1
&&
TLI.isTypeLegal(EVT::getEVT(T2))1
);
221
2.20k
}
222
223
/// Look through operations that will be free to find the earliest source of
224
/// this value.
225
///
226
/// @param ValLoc If V has aggegate type, we will be interested in a particular
227
/// scalar component. This records its address; the reverse of this list gives a
228
/// sequence of indices appropriate for an extractvalue to locate the important
229
/// value. This value is updated during the function and on exit will indicate
230
/// similar information for the Value returned.
231
///
232
/// @param DataBits If this function looks through truncate instructions, this
233
/// will record the smallest size attained.
234
static const Value *getNoopInput(const Value *V,
235
                                 SmallVectorImpl<unsigned> &ValLoc,
236
                                 unsigned &DataBits,
237
                                 const TargetLoweringBase &TLI,
238
473k
                                 const DataLayout &DL) {
239
476k
  while (
true476k
) {
240
476k
    // Try to look through V1; if V1 is not an instruction, it can't be looked
241
476k
    // through.
242
476k
    const Instruction *I = dyn_cast<Instruction>(V);
243
476k
    if (
!I || 476k
I->getNumOperands() == 0441k
)
return V34.2k
;
244
441k
    const Value *NoopInput = nullptr;
245
441k
246
441k
    Value *Op = I->getOperand(0);
247
441k
    if (
isa<BitCastInst>(I)441k
) {
248
1.73k
      // Look through truly no-op bitcasts.
249
1.73k
      if (isNoopBitcast(Op->getType(), I->getType(), TLI))
250
1.73k
        NoopInput = Op;
251
441k
    } else 
if (440k
isa<GetElementPtrInst>(I)440k
) {
252
54
      // Look through getelementptr
253
54
      if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
254
0
        NoopInput = Op;
255
440k
    } else 
if (440k
isa<IntToPtrInst>(I)440k
) {
256
21
      // Look through inttoptr.
257
21
      // Make sure this isn't a truncating or extending cast.  We could
258
21
      // support this eventually, but don't bother for now.
259
21
      if (!isa<VectorType>(I->getType()) &&
260
21
          DL.getPointerSizeInBits() ==
261
21
              cast<IntegerType>(Op->getType())->getBitWidth())
262
21
        NoopInput = Op;
263
440k
    } else 
if (440k
isa<PtrToIntInst>(I)440k
) {
264
9
      // Look through ptrtoint.
265
9
      // Make sure this isn't a truncating or extending cast.  We could
266
9
      // support this eventually, but don't bother for now.
267
9
      if (!isa<VectorType>(I->getType()) &&
268
9
          DL.getPointerSizeInBits() ==
269
9
              cast<IntegerType>(I->getType())->getBitWidth())
270
9
        NoopInput = Op;
271
440k
    } else 
if (440k
isa<TruncInst>(I) &&
272
440k
               
TLI.allowTruncateForTailCall(Op->getType(), I->getType())1.48k
) {
273
22
      DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
274
22
      NoopInput = Op;
275
440k
    } else 
if (auto 440k
CS440k
= ImmutableCallSite(I)) {
276
422k
      const Value *ReturnedOp = CS.getReturnedArgOperand();
277
422k
      if (
ReturnedOp && 422k
isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)463
)
278
463
        NoopInput = ReturnedOp;
279
440k
    } else 
if (const InsertValueInst *17.8k
IVI17.8k
= dyn_cast<InsertValueInst>(V)) {
280
169
      // Value may come from either the aggregate or the scalar
281
169
      ArrayRef<unsigned> InsertLoc = IVI->getIndices();
282
169
      if (ValLoc.size() >= InsertLoc.size() &&
283
169
          
std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())167
) {
284
91
        // The type being inserted is a nested sub-type of the aggregate; we
285
91
        // have to remove those initial indices to get the location we're
286
91
        // interested in for the operand.
287
91
        ValLoc.resize(ValLoc.size() - InsertLoc.size());
288
91
        NoopInput = IVI->getInsertedValueOperand();
289
169
      } else {
290
78
        // The struct we're inserting into has the value we're interested in, no
291
78
        // change of address.
292
78
        NoopInput = Op;
293
78
      }
294
17.8k
    } else 
if (const ExtractValueInst *17.7k
EVI17.7k
= dyn_cast<ExtractValueInst>(V)) {
295
41
      // The part we're interested in will inevitably be some sub-section of the
296
41
      // previous aggregate. Combine the two paths to obtain the true address of
297
41
      // our element.
298
41
      ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
299
41
      ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
300
41
      NoopInput = Op;
301
41
    }
302
441k
    // Terminate if we couldn't find anything to look through.
303
441k
    if (!NoopInput)
304
439k
      return V;
305
2.46k
306
2.46k
    V = NoopInput;
307
2.46k
  }
308
473k
}
309
310
/// Return true if this scalar return value only has bits discarded on its path
311
/// from the "tail call" to the "ret". This includes the obvious noop
312
/// instructions handled by getNoopInput above as well as free truncations (or
313
/// extensions prior to the call).
314
static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
315
                                 SmallVectorImpl<unsigned> &RetIndices,
316
                                 SmallVectorImpl<unsigned> &CallIndices,
317
                                 bool AllowDifferingSizes,
318
                                 const TargetLoweringBase &TLI,
319
236k
                                 const DataLayout &DL) {
320
236k
321
236k
  // Trace the sub-value needed by the return value as far back up the graph as
322
236k
  // possible, in the hope that it will intersect with the value produced by the
323
236k
  // call. In the simple case with no "returned" attribute, the hope is actually
324
236k
  // that we end up back at the tail call instruction itself.
325
236k
  unsigned BitsRequired = UINT_MAX;
326
236k
  RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
327
236k
328
236k
  // If this slot in the value returned is undef, it doesn't matter what the
329
236k
  // call puts there, it'll be fine.
330
236k
  if (isa<UndefValue>(RetVal))
331
2
    return true;
332
236k
333
236k
  // Now do a similar search up through the graph to find where the value
334
236k
  // actually returned by the "tail call" comes from. In the simple case without
335
236k
  // a "returned" attribute, the search will be blocked immediately and the loop
336
236k
  // a Noop.
337
236k
  unsigned BitsProvided = UINT_MAX;
338
236k
  CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
339
236k
340
236k
  // There's no hope if we can't actually trace them to (the same part of!) the
341
236k
  // same value.
342
236k
  if (
CallVal != RetVal || 236k
CallIndices != RetIndices182k
)
343
54.0k
    return false;
344
182k
345
182k
  // However, intervening truncates may have made the call non-tail. Make sure
346
182k
  // all the bits that are needed by the "ret" have been provided by the "tail
347
182k
  // call". FIXME: with sufficiently cunning bit-tracking, we could look through
348
182k
  // extensions too.
349
182k
  
if (182k
BitsProvided < BitsRequired ||
350
182k
      
(!AllowDifferingSizes && 182k
BitsProvided != BitsRequired143
))
351
3
    return false;
352
182k
353
182k
  return true;
354
182k
}
355
356
/// For an aggregate type, determine whether a given index is within bounds or
357
/// not.
358
392
static bool indexReallyValid(CompositeType *T, unsigned Idx) {
359
392
  if (ArrayType *AT = dyn_cast<ArrayType>(T))
360
28
    return Idx < AT->getNumElements();
361
364
362
364
  return Idx < cast<StructType>(T)->getNumElements();
363
364
}
364
365
/// Move the given iterators to the next leaf type in depth first traversal.
366
///
367
/// Performs a depth-first traversal of the type as specified by its arguments,
368
/// stopping at the next leaf node (which may be a legitimate scalar type or an
369
/// empty struct or array).
370
///
371
/// @param SubTypes List of the partial components making up the type from
372
/// outermost to innermost non-empty aggregate. The element currently
373
/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
374
///
375
/// @param Path Set of extractvalue indices leading from the outermost type
376
/// (SubTypes[0]) to the leaf node currently represented.
377
///
378
/// @returns true if a new type was found, false otherwise. Calling this
379
/// function again on a finished iterator will repeatedly return
380
/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
381
/// aggregate or a non-aggregate
382
static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
383
365k
                                  SmallVectorImpl<unsigned> &Path) {
384
365k
  // First march back up the tree until we can successfully increment one of the
385
365k
  // coordinates in Path.
386
365k
  while (
!Path.empty() && 365k
!indexReallyValid(SubTypes.back(), Path.back() + 1)218
) {
387
88
    Path.pop_back();
388
88
    SubTypes.pop_back();
389
88
  }
390
365k
391
365k
  // If we reached the top, then the iterator is done.
392
365k
  if (Path.empty())
393
365k
    return false;
394
131
395
131
  // We know there's *some* valid leaf now, so march back down the tree picking
396
131
  // out the left-most element at each node.
397
131
  ++Path.back();
398
131
  Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back());
399
153
  while (
DeeperType->isAggregateType()153
) {
400
26
    CompositeType *CT = cast<CompositeType>(DeeperType);
401
26
    if (!indexReallyValid(CT, 0))
402
4
      return true;
403
22
404
22
    SubTypes.push_back(CT);
405
22
    Path.push_back(0);
406
22
407
22
    DeeperType = CT->getTypeAtIndex(0U);
408
22
  }
409
131
410
127
  return true;
411
365k
}
412
413
/// Find the first non-empty, scalar-like type in Next and setup the iterator
414
/// components.
415
///
416
/// Assuming Next is an aggregate of some kind, this function will traverse the
417
/// tree from left to right (i.e. depth-first) looking for the first
418
/// non-aggregate type which will play a role in function return.
419
///
420
/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
421
/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
422
/// i32 in that type.
423
static bool firstRealType(Type *Next,
424
                          SmallVectorImpl<CompositeType *> &SubTypes,
425
473k
                          SmallVectorImpl<unsigned> &Path) {
426
473k
  // First initialise the iterator components to the first "leaf" node
427
473k
  // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
428
473k
  // despite nominally being an aggregate).
429
473k
  while (Next->isAggregateType() &&
430
473k
         
indexReallyValid(cast<CompositeType>(Next), 0)148
) {
431
146
    SubTypes.push_back(cast<CompositeType>(Next));
432
146
    Path.push_back(0);
433
146
    Next = cast<CompositeType>(Next)->getTypeAtIndex(0U);
434
146
  }
435
473k
436
473k
  // If there's no Path now, Next was originally scalar already (or empty
437
473k
  // leaf). We're done.
438
473k
  if (Path.empty())
439
473k
    return true;
440
142
441
142
  // Otherwise, use normal iteration to keep looking through the tree until we
442
142
  // find a non-aggregate type.
443
146
  
while (142
SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()146
) {
444
4
    if (!advanceToNextLeafType(SubTypes, Path))
445
0
      return false;
446
4
  }
447
142
448
142
  return true;
449
473k
}
450
451
/// Set the iterator data-structures to the next non-empty, non-aggregate
452
/// subtype.
453
static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
454
365k
                         SmallVectorImpl<unsigned> &Path) {
455
365k
  do {
456
365k
    if (!advanceToNextLeafType(SubTypes, Path))
457
365k
      return false;
458
126
459
365k
    assert(!Path.empty() && "found a leaf but didn't set the path?");
460
365k
  } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType());
461
365k
462
124
  return true;
463
365k
}
464
465
466
/// Test if the given instruction is in a position to be optimized
467
/// with a tail-call. This roughly means that it's in a block with
468
/// a return and there's nothing that needs to be scheduled
469
/// between it and the return.
470
///
471
/// This function only tests target-independent requirements.
472
1.33M
bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
473
1.33M
  const Instruction *I = CS.getInstruction();
474
1.33M
  const BasicBlock *ExitBB = I->getParent();
475
1.33M
  const TerminatorInst *Term = ExitBB->getTerminator();
476
1.33M
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
477
1.33M
478
1.33M
  // The block must end in a return statement or unreachable.
479
1.33M
  //
480
1.33M
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
481
1.33M
  // an unreachable, for now. The way tailcall optimization is currently
482
1.33M
  // implemented means it will add an epilogue followed by a jump. That is
483
1.33M
  // not profitable. Also, if the callee is a special function (e.g.
484
1.33M
  // longjmp on x86), it can end up causing miscompilation that has not
485
1.33M
  // been fully understood.
486
1.33M
  if (!Ret &&
487
760k
      
(!TM.Options.GuaranteedTailCallOpt || 760k
!isa<UnreachableInst>(Term)1
))
488
760k
    return false;
489
570k
490
570k
  // If I will have a chain, make sure no other instruction that will have a
491
570k
  // chain interposes between I and the return.
492
570k
  
if (570k
I->mayHaveSideEffects() || 570k
I->mayReadFromMemory()6.17k
||
493
390
      !isSafeToSpeculativelyExecute(I))
494
570k
    
for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; 570k
--BBI12.0k
) {
495
582k
      if (&*BBI == I)
496
295k
        break;
497
286k
      // Debug info intrinsics do not get in the way of tail call optimization.
498
286k
      
if (286k
isa<DbgInfoIntrinsic>(BBI)286k
)
499
7
        continue;
500
286k
      
if (286k
BBI->mayHaveSideEffects() || 286k
BBI->mayReadFromMemory()12.3k
||
501
12.1k
          !isSafeToSpeculativelyExecute(&*BBI))
502
274k
        return false;
503
570k
    }
504
570k
505
295k
  const Function *F = ExitBB->getParent();
506
295k
  return returnTypeIsEligibleForTailCall(
507
295k
      F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
508
1.33M
}
509
510
bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
511
                                    const ReturnInst *Ret,
512
                                    const TargetLoweringBase &TLI,
513
457k
                                    bool *AllowDifferingSizes) {
514
457k
  // ADS may be null, so don't write to it directly.
515
457k
  bool DummyADS;
516
457k
  bool &ADS = AllowDifferingSizes ? 
*AllowDifferingSizes238k
:
DummyADS219k
;
517
457k
  ADS = true;
518
457k
519
457k
  AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
520
457k
  AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
521
457k
                          AttributeList::ReturnIndex);
522
457k
523
457k
  // Noalias is completely benign as far as calling convention goes, it
524
457k
  // shouldn't affect whether the call is a tail call.
525
457k
  CallerAttrs.removeAttribute(Attribute::NoAlias);
526
457k
  CalleeAttrs.removeAttribute(Attribute::NoAlias);
527
457k
528
457k
  if (
CallerAttrs.contains(Attribute::ZExt)457k
) {
529
1.69k
    if (!CalleeAttrs.contains(Attribute::ZExt))
530
1.47k
      return false;
531
224
532
224
    ADS = false;
533
224
    CallerAttrs.removeAttribute(Attribute::ZExt);
534
224
    CalleeAttrs.removeAttribute(Attribute::ZExt);
535
457k
  } else 
if (455k
CallerAttrs.contains(Attribute::SExt)455k
) {
536
38
    if (!CalleeAttrs.contains(Attribute::SExt))
537
8
      return false;
538
30
539
30
    ADS = false;
540
30
    CallerAttrs.removeAttribute(Attribute::SExt);
541
30
    CalleeAttrs.removeAttribute(Attribute::SExt);
542
30
  }
543
457k
544
457k
  // If they're still different, there's some facet we don't understand
545
457k
  // (currently only "inreg", but in future who knows). It may be OK but the
546
457k
  // only safe option is to reject the tail call.
547
456k
  return CallerAttrs == CalleeAttrs;
548
457k
}
549
550
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
551
                                           const Instruction *I,
552
                                           const ReturnInst *Ret,
553
295k
                                           const TargetLoweringBase &TLI) {
554
295k
  // If the block ends with a void return or unreachable, it doesn't matter
555
295k
  // what the call's return type is.
556
295k
  if (
!Ret || 295k
Ret->getNumOperands() == 0295k
)
return true57.3k
;
557
238k
558
238k
  // If the return value is undef, it doesn't matter what the call's
559
238k
  // return type is.
560
238k
  
if (238k
isa<UndefValue>(Ret->getOperand(0))238k
)
return true195
;
561
238k
562
238k
  // Make sure the attributes attached to each return are compatible.
563
238k
  bool AllowDifferingSizes;
564
238k
  if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
565
1.59k
    return false;
566
236k
567
236k
  const Value *RetVal = Ret->getOperand(0), *CallVal = I;
568
236k
  // Intrinsic like llvm.memcpy has no return value, but the expanded
569
236k
  // libcall may or may not have return value. On most platforms, it
570
236k
  // will be expanded as memcpy in libc, which returns the first
571
236k
  // argument. On other platforms like arm-none-eabi, memcpy may be
572
236k
  // expanded as library call without return value, like __aeabi_memcpy.
573
236k
  const CallInst *Call = cast<CallInst>(I);
574
236k
  if (Function *
F236k
= Call->getCalledFunction()) {
575
226k
    Intrinsic::ID IID = F->getIntrinsicID();
576
226k
    if (((IID == Intrinsic::memcpy &&
577
43
          TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
578
226k
         (IID == Intrinsic::memmove &&
579
226k
          TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
580
226k
         (IID == Intrinsic::memset &&
581
226k
          TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
582
64
        RetVal == Call->getArgOperand(0))
583
7
      return true;
584
236k
  }
585
236k
586
236k
  SmallVector<unsigned, 4> RetPath, CallPath;
587
236k
  SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
588
236k
589
236k
  bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
590
236k
  bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
591
236k
592
236k
  // Nothing's actually returned, it doesn't matter what the callee put there
593
236k
  // it's a valid tail call.
594
236k
  if (RetEmpty)
595
0
    return true;
596
236k
597
236k
  // Iterate pairwise through each of the value types making up the tail call
598
236k
  // and the corresponding return. For each one we want to know whether it's
599
236k
  // essentially going directly from the tail call to the ret, via operations
600
236k
  // that end up not generating any code.
601
236k
  //
602
236k
  // We allow a certain amount of covariance here. For example it's permitted
603
236k
  // for the tail call to define more bits than the ret actually cares about
604
236k
  // (e.g. via a truncate).
605
236k
  
do 236k
{
606
236k
    if (
CallEmpty236k
) {
607
2
      // We've exhausted the values produced by the tail call instruction, the
608
2
      // rest are essentially undef. The type doesn't really matter, but we need
609
2
      // *something*.
610
2
      Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
611
2
      CallVal = UndefValue::get(SlotType);
612
2
    }
613
236k
614
236k
    // The manipulations performed when we're looking through an insertvalue or
615
236k
    // an extractvalue would happen at the front of the RetPath list, so since
616
236k
    // we have to copy it anyway it's more efficient to create a reversed copy.
617
236k
    SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
618
236k
    SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
619
236k
620
236k
    // Finally, we can check whether the value produced by the tail call at this
621
236k
    // index is compatible with the value we return.
622
236k
    if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
623
236k
                              AllowDifferingSizes, TLI,
624
236k
                              F->getParent()->getDataLayout()))
625
54.0k
      return false;
626
182k
627
182k
    CallEmpty  = !nextRealType(CallSubTypes, CallPath);
628
236k
  } while(nextRealType(RetSubTypes, RetPath));
629
236k
630
182k
  return true;
631
295k
}
632
633
static void collectFuncletMembers(
634
    DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
635
1.41k
    const MachineBasicBlock *MBB) {
636
1.41k
  SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
637
4.45k
  while (
!Worklist.empty()4.45k
) {
638
3.03k
    const MachineBasicBlock *Visiting = Worklist.pop_back_val();
639
3.03k
    // Don't follow blocks which start new funclets.
640
3.03k
    if (
Visiting->isEHPad() && 3.03k
Visiting != MBB1.24k
)
641
695
      continue;
642
2.34k
643
2.34k
    // Add this MBB to our funclet.
644
2.34k
    auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet));
645
2.34k
646
2.34k
    // Don't revisit blocks.
647
2.34k
    if (
!P.second2.34k
) {
648
632
      assert(P.first->second == Funclet && "MBB is part of two funclets!");
649
632
      continue;
650
632
    }
651
1.71k
652
1.71k
    // Returns are boundaries where funclet transfer can occur, don't follow
653
1.71k
    // successors.
654
1.71k
    
if (1.71k
Visiting->isReturnBlock()1.71k
)
655
698
      continue;
656
1.01k
657
1.01k
    for (const MachineBasicBlock *Succ : Visiting->successors())
658
1.62k
      Worklist.push_back(Succ);
659
3.03k
  }
660
1.41k
}
661
662
DenseMap<const MachineBasicBlock *, int>
663
2.21M
llvm::getFuncletMembership(const MachineFunction &MF) {
664
2.21M
  DenseMap<const MachineBasicBlock *, int> FuncletMembership;
665
2.21M
666
2.21M
  // We don't have anything to do if there aren't any EH pads.
667
2.21M
  if (!MF.hasEHFunclets())
668
2.21M
    return FuncletMembership;
669
342
670
342
  int EntryBBNumber = MF.front().getNumber();
671
342
  bool IsSEH = isAsynchronousEHPersonality(
672
342
      classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
673
342
674
342
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
675
342
  SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
676
342
  SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
677
342
  SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
678
342
  SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
679
1.95k
  for (const MachineBasicBlock &MBB : MF) {
680
1.95k
    if (
MBB.isEHFuncletEntry()1.95k
) {
681
520
      FuncletBlocks.push_back(&MBB);
682
1.95k
    } else 
if (1.43k
IsSEH && 1.43k
MBB.isEHPad()379
) {
683
115
      SEHCatchPads.push_back(&MBB);
684
1.43k
    } else 
if (1.32k
MBB.pred_empty()1.32k
) {
685
351
      UnreachableBlocks.push_back(&MBB);
686
351
    }
687
1.95k
688
1.95k
    MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
689
1.95k
690
1.95k
    // CatchPads are not funclets for SEH so do not consider CatchRet to
691
1.95k
    // transfer control to another funclet.
692
1.95k
    if (
MBBI == MBB.end() || 1.95k
MBBI->getOpcode() != TII->getCatchReturnOpcode()1.20k
)
693
1.67k
      continue;
694
283
695
283
    // FIXME: SEH CatchPads are not necessarily in the parent function:
696
283
    // they could be inside a finally block.
697
283
    const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
698
283
    const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
699
283
    CatchRetSuccessors.push_back(
700
283
        {Successor, IsSEH ? 
EntryBBNumber0
:
SuccessorColor->getNumber()283
});
701
1.95k
  }
702
342
703
342
  // We don't have anything to do if there aren't any EH pads.
704
342
  if (FuncletBlocks.empty())
705
56
    return FuncletMembership;
706
286
707
286
  // Identify all the basic blocks reachable from the function entry.
708
286
  collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
709
286
  // All blocks not part of a funclet are in the parent function.
710
286
  for (const MachineBasicBlock *MBB : UnreachableBlocks)
711
295
    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
712
286
  // Next, identify all the blocks inside the funclets.
713
286
  for (const MachineBasicBlock *MBB : FuncletBlocks)
714
520
    collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
715
286
  // SEH CatchPads aren't really funclets, handle them separately.
716
286
  for (const MachineBasicBlock *MBB : SEHCatchPads)
717
26
    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
718
286
  // Finally, identify all the targets of a catchret.
719
286
  for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
720
286
       CatchRetSuccessors)
721
283
    collectFuncletMembers(FuncletMembership, CatchRetPair.second,
722
283
                          CatchRetPair.first);
723
2.21M
  return FuncletMembership;
724
2.21M
}