Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/TargetTransformInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/Analysis/TargetTransformInfo.h"
10
#include "llvm/Analysis/TargetTransformInfoImpl.h"
11
#include "llvm/IR/CallSite.h"
12
#include "llvm/IR/DataLayout.h"
13
#include "llvm/IR/Instruction.h"
14
#include "llvm/IR/Instructions.h"
15
#include "llvm/IR/IntrinsicInst.h"
16
#include "llvm/IR/Module.h"
17
#include "llvm/IR/Operator.h"
18
#include "llvm/IR/PatternMatch.h"
19
#include "llvm/Support/CommandLine.h"
20
#include "llvm/Support/ErrorHandling.h"
21
#include "llvm/Analysis/CFG.h"
22
#include "llvm/Analysis/LoopIterator.h"
23
#include <utility>
24
25
using namespace llvm;
26
using namespace PatternMatch;
27
28
#define DEBUG_TYPE "tti"
29
30
static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
31
                                     cl::Hidden,
32
                                     cl::desc("Recognize reduction patterns."));
33
34
namespace {
35
/// No-op implementation of the TTI interface using the utility base
36
/// classes.
37
///
38
/// This is used when no target specific information is available.
39
struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
40
  explicit NoTTIImpl(const DataLayout &DL)
41
18.6k
      : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
42
};
43
}
44
45
5.53k
bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
46
5.53k
  // If the loop has irreducible control flow, it can not be converted to
47
5.53k
  // Hardware loop.
48
5.53k
  LoopBlocksRPO RPOT(L);  
49
5.53k
  RPOT.perform(&LI);
50
5.53k
  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
51
0
    return false;
52
5.53k
  return true;
53
5.53k
}
54
55
bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
56
                                               LoopInfo &LI, DominatorTree &DT,
57
                                               bool ForceNestedLoop,
58
675
                                               bool ForceHardwareLoopPHI) {
59
675
  SmallVector<BasicBlock *, 4> ExitingBlocks;
60
675
  L->getExitingBlocks(ExitingBlocks);
61
675
62
675
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
63
675
                                               IE = ExitingBlocks.end();
64
868
       I != IE; 
++I193
) {
65
713
    BasicBlock *BB = *I;
66
713
67
713
    // If we pass the updated counter back through a phi, we need to know
68
713
    // which latch the updated value will be coming from.
69
713
    if (!L->isLoopLatch(BB)) {
70
96
      if (ForceHardwareLoopPHI || 
CounterInReg93
)
71
6
        continue;
72
707
    }
73
707
74
707
    const SCEV *EC = SE.getExitCount(L, BB);
75
707
    if (isa<SCEVCouldNotCompute>(EC))
76
179
      continue;
77
528
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
78
229
      if (ConstEC->getValue()->isZero())
79
4
        continue;
80
299
    } else if (!SE.isLoopInvariant(EC, L))
81
0
      continue;
82
524
83
524
    if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
84
3
      continue;
85
521
86
521
    // If this exiting block is contained in a nested loop, it is not eligible
87
521
    // for insertion of the branch-and-decrement since the inner loop would
88
521
    // end up messing up the value in the CTR.
89
521
    if (!IsNestingLegal && LI.getLoopFor(BB) != L && 
!ForceNestedLoop1
)
90
1
      continue;
91
520
92
520
    // We now have a loop-invariant count of loop iterations (which is not the
93
520
    // constant zero) for which we know that this loop will not exit via this
94
520
    // existing block.
95
520
96
520
    // We need to make sure that this block will run on every loop iteration.
97
520
    // For this to be true, we must dominate all blocks with backedges. Such
98
520
    // blocks are in-loop predecessors to the header block.
99
520
    bool NotAlways = false;
100
520
    for (pred_iterator PI = pred_begin(L->getHeader()),
101
520
                       PIE = pred_end(L->getHeader());
102
1.56k
         PI != PIE; 
++PI1.04k
) {
103
1.04k
      if (!L->contains(*PI))
104
520
        continue;
105
520
106
520
      if (!DT.dominates(*I, *PI)) {
107
0
        NotAlways = true;
108
0
        break;
109
0
      }
110
520
    }
111
520
112
520
    if (NotAlways)
113
0
      continue;
114
520
115
520
    // Make sure this blocks ends with a conditional branch.
116
520
    Instruction *TI = BB->getTerminator();
117
520
    if (!TI)
118
0
      continue;
119
520
120
520
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
121
520
      if (!BI->isConditional())
122
0
        continue;
123
520
124
520
      ExitBranch = BI;
125
520
    } else
126
0
      continue;
127
520
128
520
    // Note that this block may not be the loop latch block, even if the loop
129
520
    // has a latch block.
130
520
    ExitBlock = *I;
131
520
    ExitCount = EC;
132
520
    break;
133
520
  }
134
675
135
675
  if (!ExitBlock)
136
155
    return false;
137
520
  return true;
138
520
}
139
140
TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
141
18.6k
    : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
142
143
16.1M
TargetTransformInfo::~TargetTransformInfo() {}
144
145
TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
146
72.0k
    : TTIImpl(std::move(Arg.TTIImpl)) {}
147
148
15.7M
TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
149
15.7M
  TTIImpl = std::move(RHS.TTIImpl);
150
15.7M
  return *this;
151
15.7M
}
152
153
int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
154
0
                                          Type *OpTy) const {
155
0
  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
156
0
  assert(Cost >= 0 && "TTI should not produce negative costs!");
157
0
  return Cost;
158
0
}
159
160
int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
161
0
                                     const User *U) const {
162
0
  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
163
0
  assert(Cost >= 0 && "TTI should not produce negative costs!");
164
0
  return Cost;
165
0
}
166
167
int TargetTransformInfo::getCallCost(const Function *F,
168
                                     ArrayRef<const Value *> Arguments,
169
0
                                     const User *U) const {
170
0
  int Cost = TTIImpl->getCallCost(F, Arguments, U);
171
0
  assert(Cost >= 0 && "TTI should not produce negative costs!");
172
0
  return Cost;
173
0
}
174
175
791k
unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
176
791k
  return TTIImpl->getInliningThresholdMultiplier();
177
791k
}
178
179
791k
int TargetTransformInfo::getInlinerVectorBonusPercent() const {
180
791k
  return TTIImpl->getInlinerVectorBonusPercent();
181
791k
}
182
183
int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
184
39.6k
                                    ArrayRef<const Value *> Operands) const {
185
39.6k
  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
186
39.6k
}
187
188
int TargetTransformInfo::getExtCost(const Instruction *I,
189
0
                                    const Value *Src) const {
190
0
  return TTIImpl->getExtCost(I, Src);
191
0
}
192
193
int TargetTransformInfo::getIntrinsicCost(
194
    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
195
4
    const User *U) const {
196
4
  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
197
4
  assert(Cost >= 0 && "TTI should not produce negative costs!");
198
4
  return Cost;
199
4
}
200
201
unsigned
202
TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
203
31.4k
                                                      unsigned &JTSize) const {
204
31.4k
  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
205
31.4k
}
206
207
int TargetTransformInfo::getUserCost(const User *U,
208
42.7M
    ArrayRef<const Value *> Operands) const {
209
42.7M
  int Cost = TTIImpl->getUserCost(U, Operands);
210
42.7M
  assert(Cost >= 0 && "TTI should not produce negative costs!");
211
42.7M
  return Cost;
212
42.7M
}
213
214
598k
bool TargetTransformInfo::hasBranchDivergence() const {
215
598k
  return TTIImpl->hasBranchDivergence();
216
598k
}
217
218
1.79M
bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
219
1.79M
  return TTIImpl->isSourceOfDivergence(V);
220
1.79M
}
221
222
341k
bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
223
341k
  return TTIImpl->isAlwaysUniform(V);
224
341k
}
225
226
7.12k
unsigned TargetTransformInfo::getFlatAddressSpace() const {
227
7.12k
  return TTIImpl->getFlatAddressSpace();
228
7.12k
}
229
230
3.49M
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
231
3.49M
  return TTIImpl->isLoweredToCall(F);
232
3.49M
}
233
234
bool TargetTransformInfo::isHardwareLoopProfitable(
235
  Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
236
5.23k
  TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
237
5.23k
  return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
238
5.23k
}
239
240
void TargetTransformInfo::getUnrollingPreferences(
241
390k
    Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
242
390k
  return TTIImpl->getUnrollingPreferences(L, SE, UP);
243
390k
}
244
245
2.63M
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
246
2.63M
  return TTIImpl->isLegalAddImmediate(Imm);
247
2.63M
}
248
249
536k
bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
250
536k
  return TTIImpl->isLegalICmpImmediate(Imm);
251
536k
}
252
253
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
254
                                                int64_t BaseOffset,
255
                                                bool HasBaseReg,
256
                                                int64_t Scale,
257
                                                unsigned AddrSpace,
258
53.2M
                                                Instruction *I) const {
259
53.2M
  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
260
53.2M
                                        Scale, AddrSpace, I);
261
53.2M
}
262
263
4.65M
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
264
4.65M
  return TTIImpl->isLSRCostLess(C1, C2);
265
4.65M
}
266
267
866k
bool TargetTransformInfo::canMacroFuseCmp() const {
268
866k
  return TTIImpl->canMacroFuseCmp();
269
866k
}
270
271
bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
272
                                     ScalarEvolution *SE, LoopInfo *LI,
273
                                     DominatorTree *DT, AssumptionCache *AC,
274
103k
                                     TargetLibraryInfo *LibInfo) const {
275
103k
  return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
276
103k
}
277
278
12.0M
bool TargetTransformInfo::shouldFavorPostInc() const {
279
12.0M
  return TTIImpl->shouldFavorPostInc();
280
12.0M
}
281
282
7.35M
bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
283
7.35M
  return TTIImpl->shouldFavorBackedgeIndex(L);
284
7.35M
}
285
286
3.79k
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
287
3.79k
  return TTIImpl->isLegalMaskedStore(DataType);
288
3.79k
}
289
290
2.41k
bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
291
2.41k
  return TTIImpl->isLegalMaskedLoad(DataType);
292
2.41k
}
293
294
bool TargetTransformInfo::isLegalNTStore(Type *DataType,
295
1
                                         unsigned Alignment) const {
296
1
  return TTIImpl->isLegalNTStore(DataType, Alignment);
297
1
}
298
299
bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
300
2
                                        unsigned Alignment) const {
301
2
  return TTIImpl->isLegalNTLoad(DataType, Alignment);
302
2
}
303
304
14.1k
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
305
14.1k
  return TTIImpl->isLegalMaskedGather(DataType);
306
14.1k
}
307
308
10.8k
bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
309
10.8k
  return TTIImpl->isLegalMaskedScatter(DataType);
310
10.8k
}
311
312
229
bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
313
229
  return TTIImpl->isLegalMaskedCompressStore(DataType);
314
229
}
315
316
314
bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
317
314
  return TTIImpl->isLegalMaskedExpandLoad(DataType);
318
314
}
319
320
1.76k
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
321
1.76k
  return TTIImpl->hasDivRemOp(DataType, IsSigned);
322
1.76k
}
323
324
bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
325
595
                                             unsigned AddrSpace) const {
326
595
  return TTIImpl->hasVolatileVariant(I, AddrSpace);
327
595
}
328
329
42.5k
bool TargetTransformInfo::prefersVectorizedAddressing() const {
330
42.5k
  return TTIImpl->prefersVectorizedAddressing();
331
42.5k
}
332
333
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
334
                                              int64_t BaseOffset,
335
                                              bool HasBaseReg,
336
                                              int64_t Scale,
337
6.08M
                                              unsigned AddrSpace) const {
338
6.08M
  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
339
6.08M
                                           Scale, AddrSpace);
340
6.08M
  assert(Cost >= 0 && "TTI should not produce negative costs!");
341
6.08M
  return Cost;
342
6.08M
}
343
344
4.66M
bool TargetTransformInfo::LSRWithInstrQueries() const {
345
4.66M
  return TTIImpl->LSRWithInstrQueries();
346
4.66M
}
347
348
1.48M
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
349
1.48M
  return TTIImpl->isTruncateFree(Ty1, Ty2);
350
1.48M
}
351
352
121k
bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
353
121k
  return TTIImpl->isProfitableToHoist(I);
354
121k
}
355
356
674
bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
357
358
18.5k
bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
359
18.5k
  return TTIImpl->isTypeLegal(Ty);
360
18.5k
}
361
362
0
unsigned TargetTransformInfo::getJumpBufAlignment() const {
363
0
  return TTIImpl->getJumpBufAlignment();
364
0
}
365
366
0
unsigned TargetTransformInfo::getJumpBufSize() const {
367
0
  return TTIImpl->getJumpBufSize();
368
0
}
369
370
61.4k
bool TargetTransformInfo::shouldBuildLookupTables() const {
371
61.4k
  return TTIImpl->shouldBuildLookupTables();
372
61.4k
}
373
80.7k
bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
374
80.7k
  return TTIImpl->shouldBuildLookupTablesForConstant(C);
375
80.7k
}
376
377
24.8k
bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
378
24.8k
  return TTIImpl->useColdCCForColdCall(F);
379
24.8k
}
380
381
unsigned TargetTransformInfo::
382
13.1k
getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
383
13.1k
  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
384
13.1k
}
385
386
unsigned TargetTransformInfo::
387
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
388
18.1k
                                 unsigned VF) const {
389
18.1k
  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
390
18.1k
}
391
392
16.5k
bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
393
16.5k
  return TTIImpl->supportsEfficientVectorElementLoadStore();
394
16.5k
}
395
396
1.31k
bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
397
1.31k
  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
398
1.31k
}
399
400
TargetTransformInfo::MemCmpExpansionOptions
401
490k
TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
402
490k
  return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
403
490k
}
404
405
19.9k
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
406
19.9k
  return TTIImpl->enableInterleavedAccessVectorization();
407
19.9k
}
408
409
19.5k
bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
410
19.5k
  return TTIImpl->enableMaskedInterleavedAccessVectorization();
411
19.5k
}
412
413
10.7k
bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
414
10.7k
  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
415
10.7k
}
416
417
bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
418
                                                         unsigned BitWidth,
419
                                                         unsigned AddressSpace,
420
                                                         unsigned Alignment,
421
9.42k
                                                         bool *Fast) const {
422
9.42k
  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
423
9.42k
                                                 Alignment, Fast);
424
9.42k
}
425
426
TargetTransformInfo::PopcntSupportKind
427
141k
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
428
141k
  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
429
141k
}
430
431
54
bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
432
54
  return TTIImpl->haveFastSqrt(Ty);
433
54
}
434
435
27
bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
436
27
  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
437
27
}
438
439
383k
int TargetTransformInfo::getFPOpCost(Type *Ty) const {
440
383k
  int Cost = TTIImpl->getFPOpCost(Ty);
441
383k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
442
383k
  return Cost;
443
383k
}
444
445
int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
446
                                               const APInt &Imm,
447
2.50k
                                               Type *Ty) const {
448
2.50k
  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
449
2.50k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
450
2.50k
  return Cost;
451
2.50k
}
452
453
109
int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
454
109
  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
455
109
  assert(Cost >= 0 && "TTI should not produce negative costs!");
456
109
  return Cost;
457
109
}
458
459
int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
460
4.49M
                                       const APInt &Imm, Type *Ty) const {
461
4.49M
  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
462
4.49M
  assert(Cost >= 0 && "TTI should not produce negative costs!");
463
4.49M
  return Cost;
464
4.49M
}
465
466
int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
467
335k
                                       const APInt &Imm, Type *Ty) const {
468
335k
  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
469
335k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
470
335k
  return Cost;
471
335k
}
472
473
9.34M
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
474
9.34M
  return TTIImpl->getNumberOfRegisters(Vector);
475
9.34M
}
476
477
314k
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
478
314k
  return TTIImpl->getRegisterBitWidth(Vector);
479
314k
}
480
481
275k
unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
482
275k
  return TTIImpl->getMinVectorRegisterBitWidth();
483
275k
}
484
485
19.8k
bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
486
19.8k
  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
487
19.8k
}
488
489
2
unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
490
2
  return TTIImpl->getMinimumVF(ElemWidth);
491
2
}
492
493
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
494
808k
    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
495
808k
  return TTIImpl->shouldConsiderAddressTypePromotion(
496
808k
      I, AllowPromotionWithoutCommonHeader);
497
808k
}
498
499
69
unsigned TargetTransformInfo::getCacheLineSize() const {
500
69
  return TTIImpl->getCacheLineSize();
501
69
}
502
503
llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level)
504
4
  const {
505
4
  return TTIImpl->getCacheSize(Level);
506
4
}
507
508
llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(
509
5
  CacheLevel Level) const {
510
5
  return TTIImpl->getCacheAssociativity(Level);
511
5
}
512
513
421k
unsigned TargetTransformInfo::getPrefetchDistance() const {
514
421k
  return TTIImpl->getPrefetchDistance();
515
421k
}
516
517
9.62k
unsigned TargetTransformInfo::getMinPrefetchStride() const {
518
9.62k
  return TTIImpl->getMinPrefetchStride();
519
9.62k
}
520
521
155k
unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
522
155k
  return TTIImpl->getMaxPrefetchIterationsAhead();
523
155k
}
524
525
21.3k
unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
526
21.3k
  return TTIImpl->getMaxInterleaveFactor(VF);
527
21.3k
}
528
529
TargetTransformInfo::OperandValueKind
530
557k
TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
531
557k
  OperandValueKind OpInfo = OK_AnyValue;
532
557k
  OpProps = OP_None;
533
557k
534
557k
  if (auto *CI = dyn_cast<ConstantInt>(V)) {
535
155k
    if (CI->getValue().isPowerOf2())
536
74.8k
      OpProps = OP_PowerOf2;
537
155k
    return OK_UniformConstantValue;
538
155k
  }
539
402k
540
402k
  // A broadcast shuffle creates a uniform value.
541
402k
  // TODO: Add support for non-zero index broadcasts.
542
402k
  // TODO: Add support for different source vector width.
543
402k
  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
544
1.37k
    if (ShuffleInst->isZeroEltSplat())
545
1.06k
      OpInfo = OK_UniformValue;
546
402k
547
402k
  const Value *Splat = getSplatValue(V);
548
402k
549
402k
  // Check for a splat of a constant or for a non uniform vector of constants
550
402k
  // and check if the constant(s) are all powers of two.
551
402k
  if (isa<ConstantVector>(V) || 
isa<ConstantDataVector>(V)402k
) {
552
4.48k
    OpInfo = OK_NonUniformConstantValue;
553
4.48k
    if (Splat) {
554
2.29k
      OpInfo = OK_UniformConstantValue;
555
2.29k
      if (auto *CI = dyn_cast<ConstantInt>(Splat))
556
2.22k
        if (CI->getValue().isPowerOf2())
557
508
          OpProps = OP_PowerOf2;
558
2.29k
    } else 
if (auto *2.19k
CDS2.19k
= dyn_cast<ConstantDataSequential>(V)) {
559
2.17k
      OpProps = OP_PowerOf2;
560
11.8k
      for (unsigned I = 0, E = CDS->getNumElements(); I != E; 
++I9.67k
) {
561
11.3k
        if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
562
11.3k
          if (CI->getValue().isPowerOf2())
563
9.67k
            continue;
564
1.69k
        OpProps = OP_None;
565
1.69k
        break;
566
1.69k
      }
567
2.17k
    }
568
4.48k
  }
569
402k
570
402k
  // Check for a splat of a uniform value. This is not loop aware, so return
571
402k
  // true only for the obviously uniform cases (argument, globalvalue)
572
402k
  if (Splat && 
(2.83k
isa<Argument>(Splat)2.83k
||
isa<GlobalValue>(Splat)2.29k
))
573
543
    OpInfo = OK_UniformValue;
574
402k
575
402k
  return OpInfo;
576
402k
}
577
578
int TargetTransformInfo::getArithmeticInstrCost(
579
    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
580
    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
581
    OperandValueProperties Opd2PropInfo,
582
918k
    ArrayRef<const Value *> Args) const {
583
918k
  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
584
918k
                                             Opd1PropInfo, Opd2PropInfo, Args);
585
918k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
586
918k
  return Cost;
587
918k
}
588
589
int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
590
163k
                                        Type *SubTp) const {
591
163k
  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
592
163k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
593
163k
  return Cost;
594
163k
}
595
596
int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
597
144k
                                 Type *Src, const Instruction *I) const {
598
144k
  assert ((I == nullptr || I->getOpcode() == Opcode) &&
599
144k
          "Opcode should reflect passed instruction.");
600
144k
  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
601
144k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
602
144k
  return Cost;
603
144k
}
604
605
int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
606
                                                  VectorType *VecTy,
607
6.58k
                                                  unsigned Index) const {
608
6.58k
  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
609
6.58k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
610
6.58k
  return Cost;
611
6.58k
}
612
613
135k
int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
614
135k
  int Cost = TTIImpl->getCFInstrCost(Opcode);
615
135k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
616
135k
  return Cost;
617
135k
}
618
619
int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
620
178k
                                 Type *CondTy, const Instruction *I) const {
621
178k
  assert ((I == nullptr || I->getOpcode() == Opcode) &&
622
178k
          "Opcode should reflect passed instruction.");
623
178k
  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
624
178k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
625
178k
  return Cost;
626
178k
}
627
628
int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
629
1.73M
                                            unsigned Index) const {
630
1.73M
  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
631
1.73M
  assert(Cost >= 0 && "TTI should not produce negative costs!");
632
1.73M
  return Cost;
633
1.73M
}
634
635
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
636
                                         unsigned Alignment,
637
                                         unsigned AddressSpace,
638
402k
                                         const Instruction *I) const {
639
402k
  assert ((I == nullptr || I->getOpcode() == Opcode) &&
640
402k
          "Opcode should reflect passed instruction.");
641
402k
  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
642
402k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
643
402k
  return Cost;
644
402k
}
645
646
int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
647
                                               unsigned Alignment,
648
185
                                               unsigned AddressSpace) const {
649
185
  int Cost =
650
185
      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
651
185
  assert(Cost >= 0 && "TTI should not produce negative costs!");
652
185
  return Cost;
653
185
}
654
655
int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
656
                                                Value *Ptr, bool VariableMask,
657
132
                                                unsigned Alignment) const {
658
132
  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
659
132
                                             Alignment);
660
132
  assert(Cost >= 0 && "TTI should not produce negative costs!");
661
132
  return Cost;
662
132
}
663
664
int TargetTransformInfo::getInterleavedMemoryOpCost(
665
    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
666
    unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
667
2.27k
    bool UseMaskForGaps) const {
668
2.27k
  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
669
2.27k
                                                 Alignment, AddressSpace,
670
2.27k
                                                 UseMaskForCond,
671
2.27k
                                                 UseMaskForGaps);
672
2.27k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
673
2.27k
  return Cost;
674
2.27k
}
675
676
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
677
                                    ArrayRef<Type *> Tys, FastMathFlags FMF,
678
2.83k
                                    unsigned ScalarizationCostPassed) const {
679
2.83k
  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
680
2.83k
                                            ScalarizationCostPassed);
681
2.83k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
682
2.83k
  return Cost;
683
2.83k
}
684
685
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
686
16.5k
           ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
687
16.5k
  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
688
16.5k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
689
16.5k
  return Cost;
690
16.5k
}
691
692
int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
693
2.31k
                                          ArrayRef<Type *> Tys) const {
694
2.31k
  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
695
2.31k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
696
2.31k
  return Cost;
697
2.31k
}
698
699
637k
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
700
637k
  return TTIImpl->getNumberOfParts(Tp);
701
637k
}
702
703
int TargetTransformInfo::getAddressComputationCost(Type *Tp,
704
                                                   ScalarEvolution *SE,
705
56.8k
                                                   const SCEV *Ptr) const {
706
56.8k
  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
707
56.8k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
708
56.8k
  return Cost;
709
56.8k
}
710
711
0
int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
712
0
  int Cost = TTIImpl->getMemcpyCost(I);
713
0
  assert(Cost >= 0 && "TTI should not produce negative costs!");
714
0
  return Cost;
715
0
}
716
717
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
718
4.86k
                                                    bool IsPairwiseForm) const {
719
4.86k
  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
720
4.86k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
721
4.86k
  return Cost;
722
4.86k
}
723
724
int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
725
                                                bool IsPairwiseForm,
726
2.96k
                                                bool IsUnsigned) const {
727
2.96k
  int Cost =
728
2.96k
      TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
729
2.96k
  assert(Cost >= 0 && "TTI should not produce negative costs!");
730
2.96k
  return Cost;
731
2.96k
}
732
733
unsigned
734
124k
TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
735
124k
  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
736
124k
}
737
738
bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
739
489k
                                             MemIntrinsicInfo &Info) const {
740
489k
  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
741
489k
}
742
743
9
unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
744
9
  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
745
9
}
746
747
Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
748
90
    IntrinsicInst *Inst, Type *ExpectedType) const {
749
90
  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
750
90
}
751
752
Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context,
753
                                                     Value *Length,
754
                                                     unsigned SrcAlign,
755
15
                                                     unsigned DestAlign) const {
756
15
  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
757
15
                                            DestAlign);
758
15
}
759
760
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
761
    SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
762
0
    unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
763
0
  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
764
0
                                             SrcAlign, DestAlign);
765
0
}
766
767
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
768
1.57M
                                              const Function *Callee) const {
769
1.57M
  return TTIImpl->areInlineCompatible(Caller, Callee);
770
1.57M
}
771
772
bool TargetTransformInfo::areFunctionArgsABICompatible(
773
    const Function *Caller, const Function *Callee,
774
27.8k
    SmallPtrSetImpl<Argument *> &Args) const {
775
27.8k
  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
776
27.8k
}
777
778
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
779
7.87M
                                             Type *Ty) const {
780
7.87M
  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
781
7.87M
}
782
783
bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
784
727k
                                              Type *Ty) const {
785
727k
  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
786
727k
}
787
788
82.7k
unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
789
82.7k
  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
790
82.7k
}
791
792
52.1k
bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
793
52.1k
  return TTIImpl->isLegalToVectorizeLoad(LI);
794
52.1k
}
795
796
20.0k
bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
797
20.0k
  return TTIImpl->isLegalToVectorizeStore(SI);
798
20.0k
}
799
800
bool TargetTransformInfo::isLegalToVectorizeLoadChain(
801
10.4k
    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
802
10.4k
  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
803
10.4k
                                              AddrSpace);
804
10.4k
}
805
806
bool TargetTransformInfo::isLegalToVectorizeStoreChain(
807
401
    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
808
401
  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
809
401
                                               AddrSpace);
810
401
}
811
812
unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
813
                                                  unsigned LoadSize,
814
                                                  unsigned ChainSizeInBytes,
815
16.5k
                                                  VectorType *VecTy) const {
816
16.5k
  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
817
16.5k
}
818
819
unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
820
                                                   unsigned StoreSize,
821
                                                   unsigned ChainSizeInBytes,
822
4.47k
                                                   VectorType *VecTy) const {
823
4.47k
  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
824
4.47k
}
825
826
bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
827
1.22k
                                                Type *Ty, ReductionFlags Flags) const {
828
1.22k
  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
829
1.22k
}
830
831
389k
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
832
389k
  return TTIImpl->shouldExpandReduction(II);
833
389k
}
834
835
56
unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
836
56
  return TTIImpl->getGISelRematGlobalCost();
837
56
}
838
839
119
int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
840
119
  return TTIImpl->getInstructionLatency(I);
841
119
}
842
843
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
844
255
                                     unsigned Level) {
845
255
  // We don't need a shuffle if we just want to have element 0 in position 0 of
846
255
  // the vector.
847
255
  if (!SI && 
Level == 05
&&
IsLeft5
)
848
5
    return true;
849
250
  else if (!SI)
850
0
    return false;
851
250
852
250
  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
853
250
854
250
  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
855
250
  // we look at the left or right side.
856
695
  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; 
++i, val += 2445
)
857
445
    Mask[i] = val;
858
250
859
250
  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
860
250
  return Mask == ActualMask;
861
250
}
862
863
namespace {
864
/// Kind of the reduction data.
865
enum ReductionKind {
866
  RK_None,           /// Not a reduction.
867
  RK_Arithmetic,     /// Binary reduction data.
868
  RK_MinMax,         /// Min/max reduction data.
869
  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
870
};
871
/// Contains opcode + LHS/RHS parts of the reduction operations.
872
struct ReductionData {
873
  ReductionData() = delete;
874
  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
875
550
      : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
876
550
    assert(Kind != RK_None && "expected binary or min/max reduction only.");
877
550
  }
878
  unsigned Opcode = 0;
879
  Value *LHS = nullptr;
880
  Value *RHS = nullptr;
881
  ReductionKind Kind = RK_None;
882
250
  bool hasSameData(ReductionData &RD) const {
883
250
    return Kind == RD.Kind && Opcode == RD.Opcode;
884
250
  }
885
};
886
} // namespace
887
888
550
static Optional<ReductionData> getReductionData(Instruction *I) {
889
550
  Value *L, *R;
890
550
  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
891
550
    return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
892
0
  if (auto *SI = dyn_cast<SelectInst>(I)) {
893
0
    if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
894
0
        m_SMax(m_Value(L), m_Value(R)).match(SI) ||
895
0
        m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
896
0
        m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
897
0
        m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
898
0
        m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
899
0
      auto *CI = cast<CmpInst>(SI->getCondition());
900
0
      return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
901
0
    }
902
0
    if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
903
0
        m_UMax(m_Value(L), m_Value(R)).match(SI)) {
904
0
      auto *CI = cast<CmpInst>(SI->getCondition());
905
0
      return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
906
0
    }
907
0
  }
908
0
  return llvm::None;
909
0
}
910
911
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
912
                                                   unsigned Level,
913
125
                                                   unsigned NumLevels) {
914
125
  // Match one level of pairwise operations.
915
125
  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
916
125
  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
917
125
  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
918
125
  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
919
125
  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
920
125
  if (!I)
921
0
    return RK_None;
922
125
923
125
  assert(I->getType()->isVectorTy() && "Expecting a vector type");
924
125
925
125
  Optional<ReductionData> RD = getReductionData(I);
926
125
  if (!RD)
927
0
    return RK_None;
928
125
929
125
  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
930
125
  if (!LS && 
Level5
)
931
0
    return RK_None;
932
125
  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
933
125
  if (!RS && 
Level0
)
934
0
    return RK_None;
935
125
936
125
  // On level 0 we can omit one shufflevector instruction.
937
125
  if (!Level && 
!RS60
&&
!LS0
)
938
0
    return RK_None;
939
125
940
125
  // Shuffle inputs must match.
941
125
  Value *NextLevelOpL = LS ? 
LS->getOperand(0)120
:
nullptr5
;
942
125
  Value *NextLevelOpR = RS ? RS->getOperand(0) : 
nullptr0
;
943
125
  Value *NextLevelOp = nullptr;
944
125
  if (NextLevelOpR && NextLevelOpL) {
945
120
    // If we have two shuffles their operands must match.
946
120
    if (NextLevelOpL != NextLevelOpR)
947
0
      return RK_None;
948
120
949
120
    NextLevelOp = NextLevelOpL;
950
120
  } else 
if (5
Level == 05
&&
(5
NextLevelOpR5
||
NextLevelOpL0
)) {
951
5
    // On the first level we can omit the shufflevector <0, undef,...>. So the
952
5
    // input to the other shufflevector <1, undef> must match with one of the
953
5
    // inputs to the current binary operation.
954
5
    // Example:
955
5
    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
956
5
    //  %BinOp        = fadd          %NextLevelOpL, %R
957
5
    if (NextLevelOpL && 
NextLevelOpL != RD->RHS0
)
958
0
      return RK_None;
959
5
    else if (NextLevelOpR && NextLevelOpR != RD->LHS)
960
0
      return RK_None;
961
5
962
5
    NextLevelOp = NextLevelOpL ? 
RD->RHS0
: RD->LHS;
963
5
  } else
964
0
    return RK_None;
965
125
966
125
  // Check that the next levels binary operation exists and matches with the
967
125
  // current one.
968
125
  if (Level + 1 != NumLevels) {
969
65
    Optional<ReductionData> NextLevelRD =
970
65
        getReductionData(cast<Instruction>(NextLevelOp));
971
65
    if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
972
0
      return RK_None;
973
125
  }
974
125
975
125
  // Shuffle mask for pairwise operation must match.
976
125
  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
977
120
    if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
978
0
      return RK_None;
979
5
  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
980
5
    if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
981
0
      return RK_None;
982
0
  } else {
983
0
    return RK_None;
984
0
  }
985
125
986
125
  if (++Level == NumLevels)
987
60
    return RD->Kind;
988
65
989
65
  // Match next level.
990
65
  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
991
65
                                       NumLevels);
992
65
}
993
994
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
995
1.02k
                                            unsigned &Opcode, Type *&Ty) {
996
1.02k
  if (!EnableReduxCost)
997
966
    return RK_None;
998
60
999
60
  // Need to extract the first element.
1000
60
  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1001
60
  unsigned Idx = ~0u;
1002
60
  if (CI)
1003
60
    Idx = CI->getZExtValue();
1004
60
  if (Idx != 0)
1005
0
    return RK_None;
1006
60
1007
60
  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1008
60
  if (!RdxStart)
1009
0
    return RK_None;
1010
60
  Optional<ReductionData> RD = getReductionData(RdxStart);
1011
60
  if (!RD)
1012
0
    return RK_None;
1013
60
1014
60
  Type *VecTy = RdxStart->getType();
1015
60
  unsigned NumVecElems = VecTy->getVectorNumElements();
1016
60
  if (!isPowerOf2_32(NumVecElems))
1017
0
    return RK_None;
1018
60
1019
60
  // We look for a sequence of shuffle,shuffle,add triples like the following
1020
60
  // that builds a pairwise reduction tree.
1021
60
  //
1022
60
  //  (X0, X1, X2, X3)
1023
60
  //   (X0 + X1, X2 + X3, undef, undef)
1024
60
  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
1025
60
  //
1026
60
  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1027
60
  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1028
60
  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1029
60
  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1030
60
  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1031
60
  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1032
60
  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1033
60
  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1034
60
  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1035
60
  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1036
60
  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1037
60
  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1038
60
      RK_None)
1039
0
    return RK_None;
1040
60
1041
60
  Opcode = RD->Opcode;
1042
60
  Ty = VecTy;
1043
60
1044
60
  return RD->Kind;
1045
60
}
1046
1047
static std::pair<Value *, ShuffleVectorInst *>
1048
185
getShuffleAndOtherOprd(Value *L, Value *R) {
1049
185
  ShuffleVectorInst *S = nullptr;
1050
185
1051
185
  if ((S = dyn_cast<ShuffleVectorInst>(L)))
1052
60
    return std::make_pair(R, S);
1053
125
1054
125
  S = dyn_cast<ShuffleVectorInst>(R);
1055
125
  return std::make_pair(L, S);
1056
125
}
1057
1058
static ReductionKind
1059
matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
1060
1.08k
                              unsigned &Opcode, Type *&Ty) {
1061
1.08k
  if (!EnableReduxCost)
1062
966
    return RK_None;
1063
115
1064
115
  // Need to extract the first element.
1065
115
  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1066
115
  unsigned Idx = ~0u;
1067
115
  if (CI)
1068
115
    Idx = CI->getZExtValue();
1069
115
  if (Idx != 0)
1070
0
    return RK_None;
1071
115
1072
115
  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1073
115
  if (!RdxStart)
1074
0
    return RK_None;
1075
115
  Optional<ReductionData> RD = getReductionData(RdxStart);
1076
115
  if (!RD)
1077
0
    return RK_None;
1078
115
1079
115
  Type *VecTy = ReduxRoot->getOperand(0)->getType();
1080
115
  unsigned NumVecElems = VecTy->getVectorNumElements();
1081
115
  if (!isPowerOf2_32(NumVecElems))
1082
0
    return RK_None;
1083
115
1084
115
  // We look for a sequence of shuffles and adds like the following matching one
1085
115
  // fadd, shuffle vector pair at a time.
1086
115
  //
1087
115
  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1088
115
  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1089
115
  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1090
115
  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1091
115
  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1092
115
  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1093
115
  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1094
115
1095
115
  unsigned MaskStart = 1;
1096
115
  Instruction *RdxOp = RdxStart;
1097
115
  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1098
115
  unsigned NumVecElemsRemain = NumVecElems;
1099
240
  while (NumVecElemsRemain - 1) {
1100
185
    // Check for the right reduction operation.
1101
185
    if (!RdxOp)
1102
0
      return RK_None;
1103
185
    Optional<ReductionData> RDLevel = getReductionData(RdxOp);
1104
185
    if (!RDLevel || !RDLevel->hasSameData(*RD))
1105
0
      return RK_None;
1106
185
1107
185
    Value *NextRdxOp;
1108
185
    ShuffleVectorInst *Shuffle;
1109
185
    std::tie(NextRdxOp, Shuffle) =
1110
185
        getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1111
185
1112
185
    // Check the current reduction operation and the shuffle use the same value.
1113
185
    if (Shuffle == nullptr)
1114
0
      return RK_None;
1115
185
    if (Shuffle->getOperand(0) != NextRdxOp)
1116
60
      return RK_None;
1117
125
1118
125
    // Check that shuffle masks matches.
1119
355
    
for (unsigned j = 0; 125
j != MaskStart;
++j230
)
1120
230
      ShuffleMask[j] = MaskStart + j;
1121
125
    // Fill the rest of the mask with -1 for undef.
1122
125
    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1123
125
1124
125
    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
1125
125
    if (ShuffleMask != Mask)
1126
0
      return RK_None;
1127
125
1128
125
    RdxOp = dyn_cast<Instruction>(NextRdxOp);
1129
125
    NumVecElemsRemain /= 2;
1130
125
    MaskStart *= 2;
1131
125
  }
1132
115
1133
115
  Opcode = RD->Opcode;
1134
55
  Ty = VecTy;
1135
55
  return RD->Kind;
1136
115
}
1137
1138
237k
int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1139
237k
  switch (I->getOpcode()) {
1140
237k
  case Instruction::GetElementPtr:
1141
1.24k
    return getUserCost(I);
1142
237k
1143
237k
  case Instruction::Ret:
1144
9.89k
  case Instruction::PHI:
1145
9.89k
  case Instruction::Br: {
1146
9.89k
    return getCFInstrCost(I->getOpcode());
1147
9.89k
  }
1148
193k
  case Instruction::Add:
1149
193k
  case Instruction::FAdd:
1150
193k
  case Instruction::Sub:
1151
193k
  case Instruction::FSub:
1152
193k
  case Instruction::Mul:
1153
193k
  case Instruction::FMul:
1154
193k
  case Instruction::UDiv:
1155
193k
  case Instruction::SDiv:
1156
193k
  case Instruction::FDiv:
1157
193k
  case Instruction::URem:
1158
193k
  case Instruction::SRem:
1159
193k
  case Instruction::FRem:
1160
193k
  case Instruction::Shl:
1161
193k
  case Instruction::LShr:
1162
193k
  case Instruction::AShr:
1163
193k
  case Instruction::And:
1164
193k
  case Instruction::Or:
1165
193k
  case Instruction::Xor: {
1166
193k
    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1167
193k
    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1168
193k
    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1169
193k
    Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1170
193k
    SmallVector<const Value *, 2> Operands(I->operand_values());
1171
193k
    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1172
193k
                                  Op1VP, Op2VP, Operands);
1173
193k
  }
1174
193k
  case Instruction::FNeg: {
1175
80
    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1176
80
    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1177
80
    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1178
80
    Op2VK = OK_AnyValue;
1179
80
    Op2VP = OP_None;
1180
80
    SmallVector<const Value *, 2> Operands(I->operand_values());
1181
80
    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1182
80
                                  Op1VP, Op2VP, Operands);
1183
193k
  }
1184
193k
  case Instruction::Select: {
1185
607
    const SelectInst *SI = cast<SelectInst>(I);
1186
607
    Type *CondTy = SI->getCondition()->getType();
1187
607
    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1188
193k
  }
1189
193k
  case Instruction::ICmp:
1190
5.78k
  case Instruction::FCmp: {
1191
5.78k
    Type *ValTy = I->getOperand(0)->getType();
1192
5.78k
    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1193
5.78k
  }
1194
5.78k
  case Instruction::Store: {
1195
559
    const StoreInst *SI = cast<StoreInst>(I);
1196
559
    Type *ValTy = SI->getValueOperand()->getType();
1197
559
    return getMemoryOpCost(I->getOpcode(), ValTy,
1198
559
                                SI->getAlignment(),
1199
559
                                SI->getPointerAddressSpace(), I);
1200
5.78k
  }
1201
5.78k
  case Instruction::Load: {
1202
849
    const LoadInst *LI = cast<LoadInst>(I);
1203
849
    return getMemoryOpCost(I->getOpcode(), I->getType(),
1204
849
                                LI->getAlignment(),
1205
849
                                LI->getPointerAddressSpace(), I);
1206
5.78k
  }
1207
6.65k
  case Instruction::ZExt:
1208
6.65k
  case Instruction::SExt:
1209
6.65k
  case Instruction::FPToUI:
1210
6.65k
  case Instruction::FPToSI:
1211
6.65k
  case Instruction::FPExt:
1212
6.65k
  case Instruction::PtrToInt:
1213
6.65k
  case Instruction::IntToPtr:
1214
6.65k
  case Instruction::SIToFP:
1215
6.65k
  case Instruction::UIToFP:
1216
6.65k
  case Instruction::Trunc:
1217
6.65k
  case Instruction::FPTrunc:
1218
6.65k
  case Instruction::BitCast:
1219
6.65k
  case Instruction::AddrSpaceCast: {
1220
6.65k
    Type *SrcTy = I->getOperand(0)->getType();
1221
6.65k
    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1222
6.65k
  }
1223
6.65k
  case Instruction::ExtractElement: {
1224
1.08k
    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1225
1.08k
    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
1226
1.08k
    unsigned Idx = -1;
1227
1.08k
    if (CI)
1228
1.07k
      Idx = CI->getZExtValue();
1229
1.08k
1230
1.08k
    // Try to match a reduction sequence (series of shufflevector and vector
1231
1.08k
    // adds followed by a extractelement).
1232
1.08k
    unsigned ReduxOpCode;
1233
1.08k
    Type *ReduxType;
1234
1.08k
1235
1.08k
    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1236
1.08k
    case RK_Arithmetic:
1237
55
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1238
55
                                             /*IsPairwiseForm=*/false);
1239
1.08k
    case RK_MinMax:
1240
0
      return getMinMaxReductionCost(
1241
0
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
1242
0
          /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1243
1.08k
    case RK_UnsignedMinMax:
1244
0
      return getMinMaxReductionCost(
1245
0
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
1246
0
          /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1247
1.08k
    case RK_None:
1248
1.02k
      break;
1249
1.02k
    }
1250
1.02k
1251
1.02k
    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1252
1.02k
    case RK_Arithmetic:
1253
60
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1254
60
                                             /*IsPairwiseForm=*/true);
1255
1.02k
    case RK_MinMax:
1256
0
      return getMinMaxReductionCost(
1257
0
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
1258
0
          /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1259
1.02k
    case RK_UnsignedMinMax:
1260
0
      return getMinMaxReductionCost(
1261
0
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
1262
0
          /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1263
1.02k
    case RK_None:
1264
966
      break;
1265
966
    }
1266
966
1267
966
    return getVectorInstrCost(I->getOpcode(),
1268
966
                                   EEI->getOperand(0)->getType(), Idx);
1269
966
  }
1270
1.46k
  case Instruction::InsertElement: {
1271
1.46k
    const InsertElementInst * IE = cast<InsertElementInst>(I);
1272
1.46k
    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1273
1.46k
    unsigned Idx = -1;
1274
1.46k
    if (CI)
1275
1.46k
      Idx = CI->getZExtValue();
1276
1.46k
    return getVectorInstrCost(I->getOpcode(),
1277
1.46k
                                   IE->getType(), Idx);
1278
966
  }
1279
3.71k
  case Instruction::ShuffleVector: {
1280
3.71k
    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1281
3.71k
    Type *Ty = Shuffle->getType();
1282
3.71k
    Type *SrcTy = Shuffle->getOperand(0)->getType();
1283
3.71k
1284
3.71k
    // TODO: Identify and add costs for insert subvector, etc.
1285
3.71k
    int SubIndex;
1286
3.71k
    if (Shuffle->isExtractSubvectorMask(SubIndex))
1287
198
      return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1288
3.51k
1289
3.51k
    if (Shuffle->changesLength())
1290
66
      return -1;
1291
3.44k
1292
3.44k
    if (Shuffle->isIdentity())
1293
93
      return 0;
1294
3.35k
1295
3.35k
    if (Shuffle->isReverse())
1296
307
      return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1297
3.04k
1298
3.04k
    if (Shuffle->isSelect())
1299
467
      return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1300
2.58k
1301
2.58k
    if (Shuffle->isTranspose())
1302
242
      return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1303
2.33k
1304
2.33k
    if (Shuffle->isZeroEltSplat())
1305
1.40k
      return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1306
939
1307
939
    if (Shuffle->isSingleSource())
1308
546
      return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1309
393
1310
393
    return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1311
393
  }
1312
12.1k
  case Instruction::Call:
1313
12.1k
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1314
12.1k
      SmallVector<Value *, 4> Args(II->arg_operands());
1315
12.1k
1316
12.1k
      FastMathFlags FMF;
1317
12.1k
      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1318
1.21k
        FMF = FPMO->getFastMathFlags();
1319
12.1k
1320
12.1k
      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1321
12.1k
                                        Args, FMF);
1322
12.1k
    }
1323
0
    return -1;
1324
1
  default:
1325
1
    // We don't have any information on this instruction.
1326
1
    return -1;
1327
237k
  }
1328
237k
}
1329
1330
16.0M
TargetTransformInfo::Concept::~Concept() {}
1331
1332
7.06k
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1333
1334
TargetIRAnalysis::TargetIRAnalysis(
1335
    std::function<Result(const Function &)> TTICallback)
1336
105k
    : TTICallback(std::move(TTICallback)) {}
1337
1338
TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
1339
15.8M
                                               FunctionAnalysisManager &) {
1340
15.8M
  return TTICallback(F);
1341
15.8M
}
1342
1343
AnalysisKey TargetIRAnalysis::Key;
1344
1345
15.5k
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1346
15.5k
  return Result(F.getParent()->getDataLayout());
1347
15.5k
}
1348
1349
// Register the basic pass.
1350
INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
1351
                "Target Transform Information", false, true)
1352
char TargetTransformInfoWrapperPass::ID = 0;
1353
1354
0
void TargetTransformInfoWrapperPass::anchor() {}
1355
1356
TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
1357
213
    : ImmutablePass(ID) {
1358
213
  initializeTargetTransformInfoWrapperPassPass(
1359
213
      *PassRegistry::getPassRegistry());
1360
213
}
1361
1362
TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
1363
    TargetIRAnalysis TIRA)
1364
111k
    : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1365
111k
  initializeTargetTransformInfoWrapperPassPass(
1366
111k
      *PassRegistry::getPassRegistry());
1367
111k
}
1368
1369
15.7M
TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
1370
15.7M
  FunctionAnalysisManager DummyFAM;
1371
15.7M
  TTI = TIRA.run(F, DummyFAM);
1372
15.7M
  return *TTI;
1373
15.7M
}
1374
1375
ImmutablePass *
1376
111k
llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
1377
111k
  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1378
111k
}