Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Line
Count
Source (jump to first uncovered line)
1
//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file a TargetTransformInfo::Concept conforming object specific to the
11
/// AMDGPU target machine. It uses the target's detailed information to
12
/// provide more precise answers to certain TTI queries, while letting the
13
/// target independent and default TTI implementations handle the rest.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20
#include "AMDGPU.h"
21
#include "AMDGPUSubtarget.h"
22
#include "AMDGPUTargetMachine.h"
23
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24
#include "Utils/AMDGPUBaseInfo.h"
25
#include "llvm/ADT/ArrayRef.h"
26
#include "llvm/Analysis/TargetTransformInfo.h"
27
#include "llvm/CodeGen/BasicTTIImpl.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/MC/SubtargetFeature.h"
30
#include "llvm/Support/MathExtras.h"
31
#include <cassert>
32
33
namespace llvm {
34
35
class AMDGPUTargetLowering;
36
class Loop;
37
class ScalarEvolution;
38
class Type;
39
class Value;
40
41
class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
42
  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43
  using TTI = TargetTransformInfo;
44
45
  friend BaseT;
46
47
  Triple TargetTriple;
48
49
public:
50
  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51
    : BaseT(TM, F.getParent()->getDataLayout()),
52
551k
      TargetTriple(TM->getTargetTriple()) {}
53
54
  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
55
                               TTI::UnrollingPreferences &UP);
56
};
57
58
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
59
  using BaseT = BasicTTIImplBase<GCNTTIImpl>;
60
  using TTI = TargetTransformInfo;
61
62
  friend BaseT;
63
64
  const GCNSubtarget *ST;
65
  const AMDGPUTargetLowering *TLI;
66
  AMDGPUTTIImpl CommonTTI;
67
  bool IsGraphicsShader;
68
69
  const FeatureBitset InlineFeatureIgnoreList = {
70
    // Codegen control options which don't matter.
71
    AMDGPU::FeatureEnableLoadStoreOpt,
72
    AMDGPU::FeatureEnableSIScheduler,
73
    AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74
    AMDGPU::FeatureFlatForGlobal,
75
    AMDGPU::FeaturePromoteAlloca,
76
    AMDGPU::FeatureUnalignedBufferAccess,
77
    AMDGPU::FeatureUnalignedScratchAccess,
78
79
    AMDGPU::FeatureAutoWaitcntBeforeBarrier,
80
81
    // Property of the kernel/environment which can't actually differ.
82
    AMDGPU::FeatureSGPRInitBug,
83
    AMDGPU::FeatureXNACK,
84
    AMDGPU::FeatureTrapHandler,
85
    AMDGPU::FeatureCodeObjectV3,
86
87
    // The default assumption needs to be ecc is enabled, but no directly
88
    // exposed operations depend on it, so it can be safely inlined.
89
    AMDGPU::FeatureSRAMECC,
90
91
    // Perf-tuning features
92
    AMDGPU::FeatureFastFMAF32,
93
    AMDGPU::HalfRate64Ops
94
  };
95
96
0
  const GCNSubtarget *getST() const { return ST; }
97
98.6k
  const AMDGPUTargetLowering *getTLI() const { return TLI; }
98
99
261
  static inline int getFullRateInstrCost() {
100
261
    return TargetTransformInfo::TCC_Basic;
101
261
  }
102
103
31
  static inline int getHalfRateInstrCost() {
104
31
    return 2 * TargetTransformInfo::TCC_Basic;
105
31
  }
106
107
  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
108
  // should be 2 or 4.
109
124
  static inline int getQuarterRateInstrCost() {
110
124
    return 3 * TargetTransformInfo::TCC_Basic;
111
124
  }
112
113
   // On some parts, normal fp64 operations are half rate, and others
114
   // quarter. This also applies to some integer operations.
115
48
  inline int get64BitInstrCost() const {
116
48
    return ST->hasHalfRate64Ops() ?
117
24
      getHalfRateInstrCost() : getQuarterRateInstrCost();
118
48
  }
119
120
public:
121
  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
122
    : BaseT(TM, F.getParent()->getDataLayout()),
123
      ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
124
      TLI(ST->getTargetLowering()),
125
      CommonTTI(TM, F),
126
513k
      IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
127
128
127k
  bool hasBranchDivergence() { return true; }
129
130
  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
131
                               TTI::UnrollingPreferences &UP);
132
133
9
  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
134
9
    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
135
9
    return TTI::PSK_FastHardware;
136
9
  }
137
138
  unsigned getHardwareNumberOfRegisters(bool Vector) const;
139
  unsigned getNumberOfRegisters(bool Vector) const;
140
  unsigned getRegisterBitWidth(bool Vector) const;
141
  unsigned getMinVectorRegisterBitWidth() const;
142
  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
143
                               unsigned ChainSizeInBytes,
144
                               VectorType *VecTy) const;
145
  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
146
                                unsigned ChainSizeInBytes,
147
                                VectorType *VecTy) const;
148
  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
149
150
  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
151
                                  unsigned Alignment,
152
                                  unsigned AddrSpace) const;
153
  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
154
                                   unsigned Alignment,
155
                                   unsigned AddrSpace) const;
156
  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
157
                                    unsigned Alignment,
158
                                    unsigned AddrSpace) const;
159
160
  unsigned getMaxInterleaveFactor(unsigned VF);
161
162
  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
163
164
  int getArithmeticInstrCost(
165
    unsigned Opcode, Type *Ty,
166
    TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
167
    TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
168
    TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
169
    TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
170
    ArrayRef<const Value *> Args = ArrayRef<const Value *>());
171
172
  unsigned getCFInstrCost(unsigned Opcode);
173
174
  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
175
  bool isSourceOfDivergence(const Value *V) const;
176
  bool isAlwaysUniform(const Value *V) const;
177
178
4.56k
  unsigned getFlatAddressSpace() const {
179
4.56k
    // Don't bother running InferAddressSpaces pass on graphics shaders which
180
4.56k
    // don't use flat addressing.
181
4.56k
    if (IsGraphicsShader)
182
2.46k
      return -1;
183
2.09k
    return AMDGPUAS::FLAT_ADDRESS;
184
2.09k
  }
185
186
4
  unsigned getVectorSplitCost() { return 0; }
187
188
  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
189
                          Type *SubTp);
190
191
  bool areInlineCompatible(const Function *Caller,
192
                           const Function *Callee) const;
193
194
153
  unsigned getInliningThresholdMultiplier() { return 7; }
195
196
150
  int getInlinerVectorBonusPercent() { return 0; }
197
198
  int getArithmeticReductionCost(unsigned Opcode,
199
                                 Type *Ty,
200
                                 bool IsPairwise);
201
  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
202
                             bool IsPairwiseForm,
203
                             bool IsUnsigned);
204
};
205
206
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
207
  using BaseT = BasicTTIImplBase<R600TTIImpl>;
208
  using TTI = TargetTransformInfo;
209
210
  friend BaseT;
211
212
  const R600Subtarget *ST;
213
  const AMDGPUTargetLowering *TLI;
214
  AMDGPUTTIImpl CommonTTI;
215
216
public:
217
  explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
218
    : BaseT(TM, F.getParent()->getDataLayout()),
219
      ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
220
      TLI(ST->getTargetLowering()),
221
37.9k
      CommonTTI(TM, F)  {}
222
223
0
  const R600Subtarget *getST() const { return ST; }
224
3.64k
  const AMDGPUTargetLowering *getTLI() const { return TLI; }
225
226
  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
227
                               TTI::UnrollingPreferences &UP);
228
  unsigned getHardwareNumberOfRegisters(bool Vec) const;
229
  unsigned getNumberOfRegisters(bool Vec) const;
230
  unsigned getRegisterBitWidth(bool Vector) const;
231
  unsigned getMinVectorRegisterBitWidth() const;
232
  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
233
  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
234
                                  unsigned AddrSpace) const;
235
  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
236
                       unsigned Alignment,
237
                                   unsigned AddrSpace) const;
238
  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
239
                                    unsigned Alignment,
240
                                    unsigned AddrSpace) const;
241
  unsigned getMaxInterleaveFactor(unsigned VF);
242
  unsigned getCFInstrCost(unsigned Opcode);
243
  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
244
};
245
246
} // end namespace llvm
247
248
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H