Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Line
Count
Source (jump to first uncovered line)
1
//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
/// \file
11
/// This file a TargetTransformInfo::Concept conforming object specific to the
12
/// AMDGPU target machine. It uses the target's detailed information to
13
/// provide more precise answers to certain TTI queries, while letting the
14
/// target independent and default TTI implementations handle the rest.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20
21
#include "AMDGPU.h"
22
#include "AMDGPUSubtarget.h"
23
#include "AMDGPUTargetMachine.h"
24
#include "Utils/AMDGPUBaseInfo.h"
25
#include "llvm/ADT/ArrayRef.h"
26
#include "llvm/Analysis/TargetTransformInfo.h"
27
#include "llvm/CodeGen/BasicTTIImpl.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/MC/SubtargetFeature.h"
30
#include "llvm/Support/MathExtras.h"
31
#include <cassert>
32
33
namespace llvm {
34
35
class AMDGPUTargetLowering;
36
class Loop;
37
class ScalarEvolution;
38
class Type;
39
class Value;
40
41
class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
42
  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43
  using TTI = TargetTransformInfo;
44
45
  friend BaseT;
46
47
  const AMDGPUSubtarget *ST;
48
  const AMDGPUTargetLowering *TLI;
49
  bool IsGraphicsShader;
50
51
  const FeatureBitset InlineFeatureIgnoreList = {
52
    // Codegen control options which don't matter.
53
    AMDGPU::FeatureEnableLoadStoreOpt,
54
    AMDGPU::FeatureEnableSIScheduler,
55
    AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
56
    AMDGPU::FeatureFlatForGlobal,
57
    AMDGPU::FeaturePromoteAlloca,
58
    AMDGPU::FeatureUnalignedBufferAccess,
59
    AMDGPU::FeatureUnalignedScratchAccess,
60
61
    AMDGPU::FeatureAutoWaitcntBeforeBarrier,
62
    AMDGPU::FeatureDebuggerEmitPrologue,
63
    AMDGPU::FeatureDebuggerInsertNops,
64
    AMDGPU::FeatureDebuggerReserveRegs,
65
66
    // Property of the kernel/environment which can't actually differ.
67
    AMDGPU::FeatureSGPRInitBug,
68
    AMDGPU::FeatureXNACK,
69
    AMDGPU::FeatureTrapHandler,
70
71
    // Perf-tuning features
72
    AMDGPU::FeatureFastFMAF32,
73
    AMDGPU::HalfRate64Ops
74
  };
75
76
0
  const AMDGPUSubtarget *getST() const { return ST; }
77
39.1k
  const AMDGPUTargetLowering *getTLI() const { return TLI; }
78
79
198
  static inline int getFullRateInstrCost() {
80
198
    return TargetTransformInfo::TCC_Basic;
81
198
  }
82
83
24
  static inline int getHalfRateInstrCost() {
84
24
    return 2 * TargetTransformInfo::TCC_Basic;
85
24
  }
86
87
  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
88
  // should be 2 or 4.
89
117
  static inline int getQuarterRateInstrCost() {
90
117
    return 3 * TargetTransformInfo::TCC_Basic;
91
117
  }
92
93
   // On some parts, normal fp64 operations are half rate, and others
94
   // quarter. This also applies to some integer operations.
95
48
  inline int get64BitInstrCost() const {
96
48
    return ST->hasHalfRate64Ops() ?
97
48
      
getHalfRateInstrCost()24
:
getQuarterRateInstrCost()24
;
98
48
  }
99
100
public:
101
  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
102
    : BaseT(TM, F.getParent()->getDataLayout()),
103
      ST(TM->getSubtargetImpl(F)),
104
      TLI(ST->getTargetLowering()),
105
290k
      IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
106
107
61.2k
  bool hasBranchDivergence() { return true; }
108
109
  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
110
                               TTI::UnrollingPreferences &UP);
111
112
7
  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
113
7
    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
114
7
    return TTI::PSK_FastHardware;
115
7
  }
116
117
  unsigned getHardwareNumberOfRegisters(bool Vector) const;
118
  unsigned getNumberOfRegisters(bool Vector) const;
119
  unsigned getRegisterBitWidth(bool Vector) const;
120
  unsigned getMinVectorRegisterBitWidth() const;
121
  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
122
123
  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
124
                                  unsigned Alignment,
125
                                  unsigned AddrSpace) const;
126
  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
127
                                   unsigned Alignment,
128
                                   unsigned AddrSpace) const;
129
  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
130
                                    unsigned Alignment,
131
                                    unsigned AddrSpace) const;
132
133
  unsigned getMaxInterleaveFactor(unsigned VF);
134
135
  int getArithmeticInstrCost(
136
    unsigned Opcode, Type *Ty,
137
    TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
138
    TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
139
    TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
140
    TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
141
    ArrayRef<const Value *> Args = ArrayRef<const Value *>());
142
143
  unsigned getCFInstrCost(unsigned Opcode);
144
145
  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
146
  bool isSourceOfDivergence(const Value *V) const;
147
  bool isAlwaysUniform(const Value *V) const;
148
149
17.5k
  unsigned getFlatAddressSpace() const {
150
17.5k
    // Don't bother running InferAddressSpaces pass on graphics shaders which
151
17.5k
    // don't use flat addressing.
152
17.5k
    if (IsGraphicsShader)
153
597
      return -1;
154
16.9k
    return ST->hasFlatAddressSpace() ?
155
16.9k
      
ST->getAMDGPUAS().FLAT_ADDRESS9.40k
:
ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE7.52k
;
156
17.5k
  }
157
158
4
  unsigned getVectorSplitCost() { return 0; }
159
160
  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
161
                          Type *SubTp);
162
163
  bool areInlineCompatible(const Function *Caller,
164
                           const Function *Callee) const;
165
166
24
  unsigned getInliningThresholdMultiplier() { return 9; }
167
};
168
169
} // end namespace llvm
170
171
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H