/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// \file |
9 | | /// This file a TargetTransformInfo::Concept conforming object specific to the |
10 | | /// NVPTX target machine. It uses the target's detailed information to |
11 | | /// provide more precise answers to certain TTI queries, while letting the |
12 | | /// target independent and default TTI implementations handle the rest. |
13 | | /// |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
17 | | #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
18 | | |
19 | | #include "NVPTXTargetMachine.h" |
20 | | #include "MCTargetDesc/NVPTXBaseInfo.h" |
21 | | #include "llvm/Analysis/TargetTransformInfo.h" |
22 | | #include "llvm/CodeGen/BasicTTIImpl.h" |
23 | | #include "llvm/CodeGen/TargetLowering.h" |
24 | | |
25 | | namespace llvm { |
26 | | |
27 | | class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { |
28 | | typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; |
29 | | typedef TargetTransformInfo TTI; |
30 | | friend BaseT; |
31 | | |
32 | | const NVPTXSubtarget *ST; |
33 | | const NVPTXTargetLowering *TLI; |
34 | | |
35 | 2 | const NVPTXSubtarget *getST() const { return ST; }; |
36 | 2.22k | const NVPTXTargetLowering *getTLI() const { return TLI; }; |
37 | | |
38 | | public: |
39 | | explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) |
40 | | : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), |
41 | 19.6k | TLI(ST->getTargetLowering()) {} |
42 | | |
43 | 38 | bool hasBranchDivergence() { return true; } |
44 | | |
45 | | bool isSourceOfDivergence(const Value *V); |
46 | | |
47 | 193 | unsigned getFlatAddressSpace() const { |
48 | 193 | return AddressSpace::ADDRESS_SPACE_GENERIC; |
49 | 193 | } |
50 | | |
51 | | // Loads and stores can be vectorized if the alignment is at least as big as |
52 | | // the load/store we want to vectorize. |
53 | | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, |
54 | | unsigned Alignment, |
55 | 16 | unsigned AddrSpace) const { |
56 | 16 | return Alignment >= ChainSizeInBytes; |
57 | 16 | } |
58 | | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, |
59 | | unsigned Alignment, |
60 | 3 | unsigned AddrSpace) const { |
61 | 3 | return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); |
62 | 3 | } |
63 | | |
64 | | // NVPTX has infinite registers of all kinds, but the actual machine doesn't. |
65 | | // We conservatively return 1 here which is just enough to enable the |
66 | | // vectorizers but disables heuristics based on the number of registers. |
67 | | // FIXME: Return a more reasonable number, while keeping an eye on |
68 | | // LoopVectorizer's unrolling heuristics. |
69 | 285 | unsigned getNumberOfRegisters(bool Vector) const { return 1; } |
70 | | |
71 | | // Only <2 x half> should be vectorized, so always return 32 for the vector |
72 | | // register size. |
73 | 9 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
74 | 7 | unsigned getMinVectorRegisterBitWidth() const { return 32; } |
75 | | |
76 | | // We don't want to prevent inlining because of target-cpu and -features |
77 | | // attributes that were added to newer versions of LLVM/Clang: There are |
78 | | // no incompatible functions in PTX, ptxas will throw errors in such cases. |
79 | | bool areInlineCompatible(const Function *Caller, |
80 | 0 | const Function *Callee) const { |
81 | 0 | return true; |
82 | 0 | } |
83 | | |
84 | | // Increase the inlining cost threshold by a factor of 5, reflecting that |
85 | | // calls are particularly expensive in NVPTX. |
86 | 0 | unsigned getInliningThresholdMultiplier() { return 5; } |
87 | | |
88 | | int getArithmeticInstrCost( |
89 | | unsigned Opcode, Type *Ty, |
90 | | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
91 | | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
92 | | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
93 | | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
94 | | ArrayRef<const Value *> Args = ArrayRef<const Value *>()); |
95 | | |
96 | | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
97 | | TTI::UnrollingPreferences &UP); |
98 | 338 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { |
99 | 338 | // Volatile loads/stores are only supported for shared and global address |
100 | 338 | // spaces, or for generic AS that maps to them. |
101 | 338 | if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || |
102 | 338 | AddrSpace == llvm::ADDRESS_SPACE_GLOBAL0 || |
103 | 338 | AddrSpace == llvm::ADDRESS_SPACE_SHARED0 )) |
104 | 0 | return false; |
105 | 338 | |
106 | 338 | switch(I->getOpcode()){ |
107 | 338 | default: |
108 | 93 | return false; |
109 | 338 | case Instruction::Load: |
110 | 245 | case Instruction::Store: |
111 | 245 | return true; |
112 | 338 | } |
113 | 338 | } |
114 | | }; |
115 | | |
116 | | } // end namespace llvm |
117 | | |
118 | | #endif |