/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/CodeGen/BasicTTIImpl.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// This file provides a helper that implements much of the TTI interface in |
11 | | /// terms of the target-independent code generator and TargetLowering |
12 | | /// interfaces. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | | |
19 | | #include "llvm/ADT/APInt.h" |
20 | | #include "llvm/ADT/ArrayRef.h" |
21 | | #include "llvm/ADT/BitVector.h" |
22 | | #include "llvm/ADT/SmallPtrSet.h" |
23 | | #include "llvm/ADT/SmallVector.h" |
24 | | #include "llvm/Analysis/LoopInfo.h" |
25 | | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | | #include "llvm/CodeGen/TargetLowering.h" |
29 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | | #include "llvm/CodeGen/ValueTypes.h" |
31 | | #include "llvm/IR/BasicBlock.h" |
32 | | #include "llvm/IR/CallSite.h" |
33 | | #include "llvm/IR/Constant.h" |
34 | | #include "llvm/IR/Constants.h" |
35 | | #include "llvm/IR/DataLayout.h" |
36 | | #include "llvm/IR/DerivedTypes.h" |
37 | | #include "llvm/IR/InstrTypes.h" |
38 | | #include "llvm/IR/Instruction.h" |
39 | | #include "llvm/IR/Instructions.h" |
40 | | #include "llvm/IR/Intrinsics.h" |
41 | | #include "llvm/IR/Operator.h" |
42 | | #include "llvm/IR/Type.h" |
43 | | #include "llvm/IR/Value.h" |
44 | | #include "llvm/MC/MCSchedule.h" |
45 | | #include "llvm/Support/Casting.h" |
46 | | #include "llvm/Support/CommandLine.h" |
47 | | #include "llvm/Support/ErrorHandling.h" |
48 | | #include "llvm/Support/MachineValueType.h" |
49 | | #include "llvm/Support/MathExtras.h" |
50 | | #include <algorithm> |
51 | | #include <cassert> |
52 | | #include <cstdint> |
53 | | #include <limits> |
54 | | #include <utility> |
55 | | |
56 | | namespace llvm { |
57 | | |
58 | | class Function; |
59 | | class GlobalValue; |
60 | | class LLVMContext; |
61 | | class ScalarEvolution; |
62 | | class SCEV; |
63 | | class TargetMachine; |
64 | | |
65 | | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | | |
67 | | /// Base class which can be used to help build a TTI implementation. |
68 | | /// |
69 | | /// This class provides as much implementation of the TTI interface as is |
70 | | /// possible using the target independent parts of the code generator. |
71 | | /// |
72 | | /// In order to subclass it, your class must implement a getST() method to |
73 | | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | | /// We need these methods implemented in the derived class so that this class |
75 | | /// doesn't have to duplicate storage for them. |
76 | | template <typename T> |
77 | | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | | private: |
79 | | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | | using TTI = TargetTransformInfo; |
81 | | |
82 | | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | | /// operations. |
84 | 51 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | 51 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); |
86 | 51 | unsigned Cost = 0; |
87 | 51 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | 51 | // plus the cost of inserting it into every element of the result vector. |
89 | 51 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | 51 | Instruction::ExtractElement, Ty, 0); |
91 | 51 | |
92 | 153 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i102 ) { |
93 | 102 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | 102 | Instruction::InsertElement, Ty, i); |
95 | 102 | } |
96 | 51 | return Cost; |
97 | 51 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Line | Count | Source | 84 | 2 | unsigned getBroadcastShuffleOverhead(Type *Ty) { | 85 | 2 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 86 | 2 | unsigned Cost = 0; | 87 | 2 | // Broadcast cost is equal to the cost of extracting the zero'th element | 88 | 2 | // plus the cost of inserting it into every element of the result vector. | 89 | 2 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 90 | 2 | Instruction::ExtractElement, Ty, 0); | 91 | 2 | | 92 | 6 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i4 ) { | 93 | 4 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 94 | 4 | Instruction::InsertElement, Ty, i); | 95 | 4 | } | 96 | 2 | return Cost; | 97 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Line | Count | Source | 84 | 2 | unsigned getBroadcastShuffleOverhead(Type *Ty) { | 85 | 2 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 86 | 2 | unsigned Cost = 0; | 87 | 2 | // Broadcast cost is equal to the cost of extracting the zero'th element | 88 | 2 | // plus the cost of inserting it into every element of the result vector. | 89 | 2 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 90 | 2 | Instruction::ExtractElement, Ty, 0); | 91 | 2 | | 92 | 6 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i4 ) { | 93 | 4 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 94 | 4 | Instruction::InsertElement, Ty, i); | 95 | 4 | } | 96 | 2 | return Cost; | 97 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) Line | Count | Source | 84 | 47 | unsigned getBroadcastShuffleOverhead(Type *Ty) { | 85 | 47 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 86 | 47 | unsigned Cost = 0; | 87 | 47 | // Broadcast cost is equal to the cost of extracting the zero'th element | 88 | 47 | // plus the cost of inserting it into every element of the result vector. | 89 | 47 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 90 | 47 | Instruction::ExtractElement, Ty, 0); | 91 | 47 | | 92 | 141 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i94 ) { | 93 | 94 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 94 | 94 | Instruction::InsertElement, Ty, i); | 95 | 94 | } | 96 | 47 | return Cost; | 97 | 47 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*) |
98 | | |
99 | | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | | /// operations. |
101 | 3.96k | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | 3.96k | assert(Ty->isVectorTy() && "Can only shuffle vectors"); |
103 | 3.96k | unsigned Cost = 0; |
104 | 3.96k | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | 3.96k | // plus the cost of inserting them onto the result vector. |
106 | 3.96k | |
107 | 3.96k | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | 3.96k | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | 3.96k | // vector and finally index 3 of second vector and insert them at index |
110 | 3.96k | // <0,1,2,3> of result vector. |
111 | 22.1k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i18.1k ) { |
112 | 18.1k | Cost += static_cast<T *>(this) |
113 | 18.1k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | 18.1k | Cost += static_cast<T *>(this) |
115 | 18.1k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | 18.1k | } |
117 | 3.96k | return Cost; |
118 | 3.96k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Line | Count | Source | 101 | 3.32k | unsigned getPermuteShuffleOverhead(Type *Ty) { | 102 | 3.32k | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 103 | 3.32k | unsigned Cost = 0; | 104 | 3.32k | // Shuffle cost is equal to the cost of extracting element from its argument | 105 | 3.32k | // plus the cost of inserting them onto the result vector. | 106 | 3.32k | | 107 | 3.32k | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | 108 | 3.32k | // index 0 of first vector, index 1 of second vector,index 2 of first | 109 | 3.32k | // vector and finally index 3 of second vector and insert them at index | 110 | 3.32k | // <0,1,2,3> of result vector. | 111 | 17.8k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i14.5k ) { | 112 | 14.5k | Cost += static_cast<T *>(this) | 113 | 14.5k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 114 | 14.5k | Cost += static_cast<T *>(this) | 115 | 14.5k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 116 | 14.5k | } | 117 | 3.32k | return Cost; | 118 | 3.32k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getPermuteShuffleOverhead(llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Line | Count | Source | 101 | 54 | unsigned getPermuteShuffleOverhead(Type *Ty) { | 102 | 54 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 103 | 54 | unsigned Cost = 0; | 104 | 54 | // Shuffle cost is equal to the cost of extracting element from its argument | 105 | 54 | // plus the cost of inserting them onto the result vector. | 106 | 54 | | 107 | 54 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | 108 | 54 | // index 0 of first vector, index 1 of second vector,index 2 of first | 109 | 54 | // vector and finally index 3 of second vector and insert them at index | 110 | 54 | // <0,1,2,3> of result vector. | 111 | 254 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i200 ) { | 112 | 200 | Cost += static_cast<T *>(this) | 113 | 200 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 114 | 200 | Cost += static_cast<T *>(this) | 115 | 200 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 116 | 200 | } | 117 | 54 | return Cost; | 118 | 54 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Line | Count | Source | 101 | 96 | unsigned getPermuteShuffleOverhead(Type *Ty) { | 102 | 96 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 103 | 96 | unsigned Cost = 0; | 104 | 96 | // Shuffle cost is equal to the cost of extracting element from its argument | 105 | 96 | // plus the cost of inserting them onto the result vector. | 106 | 96 | | 107 | 96 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | 108 | 96 | // index 0 of first vector, index 1 of second vector,index 2 of first | 109 | 96 | // vector and finally index 3 of second vector and insert them at index | 110 | 96 | // <0,1,2,3> of result vector. | 111 | 480 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i384 ) { | 112 | 384 | Cost += static_cast<T *>(this) | 113 | 384 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 114 | 384 | Cost += static_cast<T *>(this) | 115 | 384 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 116 | 384 | } | 117 | 96 | return Cost; | 118 | 96 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getPermuteShuffleOverhead(llvm::Type*) Line | Count | Source | 101 | 487 | unsigned getPermuteShuffleOverhead(Type *Ty) { | 102 | 487 | assert(Ty->isVectorTy() && "Can only shuffle vectors"); | 103 | 487 | unsigned Cost = 0; | 104 | 487 | // Shuffle cost is equal to the cost of extracting element from its argument | 105 | 487 | // plus the cost of inserting them onto the result vector. | 106 | 487 | | 107 | 487 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | 108 | 487 | // index 0 of first vector, index 1 of second vector,index 2 of first | 109 | 487 | // vector and finally index 3 of second vector and insert them at index | 110 | 487 | // <0,1,2,3> of result vector. | 111 | 3.53k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i3.04k ) { | 112 | 3.04k | Cost += static_cast<T *>(this) | 113 | 3.04k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 114 | 3.04k | Cost += static_cast<T *>(this) | 115 | 3.04k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 116 | 3.04k | } | 117 | 487 | return Cost; | 118 | 487 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getPermuteShuffleOverhead(llvm::Type*) |
119 | | |
120 | | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | | /// insert operations. |
122 | 3.93k | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | 3.93k | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && |
124 | 3.93k | "Can only extract subvectors from vectors"); |
125 | 3.93k | int NumSubElts = SubTy->getVectorNumElements(); |
126 | 3.93k | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && |
127 | 3.93k | "SK_ExtractSubvector index out of range"); |
128 | 3.93k | |
129 | 3.93k | unsigned Cost = 0; |
130 | 3.93k | // Subvector extraction cost is equal to the cost of extracting element from |
131 | 3.93k | // the source type plus the cost of inserting them into the result vector |
132 | 3.93k | // type. |
133 | 13.9k | for (int i = 0; i != NumSubElts; ++i9.99k ) { |
134 | 9.99k | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | 9.99k | Instruction::ExtractElement, Ty, i + Index); |
136 | 9.99k | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | 9.99k | Instruction::InsertElement, SubTy, i); |
138 | 9.99k | } |
139 | 3.93k | return Cost; |
140 | 3.93k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Line | Count | Source | 122 | 3.89k | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { | 123 | 3.89k | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && | 124 | 3.89k | "Can only extract subvectors from vectors"); | 125 | 3.89k | int NumSubElts = SubTy->getVectorNumElements(); | 126 | 3.89k | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && | 127 | 3.89k | "SK_ExtractSubvector index out of range"); | 128 | 3.89k | | 129 | 3.89k | unsigned Cost = 0; | 130 | 3.89k | // Subvector extraction cost is equal to the cost of extracting element from | 131 | 3.89k | // the source type plus the cost of inserting them into the result vector | 132 | 3.89k | // type. | 133 | 13.7k | for (int i = 0; i != NumSubElts; ++i9.82k ) { | 134 | 9.82k | Cost += static_cast<T *>(this)->getVectorInstrCost( | 135 | 9.82k | Instruction::ExtractElement, Ty, i + Index); | 136 | 9.82k | Cost += static_cast<T *>(this)->getVectorInstrCost( | 137 | 9.82k | Instruction::InsertElement, SubTy, i); | 138 | 9.82k | } | 139 | 3.89k | return Cost; | 140 | 3.89k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Line | Count | Source | 122 | 25 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { | 123 | 25 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && | 124 | 25 | "Can only extract subvectors from vectors"); | 125 | 25 | int NumSubElts = SubTy->getVectorNumElements(); | 126 | 25 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && | 127 | 25 | "SK_ExtractSubvector index out of range"); | 128 | 25 | | 129 | 25 | unsigned Cost = 0; | 130 | 25 | // Subvector extraction cost is equal to the cost of extracting element from | 131 | 25 | // the source type plus the cost of inserting them into the result vector | 132 | 25 | // type. | 133 | 141 | for (int i = 0; i != NumSubElts; ++i116 ) { | 134 | 116 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 135 | 116 | Instruction::ExtractElement, Ty, i + Index); | 136 | 116 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 137 | 116 | Instruction::InsertElement, SubTy, i); | 138 | 116 | } | 139 | 25 | return Cost; | 140 | 25 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) Line | Count | Source | 122 | 12 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { | 123 | 12 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && | 124 | 12 | "Can only extract subvectors from vectors"); | 125 | 12 | int NumSubElts = SubTy->getVectorNumElements(); | 126 | 12 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && | 127 | 12 | "SK_ExtractSubvector index out of range"); | 128 | 12 | | 129 | 12 | unsigned Cost = 0; | 130 | 12 | // Subvector extraction cost is equal to the cost of extracting element from | 131 | 12 | // the source type plus the cost of inserting them into the result vector | 132 | 12 | // type. | 133 | 60 | for (int i = 0; i != NumSubElts; ++i48 ) { | 134 | 48 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 135 | 48 | Instruction::ExtractElement, Ty, i + Index); | 136 | 48 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 137 | 48 | Instruction::InsertElement, SubTy, i); | 138 | 48 | } | 139 | 12 | return Cost; | 140 | 12 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*) |
141 | | |
142 | | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | | /// insert operations. |
144 | 73 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | 73 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && |
146 | 73 | "Can only insert subvectors into vectors"); |
147 | 73 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | 73 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && |
149 | 73 | "SK_InsertSubvector index out of range"); |
150 | 73 | |
151 | 73 | unsigned Cost = 0; |
152 | 73 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | 73 | // the source type plus the cost of inserting them into the result vector |
154 | 73 | // type. |
155 | 235 | for (int i = 0; i != NumSubElts; ++i162 ) { |
156 | 162 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | 162 | Instruction::ExtractElement, SubTy, i); |
158 | 162 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | 162 | Instruction::InsertElement, Ty, i + Index); |
160 | 162 | } |
161 | 73 | return Cost; |
162 | 73 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) Line | Count | Source | 144 | 73 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { | 145 | 73 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && | 146 | 73 | "Can only insert subvectors into vectors"); | 147 | 73 | int NumSubElts = SubTy->getVectorNumElements(); | 148 | 73 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() && | 149 | 73 | "SK_InsertSubvector index out of range"); | 150 | 73 | | 151 | 73 | unsigned Cost = 0; | 152 | 73 | // Subvector insertion cost is equal to the cost of extracting element from | 153 | 73 | // the source type plus the cost of inserting them into the result vector | 154 | 73 | // type. | 155 | 235 | for (int i = 0; i != NumSubElts; ++i162 ) { | 156 | 162 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 157 | 162 | Instruction::ExtractElement, SubTy, i); | 158 | 162 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 159 | 162 | Instruction::InsertElement, Ty, i + Index); | 160 | 162 | } | 161 | 73 | return Cost; | 162 | 73 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*) |
163 | | |
164 | | /// Local query method delegates up to T which *must* implement this! |
165 | 390k | const TargetSubtargetInfo *getST() const { |
166 | 390k | return static_cast<const T *>(this)->getST(); |
167 | 390k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getST() const Line | Count | Source | 165 | 359k | const TargetSubtargetInfo *getST() const { | 166 | 359k | return static_cast<const T *>(this)->getST(); | 167 | 359k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getST() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getST() const llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getST() const Line | Count | Source | 165 | 6.89k | const TargetSubtargetInfo *getST() const { | 166 | 6.89k | return static_cast<const T *>(this)->getST(); | 167 | 6.89k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getST() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getST() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getST() const llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getST() const Line | Count | Source | 165 | 2 | const TargetSubtargetInfo *getST() const { | 166 | 2 | return static_cast<const T *>(this)->getST(); | 167 | 2 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getST() const Line | Count | Source | 165 | 654 | const TargetSubtargetInfo *getST() const { | 166 | 654 | return static_cast<const T *>(this)->getST(); | 167 | 654 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getST() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getST() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getST() const llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getST() const Line | Count | Source | 165 | 22.9k | const TargetSubtargetInfo *getST() const { | 166 | 22.9k | return static_cast<const T *>(this)->getST(); | 167 | 22.9k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getST() const |
168 | | |
169 | | /// Local query method delegates up to T which *must* implement this! |
170 | 117M | const TargetLoweringBase *getTLI() const { |
171 | 117M | return static_cast<const T *>(this)->getTLI(); |
172 | 117M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getTLI() const Line | Count | Source | 170 | 104M | const TargetLoweringBase *getTLI() const { | 171 | 104M | return static_cast<const T *>(this)->getTLI(); | 172 | 104M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getTLI() const Line | Count | Source | 170 | 3.64k | const TargetLoweringBase *getTLI() const { | 171 | 3.64k | return static_cast<const T *>(this)->getTLI(); | 172 | 3.64k | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getTLI() const Line | Count | Source | 170 | 98.3k | const TargetLoweringBase *getTLI() const { | 171 | 98.3k | return static_cast<const T *>(this)->getTLI(); | 172 | 98.3k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getTLI() const Line | Count | Source | 170 | 2.07M | const TargetLoweringBase *getTLI() const { | 171 | 2.07M | return static_cast<const T *>(this)->getTLI(); | 172 | 2.07M | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getTLI() const Line | Count | Source | 170 | 40.9k | const TargetLoweringBase *getTLI() const { | 171 | 40.9k | return static_cast<const T *>(this)->getTLI(); | 172 | 40.9k | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getTLI() const Line | Count | Source | 170 | 268 | const TargetLoweringBase *getTLI() const { | 171 | 268 | return static_cast<const T *>(this)->getTLI(); | 172 | 268 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getTLI() const Line | Count | Source | 170 | 1.36k | const TargetLoweringBase *getTLI() const { | 171 | 1.36k | return static_cast<const T *>(this)->getTLI(); | 172 | 1.36k | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getTLI() const Line | Count | Source | 170 | 2.22k | const TargetLoweringBase *getTLI() const { | 171 | 2.22k | return static_cast<const T *>(this)->getTLI(); | 172 | 2.22k | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getTLI() const Line | Count | Source | 170 | 155k | const TargetLoweringBase *getTLI() const { | 171 | 155k | return static_cast<const T *>(this)->getTLI(); | 172 | 155k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getTLI() const Line | Count | Source | 170 | 655 | const TargetLoweringBase *getTLI() const { | 171 | 655 | return static_cast<const T *>(this)->getTLI(); | 172 | 655 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getTLI() const Line | Count | Source | 170 | 12.6k | const TargetLoweringBase *getTLI() const { | 171 | 12.6k | return static_cast<const T *>(this)->getTLI(); | 172 | 12.6k | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getTLI() const Line | Count | Source | 170 | 966 | const TargetLoweringBase *getTLI() const { | 171 | 966 | return static_cast<const T *>(this)->getTLI(); | 172 | 966 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getTLI() const Line | Count | Source | 170 | 10.0M | const TargetLoweringBase *getTLI() const { | 171 | 10.0M | return static_cast<const T *>(this)->getTLI(); | 172 | 10.0M | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getTLI() const Line | Count | Source | 170 | 26 | const TargetLoweringBase *getTLI() const { | 171 | 26 | return static_cast<const T *>(this)->getTLI(); | 172 | 26 | } |
|
173 | | |
174 | 8.49M | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | 8.49M | switch (M) { |
176 | 8.49M | case TTI::MIM_Unindexed: |
177 | 0 | return ISD::UNINDEXED; |
178 | 8.49M | case TTI::MIM_PreInc: |
179 | 0 | return ISD::PRE_INC; |
180 | 8.49M | case TTI::MIM_PreDec: |
181 | 0 | return ISD::PRE_DEC; |
182 | 8.49M | case TTI::MIM_PostInc: |
183 | 8.49M | return ISD::POST_INC; |
184 | 8.49M | case TTI::MIM_PostDec: |
185 | 0 | return ISD::POST_DEC; |
186 | 0 | } |
187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); |
188 | 0 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 6.95M | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 6.95M | switch (M) { | 176 | 6.95M | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 6.95M | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 6.95M | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 6.95M | case TTI::MIM_PostInc: | 183 | 6.95M | return ISD::POST_INC; | 184 | 6.95M | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 304 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 304 | switch (M) { | 176 | 304 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 304 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 304 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 304 | case TTI::MIM_PostInc: | 183 | 304 | return ISD::POST_INC; | 184 | 304 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 7.27k | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 7.27k | switch (M) { | 176 | 7.27k | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 7.27k | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 7.27k | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 7.27k | case TTI::MIM_PostInc: | 183 | 7.27k | return ISD::POST_INC; | 184 | 7.27k | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 177k | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 177k | switch (M) { | 176 | 177k | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 177k | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 177k | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 177k | case TTI::MIM_PostInc: | 183 | 177k | return ISD::POST_INC; | 184 | 177k | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 8.07k | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 8.07k | switch (M) { | 176 | 8.07k | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 8.07k | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 8.07k | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 8.07k | case TTI::MIM_PostInc: | 183 | 8.07k | return ISD::POST_INC; | 184 | 8.07k | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 112 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 112 | switch (M) { | 176 | 112 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 112 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 112 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 112 | case TTI::MIM_PostInc: | 183 | 112 | return ISD::POST_INC; | 184 | 112 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 454 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 454 | switch (M) { | 176 | 454 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 454 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 454 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 454 | case TTI::MIM_PostInc: | 183 | 454 | return ISD::POST_INC; | 184 | 454 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 494 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 494 | switch (M) { | 176 | 494 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 494 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 494 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 494 | case TTI::MIM_PostInc: | 183 | 494 | return ISD::POST_INC; | 184 | 494 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 23.3k | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 23.3k | switch (M) { | 176 | 23.3k | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 23.3k | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 23.3k | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 23.3k | case TTI::MIM_PostInc: | 183 | 23.3k | return ISD::POST_INC; | 184 | 23.3k | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 108 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 108 | switch (M) { | 176 | 108 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 108 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 108 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 108 | case TTI::MIM_PostInc: | 183 | 108 | return ISD::POST_INC; | 184 | 108 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 1.54k | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 1.54k | switch (M) { | 176 | 1.54k | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 1.54k | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 1.54k | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 1.54k | case TTI::MIM_PostInc: | 183 | 1.54k | return ISD::POST_INC; | 184 | 1.54k | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 368 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 368 | switch (M) { | 176 | 368 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 368 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 368 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 368 | case TTI::MIM_PostInc: | 183 | 368 | return ISD::POST_INC; | 184 | 368 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 1.31M | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 1.31M | switch (M) { | 176 | 1.31M | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 1.31M | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 1.31M | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 1.31M | case TTI::MIM_PostInc: | 183 | 1.31M | return ISD::POST_INC; | 184 | 1.31M | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode) Line | Count | Source | 174 | 8 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | 175 | 8 | switch (M) { | 176 | 8 | case TTI::MIM_Unindexed: | 177 | 0 | return ISD::UNINDEXED; | 178 | 8 | case TTI::MIM_PreInc: | 179 | 0 | return ISD::PRE_INC; | 180 | 8 | case TTI::MIM_PreDec: | 181 | 0 | return ISD::PRE_DEC; | 182 | 8 | case TTI::MIM_PostInc: | 183 | 8 | return ISD::POST_INC; | 184 | 8 | case TTI::MIM_PostDec: | 185 | 0 | return ISD::POST_DEC; | 186 | 0 | } | 187 | 0 | llvm_unreachable("Unexpected MemIndexedMode"); | 188 | 0 | } |
|
189 | | |
190 | | protected: |
191 | | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | 16.5M | : BaseT(DL) {} llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 11.9M | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 37.9k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::AMDGPUTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 551k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 513k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 656k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 30.6k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 648 | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 19.6k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 97.9k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 23.8k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 107k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 33.7k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 2.43M | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 1.89k | : BaseT(DL) {} |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&) Line | Count | Source | 192 | 95.5k | : BaseT(DL) {} |
|
193 | | |
194 | | using TargetTransformInfoImplBase::DL; |
195 | | |
196 | | public: |
197 | | /// \name Scalar TTI Implementations |
198 | | /// @{ |
199 | | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
200 | | unsigned AddressSpace, unsigned Alignment, |
201 | 9.42k | bool *Fast) const { |
202 | 9.42k | EVT E = EVT::getIntegerVT(Context, BitWidth); |
203 | 9.42k | return getTLI()->allowsMisalignedMemoryAccesses( |
204 | 9.42k | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); |
205 | 9.42k | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const llvm::BasicTTIImplBase<llvm::R600TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Line | Count | Source | 201 | 118 | bool *Fast) const { | 202 | 118 | EVT E = EVT::getIntegerVT(Context, BitWidth); | 203 | 118 | return getTLI()->allowsMisalignedMemoryAccesses( | 204 | 118 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); | 205 | 118 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Line | Count | Source | 201 | 9.24k | bool *Fast) const { | 202 | 9.24k | EVT E = EVT::getIntegerVT(Context, BitWidth); | 203 | 9.24k | return getTLI()->allowsMisalignedMemoryAccesses( | 204 | 9.24k | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); | 205 | 9.24k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Line | Count | Source | 201 | 21 | bool *Fast) const { | 202 | 21 | EVT E = EVT::getIntegerVT(Context, BitWidth); | 203 | 21 | return getTLI()->allowsMisalignedMemoryAccesses( | 204 | 21 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); | 205 | 21 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const llvm::BasicTTIImplBase<llvm::X86TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const Line | Count | Source | 201 | 47 | bool *Fast) const { | 202 | 47 | EVT E = EVT::getIntegerVT(Context, BitWidth); | 203 | 47 | return getTLI()->allowsMisalignedMemoryAccesses( | 204 | 47 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); | 205 | 47 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const |
206 | | |
207 | 469k | bool hasBranchDivergence() { return false; } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 359k | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 2.44k | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 22.3k | bool hasBranchDivergence() { return false; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::hasBranchDivergence() llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 49 | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 81 | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 6 | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 3.43k | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 170 | bool hasBranchDivergence() { return false; } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::hasBranchDivergence() Line | Count | Source | 207 | 81.2k | bool hasBranchDivergence() { return false; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::hasBranchDivergence() |
208 | | |
209 | 0 | bool isSourceOfDivergence(const Value *V) { return false; } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isSourceOfDivergence(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isSourceOfDivergence(llvm::Value const*) |
210 | | |
211 | 108 | bool isAlwaysUniform(const Value *V) { return false; } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isAlwaysUniform(llvm::Value const*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isAlwaysUniform(llvm::Value const*) Line | Count | Source | 211 | 108 | bool isAlwaysUniform(const Value *V) { return false; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isAlwaysUniform(llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isAlwaysUniform(llvm::Value const*) |
212 | | |
213 | 2.36k | unsigned getFlatAddressSpace() { |
214 | 2.36k | // Return an invalid address space. |
215 | 2.36k | return -1; |
216 | 2.36k | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getFlatAddressSpace() llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getFlatAddressSpace() Line | Count | Source | 213 | 2.36k | unsigned getFlatAddressSpace() { | 214 | 2.36k | // Return an invalid address space. | 215 | 2.36k | return -1; | 216 | 2.36k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getFlatAddressSpace() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getFlatAddressSpace() |
217 | | |
218 | 2.61M | bool isLegalAddImmediate(int64_t imm) { |
219 | 2.61M | return getTLI()->isLegalAddImmediate(imm); |
220 | 2.61M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 2.54M | bool isLegalAddImmediate(int64_t imm) { | 219 | 2.54M | return getTLI()->isLegalAddImmediate(imm); | 220 | 2.54M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 14 | bool isLegalAddImmediate(int64_t imm) { | 219 | 14 | return getTLI()->isLegalAddImmediate(imm); | 220 | 14 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 660 | bool isLegalAddImmediate(int64_t imm) { | 219 | 660 | return getTLI()->isLegalAddImmediate(imm); | 220 | 660 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 29.4k | bool isLegalAddImmediate(int64_t imm) { | 219 | 29.4k | return getTLI()->isLegalAddImmediate(imm); | 220 | 29.4k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 1.15k | bool isLegalAddImmediate(int64_t imm) { | 219 | 1.15k | return getTLI()->isLegalAddImmediate(imm); | 220 | 1.15k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalAddImmediate(long long) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 5 | bool isLegalAddImmediate(int64_t imm) { | 219 | 5 | return getTLI()->isLegalAddImmediate(imm); | 220 | 5 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 19 | bool isLegalAddImmediate(int64_t imm) { | 219 | 19 | return getTLI()->isLegalAddImmediate(imm); | 220 | 19 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 4.39k | bool isLegalAddImmediate(int64_t imm) { | 219 | 4.39k | return getTLI()->isLegalAddImmediate(imm); | 220 | 4.39k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 38 | bool isLegalAddImmediate(int64_t imm) { | 219 | 38 | return getTLI()->isLegalAddImmediate(imm); | 220 | 38 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 22 | bool isLegalAddImmediate(int64_t imm) { | 219 | 22 | return getTLI()->isLegalAddImmediate(imm); | 220 | 22 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 12 | bool isLegalAddImmediate(int64_t imm) { | 219 | 12 | return getTLI()->isLegalAddImmediate(imm); | 220 | 12 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalAddImmediate(long long) Line | Count | Source | 218 | 34.0k | bool isLegalAddImmediate(int64_t imm) { | 219 | 34.0k | return getTLI()->isLegalAddImmediate(imm); | 220 | 34.0k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalAddImmediate(long long) |
221 | | |
222 | 535k | bool isLegalICmpImmediate(int64_t imm) { |
223 | 535k | return getTLI()->isLegalICmpImmediate(imm); |
224 | 535k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 452k | bool isLegalICmpImmediate(int64_t imm) { | 223 | 452k | return getTLI()->isLegalICmpImmediate(imm); | 224 | 452k | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 74 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 74 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 74 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 470 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 470 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 470 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 12.9k | bool isLegalICmpImmediate(int64_t imm) { | 223 | 12.9k | return getTLI()->isLegalICmpImmediate(imm); | 224 | 12.9k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 1.27k | bool isLegalICmpImmediate(int64_t imm) { | 223 | 1.27k | return getTLI()->isLegalICmpImmediate(imm); | 224 | 1.27k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalICmpImmediate(long long) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 22 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 22 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 22 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 48 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 48 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 48 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 836 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 836 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 836 | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 19 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 19 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 19 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 222 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 222 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 222 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 50 | bool isLegalICmpImmediate(int64_t imm) { | 223 | 50 | return getTLI()->isLegalICmpImmediate(imm); | 224 | 50 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalICmpImmediate(long long) Line | Count | Source | 222 | 66.9k | bool isLegalICmpImmediate(int64_t imm) { | 223 | 66.9k | return getTLI()->isLegalICmpImmediate(imm); | 224 | 66.9k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalICmpImmediate(long long) |
225 | | |
226 | | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
227 | | bool HasBaseReg, int64_t Scale, |
228 | 59.0M | unsigned AddrSpace, Instruction *I = nullptr) { |
229 | 59.0M | TargetLoweringBase::AddrMode AM; |
230 | 59.0M | AM.BaseGV = BaseGV; |
231 | 59.0M | AM.BaseOffs = BaseOffset; |
232 | 59.0M | AM.HasBaseReg = HasBaseReg; |
233 | 59.0M | AM.Scale = Scale; |
234 | 59.0M | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
235 | 59.0M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 53.7M | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 53.7M | TargetLoweringBase::AddrMode AM; | 230 | 53.7M | AM.BaseGV = BaseGV; | 231 | 53.7M | AM.BaseOffs = BaseOffset; | 232 | 53.7M | AM.HasBaseReg = HasBaseReg; | 233 | 53.7M | AM.Scale = Scale; | 234 | 53.7M | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 53.7M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 2.39k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 2.39k | TargetLoweringBase::AddrMode AM; | 230 | 2.39k | AM.BaseGV = BaseGV; | 231 | 2.39k | AM.BaseOffs = BaseOffset; | 232 | 2.39k | AM.HasBaseReg = HasBaseReg; | 233 | 2.39k | AM.Scale = Scale; | 234 | 2.39k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 2.39k | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 62.9k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 62.9k | TargetLoweringBase::AddrMode AM; | 230 | 62.9k | AM.BaseGV = BaseGV; | 231 | 62.9k | AM.BaseOffs = BaseOffset; | 232 | 62.9k | AM.HasBaseReg = HasBaseReg; | 233 | 62.9k | AM.Scale = Scale; | 234 | 62.9k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 62.9k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 866k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 866k | TargetLoweringBase::AddrMode AM; | 230 | 866k | AM.BaseGV = BaseGV; | 231 | 866k | AM.BaseOffs = BaseOffset; | 232 | 866k | AM.HasBaseReg = HasBaseReg; | 233 | 866k | AM.Scale = Scale; | 234 | 866k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 866k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 21.4k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 21.4k | TargetLoweringBase::AddrMode AM; | 230 | 21.4k | AM.BaseGV = BaseGV; | 231 | 21.4k | AM.BaseOffs = BaseOffset; | 232 | 21.4k | AM.HasBaseReg = HasBaseReg; | 233 | 21.4k | AM.Scale = Scale; | 234 | 21.4k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 21.4k | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 32 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 32 | TargetLoweringBase::AddrMode AM; | 230 | 32 | AM.BaseGV = BaseGV; | 231 | 32 | AM.BaseOffs = BaseOffset; | 232 | 32 | AM.HasBaseReg = HasBaseReg; | 233 | 32 | AM.Scale = Scale; | 234 | 32 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 32 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 333 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 333 | TargetLoweringBase::AddrMode AM; | 230 | 333 | AM.BaseGV = BaseGV; | 231 | 333 | AM.BaseOffs = BaseOffset; | 232 | 333 | AM.HasBaseReg = HasBaseReg; | 233 | 333 | AM.Scale = Scale; | 234 | 333 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 333 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 899 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 899 | TargetLoweringBase::AddrMode AM; | 230 | 899 | AM.BaseGV = BaseGV; | 231 | 899 | AM.BaseOffs = BaseOffset; | 232 | 899 | AM.HasBaseReg = HasBaseReg; | 233 | 899 | AM.Scale = Scale; | 234 | 899 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 899 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 75.9k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 75.9k | TargetLoweringBase::AddrMode AM; | 230 | 75.9k | AM.BaseGV = BaseGV; | 231 | 75.9k | AM.BaseOffs = BaseOffset; | 232 | 75.9k | AM.HasBaseReg = HasBaseReg; | 233 | 75.9k | AM.Scale = Scale; | 234 | 75.9k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 75.9k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 194 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 194 | TargetLoweringBase::AddrMode AM; | 230 | 194 | AM.BaseGV = BaseGV; | 231 | 194 | AM.BaseOffs = BaseOffset; | 232 | 194 | AM.HasBaseReg = HasBaseReg; | 233 | 194 | AM.Scale = Scale; | 234 | 194 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 194 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 4.13k | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 4.13k | TargetLoweringBase::AddrMode AM; | 230 | 4.13k | AM.BaseGV = BaseGV; | 231 | 4.13k | AM.BaseOffs = BaseOffset; | 232 | 4.13k | AM.HasBaseReg = HasBaseReg; | 233 | 4.13k | AM.Scale = Scale; | 234 | 4.13k | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 4.13k | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 116 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 116 | TargetLoweringBase::AddrMode AM; | 230 | 116 | AM.BaseGV = BaseGV; | 231 | 116 | AM.BaseOffs = BaseOffset; | 232 | 116 | AM.HasBaseReg = HasBaseReg; | 233 | 116 | AM.Scale = Scale; | 234 | 116 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 116 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 4.26M | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 4.26M | TargetLoweringBase::AddrMode AM; | 230 | 4.26M | AM.BaseGV = BaseGV; | 231 | 4.26M | AM.BaseOffs = BaseOffset; | 232 | 4.26M | AM.HasBaseReg = HasBaseReg; | 233 | 4.26M | AM.Scale = Scale; | 234 | 4.26M | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 4.26M | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*) Line | Count | Source | 228 | 10 | unsigned AddrSpace, Instruction *I = nullptr) { | 229 | 10 | TargetLoweringBase::AddrMode AM; | 230 | 10 | AM.BaseGV = BaseGV; | 231 | 10 | AM.BaseOffs = BaseOffset; | 232 | 10 | AM.HasBaseReg = HasBaseReg; | 233 | 10 | AM.Scale = Scale; | 234 | 10 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | 235 | 10 | } |
|
236 | | |
237 | | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
238 | 7.82M | const DataLayout &DL) const { |
239 | 7.82M | EVT VT = getTLI()->getValueType(DL, Ty); |
240 | 7.82M | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
241 | 7.82M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 6.95M | const DataLayout &DL) const { | 239 | 6.95M | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 6.95M | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 6.95M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 152 | const DataLayout &DL) const { | 239 | 152 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 152 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 152 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 3.63k | const DataLayout &DL) const { | 239 | 3.63k | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 3.63k | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 3.63k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 177k | const DataLayout &DL) const { | 239 | 177k | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 177k | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 177k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 8.07k | const DataLayout &DL) const { | 239 | 8.07k | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 8.07k | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 8.07k | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 56 | const DataLayout &DL) const { | 239 | 56 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 56 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 56 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 267 | const DataLayout &DL) const { | 239 | 267 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 267 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 267 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 247 | const DataLayout &DL) const { | 239 | 247 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 247 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 247 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 11.6k | const DataLayout &DL) const { | 239 | 11.6k | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 11.6k | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 11.6k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 54 | const DataLayout &DL) const { | 239 | 54 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 54 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 54 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 773 | const DataLayout &DL) const { | 239 | 773 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 773 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 773 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 184 | const DataLayout &DL) const { | 239 | 184 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 184 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 184 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 659k | const DataLayout &DL) const { | 239 | 659k | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 659k | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 659k | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 238 | 4 | const DataLayout &DL) const { | 239 | 4 | EVT VT = getTLI()->getValueType(DL, Ty); | 240 | 4 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | 241 | 4 | } |
|
242 | | |
243 | | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
244 | 676k | const DataLayout &DL) const { |
245 | 676k | EVT VT = getTLI()->getValueType(DL, Ty); |
246 | 676k | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
247 | 676k | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 152 | const DataLayout &DL) const { | 245 | 152 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 152 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 152 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 3.63k | const DataLayout &DL) const { | 245 | 3.63k | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 3.63k | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 3.63k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 56 | const DataLayout &DL) const { | 245 | 56 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 56 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 56 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 187 | const DataLayout &DL) const { | 245 | 187 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 187 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 187 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 247 | const DataLayout &DL) const { | 245 | 247 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 247 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 247 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 11.6k | const DataLayout &DL) const { | 245 | 11.6k | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 11.6k | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 11.6k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 54 | const DataLayout &DL) const { | 245 | 54 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 54 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 54 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 773 | const DataLayout &DL) const { | 245 | 773 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 773 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 773 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 184 | const DataLayout &DL) const { | 245 | 184 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 184 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 184 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 659k | const DataLayout &DL) const { | 245 | 659k | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 659k | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 659k | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const Line | Count | Source | 244 | 4 | const DataLayout &DL) const { | 245 | 4 | EVT VT = getTLI()->getValueType(DL, Ty); | 246 | 4 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | 247 | 4 | } |
|
248 | | |
249 | 4.21M | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
250 | 4.21M | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
251 | 4.21M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 4.07M | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 4.07M | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 4.07M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 86 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 86 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 86 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 1.02k | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 1.02k | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 1.02k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 123k | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 123k | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 123k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 4.38k | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 4.38k | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 4.38k | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 28 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 28 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 28 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 142 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 142 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 142 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 130 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 130 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 130 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 4.90k | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 4.90k | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 4.90k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 39 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 39 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 39 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 96 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 96 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 96 | } |
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost) Line | Count | Source | 249 | 2 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | 250 | 2 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | 251 | 2 | } |
|
252 | | |
253 | | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
254 | 6.05M | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
255 | 6.05M | TargetLoweringBase::AddrMode AM; |
256 | 6.05M | AM.BaseGV = BaseGV; |
257 | 6.05M | AM.BaseOffs = BaseOffset; |
258 | 6.05M | AM.HasBaseReg = HasBaseReg; |
259 | 6.05M | AM.Scale = Scale; |
260 | 6.05M | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
261 | 6.05M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 5.24M | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 5.24M | TargetLoweringBase::AddrMode AM; | 256 | 5.24M | AM.BaseGV = BaseGV; | 257 | 5.24M | AM.BaseOffs = BaseOffset; | 258 | 5.24M | AM.HasBaseReg = HasBaseReg; | 259 | 5.24M | AM.Scale = Scale; | 260 | 5.24M | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 5.24M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 10 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 10 | TargetLoweringBase::AddrMode AM; | 256 | 10 | AM.BaseGV = BaseGV; | 257 | 10 | AM.BaseOffs = BaseOffset; | 258 | 10 | AM.HasBaseReg = HasBaseReg; | 259 | 10 | AM.Scale = Scale; | 260 | 10 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 10 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 446 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 446 | TargetLoweringBase::AddrMode AM; | 256 | 446 | AM.BaseGV = BaseGV; | 257 | 446 | AM.BaseOffs = BaseOffset; | 258 | 446 | AM.HasBaseReg = HasBaseReg; | 259 | 446 | AM.Scale = Scale; | 260 | 446 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 446 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 105k | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 105k | TargetLoweringBase::AddrMode AM; | 256 | 105k | AM.BaseGV = BaseGV; | 257 | 105k | AM.BaseOffs = BaseOffset; | 258 | 105k | AM.HasBaseReg = HasBaseReg; | 259 | 105k | AM.Scale = Scale; | 260 | 105k | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 105k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 4 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 4 | TargetLoweringBase::AddrMode AM; | 256 | 4 | AM.BaseGV = BaseGV; | 257 | 4 | AM.BaseOffs = BaseOffset; | 258 | 4 | AM.HasBaseReg = HasBaseReg; | 259 | 4 | AM.Scale = Scale; | 260 | 4 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 4 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 14 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 14 | TargetLoweringBase::AddrMode AM; | 256 | 14 | AM.BaseGV = BaseGV; | 257 | 14 | AM.BaseOffs = BaseOffset; | 258 | 14 | AM.HasBaseReg = HasBaseReg; | 259 | 14 | AM.Scale = Scale; | 260 | 14 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 14 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 11.9k | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 11.9k | TargetLoweringBase::AddrMode AM; | 256 | 11.9k | AM.BaseGV = BaseGV; | 257 | 11.9k | AM.BaseOffs = BaseOffset; | 258 | 11.9k | AM.HasBaseReg = HasBaseReg; | 259 | 11.9k | AM.Scale = Scale; | 260 | 11.9k | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 11.9k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 662 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 662 | TargetLoweringBase::AddrMode AM; | 256 | 662 | AM.BaseGV = BaseGV; | 257 | 662 | AM.BaseOffs = BaseOffset; | 258 | 662 | AM.HasBaseReg = HasBaseReg; | 259 | 662 | AM.Scale = Scale; | 260 | 662 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 662 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) Line | Count | Source | 254 | 690k | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | 255 | 690k | TargetLoweringBase::AddrMode AM; | 256 | 690k | AM.BaseGV = BaseGV; | 257 | 690k | AM.BaseOffs = BaseOffset; | 258 | 690k | AM.HasBaseReg = HasBaseReg; | 259 | 690k | AM.Scale = Scale; | 260 | 690k | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | 261 | 690k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int) |
262 | | |
263 | 1.46M | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
264 | 1.46M | return getTLI()->isTruncateFree(Ty1, Ty2); |
265 | 1.46M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 1.43M | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 1.43M | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 1.43M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 30 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 30 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 30 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 1.33k | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 1.33k | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 1.33k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 4.18k | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 4.18k | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 4.18k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 443 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 443 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 443 | } |
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 8 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 8 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 8 | } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 65 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 65 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 65 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 98 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 98 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 98 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 3.88k | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 3.88k | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 3.88k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 184 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 184 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 184 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 209 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 209 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 209 | } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 52 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 52 | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 52 | } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) Line | Count | Source | 263 | 21.6k | bool isTruncateFree(Type *Ty1, Type *Ty2) { | 264 | 21.6k | return getTLI()->isTruncateFree(Ty1, Ty2); | 265 | 21.6k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*) |
266 | | |
267 | 121k | bool isProfitableToHoist(Instruction *I) { |
268 | 121k | return getTLI()->isProfitableToHoist(I); |
269 | 121k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 78.6k | bool isProfitableToHoist(Instruction *I) { | 268 | 78.6k | return getTLI()->isProfitableToHoist(I); | 269 | 78.6k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isProfitableToHoist(llvm::Instruction*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 24 | bool isProfitableToHoist(Instruction *I) { | 268 | 24 | return getTLI()->isProfitableToHoist(I); | 269 | 24 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 8.67k | bool isProfitableToHoist(Instruction *I) { | 268 | 8.67k | return getTLI()->isProfitableToHoist(I); | 269 | 8.67k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 16 | bool isProfitableToHoist(Instruction *I) { | 268 | 16 | return getTLI()->isProfitableToHoist(I); | 269 | 16 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isProfitableToHoist(llvm::Instruction*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isProfitableToHoist(llvm::Instruction*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isProfitableToHoist(llvm::Instruction*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isProfitableToHoist(llvm::Instruction*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isProfitableToHoist(llvm::Instruction*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 64 | bool isProfitableToHoist(Instruction *I) { | 268 | 64 | return getTLI()->isProfitableToHoist(I); | 269 | 64 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isProfitableToHoist(llvm::Instruction*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isProfitableToHoist(llvm::Instruction*) Line | Count | Source | 267 | 33.6k | bool isProfitableToHoist(Instruction *I) { | 268 | 33.6k | return getTLI()->isProfitableToHoist(I); | 269 | 33.6k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isProfitableToHoist(llvm::Instruction*) |
270 | | |
271 | 674 | bool useAA() const { return getST()->useAA(); } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::useAA() const Line | Count | Source | 271 | 35 | bool useAA() const { return getST()->useAA(); } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::useAA() const llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::useAA() const Line | Count | Source | 271 | 639 | bool useAA() const { return getST()->useAA(); } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::useAA() const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::useAA() const |
272 | | |
273 | 18.0k | bool isTypeLegal(Type *Ty) { |
274 | 18.0k | EVT VT = getTLI()->getValueType(DL, Ty); |
275 | 18.0k | return getTLI()->isTypeLegal(VT); |
276 | 18.0k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 17.2k | bool isTypeLegal(Type *Ty) { | 274 | 17.2k | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 17.2k | return getTLI()->isTypeLegal(VT); | 276 | 17.2k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isTypeLegal(llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 2 | bool isTypeLegal(Type *Ty) { | 274 | 2 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 2 | return getTLI()->isTypeLegal(VT); | 276 | 2 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 160 | bool isTypeLegal(Type *Ty) { | 274 | 160 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 160 | return getTLI()->isTypeLegal(VT); | 276 | 160 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 3 | bool isTypeLegal(Type *Ty) { | 274 | 3 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 3 | return getTLI()->isTypeLegal(VT); | 276 | 3 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isTypeLegal(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isTypeLegal(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isTypeLegal(llvm::Type*) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 75 | bool isTypeLegal(Type *Ty) { | 274 | 75 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 75 | return getTLI()->isTypeLegal(VT); | 276 | 75 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isTypeLegal(llvm::Type*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 5 | bool isTypeLegal(Type *Ty) { | 274 | 5 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 5 | return getTLI()->isTypeLegal(VT); | 276 | 5 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isTypeLegal(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isTypeLegal(llvm::Type*) Line | Count | Source | 273 | 624 | bool isTypeLegal(Type *Ty) { | 274 | 624 | EVT VT = getTLI()->getValueType(DL, Ty); | 275 | 624 | return getTLI()->isTypeLegal(VT); | 276 | 624 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isTypeLegal(llvm::Type*) |
277 | | |
278 | | int getGEPCost(Type *PointeeType, const Value *Ptr, |
279 | 6.06M | ArrayRef<const Value *> Operands) { |
280 | 6.06M | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
281 | 6.06M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 5.47M | ArrayRef<const Value *> Operands) { | 280 | 5.47M | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 5.47M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 1.84k | ArrayRef<const Value *> Operands) { | 280 | 1.84k | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 1.84k | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 38.1k | ArrayRef<const Value *> Operands) { | 280 | 38.1k | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 38.1k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 92.2k | ArrayRef<const Value *> Operands) { | 280 | 92.2k | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 92.2k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 29 | ArrayRef<const Value *> Operands) { | 280 | 29 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 29 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 335 | ArrayRef<const Value *> Operands) { | 280 | 335 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 335 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 1.08k | ArrayRef<const Value *> Operands) { | 280 | 1.08k | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 1.08k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 132 | ArrayRef<const Value *> Operands) { | 280 | 132 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 132 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 75 | ArrayRef<const Value *> Operands) { | 280 | 75 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 75 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 279 | 461k | ArrayRef<const Value *> Operands) { | 280 | 461k | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | 281 | 461k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>) |
282 | | |
283 | 1.56M | int getExtCost(const Instruction *I, const Value *Src) { |
284 | 1.56M | if (getTLI()->isExtFree(I)) |
285 | 592k | return TargetTransformInfo::TCC_Free; |
286 | 974k | |
287 | 974k | if (isa<ZExtInst>(I) || isa<SExtInst>(I)599k ) |
288 | 901k | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
289 | 592k | if (getTLI()->isExtLoad(LI, I, DL)) |
290 | 583k | return TargetTransformInfo::TCC_Free; |
291 | 390k | |
292 | 390k | return TargetTransformInfo::TCC_Basic; |
293 | 390k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 1.37M | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 1.37M | if (getTLI()->isExtFree(I)) | 285 | 566k | return TargetTransformInfo::TCC_Free; | 286 | 811k | | 287 | 811k | if (isa<ZExtInst>(I) || isa<SExtInst>(I)504k ) | 288 | 738k | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 485k | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 477k | return TargetTransformInfo::TCC_Free; | 291 | 334k | | 292 | 334k | return TargetTransformInfo::TCC_Basic; | 293 | 334k | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 7 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 7 | if (getTLI()->isExtFree(I)) | 285 | 0 | return TargetTransformInfo::TCC_Free; | 286 | 7 | | 287 | 7 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) | 288 | 7 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 0 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 0 | return TargetTransformInfo::TCC_Free; | 291 | 7 | | 292 | 7 | return TargetTransformInfo::TCC_Basic; | 293 | 7 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 221 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 221 | if (getTLI()->isExtFree(I)) | 285 | 109 | return TargetTransformInfo::TCC_Free; | 286 | 112 | | 287 | 112 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)103 ) | 288 | 112 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 34 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 30 | return TargetTransformInfo::TCC_Free; | 291 | 82 | | 292 | 82 | return TargetTransformInfo::TCC_Basic; | 293 | 82 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 69.0k | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 69.0k | if (getTLI()->isExtFree(I)) | 285 | 0 | return TargetTransformInfo::TCC_Free; | 286 | 69.0k | | 287 | 69.0k | if (isa<ZExtInst>(I) || isa<SExtInst>(I)39.0k ) | 288 | 69.0k | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 50.9k | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 50.5k | return TargetTransformInfo::TCC_Free; | 291 | 18.4k | | 292 | 18.4k | return TargetTransformInfo::TCC_Basic; | 293 | 18.4k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 12 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 12 | if (getTLI()->isExtFree(I)) | 285 | 0 | return TargetTransformInfo::TCC_Free; | 286 | 12 | | 287 | 12 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)2 ) | 288 | 12 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 2 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 2 | return TargetTransformInfo::TCC_Free; | 291 | 10 | | 292 | 10 | return TargetTransformInfo::TCC_Basic; | 293 | 10 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 1 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 1 | if (getTLI()->isExtFree(I)) | 285 | 0 | return TargetTransformInfo::TCC_Free; | 286 | 1 | | 287 | 1 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)0 ) | 288 | 1 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 0 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 0 | return TargetTransformInfo::TCC_Free; | 291 | 1 | | 292 | 1 | return TargetTransformInfo::TCC_Basic; | 293 | 1 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 245 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 245 | if (getTLI()->isExtFree(I)) | 285 | 6 | return TargetTransformInfo::TCC_Free; | 286 | 239 | | 287 | 239 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)104 ) | 288 | 239 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 124 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 119 | return TargetTransformInfo::TCC_Free; | 291 | 120 | | 292 | 120 | return TargetTransformInfo::TCC_Basic; | 293 | 120 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 23 | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 23 | if (getTLI()->isExtFree(I)) | 285 | 0 | return TargetTransformInfo::TCC_Free; | 286 | 23 | | 287 | 23 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)8 ) | 288 | 23 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 9 | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 9 | return TargetTransformInfo::TCC_Free; | 291 | 14 | | 292 | 14 | return TargetTransformInfo::TCC_Basic; | 293 | 14 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) Line | Count | Source | 283 | 118k | int getExtCost(const Instruction *I, const Value *Src) { | 284 | 118k | if (getTLI()->isExtFree(I)) | 285 | 25.3k | return TargetTransformInfo::TCC_Free; | 286 | 93.1k | | 287 | 93.1k | if (isa<ZExtInst>(I) || isa<SExtInst>(I)55.4k ) | 288 | 93.1k | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) | 289 | 55.8k | if (getTLI()->isExtLoad(LI, I, DL)) | 290 | 55.6k | return TargetTransformInfo::TCC_Free; | 291 | 37.5k | | 292 | 37.5k | return TargetTransformInfo::TCC_Basic; | 293 | 37.5k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*) |
294 | | |
295 | | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
296 | 4 | ArrayRef<const Value *> Arguments, const User *U) { |
297 | 4 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
298 | 4 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Line | Count | Source | 296 | 2 | ArrayRef<const Value *> Arguments, const User *U) { | 297 | 2 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); | 298 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) Line | Count | Source | 296 | 2 | ArrayRef<const Value *> Arguments, const User *U) { | 297 | 2 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); | 298 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*) |
299 | | |
300 | | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
301 | 968k | ArrayRef<Type *> ParamTys, const User *U) { |
302 | 968k | if (IID == Intrinsic::cttz) { |
303 | 6.85k | if (getTLI()->isCheapToSpeculateCttz()) |
304 | 3.70k | return TargetTransformInfo::TCC_Basic; |
305 | 3.14k | return TargetTransformInfo::TCC_Expensive; |
306 | 3.14k | } |
307 | 962k | |
308 | 962k | if (IID == Intrinsic::ctlz) { |
309 | 10.3k | if (getTLI()->isCheapToSpeculateCtlz()) |
310 | 6.37k | return TargetTransformInfo::TCC_Basic; |
311 | 3.94k | return TargetTransformInfo::TCC_Expensive; |
312 | 3.94k | } |
313 | 951k | |
314 | 951k | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
315 | 951k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 755k | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 755k | if (IID == Intrinsic::cttz) { | 303 | 688 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 688 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 754k | | 308 | 754k | if (IID == Intrinsic::ctlz) { | 309 | 1.51k | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 1.51k | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 753k | | 314 | 753k | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 753k | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 2 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 2 | if (IID == Intrinsic::cttz) { | 303 | 0 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 0 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 2 | | 308 | 2 | if (IID == Intrinsic::ctlz) { | 309 | 0 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 0 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 2 | | 314 | 2 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 2 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 537 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 537 | if (IID == Intrinsic::cttz) { | 303 | 12 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 12 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 525 | | 308 | 525 | if (IID == Intrinsic::ctlz) { | 309 | 12 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 12 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 513 | | 314 | 513 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 513 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 57.5k | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 57.5k | if (IID == Intrinsic::cttz) { | 303 | 2.03k | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 1.97k | return TargetTransformInfo::TCC_Basic; | 305 | 60 | return TargetTransformInfo::TCC_Expensive; | 306 | 60 | } | 307 | 55.4k | | 308 | 55.4k | if (IID == Intrinsic::ctlz) { | 309 | 4.23k | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 3.80k | return TargetTransformInfo::TCC_Basic; | 311 | 437 | return TargetTransformInfo::TCC_Expensive; | 312 | 437 | } | 313 | 51.2k | | 314 | 51.2k | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 51.2k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 14 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 14 | if (IID == Intrinsic::cttz) { | 303 | 0 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 0 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 14 | | 308 | 14 | if (IID == Intrinsic::ctlz) { | 309 | 0 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 0 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 14 | | 314 | 14 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 14 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 5 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 5 | if (IID == Intrinsic::cttz) { | 303 | 1 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 1 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 4 | | 308 | 4 | if (IID == Intrinsic::ctlz) { | 309 | 1 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 1 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 3 | | 314 | 3 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 3 | } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 16 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 16 | if (IID == Intrinsic::cttz) { | 303 | 0 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 0 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 16 | | 308 | 16 | if (IID == Intrinsic::ctlz) { | 309 | 0 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 0 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 16 | | 314 | 16 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 16 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 59 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 59 | if (IID == Intrinsic::cttz) { | 303 | 1 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 1 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 58 | | 308 | 58 | if (IID == Intrinsic::ctlz) { | 309 | 1 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 1 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 57 | | 314 | 57 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 57 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 20 | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 20 | if (IID == Intrinsic::cttz) { | 303 | 0 | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 0 | return TargetTransformInfo::TCC_Basic; | 305 | 0 | return TargetTransformInfo::TCC_Expensive; | 306 | 0 | } | 307 | 20 | | 308 | 20 | if (IID == Intrinsic::ctlz) { | 309 | 0 | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 0 | return TargetTransformInfo::TCC_Basic; | 311 | 0 | return TargetTransformInfo::TCC_Expensive; | 312 | 0 | } | 313 | 20 | | 314 | 20 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 20 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) Line | Count | Source | 301 | 155k | ArrayRef<Type *> ParamTys, const User *U) { | 302 | 155k | if (IID == Intrinsic::cttz) { | 303 | 4.11k | if (getTLI()->isCheapToSpeculateCttz()) | 304 | 1.02k | return TargetTransformInfo::TCC_Basic; | 305 | 3.08k | return TargetTransformInfo::TCC_Expensive; | 306 | 3.08k | } | 307 | 151k | | 308 | 151k | if (IID == Intrinsic::ctlz) { | 309 | 4.55k | if (getTLI()->isCheapToSpeculateCtlz()) | 310 | 1.04k | return TargetTransformInfo::TCC_Basic; | 311 | 3.51k | return TargetTransformInfo::TCC_Expensive; | 312 | 3.51k | } | 313 | 146k | | 314 | 146k | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); | 315 | 146k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*) |
316 | | |
317 | | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
318 | 31.4k | unsigned &JumpTableSize) { |
319 | 31.4k | /// Try to find the estimated number of clusters. Note that the number of |
320 | 31.4k | /// clusters identified in this function could be different from the actural |
321 | 31.4k | /// numbers found in lowering. This function ignore switches that are |
322 | 31.4k | /// lowered with a mix of jump table / bit test / BTree. This function was |
323 | 31.4k | /// initially intended to be used when estimating the cost of switch in |
324 | 31.4k | /// inline cost heuristic, but it's a generic cost model to be used in other |
325 | 31.4k | /// places (e.g., in loop unrolling). |
326 | 31.4k | unsigned N = SI.getNumCases(); |
327 | 31.4k | const TargetLoweringBase *TLI = getTLI(); |
328 | 31.4k | const DataLayout &DL = this->getDataLayout(); |
329 | 31.4k | |
330 | 31.4k | JumpTableSize = 0; |
331 | 31.4k | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
332 | 31.4k | |
333 | 31.4k | // Early exit if both a jump table and bit test are not allowed. |
334 | 31.4k | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N0 )) |
335 | 0 | return N; |
336 | 31.4k | |
337 | 31.4k | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
338 | 31.4k | APInt MinCaseVal = MaxCaseVal; |
339 | 303k | for (auto CI : SI.cases()) { |
340 | 303k | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
341 | 303k | if (CaseVal.sgt(MaxCaseVal)) |
342 | 59.0k | MaxCaseVal = CaseVal; |
343 | 303k | if (CaseVal.slt(MinCaseVal)) |
344 | 30.0k | MinCaseVal = CaseVal; |
345 | 303k | } |
346 | 31.4k | |
347 | 31.4k | // Check if suitable for a bit test |
348 | 31.4k | if (N <= DL.getIndexSizeInBits(0u)) { |
349 | 30.8k | SmallPtrSet<const BasicBlock *, 4> Dests; |
350 | 30.8k | for (auto I : SI.cases()) |
351 | 196k | Dests.insert(I.getCaseSuccessor()); |
352 | 30.8k | |
353 | 30.8k | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
354 | 30.8k | DL)) |
355 | 3.38k | return 1; |
356 | 28.0k | } |
357 | 28.0k | |
358 | 28.0k | // Check if suitable for a jump table. |
359 | 28.0k | if (IsJTAllowed) { |
360 | 28.0k | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
361 | 17.5k | return N; |
362 | 10.5k | uint64_t Range = |
363 | 10.5k | (MaxCaseVal - MinCaseVal) |
364 | 10.5k | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
365 | 10.5k | // Check whether a range of clusters is dense enough for a jump table |
366 | 10.5k | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { |
367 | 8.59k | JumpTableSize = Range; |
368 | 8.59k | return 1; |
369 | 8.59k | } |
370 | 1.90k | } |
371 | 1.90k | return N; |
372 | 1.90k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Line | Count | Source | 318 | 23.7k | unsigned &JumpTableSize) { | 319 | 23.7k | /// Try to find the estimated number of clusters. Note that the number of | 320 | 23.7k | /// clusters identified in this function could be different from the actural | 321 | 23.7k | /// numbers found in lowering. This function ignore switches that are | 322 | 23.7k | /// lowered with a mix of jump table / bit test / BTree. This function was | 323 | 23.7k | /// initially intended to be used when estimating the cost of switch in | 324 | 23.7k | /// inline cost heuristic, but it's a generic cost model to be used in other | 325 | 23.7k | /// places (e.g., in loop unrolling). | 326 | 23.7k | unsigned N = SI.getNumCases(); | 327 | 23.7k | const TargetLoweringBase *TLI = getTLI(); | 328 | 23.7k | const DataLayout &DL = this->getDataLayout(); | 329 | 23.7k | | 330 | 23.7k | JumpTableSize = 0; | 331 | 23.7k | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); | 332 | 23.7k | | 333 | 23.7k | // Early exit if both a jump table and bit test are not allowed. | 334 | 23.7k | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N0 )) | 335 | 0 | return N; | 336 | 23.7k | | 337 | 23.7k | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); | 338 | 23.7k | APInt MinCaseVal = MaxCaseVal; | 339 | 270k | for (auto CI : SI.cases()) { | 340 | 270k | const APInt &CaseVal = CI.getCaseValue()->getValue(); | 341 | 270k | if (CaseVal.sgt(MaxCaseVal)) | 342 | 42.7k | MaxCaseVal = CaseVal; | 343 | 270k | if (CaseVal.slt(MinCaseVal)) | 344 | 25.3k | MinCaseVal = CaseVal; | 345 | 270k | } | 346 | 23.7k | | 347 | 23.7k | // Check if suitable for a bit test | 348 | 23.7k | if (N <= DL.getIndexSizeInBits(0u)) { | 349 | 23.0k | SmallPtrSet<const BasicBlock *, 4> Dests; | 350 | 23.0k | for (auto I : SI.cases()) | 351 | 163k | Dests.insert(I.getCaseSuccessor()); | 352 | 23.0k | | 353 | 23.0k | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, | 354 | 23.0k | DL)) | 355 | 2.11k | return 1; | 356 | 21.5k | } | 357 | 21.5k | | 358 | 21.5k | // Check if suitable for a jump table. | 359 | 21.5k | if (IsJTAllowed) { | 360 | 21.5k | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) | 361 | 13.4k | return N; | 362 | 8.16k | uint64_t Range = | 363 | 8.16k | (MaxCaseVal - MinCaseVal) | 364 | 8.16k | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; | 365 | 8.16k | // Check whether a range of clusters is dense enough for a jump table | 366 | 8.16k | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { | 367 | 6.66k | JumpTableSize = Range; | 368 | 6.66k | return 1; | 369 | 6.66k | } | 370 | 1.49k | } | 371 | 1.49k | return N; | 372 | 1.49k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Line | Count | Source | 318 | 1.95k | unsigned &JumpTableSize) { | 319 | 1.95k | /// Try to find the estimated number of clusters. Note that the number of | 320 | 1.95k | /// clusters identified in this function could be different from the actural | 321 | 1.95k | /// numbers found in lowering. This function ignore switches that are | 322 | 1.95k | /// lowered with a mix of jump table / bit test / BTree. This function was | 323 | 1.95k | /// initially intended to be used when estimating the cost of switch in | 324 | 1.95k | /// inline cost heuristic, but it's a generic cost model to be used in other | 325 | 1.95k | /// places (e.g., in loop unrolling). | 326 | 1.95k | unsigned N = SI.getNumCases(); | 327 | 1.95k | const TargetLoweringBase *TLI = getTLI(); | 328 | 1.95k | const DataLayout &DL = this->getDataLayout(); | 329 | 1.95k | | 330 | 1.95k | JumpTableSize = 0; | 331 | 1.95k | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); | 332 | 1.95k | | 333 | 1.95k | // Early exit if both a jump table and bit test are not allowed. | 334 | 1.95k | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N0 )) | 335 | 0 | return N; | 336 | 1.95k | | 337 | 1.95k | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); | 338 | 1.95k | APInt MinCaseVal = MaxCaseVal; | 339 | 7.86k | for (auto CI : SI.cases()) { | 340 | 7.86k | const APInt &CaseVal = CI.getCaseValue()->getValue(); | 341 | 7.86k | if (CaseVal.sgt(MaxCaseVal)) | 342 | 3.60k | MaxCaseVal = CaseVal; | 343 | 7.86k | if (CaseVal.slt(MinCaseVal)) | 344 | 1.34k | MinCaseVal = CaseVal; | 345 | 7.86k | } | 346 | 1.95k | | 347 | 1.95k | // Check if suitable for a bit test | 348 | 1.95k | if (N <= DL.getIndexSizeInBits(0u)) { | 349 | 1.95k | SmallPtrSet<const BasicBlock *, 4> Dests; | 350 | 1.95k | for (auto I : SI.cases()) | 351 | 7.86k | Dests.insert(I.getCaseSuccessor()); | 352 | 1.95k | | 353 | 1.95k | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, | 354 | 1.95k | DL)) | 355 | 90 | return 1; | 356 | 1.86k | } | 357 | 1.86k | | 358 | 1.86k | // Check if suitable for a jump table. | 359 | 1.86k | if (IsJTAllowed) { | 360 | 1.86k | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) | 361 | 1.17k | return N; | 362 | 690 | uint64_t Range = | 363 | 690 | (MaxCaseVal - MinCaseVal) | 364 | 690 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; | 365 | 690 | // Check whether a range of clusters is dense enough for a jump table | 366 | 690 | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { | 367 | 567 | JumpTableSize = Range; | 368 | 567 | return 1; | 369 | 567 | } | 370 | 123 | } | 371 | 123 | return N; | 372 | 123 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) Line | Count | Source | 318 | 5.78k | unsigned &JumpTableSize) { | 319 | 5.78k | /// Try to find the estimated number of clusters. Note that the number of | 320 | 5.78k | /// clusters identified in this function could be different from the actural | 321 | 5.78k | /// numbers found in lowering. This function ignore switches that are | 322 | 5.78k | /// lowered with a mix of jump table / bit test / BTree. This function was | 323 | 5.78k | /// initially intended to be used when estimating the cost of switch in | 324 | 5.78k | /// inline cost heuristic, but it's a generic cost model to be used in other | 325 | 5.78k | /// places (e.g., in loop unrolling). | 326 | 5.78k | unsigned N = SI.getNumCases(); | 327 | 5.78k | const TargetLoweringBase *TLI = getTLI(); | 328 | 5.78k | const DataLayout &DL = this->getDataLayout(); | 329 | 5.78k | | 330 | 5.78k | JumpTableSize = 0; | 331 | 5.78k | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); | 332 | 5.78k | | 333 | 5.78k | // Early exit if both a jump table and bit test are not allowed. | 334 | 5.78k | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N0 )) | 335 | 0 | return N; | 336 | 5.78k | | 337 | 5.78k | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); | 338 | 5.78k | APInt MinCaseVal = MaxCaseVal; | 339 | 24.8k | for (auto CI : SI.cases()) { | 340 | 24.8k | const APInt &CaseVal = CI.getCaseValue()->getValue(); | 341 | 24.8k | if (CaseVal.sgt(MaxCaseVal)) | 342 | 12.7k | MaxCaseVal = CaseVal; | 343 | 24.8k | if (CaseVal.slt(MinCaseVal)) | 344 | 3.39k | MinCaseVal = CaseVal; | 345 | 24.8k | } | 346 | 5.78k | | 347 | 5.78k | // Check if suitable for a bit test | 348 | 5.78k | if (N <= DL.getIndexSizeInBits(0u)) { | 349 | 5.78k | SmallPtrSet<const BasicBlock *, 4> Dests; | 350 | 5.78k | for (auto I : SI.cases()) | 351 | 24.7k | Dests.insert(I.getCaseSuccessor()); | 352 | 5.78k | | 353 | 5.78k | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, | 354 | 5.78k | DL)) | 355 | 1.17k | return 1; | 356 | 4.60k | } | 357 | 4.60k | | 358 | 4.60k | // Check if suitable for a jump table. | 359 | 4.60k | if (IsJTAllowed) { | 360 | 4.60k | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) | 361 | 2.96k | return N; | 362 | 1.64k | uint64_t Range = | 363 | 1.64k | (MaxCaseVal - MinCaseVal) | 364 | 1.64k | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; | 365 | 1.64k | // Check whether a range of clusters is dense enough for a jump table | 366 | 1.64k | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { | 367 | 1.36k | JumpTableSize = Range; | 368 | 1.36k | return 1; | 369 | 1.36k | } | 370 | 284 | } | 371 | 284 | return N; | 372 | 284 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&) |
373 | | |
374 | 0 | unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getJumpBufAlignment() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getJumpBufAlignment() |
375 | | |
376 | 0 | unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getJumpBufSize() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getJumpBufSize() |
377 | | |
378 | 61.3k | bool shouldBuildLookupTables() { |
379 | 61.3k | const TargetLoweringBase *TLI = getTLI(); |
380 | 61.3k | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
381 | 61.3k | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)2.53k ; |
382 | 61.3k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::shouldBuildLookupTables() Line | Count | Source | 378 | 57.7k | bool shouldBuildLookupTables() { | 379 | 57.7k | const TargetLoweringBase *TLI = getTLI(); | 380 | 57.7k | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | 381 | 57.7k | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)0 ; | 382 | 57.7k | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::shouldBuildLookupTables() Line | Count | Source | 378 | 6 | bool shouldBuildLookupTables() { | 379 | 6 | const TargetLoweringBase *TLI = getTLI(); | 380 | 6 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | 381 | 6 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | 382 | 6 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::shouldBuildLookupTables() Line | Count | Source | 378 | 6 | bool shouldBuildLookupTables() { | 379 | 6 | const TargetLoweringBase *TLI = getTLI(); | 380 | 6 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | 381 | 6 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | 382 | 6 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::shouldBuildLookupTables() Line | Count | Source | 378 | 1.11k | bool shouldBuildLookupTables() { | 379 | 1.11k | const TargetLoweringBase *TLI = getTLI(); | 380 | 1.11k | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | 381 | 1.11k | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)0 ; | 382 | 1.11k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::shouldBuildLookupTables() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::shouldBuildLookupTables() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::shouldBuildLookupTables() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::shouldBuildLookupTables() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::shouldBuildLookupTables() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::shouldBuildLookupTables() llvm::BasicTTIImplBase<llvm::X86TTIImpl>::shouldBuildLookupTables() Line | Count | Source | 378 | 2.52k | bool shouldBuildLookupTables() { | 379 | 2.52k | const TargetLoweringBase *TLI = getTLI(); | 380 | 2.52k | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | 381 | 2.52k | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | 382 | 2.52k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::shouldBuildLookupTables() |
383 | | |
384 | 50 | bool haveFastSqrt(Type *Ty) { |
385 | 50 | const TargetLoweringBase *TLI = getTLI(); |
386 | 50 | EVT VT = TLI->getValueType(DL, Ty); |
387 | 50 | return TLI->isTypeLegal(VT) && |
388 | 50 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT)48 ; |
389 | 50 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::haveFastSqrt(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::haveFastSqrt(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::haveFastSqrt(llvm::Type*) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::haveFastSqrt(llvm::Type*) Line | Count | Source | 384 | 2 | bool haveFastSqrt(Type *Ty) { | 385 | 2 | const TargetLoweringBase *TLI = getTLI(); | 386 | 2 | EVT VT = TLI->getValueType(DL, Ty); | 387 | 2 | return TLI->isTypeLegal(VT) && | 388 | 2 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT)0 ; | 389 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::haveFastSqrt(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::haveFastSqrt(llvm::Type*) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::haveFastSqrt(llvm::Type*) Line | Count | Source | 384 | 14 | bool haveFastSqrt(Type *Ty) { | 385 | 14 | const TargetLoweringBase *TLI = getTLI(); | 386 | 14 | EVT VT = TLI->getValueType(DL, Ty); | 387 | 14 | return TLI->isTypeLegal(VT) && | 388 | 14 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | 389 | 14 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::haveFastSqrt(llvm::Type*) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::haveFastSqrt(llvm::Type*) Line | Count | Source | 384 | 4 | bool haveFastSqrt(Type *Ty) { | 385 | 4 | const TargetLoweringBase *TLI = getTLI(); | 386 | 4 | EVT VT = TLI->getValueType(DL, Ty); | 387 | 4 | return TLI->isTypeLegal(VT) && | 388 | 4 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | 389 | 4 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::haveFastSqrt(llvm::Type*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::haveFastSqrt(llvm::Type*) Line | Count | Source | 384 | 4 | bool haveFastSqrt(Type *Ty) { | 385 | 4 | const TargetLoweringBase *TLI = getTLI(); | 386 | 4 | EVT VT = TLI->getValueType(DL, Ty); | 387 | 4 | return TLI->isTypeLegal(VT) && | 388 | 4 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | 389 | 4 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::haveFastSqrt(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::haveFastSqrt(llvm::Type*) Line | Count | Source | 384 | 26 | bool haveFastSqrt(Type *Ty) { | 385 | 26 | const TargetLoweringBase *TLI = getTLI(); | 386 | 26 | EVT VT = TLI->getValueType(DL, Ty); | 387 | 26 | return TLI->isTypeLegal(VT) && | 388 | 26 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | 389 | 26 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::haveFastSqrt(llvm::Type*) |
390 | | |
391 | 22 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
392 | 22 | return true; |
393 | 22 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Line | Count | Source | 391 | 14 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { | 392 | 14 | return true; | 393 | 14 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Line | Count | Source | 391 | 4 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { | 392 | 4 | return true; | 393 | 4 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Line | Count | Source | 391 | 4 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { | 392 | 4 | return true; | 393 | 4 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*) |
394 | | |
395 | 383k | unsigned getFPOpCost(Type *Ty) { |
396 | 383k | // Check whether FADD is available, as a proxy for floating-point in |
397 | 383k | // general. |
398 | 383k | const TargetLoweringBase *TLI = getTLI(); |
399 | 383k | EVT VT = TLI->getValueType(DL, Ty); |
400 | 383k | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
401 | 382k | return TargetTransformInfo::TCC_Basic; |
402 | 1.27k | return TargetTransformInfo::TCC_Expensive; |
403 | 1.27k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getFPOpCost(llvm::Type*) Line | Count | Source | 395 | 381k | unsigned getFPOpCost(Type *Ty) { | 396 | 381k | // Check whether FADD is available, as a proxy for floating-point in | 397 | 381k | // general. | 398 | 381k | const TargetLoweringBase *TLI = getTLI(); | 399 | 381k | EVT VT = TLI->getValueType(DL, Ty); | 400 | 381k | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) | 401 | 380k | return TargetTransformInfo::TCC_Basic; | 402 | 1.07k | return TargetTransformInfo::TCC_Expensive; | 403 | 1.07k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getFPOpCost(llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getFPOpCost(llvm::Type*) Line | Count | Source | 395 | 36 | unsigned getFPOpCost(Type *Ty) { | 396 | 36 | // Check whether FADD is available, as a proxy for floating-point in | 397 | 36 | // general. | 398 | 36 | const TargetLoweringBase *TLI = getTLI(); | 399 | 36 | EVT VT = TLI->getValueType(DL, Ty); | 400 | 36 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) | 401 | 36 | return TargetTransformInfo::TCC_Basic; | 402 | 0 | return TargetTransformInfo::TCC_Expensive; | 403 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getFPOpCost(llvm::Type*) Line | Count | Source | 395 | 312 | unsigned getFPOpCost(Type *Ty) { | 396 | 312 | // Check whether FADD is available, as a proxy for floating-point in | 397 | 312 | // general. | 398 | 312 | const TargetLoweringBase *TLI = getTLI(); | 399 | 312 | EVT VT = TLI->getValueType(DL, Ty); | 400 | 312 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) | 401 | 114 | return TargetTransformInfo::TCC_Basic; | 402 | 198 | return TargetTransformInfo::TCC_Expensive; | 403 | 198 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getFPOpCost(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getFPOpCost(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getFPOpCost(llvm::Type*) Line | Count | Source | 395 | 2.15k | unsigned getFPOpCost(Type *Ty) { | 396 | 2.15k | // Check whether FADD is available, as a proxy for floating-point in | 397 | 2.15k | // general. | 398 | 2.15k | const TargetLoweringBase *TLI = getTLI(); | 399 | 2.15k | EVT VT = TLI->getValueType(DL, Ty); | 400 | 2.15k | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) | 401 | 2.15k | return TargetTransformInfo::TCC_Basic; | 402 | 0 | return TargetTransformInfo::TCC_Expensive; | 403 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getFPOpCost(llvm::Type*) |
404 | | |
405 | 26.0M | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
406 | 26.0M | const TargetLoweringBase *TLI = getTLI(); |
407 | 26.0M | switch (Opcode) { |
408 | 26.0M | default: break25.3M ; |
409 | 26.0M | case Instruction::Trunc: |
410 | 710k | if (TLI->isTruncateFree(OpTy, Ty)) |
411 | 664k | return TargetTransformInfo::TCC_Free; |
412 | 45.8k | return TargetTransformInfo::TCC_Basic; |
413 | 45.8k | case Instruction::ZExt: |
414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) |
415 | 0 | return TargetTransformInfo::TCC_Free; |
416 | 0 | return TargetTransformInfo::TCC_Basic; |
417 | 0 |
|
418 | 204 | case Instruction::AddrSpaceCast: |
419 | 204 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), |
420 | 204 | Ty->getPointerAddressSpace())) |
421 | 132 | return TargetTransformInfo::TCC_Free; |
422 | 72 | return TargetTransformInfo::TCC_Basic; |
423 | 25.3M | } |
424 | 25.3M | |
425 | 25.3M | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
426 | 25.3M | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 23.8M | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 23.8M | const TargetLoweringBase *TLI = getTLI(); | 407 | 23.8M | switch (Opcode) { | 408 | 23.8M | default: break23.2M ; | 409 | 23.8M | case Instruction::Trunc: | 410 | 650k | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 608k | return TargetTransformInfo::TCC_Free; | 412 | 42.6k | return TargetTransformInfo::TCC_Basic; | 413 | 42.6k | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 23.2M | } | 424 | 23.2M | | 425 | 23.2M | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 23.2M | } |
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 386 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 386 | const TargetLoweringBase *TLI = getTLI(); | 407 | 386 | switch (Opcode) { | 408 | 386 | default: break; | 409 | 386 | case Instruction::Trunc: | 410 | 0 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 0 | return TargetTransformInfo::TCC_Free; | 412 | 0 | return TargetTransformInfo::TCC_Basic; | 413 | 0 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 386 | } | 424 | 386 | | 425 | 386 | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 386 | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 6.53k | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 6.53k | const TargetLoweringBase *TLI = getTLI(); | 407 | 6.53k | switch (Opcode) { | 408 | 6.53k | default: break6.28k ; | 409 | 6.53k | case Instruction::Trunc: | 410 | 39 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 33 | return TargetTransformInfo::TCC_Free; | 412 | 6 | return TargetTransformInfo::TCC_Basic; | 413 | 6 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 204 | case Instruction::AddrSpaceCast: | 419 | 204 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 204 | Ty->getPointerAddressSpace())) | 421 | 132 | return TargetTransformInfo::TCC_Free; | 422 | 72 | return TargetTransformInfo::TCC_Basic; | 423 | 6.28k | } | 424 | 6.28k | | 425 | 6.28k | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 6.28k | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 508k | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 508k | const TargetLoweringBase *TLI = getTLI(); | 407 | 508k | switch (Opcode) { | 408 | 508k | default: break499k ; | 409 | 508k | case Instruction::Trunc: | 410 | 8.90k | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 5.81k | return TargetTransformInfo::TCC_Free; | 412 | 3.09k | return TargetTransformInfo::TCC_Basic; | 413 | 3.09k | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 499k | } | 424 | 499k | | 425 | 499k | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 499k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 197 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 197 | const TargetLoweringBase *TLI = getTLI(); | 407 | 197 | switch (Opcode) { | 408 | 197 | default: break185 ; | 409 | 197 | case Instruction::Trunc: | 410 | 12 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 1 | return TargetTransformInfo::TCC_Free; | 412 | 11 | return TargetTransformInfo::TCC_Basic; | 413 | 11 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 185 | } | 424 | 185 | | 425 | 185 | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 185 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 102 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 102 | const TargetLoweringBase *TLI = getTLI(); | 407 | 102 | switch (Opcode) { | 408 | 102 | default: break; | 409 | 102 | case Instruction::Trunc: | 410 | 0 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 0 | return TargetTransformInfo::TCC_Free; | 412 | 0 | return TargetTransformInfo::TCC_Basic; | 413 | 0 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 102 | } | 424 | 102 | | 425 | 102 | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 102 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 6.46k | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 6.46k | const TargetLoweringBase *TLI = getTLI(); | 407 | 6.46k | switch (Opcode) { | 408 | 6.46k | default: break6.18k ; | 409 | 6.46k | case Instruction::Trunc: | 410 | 272 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 217 | return TargetTransformInfo::TCC_Free; | 412 | 55 | return TargetTransformInfo::TCC_Basic; | 413 | 55 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 6.18k | } | 424 | 6.18k | | 425 | 6.18k | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 6.18k | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 4 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 4 | const TargetLoweringBase *TLI = getTLI(); | 407 | 4 | switch (Opcode) { | 408 | 4 | default: break3 ; | 409 | 4 | case Instruction::Trunc: | 410 | 1 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 1 | return TargetTransformInfo::TCC_Free; | 412 | 0 | return TargetTransformInfo::TCC_Basic; | 413 | 0 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 3 | } | 424 | 3 | | 425 | 3 | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 3 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 1.08k | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 1.08k | const TargetLoweringBase *TLI = getTLI(); | 407 | 1.08k | switch (Opcode) { | 408 | 1.08k | default: break1.06k ; | 409 | 1.08k | case Instruction::Trunc: | 410 | 18 | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 15 | return TargetTransformInfo::TCC_Free; | 412 | 3 | return TargetTransformInfo::TCC_Basic; | 413 | 3 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 1.06k | } | 424 | 1.06k | | 425 | 1.06k | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 1.06k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) Line | Count | Source | 405 | 1.63M | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | 406 | 1.63M | const TargetLoweringBase *TLI = getTLI(); | 407 | 1.63M | switch (Opcode) { | 408 | 1.63M | default: break1.58M ; | 409 | 1.63M | case Instruction::Trunc: | 410 | 50.1k | if (TLI->isTruncateFree(OpTy, Ty)) | 411 | 50.1k | return TargetTransformInfo::TCC_Free; | 412 | 54 | return TargetTransformInfo::TCC_Basic; | 413 | 54 | case Instruction::ZExt: | 414 | 0 | if (TLI->isZExtFree(OpTy, Ty)) | 415 | 0 | return TargetTransformInfo::TCC_Free; | 416 | 0 | return TargetTransformInfo::TCC_Basic; | 417 | 0 |
| 418 | 0 | case Instruction::AddrSpaceCast: | 419 | 0 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), | 420 | 0 | Ty->getPointerAddressSpace())) | 421 | 0 | return TargetTransformInfo::TCC_Free; | 422 | 0 | return TargetTransformInfo::TCC_Basic; | 423 | 1.58M | } | 424 | 1.58M | | 425 | 1.58M | return BaseT::getOperationCost(Opcode, Ty, OpTy); | 426 | 1.58M | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*) |
427 | | |
428 | 790k | unsigned getInliningThresholdMultiplier() { return 1; } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 527k | unsigned getInliningThresholdMultiplier() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInliningThresholdMultiplier() llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 61.9k | unsigned getInliningThresholdMultiplier() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInliningThresholdMultiplier() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInliningThresholdMultiplier() llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 7 | unsigned getInliningThresholdMultiplier() { return 1; } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 17 | unsigned getInliningThresholdMultiplier() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInliningThresholdMultiplier() llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 4 | unsigned getInliningThresholdMultiplier() { return 1; } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInliningThresholdMultiplier() Line | Count | Source | 428 | 200k | unsigned getInliningThresholdMultiplier() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInliningThresholdMultiplier() |
429 | | |
430 | 790k | int getInlinerVectorBonusPercent() { return 150; } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 527k | int getInlinerVectorBonusPercent() { return 150; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInlinerVectorBonusPercent() llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 61.9k | int getInlinerVectorBonusPercent() { return 150; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInlinerVectorBonusPercent() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInlinerVectorBonusPercent() llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 7 | int getInlinerVectorBonusPercent() { return 150; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInlinerVectorBonusPercent() llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 17 | int getInlinerVectorBonusPercent() { return 150; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInlinerVectorBonusPercent() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInlinerVectorBonusPercent() llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 4 | int getInlinerVectorBonusPercent() { return 150; } |
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInlinerVectorBonusPercent() Line | Count | Source | 430 | 200k | int getInlinerVectorBonusPercent() { return 150; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInlinerVectorBonusPercent() |
431 | | |
432 | | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
433 | 389k | TTI::UnrollingPreferences &UP) { |
434 | 389k | // This unrolling functionality is target independent, but to provide some |
435 | 389k | // motivation for its intended use, for x86: |
436 | 389k | |
437 | 389k | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
438 | 389k | // Manual, Intel Core models and later have a loop stream detector (and |
439 | 389k | // associated uop queue) that can benefit from partial unrolling. |
440 | 389k | // The relevant requirements are: |
441 | 389k | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
442 | 389k | // taken, and none of them may be calls. |
443 | 389k | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
444 | 389k | |
445 | 389k | // According to the Software Optimization Guide for AMD Family 15h |
446 | 389k | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
447 | 389k | // and loop buffer which can benefit from partial unrolling. |
448 | 389k | // The relevant requirements are: |
449 | 389k | // - The loop must have fewer than 16 branches |
450 | 389k | // - The loop must have less than 40 uops in all executed loop branches |
451 | 389k | |
452 | 389k | // The number of taken branches in a loop is hard to estimate here, and |
453 | 389k | // benchmarking has revealed that it is better not to be conservative when |
454 | 389k | // estimating the branch count. As a result, we'll ignore the branch limits |
455 | 389k | // until someone finds a case where it matters in practice. |
456 | 389k | |
457 | 389k | unsigned MaxOps; |
458 | 389k | const TargetSubtargetInfo *ST = getST(); |
459 | 389k | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
460 | 0 | MaxOps = PartialUnrollingThreshold; |
461 | 389k | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
462 | 22.8k | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
463 | 366k | else |
464 | 366k | return; |
465 | 22.8k | |
466 | 22.8k | // Scan the loop: don't unroll loops with calls. |
467 | 69.1k | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 22.8k I != E; |
468 | 56.4k | ++I46.2k ) { |
469 | 56.4k | BasicBlock *BB = *I; |
470 | 56.4k | |
471 | 380k | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J323k ) |
472 | 333k | if (isa<CallInst>(J) || isa<InvokeInst>(J)318k ) { |
473 | 15.1k | ImmutableCallSite CS(&*J); |
474 | 15.1k | if (const Function *F = CS.getCalledFunction()) { |
475 | 13.8k | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
476 | 4.92k | continue; |
477 | 10.2k | } |
478 | 10.2k | |
479 | 10.2k | return; |
480 | 10.2k | } |
481 | 56.4k | } |
482 | 22.8k | |
483 | 22.8k | // Enable runtime and partial unrolling up to the specified size. |
484 | 22.8k | // Enable using trip count upper bound to unroll loops. |
485 | 22.8k | UP.Partial = UP.Runtime = UP.UpperBound = true; |
486 | 12.6k | UP.PartialThreshold = MaxOps; |
487 | 12.6k | |
488 | 12.6k | // Avoid unrolling when optimizing for size. |
489 | 12.6k | UP.OptSizeThreshold = 0; |
490 | 12.6k | UP.PartialOptSizeThreshold = 0; |
491 | 12.6k | |
492 | 12.6k | // Set number of instructions optimized when "back edge" |
493 | 12.6k | // becomes "fall through" to default value of 2. |
494 | 12.6k | UP.BEInsns = 2; |
495 | 12.6k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Line | Count | Source | 433 | 359k | TTI::UnrollingPreferences &UP) { | 434 | 359k | // This unrolling functionality is target independent, but to provide some | 435 | 359k | // motivation for its intended use, for x86: | 436 | 359k | | 437 | 359k | // According to the Intel 64 and IA-32 Architectures Optimization Reference | 438 | 359k | // Manual, Intel Core models and later have a loop stream detector (and | 439 | 359k | // associated uop queue) that can benefit from partial unrolling. | 440 | 359k | // The relevant requirements are: | 441 | 359k | // - The loop must have no more than 4 (8 for Nehalem and later) branches | 442 | 359k | // taken, and none of them may be calls. | 443 | 359k | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | 444 | 359k | | 445 | 359k | // According to the Software Optimization Guide for AMD Family 15h | 446 | 359k | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | 447 | 359k | // and loop buffer which can benefit from partial unrolling. | 448 | 359k | // The relevant requirements are: | 449 | 359k | // - The loop must have fewer than 16 branches | 450 | 359k | // - The loop must have less than 40 uops in all executed loop branches | 451 | 359k | | 452 | 359k | // The number of taken branches in a loop is hard to estimate here, and | 453 | 359k | // benchmarking has revealed that it is better not to be conservative when | 454 | 359k | // estimating the branch count. As a result, we'll ignore the branch limits | 455 | 359k | // until someone finds a case where it matters in practice. | 456 | 359k | | 457 | 359k | unsigned MaxOps; | 458 | 359k | const TargetSubtargetInfo *ST = getST(); | 459 | 359k | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | 460 | 0 | MaxOps = PartialUnrollingThreshold; | 461 | 359k | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | 462 | 28 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | 463 | 359k | else | 464 | 359k | return; | 465 | 28 | | 466 | 28 | // Scan the loop: don't unroll loops with calls. | 467 | 68 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 28 I != E; | 468 | 40 | ++I) { | 469 | 40 | BasicBlock *BB = *I; | 470 | 40 | | 471 | 279 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J239 ) | 472 | 239 | if (isa<CallInst>(J) || isa<InvokeInst>(J)209 ) { | 473 | 30 | ImmutableCallSite CS(&*J); | 474 | 30 | if (const Function *F = CS.getCalledFunction()) { | 475 | 30 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | 476 | 30 | continue; | 477 | 0 | } | 478 | 0 | | 479 | 0 | return; | 480 | 0 | } | 481 | 40 | } | 482 | 28 | | 483 | 28 | // Enable runtime and partial unrolling up to the specified size. | 484 | 28 | // Enable using trip count upper bound to unroll loops. | 485 | 28 | UP.Partial = UP.Runtime = UP.UpperBound = true; | 486 | 28 | UP.PartialThreshold = MaxOps; | 487 | 28 | | 488 | 28 | // Avoid unrolling when optimizing for size. | 489 | 28 | UP.OptSizeThreshold = 0; | 490 | 28 | UP.PartialOptSizeThreshold = 0; | 491 | 28 | | 492 | 28 | // Set number of instructions optimized when "back edge" | 493 | 28 | // becomes "fall through" to default value of 2. | 494 | 28 | UP.BEInsns = 2; | 495 | 28 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Line | Count | Source | 433 | 6.89k | TTI::UnrollingPreferences &UP) { | 434 | 6.89k | // This unrolling functionality is target independent, but to provide some | 435 | 6.89k | // motivation for its intended use, for x86: | 436 | 6.89k | | 437 | 6.89k | // According to the Intel 64 and IA-32 Architectures Optimization Reference | 438 | 6.89k | // Manual, Intel Core models and later have a loop stream detector (and | 439 | 6.89k | // associated uop queue) that can benefit from partial unrolling. | 440 | 6.89k | // The relevant requirements are: | 441 | 6.89k | // - The loop must have no more than 4 (8 for Nehalem and later) branches | 442 | 6.89k | // taken, and none of them may be calls. | 443 | 6.89k | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | 444 | 6.89k | | 445 | 6.89k | // According to the Software Optimization Guide for AMD Family 15h | 446 | 6.89k | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | 447 | 6.89k | // and loop buffer which can benefit from partial unrolling. | 448 | 6.89k | // The relevant requirements are: | 449 | 6.89k | // - The loop must have fewer than 16 branches | 450 | 6.89k | // - The loop must have less than 40 uops in all executed loop branches | 451 | 6.89k | | 452 | 6.89k | // The number of taken branches in a loop is hard to estimate here, and | 453 | 6.89k | // benchmarking has revealed that it is better not to be conservative when | 454 | 6.89k | // estimating the branch count. As a result, we'll ignore the branch limits | 455 | 6.89k | // until someone finds a case where it matters in practice. | 456 | 6.89k | | 457 | 6.89k | unsigned MaxOps; | 458 | 6.89k | const TargetSubtargetInfo *ST = getST(); | 459 | 6.89k | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | 460 | 0 | MaxOps = PartialUnrollingThreshold; | 461 | 6.89k | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | 462 | 24 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | 463 | 6.87k | else | 464 | 6.87k | return; | 465 | 24 | | 466 | 24 | // Scan the loop: don't unroll loops with calls. | 467 | 51 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 24 I != E; | 468 | 30 | ++I27 ) { | 469 | 30 | BasicBlock *BB = *I; | 470 | 30 | | 471 | 276 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J246 ) | 472 | 249 | if (isa<CallInst>(J) || isa<InvokeInst>(J)234 ) { | 473 | 15 | ImmutableCallSite CS(&*J); | 474 | 15 | if (const Function *F = CS.getCalledFunction()) { | 475 | 15 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | 476 | 12 | continue; | 477 | 3 | } | 478 | 3 | | 479 | 3 | return; | 480 | 3 | } | 481 | 30 | } | 482 | 24 | | 483 | 24 | // Enable runtime and partial unrolling up to the specified size. | 484 | 24 | // Enable using trip count upper bound to unroll loops. | 485 | 24 | UP.Partial = UP.Runtime = UP.UpperBound = true; | 486 | 21 | UP.PartialThreshold = MaxOps; | 487 | 21 | | 488 | 21 | // Avoid unrolling when optimizing for size. | 489 | 21 | UP.OptSizeThreshold = 0; | 490 | 21 | UP.PartialOptSizeThreshold = 0; | 491 | 21 | | 492 | 21 | // Set number of instructions optimized when "back edge" | 493 | 21 | // becomes "fall through" to default value of 2. | 494 | 21 | UP.BEInsns = 2; | 495 | 21 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Line | Count | Source | 433 | 2 | TTI::UnrollingPreferences &UP) { | 434 | 2 | // This unrolling functionality is target independent, but to provide some | 435 | 2 | // motivation for its intended use, for x86: | 436 | 2 | | 437 | 2 | // According to the Intel 64 and IA-32 Architectures Optimization Reference | 438 | 2 | // Manual, Intel Core models and later have a loop stream detector (and | 439 | 2 | // associated uop queue) that can benefit from partial unrolling. | 440 | 2 | // The relevant requirements are: | 441 | 2 | // - The loop must have no more than 4 (8 for Nehalem and later) branches | 442 | 2 | // taken, and none of them may be calls. | 443 | 2 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | 444 | 2 | | 445 | 2 | // According to the Software Optimization Guide for AMD Family 15h | 446 | 2 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | 447 | 2 | // and loop buffer which can benefit from partial unrolling. | 448 | 2 | // The relevant requirements are: | 449 | 2 | // - The loop must have fewer than 16 branches | 450 | 2 | // - The loop must have less than 40 uops in all executed loop branches | 451 | 2 | | 452 | 2 | // The number of taken branches in a loop is hard to estimate here, and | 453 | 2 | // benchmarking has revealed that it is better not to be conservative when | 454 | 2 | // estimating the branch count. As a result, we'll ignore the branch limits | 455 | 2 | // until someone finds a case where it matters in practice. | 456 | 2 | | 457 | 2 | unsigned MaxOps; | 458 | 2 | const TargetSubtargetInfo *ST = getST(); | 459 | 2 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | 460 | 0 | MaxOps = PartialUnrollingThreshold; | 461 | 2 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | 462 | 0 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | 463 | 2 | else | 464 | 2 | return; | 465 | 0 | | 466 | 0 | // Scan the loop: don't unroll loops with calls. | 467 | 0 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; | 468 | 0 | ++I) { | 469 | 0 | BasicBlock *BB = *I; | 470 | 0 |
| 471 | 0 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) | 472 | 0 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { | 473 | 0 | ImmutableCallSite CS(&*J); | 474 | 0 | if (const Function *F = CS.getCalledFunction()) { | 475 | 0 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | 476 | 0 | continue; | 477 | 0 | } | 478 | 0 | | 479 | 0 | return; | 480 | 0 | } | 481 | 0 | } | 482 | 0 |
| 483 | 0 | // Enable runtime and partial unrolling up to the specified size. | 484 | 0 | // Enable using trip count upper bound to unroll loops. | 485 | 0 | UP.Partial = UP.Runtime = UP.UpperBound = true; | 486 | 0 | UP.PartialThreshold = MaxOps; | 487 | 0 |
| 488 | 0 | // Avoid unrolling when optimizing for size. | 489 | 0 | UP.OptSizeThreshold = 0; | 490 | 0 | UP.PartialOptSizeThreshold = 0; | 491 | 0 |
| 492 | 0 | // Set number of instructions optimized when "back edge" | 493 | 0 | // becomes "fall through" to default value of 2. | 494 | 0 | UP.BEInsns = 2; | 495 | 0 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Line | Count | Source | 433 | 15 | TTI::UnrollingPreferences &UP) { | 434 | 15 | // This unrolling functionality is target independent, but to provide some | 435 | 15 | // motivation for its intended use, for x86: | 436 | 15 | | 437 | 15 | // According to the Intel 64 and IA-32 Architectures Optimization Reference | 438 | 15 | // Manual, Intel Core models and later have a loop stream detector (and | 439 | 15 | // associated uop queue) that can benefit from partial unrolling. | 440 | 15 | // The relevant requirements are: | 441 | 15 | // - The loop must have no more than 4 (8 for Nehalem and later) branches | 442 | 15 | // taken, and none of them may be calls. | 443 | 15 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | 444 | 15 | | 445 | 15 | // According to the Software Optimization Guide for AMD Family 15h | 446 | 15 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | 447 | 15 | // and loop buffer which can benefit from partial unrolling. | 448 | 15 | // The relevant requirements are: | 449 | 15 | // - The loop must have fewer than 16 branches | 450 | 15 | // - The loop must have less than 40 uops in all executed loop branches | 451 | 15 | | 452 | 15 | // The number of taken branches in a loop is hard to estimate here, and | 453 | 15 | // benchmarking has revealed that it is better not to be conservative when | 454 | 15 | // estimating the branch count. As a result, we'll ignore the branch limits | 455 | 15 | // until someone finds a case where it matters in practice. | 456 | 15 | | 457 | 15 | unsigned MaxOps; | 458 | 15 | const TargetSubtargetInfo *ST = getST(); | 459 | 15 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | 460 | 0 | MaxOps = PartialUnrollingThreshold; | 461 | 15 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | 462 | 9 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | 463 | 6 | else | 464 | 6 | return; | 465 | 9 | | 466 | 9 | // Scan the loop: don't unroll loops with calls. | 467 | 18 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 9 I != E; | 468 | 9 | ++I) { | 469 | 9 | BasicBlock *BB = *I; | 470 | 9 | | 471 | 93 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J84 ) | 472 | 84 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { | 473 | 0 | ImmutableCallSite CS(&*J); | 474 | 0 | if (const Function *F = CS.getCalledFunction()) { | 475 | 0 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | 476 | 0 | continue; | 477 | 0 | } | 478 | 0 | | 479 | 0 | return; | 480 | 0 | } | 481 | 9 | } | 482 | 9 | | 483 | 9 | // Enable runtime and partial unrolling up to the specified size. | 484 | 9 | // Enable using trip count upper bound to unroll loops. | 485 | 9 | UP.Partial = UP.Runtime = UP.UpperBound = true; | 486 | 9 | UP.PartialThreshold = MaxOps; | 487 | 9 | | 488 | 9 | // Avoid unrolling when optimizing for size. | 489 | 9 | UP.OptSizeThreshold = 0; | 490 | 9 | UP.PartialOptSizeThreshold = 0; | 491 | 9 | | 492 | 9 | // Set number of instructions optimized when "back edge" | 493 | 9 | // becomes "fall through" to default value of 2. | 494 | 9 | UP.BEInsns = 2; | 495 | 9 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) Line | Count | Source | 433 | 22.9k | TTI::UnrollingPreferences &UP) { | 434 | 22.9k | // This unrolling functionality is target independent, but to provide some | 435 | 22.9k | // motivation for its intended use, for x86: | 436 | 22.9k | | 437 | 22.9k | // According to the Intel 64 and IA-32 Architectures Optimization Reference | 438 | 22.9k | // Manual, Intel Core models and later have a loop stream detector (and | 439 | 22.9k | // associated uop queue) that can benefit from partial unrolling. | 440 | 22.9k | // The relevant requirements are: | 441 | 22.9k | // - The loop must have no more than 4 (8 for Nehalem and later) branches | 442 | 22.9k | // taken, and none of them may be calls. | 443 | 22.9k | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | 444 | 22.9k | | 445 | 22.9k | // According to the Software Optimization Guide for AMD Family 15h | 446 | 22.9k | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | 447 | 22.9k | // and loop buffer which can benefit from partial unrolling. | 448 | 22.9k | // The relevant requirements are: | 449 | 22.9k | // - The loop must have fewer than 16 branches | 450 | 22.9k | // - The loop must have less than 40 uops in all executed loop branches | 451 | 22.9k | | 452 | 22.9k | // The number of taken branches in a loop is hard to estimate here, and | 453 | 22.9k | // benchmarking has revealed that it is better not to be conservative when | 454 | 22.9k | // estimating the branch count. As a result, we'll ignore the branch limits | 455 | 22.9k | // until someone finds a case where it matters in practice. | 456 | 22.9k | | 457 | 22.9k | unsigned MaxOps; | 458 | 22.9k | const TargetSubtargetInfo *ST = getST(); | 459 | 22.9k | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | 460 | 0 | MaxOps = PartialUnrollingThreshold; | 461 | 22.9k | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | 462 | 22.8k | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | 463 | 157 | else | 464 | 157 | return; | 465 | 22.8k | | 466 | 22.8k | // Scan the loop: don't unroll loops with calls. | 467 | 68.9k | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 22.8k I != E; | 468 | 56.3k | ++I46.1k ) { | 469 | 56.3k | BasicBlock *BB = *I; | 470 | 56.3k | | 471 | 379k | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J323k ) | 472 | 333k | if (isa<CallInst>(J) || isa<InvokeInst>(J)318k ) { | 473 | 15.1k | ImmutableCallSite CS(&*J); | 474 | 15.1k | if (const Function *F = CS.getCalledFunction()) { | 475 | 13.7k | if (!static_cast<T *>(this)->isLoweredToCall(F)) | 476 | 4.88k | continue; | 477 | 10.2k | } | 478 | 10.2k | | 479 | 10.2k | return; | 480 | 10.2k | } | 481 | 56.3k | } | 482 | 22.8k | | 483 | 22.8k | // Enable runtime and partial unrolling up to the specified size. | 484 | 22.8k | // Enable using trip count upper bound to unroll loops. | 485 | 22.8k | UP.Partial = UP.Runtime = UP.UpperBound = true; | 486 | 12.5k | UP.PartialThreshold = MaxOps; | 487 | 12.5k | | 488 | 12.5k | // Avoid unrolling when optimizing for size. | 489 | 12.5k | UP.OptSizeThreshold = 0; | 490 | 12.5k | UP.PartialOptSizeThreshold = 0; | 491 | 12.5k | | 492 | 12.5k | // Set number of instructions optimized when "back edge" | 493 | 12.5k | // becomes "fall through" to default value of 2. | 494 | 12.5k | UP.BEInsns = 2; | 495 | 12.5k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&) |
496 | | |
497 | | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
498 | | AssumptionCache &AC, |
499 | | TargetLibraryInfo *LibInfo, |
500 | 0 | HardwareLoopInfo &HWLoopInfo) { |
501 | 0 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
502 | 0 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&) |
503 | | |
504 | 119 | int getInstructionLatency(const Instruction *I) { |
505 | 119 | if (isa<LoadInst>(I)) |
506 | 41 | return getST()->getSchedModel().DefaultLoadLatency; |
507 | 78 | |
508 | 78 | return BaseT::getInstructionLatency(I); |
509 | 78 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInstructionLatency(llvm::Instruction const*) Line | Count | Source | 504 | 108 | int getInstructionLatency(const Instruction *I) { | 505 | 108 | if (isa<LoadInst>(I)) | 506 | 40 | return getST()->getSchedModel().DefaultLoadLatency; | 507 | 68 | | 508 | 68 | return BaseT::getInstructionLatency(I); | 509 | 68 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInstructionLatency(llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInstructionLatency(llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInstructionLatency(llvm::Instruction const*) Line | Count | Source | 504 | 11 | int getInstructionLatency(const Instruction *I) { | 505 | 11 | if (isa<LoadInst>(I)) | 506 | 1 | return getST()->getSchedModel().DefaultLoadLatency; | 507 | 10 | | 508 | 10 | return BaseT::getInstructionLatency(I); | 509 | 10 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInstructionLatency(llvm::Instruction const*) |
510 | | |
511 | | /// @} |
512 | | |
513 | | /// \name Vector TTI Implementations |
514 | | /// @{ |
515 | | |
516 | 867 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0286 : 1581 ; } llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getNumberOfRegisters(bool) Line | Count | Source | 516 | 56 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 00 : 1; } |
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getNumberOfRegisters(bool) Line | Count | Source | 516 | 335 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 060 : 1275 ; } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getNumberOfRegisters(bool) Line | Count | Source | 516 | 68 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 06 : 162 ; } |
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getNumberOfRegisters(bool) Line | Count | Source | 516 | 408 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0220 : 1188 ; } |
|
517 | | |
518 | 0 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getRegisterBitWidth(bool) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getRegisterBitWidth(bool) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getRegisterBitWidth(bool) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getRegisterBitWidth(bool) const |
519 | | |
520 | | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
521 | | /// are set if the result needs to be inserted and/or extracted from vectors. |
522 | 88.3k | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
523 | 88.3k | assert(Ty->isVectorTy() && "Can only scalarize vectors"); |
524 | 88.3k | unsigned Cost = 0; |
525 | 88.3k | |
526 | 395k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i307k ) { |
527 | 307k | if (Insert) |
528 | 167k | Cost += static_cast<T *>(this) |
529 | 167k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
530 | 307k | if (Extract) |
531 | 144k | Cost += static_cast<T *>(this) |
532 | 144k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
533 | 307k | } |
534 | 88.3k | |
535 | 88.3k | return Cost; |
536 | 88.3k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 69.6k | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 69.6k | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 69.6k | unsigned Cost = 0; | 525 | 69.6k | | 526 | 254k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i185k ) { | 527 | 185k | if (Insert) | 528 | 105k | Cost += static_cast<T *>(this) | 529 | 105k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 185k | if (Extract) | 531 | 80.0k | Cost += static_cast<T *>(this) | 532 | 80.0k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 185k | } | 534 | 69.6k | | 535 | 69.6k | return Cost; | 536 | 69.6k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 170 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 170 | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 170 | unsigned Cost = 0; | 525 | 170 | | 526 | 720 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i550 ) { | 527 | 550 | if (Insert) | 528 | 437 | Cost += static_cast<T *>(this) | 529 | 437 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 550 | if (Extract) | 531 | 117 | Cost += static_cast<T *>(this) | 532 | 117 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 550 | } | 534 | 170 | | 535 | 170 | return Cost; | 536 | 170 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 2.83k | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 2.83k | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 2.83k | unsigned Cost = 0; | 525 | 2.83k | | 526 | 9.78k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i6.95k ) { | 527 | 6.95k | if (Insert) | 528 | 2.60k | Cost += static_cast<T *>(this) | 529 | 2.60k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 6.95k | if (Extract) | 531 | 4.58k | Cost += static_cast<T *>(this) | 532 | 4.58k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 6.95k | } | 534 | 2.83k | | 535 | 2.83k | return Cost; | 536 | 2.83k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 23 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 23 | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 23 | unsigned Cost = 0; | 525 | 23 | | 526 | 311 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i288 ) { | 527 | 288 | if (Insert) | 528 | 124 | Cost += static_cast<T *>(this) | 529 | 124 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 288 | if (Extract) | 531 | 164 | Cost += static_cast<T *>(this) | 532 | 164 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 288 | } | 534 | 23 | | 535 | 23 | return Cost; | 536 | 23 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 4 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 4 | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 4 | unsigned Cost = 0; | 525 | 4 | | 526 | 12 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i8 ) { | 527 | 8 | if (Insert) | 528 | 4 | Cost += static_cast<T *>(this) | 529 | 4 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 8 | if (Extract) | 531 | 4 | Cost += static_cast<T *>(this) | 532 | 4 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 8 | } | 534 | 4 | | 535 | 4 | return Cost; | 536 | 4 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 788 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 788 | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 788 | unsigned Cost = 0; | 525 | 788 | | 526 | 2.99k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i2.21k ) { | 527 | 2.21k | if (Insert) | 528 | 1.07k | Cost += static_cast<T *>(this) | 529 | 1.07k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 2.21k | if (Extract) | 531 | 1.13k | Cost += static_cast<T *>(this) | 532 | 1.13k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 2.21k | } | 534 | 788 | | 535 | 788 | return Cost; | 536 | 788 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 758 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 758 | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 758 | unsigned Cost = 0; | 525 | 758 | | 526 | 5.53k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i4.77k ) { | 527 | 4.77k | if (Insert) | 528 | 2.21k | Cost += static_cast<T *>(this) | 529 | 2.21k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 4.77k | if (Extract) | 531 | 2.11k | Cost += static_cast<T *>(this) | 532 | 2.11k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 4.77k | } | 534 | 758 | | 535 | 758 | return Cost; | 536 | 758 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) Line | Count | Source | 522 | 14.1k | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | 523 | 14.1k | assert(Ty->isVectorTy() && "Can only scalarize vectors"); | 524 | 14.1k | unsigned Cost = 0; | 525 | 14.1k | | 526 | 121k | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i107k ) { | 527 | 107k | if (Insert) | 528 | 55.7k | Cost += static_cast<T *>(this) | 529 | 55.7k | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | 530 | 107k | if (Extract) | 531 | 55.9k | Cost += static_cast<T *>(this) | 532 | 55.9k | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | 533 | 107k | } | 534 | 14.1k | | 535 | 14.1k | return Cost; | 536 | 14.1k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool) |
537 | | |
538 | | /// Estimate the overhead of scalarizing an instructions unique |
539 | | /// non-constant operands. The types of the arguments are ordinarily |
540 | | /// scalar, in which case the costs are multiplied with VF. |
541 | | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
542 | 24.4k | unsigned VF) { |
543 | 24.4k | unsigned Cost = 0; |
544 | 24.4k | SmallPtrSet<const Value*, 4> UniqueOperands; |
545 | 34.0k | for (const Value *A : Args) { |
546 | 34.0k | if (!isa<Constant>(A) && UniqueOperands.insert(A).second28.0k ) { |
547 | 28.0k | Type *VecTy = nullptr; |
548 | 28.0k | if (A->getType()->isVectorTy()) { |
549 | 823 | VecTy = A->getType(); |
550 | 823 | // If A is a vector operand, VF should be 1 or correspond to A. |
551 | 823 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && |
552 | 823 | "Vector argument does not match VF"); |
553 | 823 | } |
554 | 27.2k | else |
555 | 27.2k | VecTy = VectorType::get(A->getType(), VF); |
556 | 28.0k | |
557 | 28.0k | Cost += getScalarizationOverhead(VecTy, false, true); |
558 | 28.0k | } |
559 | 34.0k | } |
560 | 24.4k | |
561 | 24.4k | return Cost; |
562 | 24.4k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 17.6k | unsigned VF) { | 543 | 17.6k | unsigned Cost = 0; | 544 | 17.6k | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 21.5k | for (const Value *A : Args) { | 546 | 21.5k | if (!isa<Constant>(A) && UniqueOperands.insert(A).second21.4k ) { | 547 | 21.4k | Type *VecTy = nullptr; | 548 | 21.4k | if (A->getType()->isVectorTy()) { | 549 | 5 | VecTy = A->getType(); | 550 | 5 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 5 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 5 | "Vector argument does not match VF"); | 553 | 5 | } | 554 | 21.4k | else | 555 | 21.4k | VecTy = VectorType::get(A->getType(), VF); | 556 | 21.4k | | 557 | 21.4k | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 21.4k | } | 559 | 21.5k | } | 560 | 17.6k | | 561 | 17.6k | return Cost; | 562 | 17.6k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 19 | unsigned VF) { | 543 | 19 | unsigned Cost = 0; | 544 | 19 | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 31 | for (const Value *A : Args) { | 546 | 31 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { | 547 | 31 | Type *VecTy = nullptr; | 548 | 31 | if (A->getType()->isVectorTy()) { | 549 | 7 | VecTy = A->getType(); | 550 | 7 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 7 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 7 | "Vector argument does not match VF"); | 553 | 7 | } | 554 | 24 | else | 555 | 24 | VecTy = VectorType::get(A->getType(), VF); | 556 | 31 | | 557 | 31 | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 31 | } | 559 | 31 | } | 560 | 19 | | 561 | 19 | return Cost; | 562 | 19 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 613 | unsigned VF) { | 543 | 613 | unsigned Cost = 0; | 544 | 613 | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 943 | for (const Value *A : Args) { | 546 | 943 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second835 ) { | 547 | 835 | Type *VecTy = nullptr; | 548 | 835 | if (A->getType()->isVectorTy()) { | 549 | 0 | VecTy = A->getType(); | 550 | 0 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 0 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 0 | "Vector argument does not match VF"); | 553 | 0 | } | 554 | 835 | else | 555 | 835 | VecTy = VectorType::get(A->getType(), VF); | 556 | 835 | | 557 | 835 | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 835 | } | 559 | 943 | } | 560 | 613 | | 561 | 613 | return Cost; | 562 | 613 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 5 | unsigned VF) { | 543 | 5 | unsigned Cost = 0; | 544 | 5 | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 10 | for (const Value *A : Args) { | 546 | 10 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { | 547 | 10 | Type *VecTy = nullptr; | 548 | 10 | if (A->getType()->isVectorTy()) { | 549 | 0 | VecTy = A->getType(); | 550 | 0 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 0 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 0 | "Vector argument does not match VF"); | 553 | 0 | } | 554 | 10 | else | 555 | 10 | VecTy = VectorType::get(A->getType(), VF); | 556 | 10 | | 557 | 10 | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 10 | } | 559 | 10 | } | 560 | 5 | | 561 | 5 | return Cost; | 562 | 5 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 354 | unsigned VF) { | 543 | 354 | unsigned Cost = 0; | 544 | 354 | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 379 | for (const Value *A : Args) { | 546 | 379 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { | 547 | 379 | Type *VecTy = nullptr; | 548 | 379 | if (A->getType()->isVectorTy()) { | 549 | 1 | VecTy = A->getType(); | 550 | 1 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 1 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 1 | "Vector argument does not match VF"); | 553 | 1 | } | 554 | 378 | else | 555 | 378 | VecTy = VectorType::get(A->getType(), VF); | 556 | 379 | | 557 | 379 | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 379 | } | 559 | 379 | } | 560 | 354 | | 561 | 354 | return Cost; | 562 | 354 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 72 | unsigned VF) { | 543 | 72 | unsigned Cost = 0; | 544 | 72 | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 136 | for (const Value *A : Args) { | 546 | 136 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second60 ) { | 547 | 60 | Type *VecTy = nullptr; | 548 | 60 | if (A->getType()->isVectorTy()) { | 549 | 60 | VecTy = A->getType(); | 550 | 60 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 60 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 60 | "Vector argument does not match VF"); | 553 | 60 | } | 554 | 0 | else | 555 | 0 | VecTy = VectorType::get(A->getType(), VF); | 556 | 60 | | 557 | 60 | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 60 | } | 559 | 136 | } | 560 | 72 | | 561 | 72 | return Cost; | 562 | 72 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) Line | Count | Source | 542 | 5.77k | unsigned VF) { | 543 | 5.77k | unsigned Cost = 0; | 544 | 5.77k | SmallPtrSet<const Value*, 4> UniqueOperands; | 545 | 10.9k | for (const Value *A : Args) { | 546 | 10.9k | if (!isa<Constant>(A) && UniqueOperands.insert(A).second5.28k ) { | 547 | 5.28k | Type *VecTy = nullptr; | 548 | 5.28k | if (A->getType()->isVectorTy()) { | 549 | 750 | VecTy = A->getType(); | 550 | 750 | // If A is a vector operand, VF should be 1 or correspond to A. | 551 | 750 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) && | 552 | 750 | "Vector argument does not match VF"); | 553 | 750 | } | 554 | 4.53k | else | 555 | 4.53k | VecTy = VectorType::get(A->getType(), VF); | 556 | 5.28k | | 557 | 5.28k | Cost += getScalarizationOverhead(VecTy, false, true); | 558 | 5.28k | } | 559 | 10.9k | } | 560 | 5.77k | | 561 | 5.77k | return Cost; | 562 | 5.77k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int) |
563 | | |
564 | 6.23k | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
565 | 6.23k | assert(VecTy->isVectorTy()); |
566 | 6.23k | |
567 | 6.23k | unsigned Cost = 0; |
568 | 6.23k | |
569 | 6.23k | Cost += getScalarizationOverhead(VecTy, true, false); |
570 | 6.23k | if (!Args.empty()) |
571 | 64 | Cost += getOperandsScalarizationOverhead(Args, |
572 | 64 | VecTy->getVectorNumElements()); |
573 | 6.17k | else |
574 | 6.17k | // When no information on arguments is provided, we add the cost |
575 | 6.17k | // associated with one argument as a heuristic. |
576 | 6.17k | Cost += getScalarizationOverhead(VecTy, false, true); |
577 | 6.23k | |
578 | 6.23k | return Cost; |
579 | 6.23k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 5.40k | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 5.40k | assert(VecTy->isVectorTy()); | 566 | 5.40k | | 567 | 5.40k | unsigned Cost = 0; | 568 | 5.40k | | 569 | 5.40k | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 5.40k | if (!Args.empty()) | 571 | 0 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 0 | VecTy->getVectorNumElements()); | 573 | 5.40k | else | 574 | 5.40k | // When no information on arguments is provided, we add the cost | 575 | 5.40k | // associated with one argument as a heuristic. | 576 | 5.40k | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 5.40k | | 578 | 5.40k | return Cost; | 579 | 5.40k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 15 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 15 | assert(VecTy->isVectorTy()); | 566 | 15 | | 567 | 15 | unsigned Cost = 0; | 568 | 15 | | 569 | 15 | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 15 | if (!Args.empty()) | 571 | 0 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 0 | VecTy->getVectorNumElements()); | 573 | 15 | else | 574 | 15 | // When no information on arguments is provided, we add the cost | 575 | 15 | // associated with one argument as a heuristic. | 576 | 15 | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 15 | | 578 | 15 | return Cost; | 579 | 15 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 694 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 694 | assert(VecTy->isVectorTy()); | 566 | 694 | | 567 | 694 | unsigned Cost = 0; | 568 | 694 | | 569 | 694 | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 694 | if (!Args.empty()) | 571 | 0 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 0 | VecTy->getVectorNumElements()); | 573 | 694 | else | 574 | 694 | // When no information on arguments is provided, we add the cost | 575 | 694 | // associated with one argument as a heuristic. | 576 | 694 | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 694 | | 578 | 694 | return Cost; | 579 | 694 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 2 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 2 | assert(VecTy->isVectorTy()); | 566 | 2 | | 567 | 2 | unsigned Cost = 0; | 568 | 2 | | 569 | 2 | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 2 | if (!Args.empty()) | 571 | 0 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 0 | VecTy->getVectorNumElements()); | 573 | 2 | else | 574 | 2 | // When no information on arguments is provided, we add the cost | 575 | 2 | // associated with one argument as a heuristic. | 576 | 2 | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 2 | | 578 | 2 | return Cost; | 579 | 2 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 64 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 64 | assert(VecTy->isVectorTy()); | 566 | 64 | | 567 | 64 | unsigned Cost = 0; | 568 | 64 | | 569 | 64 | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 64 | if (!Args.empty()) | 571 | 64 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 64 | VecTy->getVectorNumElements()); | 573 | 0 | else | 574 | 0 | // When no information on arguments is provided, we add the cost | 575 | 0 | // associated with one argument as a heuristic. | 576 | 0 | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 64 | | 578 | 64 | return Cost; | 579 | 64 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 564 | 60 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { | 565 | 60 | assert(VecTy->isVectorTy()); | 566 | 60 | | 567 | 60 | unsigned Cost = 0; | 568 | 60 | | 569 | 60 | Cost += getScalarizationOverhead(VecTy, true, false); | 570 | 60 | if (!Args.empty()) | 571 | 0 | Cost += getOperandsScalarizationOverhead(Args, | 572 | 0 | VecTy->getVectorNumElements()); | 573 | 60 | else | 574 | 60 | // When no information on arguments is provided, we add the cost | 575 | 60 | // associated with one argument as a heuristic. | 576 | 60 | Cost += getScalarizationOverhead(VecTy, false, true); | 577 | 60 | | 578 | 60 | return Cost; | 579 | 60 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>) |
580 | | |
581 | 76 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMaxInterleaveFactor(unsigned int) llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMaxInterleaveFactor(unsigned int) Line | Count | Source | 581 | 46 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMaxInterleaveFactor(unsigned int) Line | Count | Source | 581 | 1 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMaxInterleaveFactor(unsigned int) Line | Count | Source | 581 | 6 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getMaxInterleaveFactor(unsigned int) Line | Count | Source | 581 | 22 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMaxInterleaveFactor(unsigned int) llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMaxInterleaveFactor(unsigned int) Line | Count | Source | 581 | 1 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
|
582 | | |
583 | | unsigned getArithmeticInstrCost( |
584 | | unsigned Opcode, Type *Ty, |
585 | | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
586 | | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
587 | | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
588 | | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
589 | 530k | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { |
590 | 530k | // Check if any of the operands are vector operands. |
591 | 530k | const TargetLoweringBase *TLI = getTLI(); |
592 | 530k | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
593 | 530k | assert(ISD && "Invalid opcode"); |
594 | 530k | |
595 | 530k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
596 | 530k | |
597 | 530k | bool IsFloat = Ty->isFPOrFPVectorTy(); |
598 | 530k | // Assume that floating point arithmetic operations cost twice as much as |
599 | 530k | // integer operations. |
600 | 530k | unsigned OpCost = (IsFloat ? 2264k : 1265k ); |
601 | 530k | |
602 | 530k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
603 | 465k | // The operation is legal. Assume it costs 1. |
604 | 465k | // TODO: Once we have extract/insert subvector cost we need to use them. |
605 | 465k | return LT.first * OpCost; |
606 | 465k | } |
607 | 65.1k | |
608 | 65.1k | if (!TLI->isOperationExpand(ISD, LT.second)) { |
609 | 50.7k | // If the operation is custom lowered, then assume that the code is twice |
610 | 50.7k | // as expensive. |
611 | 50.7k | return LT.first * 2 * OpCost; |
612 | 50.7k | } |
613 | 14.3k | |
614 | 14.3k | // Else, assume that we need to scalarize this op. |
615 | 14.3k | // TODO: If one of the types get legalized by splitting, handle this |
616 | 14.3k | // similarly to what getCastInstrCost() does. |
617 | 14.3k | if (Ty->isVectorTy()) { |
618 | 6.19k | unsigned Num = Ty->getVectorNumElements(); |
619 | 6.19k | unsigned Cost = static_cast<T *>(this) |
620 | 6.19k | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
621 | 6.19k | // Return the cost of multiple scalar invocation plus the cost of |
622 | 6.19k | // inserting and extracting the values. |
623 | 6.19k | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
624 | 6.19k | } |
625 | 8.14k | |
626 | 8.14k | // We don't know anything about this scalar instruction. |
627 | 8.14k | return OpCost; |
628 | 8.14k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 420k | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 420k | // Check if any of the operands are vector operands. | 591 | 420k | const TargetLoweringBase *TLI = getTLI(); | 592 | 420k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 420k | assert(ISD && "Invalid opcode"); | 594 | 420k | | 595 | 420k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 420k | | 597 | 420k | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 420k | // Assume that floating point arithmetic operations cost twice as much as | 599 | 420k | // integer operations. | 600 | 420k | unsigned OpCost = (IsFloat ? 2260k : 1160k ); | 601 | 420k | | 602 | 420k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 364k | // The operation is legal. Assume it costs 1. | 604 | 364k | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 364k | return LT.first * OpCost; | 606 | 364k | } | 607 | 55.6k | | 608 | 55.6k | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 48.6k | // If the operation is custom lowered, then assume that the code is twice | 610 | 48.6k | // as expensive. | 611 | 48.6k | return LT.first * 2 * OpCost; | 612 | 48.6k | } | 613 | 7.01k | | 614 | 7.01k | // Else, assume that we need to scalarize this op. | 615 | 7.01k | // TODO: If one of the types get legalized by splitting, handle this | 616 | 7.01k | // similarly to what getCastInstrCost() does. | 617 | 7.01k | if (Ty->isVectorTy()) { | 618 | 5.40k | unsigned Num = Ty->getVectorNumElements(); | 619 | 5.40k | unsigned Cost = static_cast<T *>(this) | 620 | 5.40k | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 5.40k | // Return the cost of multiple scalar invocation plus the cost of | 622 | 5.40k | // inserting and extracting the values. | 623 | 5.40k | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 5.40k | } | 625 | 1.61k | | 626 | 1.61k | // We don't know anything about this scalar instruction. | 627 | 1.61k | return OpCost; | 628 | 1.61k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 16 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 16 | // Check if any of the operands are vector operands. | 591 | 16 | const TargetLoweringBase *TLI = getTLI(); | 592 | 16 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 16 | assert(ISD && "Invalid opcode"); | 594 | 16 | | 595 | 16 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 16 | | 597 | 16 | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 16 | // Assume that floating point arithmetic operations cost twice as much as | 599 | 16 | // integer operations. | 600 | 16 | unsigned OpCost = (IsFloat ? 212 : 14 ); | 601 | 16 | | 602 | 16 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 1 | // The operation is legal. Assume it costs 1. | 604 | 1 | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 1 | return LT.first * OpCost; | 606 | 1 | } | 607 | 15 | | 608 | 15 | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 0 | // If the operation is custom lowered, then assume that the code is twice | 610 | 0 | // as expensive. | 611 | 0 | return LT.first * 2 * OpCost; | 612 | 0 | } | 613 | 15 | | 614 | 15 | // Else, assume that we need to scalarize this op. | 615 | 15 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 15 | // similarly to what getCastInstrCost() does. | 617 | 15 | if (Ty->isVectorTy()) { | 618 | 15 | unsigned Num = Ty->getVectorNumElements(); | 619 | 15 | unsigned Cost = static_cast<T *>(this) | 620 | 15 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 15 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 15 | // inserting and extracting the values. | 623 | 15 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 15 | } | 625 | 0 | | 626 | 0 | // We don't know anything about this scalar instruction. | 627 | 0 | return OpCost; | 628 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 20.4k | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 20.4k | // Check if any of the operands are vector operands. | 591 | 20.4k | const TargetLoweringBase *TLI = getTLI(); | 592 | 20.4k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 20.4k | assert(ISD && "Invalid opcode"); | 594 | 20.4k | | 595 | 20.4k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 20.4k | | 597 | 20.4k | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 20.4k | // Assume that floating point arithmetic operations cost twice as much as | 599 | 20.4k | // integer operations. | 600 | 20.4k | unsigned OpCost = (IsFloat ? 22.93k : 117.4k ); | 601 | 20.4k | | 602 | 20.4k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 17.6k | // The operation is legal. Assume it costs 1. | 604 | 17.6k | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 17.6k | return LT.first * OpCost; | 606 | 17.6k | } | 607 | 2.76k | | 608 | 2.76k | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 2.05k | // If the operation is custom lowered, then assume that the code is twice | 610 | 2.05k | // as expensive. | 611 | 2.05k | return LT.first * 2 * OpCost; | 612 | 2.05k | } | 613 | 712 | | 614 | 712 | // Else, assume that we need to scalarize this op. | 615 | 712 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 712 | // similarly to what getCastInstrCost() does. | 617 | 712 | if (Ty->isVectorTy()) { | 618 | 694 | unsigned Num = Ty->getVectorNumElements(); | 619 | 694 | unsigned Cost = static_cast<T *>(this) | 620 | 694 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 694 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 694 | // inserting and extracting the values. | 623 | 694 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 694 | } | 625 | 18 | | 626 | 18 | // We don't know anything about this scalar instruction. | 627 | 18 | return OpCost; | 628 | 18 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 42 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 42 | // Check if any of the operands are vector operands. | 591 | 42 | const TargetLoweringBase *TLI = getTLI(); | 592 | 42 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 42 | assert(ISD && "Invalid opcode"); | 594 | 42 | | 595 | 42 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 42 | | 597 | 42 | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 42 | // Assume that floating point arithmetic operations cost twice as much as | 599 | 42 | // integer operations. | 600 | 42 | unsigned OpCost = (IsFloat ? 20 : 1); | 601 | 42 | | 602 | 42 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 42 | // The operation is legal. Assume it costs 1. | 604 | 42 | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 42 | return LT.first * OpCost; | 606 | 42 | } | 607 | 0 | | 608 | 0 | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 0 | // If the operation is custom lowered, then assume that the code is twice | 610 | 0 | // as expensive. | 611 | 0 | return LT.first * 2 * OpCost; | 612 | 0 | } | 613 | 0 | | 614 | 0 | // Else, assume that we need to scalarize this op. | 615 | 0 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 0 | // similarly to what getCastInstrCost() does. | 617 | 0 | if (Ty->isVectorTy()) { | 618 | 0 | unsigned Num = Ty->getVectorNumElements(); | 619 | 0 | unsigned Cost = static_cast<T *>(this) | 620 | 0 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 0 | // inserting and extracting the values. | 623 | 0 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 0 | } | 625 | 0 | | 626 | 0 | // We don't know anything about this scalar instruction. | 627 | 0 | return OpCost; | 628 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 13 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 13 | // Check if any of the operands are vector operands. | 591 | 13 | const TargetLoweringBase *TLI = getTLI(); | 592 | 13 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 13 | assert(ISD && "Invalid opcode"); | 594 | 13 | | 595 | 13 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 13 | | 597 | 13 | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 13 | // Assume that floating point arithmetic operations cost twice as much as | 599 | 13 | // integer operations. | 600 | 13 | unsigned OpCost = (IsFloat ? 210 : 13 ); | 601 | 13 | | 602 | 13 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 11 | // The operation is legal. Assume it costs 1. | 604 | 11 | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 11 | return LT.first * OpCost; | 606 | 11 | } | 607 | 2 | | 608 | 2 | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 0 | // If the operation is custom lowered, then assume that the code is twice | 610 | 0 | // as expensive. | 611 | 0 | return LT.first * 2 * OpCost; | 612 | 0 | } | 613 | 2 | | 614 | 2 | // Else, assume that we need to scalarize this op. | 615 | 2 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 2 | // similarly to what getCastInstrCost() does. | 617 | 2 | if (Ty->isVectorTy()) { | 618 | 2 | unsigned Num = Ty->getVectorNumElements(); | 619 | 2 | unsigned Cost = static_cast<T *>(this) | 620 | 2 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 2 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 2 | // inserting and extracting the values. | 623 | 2 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 2 | } | 625 | 0 | | 626 | 0 | // We don't know anything about this scalar instruction. | 627 | 0 | return OpCost; | 628 | 0 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 288 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 288 | // Check if any of the operands are vector operands. | 591 | 288 | const TargetLoweringBase *TLI = getTLI(); | 592 | 288 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 288 | assert(ISD && "Invalid opcode"); | 594 | 288 | | 595 | 288 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 288 | | 597 | 288 | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 288 | // Assume that floating point arithmetic operations cost twice as much as | 599 | 288 | // integer operations. | 600 | 288 | unsigned OpCost = (IsFloat ? 270 : 1218 ); | 601 | 288 | | 602 | 288 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 288 | // The operation is legal. Assume it costs 1. | 604 | 288 | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 288 | return LT.first * OpCost; | 606 | 288 | } | 607 | 0 | | 608 | 0 | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 0 | // If the operation is custom lowered, then assume that the code is twice | 610 | 0 | // as expensive. | 611 | 0 | return LT.first * 2 * OpCost; | 612 | 0 | } | 613 | 0 | | 614 | 0 | // Else, assume that we need to scalarize this op. | 615 | 0 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 0 | // similarly to what getCastInstrCost() does. | 617 | 0 | if (Ty->isVectorTy()) { | 618 | 0 | unsigned Num = Ty->getVectorNumElements(); | 619 | 0 | unsigned Cost = static_cast<T *>(this) | 620 | 0 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 0 | // inserting and extracting the values. | 623 | 0 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 0 | } | 625 | 0 | | 626 | 0 | // We don't know anything about this scalar instruction. | 627 | 0 | return OpCost; | 628 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 378 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 378 | // Check if any of the operands are vector operands. | 591 | 378 | const TargetLoweringBase *TLI = getTLI(); | 592 | 378 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 378 | assert(ISD && "Invalid opcode"); | 594 | 378 | | 595 | 378 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 378 | | 597 | 378 | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 378 | // Assume that floating point arithmetic operations cost twice as much as | 599 | 378 | // integer operations. | 600 | 378 | unsigned OpCost = (IsFloat ? 20 : 1); | 601 | 378 | | 602 | 378 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 358 | // The operation is legal. Assume it costs 1. | 604 | 358 | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 358 | return LT.first * OpCost; | 606 | 358 | } | 607 | 20 | | 608 | 20 | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 0 | // If the operation is custom lowered, then assume that the code is twice | 610 | 0 | // as expensive. | 611 | 0 | return LT.first * 2 * OpCost; | 612 | 0 | } | 613 | 20 | | 614 | 20 | // Else, assume that we need to scalarize this op. | 615 | 20 | // TODO: If one of the types get legalized by splitting, handle this | 616 | 20 | // similarly to what getCastInstrCost() does. | 617 | 20 | if (Ty->isVectorTy()) { | 618 | 20 | unsigned Num = Ty->getVectorNumElements(); | 619 | 20 | unsigned Cost = static_cast<T *>(this) | 620 | 20 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 20 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 20 | // inserting and extracting the values. | 623 | 20 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 20 | } | 625 | 0 | | 626 | 0 | // We don't know anything about this scalar instruction. | 627 | 0 | return OpCost; | 628 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) Line | Count | Source | 589 | 88.5k | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { | 590 | 88.5k | // Check if any of the operands are vector operands. | 591 | 88.5k | const TargetLoweringBase *TLI = getTLI(); | 592 | 88.5k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 593 | 88.5k | assert(ISD && "Invalid opcode"); | 594 | 88.5k | | 595 | 88.5k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | 596 | 88.5k | | 597 | 88.5k | bool IsFloat = Ty->isFPOrFPVectorTy(); | 598 | 88.5k | // Assume that floating point arithmetic operations cost twice as much as | 599 | 88.5k | // integer operations. | 600 | 88.5k | unsigned OpCost = (IsFloat ? 21.15k : 187.3k ); | 601 | 88.5k | | 602 | 88.5k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 603 | 81.8k | // The operation is legal. Assume it costs 1. | 604 | 81.8k | // TODO: Once we have extract/insert subvector cost we need to use them. | 605 | 81.8k | return LT.first * OpCost; | 606 | 81.8k | } | 607 | 6.65k | | 608 | 6.65k | if (!TLI->isOperationExpand(ISD, LT.second)) { | 609 | 82 | // If the operation is custom lowered, then assume that the code is twice | 610 | 82 | // as expensive. | 611 | 82 | return LT.first * 2 * OpCost; | 612 | 82 | } | 613 | 6.56k | | 614 | 6.56k | // Else, assume that we need to scalarize this op. | 615 | 6.56k | // TODO: If one of the types get legalized by splitting, handle this | 616 | 6.56k | // similarly to what getCastInstrCost() does. | 617 | 6.56k | if (Ty->isVectorTy()) { | 618 | 60 | unsigned Num = Ty->getVectorNumElements(); | 619 | 60 | unsigned Cost = static_cast<T *>(this) | 620 | 60 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | 621 | 60 | // Return the cost of multiple scalar invocation plus the cost of | 622 | 60 | // inserting and extracting the values. | 623 | 60 | return getScalarizationOverhead(Ty, Args) + Num * Cost; | 624 | 60 | } | 625 | 6.50k | | 626 | 6.50k | // We don't know anything about this scalar instruction. | 627 | 6.50k | return OpCost; | 628 | 6.50k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>) |
629 | | |
630 | | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
631 | 8.01k | Type *SubTp) { |
632 | 8.01k | switch (Kind) { |
633 | 8.01k | case TTI::SK_Broadcast: |
634 | 51 | return getBroadcastShuffleOverhead(Tp); |
635 | 8.01k | case TTI::SK_Select: |
636 | 3.96k | case TTI::SK_Reverse: |
637 | 3.96k | case TTI::SK_Transpose: |
638 | 3.96k | case TTI::SK_PermuteSingleSrc: |
639 | 3.96k | case TTI::SK_PermuteTwoSrc: |
640 | 3.96k | return getPermuteShuffleOverhead(Tp); |
641 | 3.96k | case TTI::SK_ExtractSubvector: |
642 | 3.93k | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
643 | 3.96k | case TTI::SK_InsertSubvector: |
644 | 73 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
645 | 0 | } |
646 | 0 | llvm_unreachable("Unknown TTI::ShuffleKind"); |
647 | 0 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Line | Count | Source | 631 | 7.21k | Type *SubTp) { | 632 | 7.21k | switch (Kind) { | 633 | 7.21k | case TTI::SK_Broadcast: | 634 | 2 | return getBroadcastShuffleOverhead(Tp); | 635 | 7.21k | case TTI::SK_Select: | 636 | 3.32k | case TTI::SK_Reverse: | 637 | 3.32k | case TTI::SK_Transpose: | 638 | 3.32k | case TTI::SK_PermuteSingleSrc: | 639 | 3.32k | case TTI::SK_PermuteTwoSrc: | 640 | 3.32k | return getPermuteShuffleOverhead(Tp); | 641 | 3.89k | case TTI::SK_ExtractSubvector: | 642 | 3.89k | return getExtractSubvectorOverhead(Tp, Index, SubTp); | 643 | 3.32k | case TTI::SK_InsertSubvector: | 644 | 0 | return getInsertSubvectorOverhead(Tp, Index, SubTp); | 645 | 0 | } | 646 | 0 | llvm_unreachable("Unknown TTI::ShuffleKind"); | 647 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Line | Count | Source | 631 | 81 | Type *SubTp) { | 632 | 81 | switch (Kind) { | 633 | 81 | case TTI::SK_Broadcast: | 634 | 2 | return getBroadcastShuffleOverhead(Tp); | 635 | 81 | case TTI::SK_Select: | 636 | 54 | case TTI::SK_Reverse: | 637 | 54 | case TTI::SK_Transpose: | 638 | 54 | case TTI::SK_PermuteSingleSrc: | 639 | 54 | case TTI::SK_PermuteTwoSrc: | 640 | 54 | return getPermuteShuffleOverhead(Tp); | 641 | 54 | case TTI::SK_ExtractSubvector: | 642 | 25 | return getExtractSubvectorOverhead(Tp, Index, SubTp); | 643 | 54 | case TTI::SK_InsertSubvector: | 644 | 0 | return getInsertSubvectorOverhead(Tp, Index, SubTp); | 645 | 0 | } | 646 | 0 | llvm_unreachable("Unknown TTI::ShuffleKind"); | 647 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Line | Count | Source | 631 | 96 | Type *SubTp) { | 632 | 96 | switch (Kind) { | 633 | 96 | case TTI::SK_Broadcast: | 634 | 0 | return getBroadcastShuffleOverhead(Tp); | 635 | 96 | case TTI::SK_Select: | 636 | 96 | case TTI::SK_Reverse: | 637 | 96 | case TTI::SK_Transpose: | 638 | 96 | case TTI::SK_PermuteSingleSrc: | 639 | 96 | case TTI::SK_PermuteTwoSrc: | 640 | 96 | return getPermuteShuffleOverhead(Tp); | 641 | 96 | case TTI::SK_ExtractSubvector: | 642 | 0 | return getExtractSubvectorOverhead(Tp, Index, SubTp); | 643 | 96 | case TTI::SK_InsertSubvector: | 644 | 0 | return getInsertSubvectorOverhead(Tp, Index, SubTp); | 645 | 0 | } | 646 | 0 | llvm_unreachable("Unknown TTI::ShuffleKind"); | 647 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) Line | Count | Source | 631 | 619 | Type *SubTp) { | 632 | 619 | switch (Kind) { | 633 | 619 | case TTI::SK_Broadcast: | 634 | 47 | return getBroadcastShuffleOverhead(Tp); | 635 | 619 | case TTI::SK_Select: | 636 | 487 | case TTI::SK_Reverse: | 637 | 487 | case TTI::SK_Transpose: | 638 | 487 | case TTI::SK_PermuteSingleSrc: | 639 | 487 | case TTI::SK_PermuteTwoSrc: | 640 | 487 | return getPermuteShuffleOverhead(Tp); | 641 | 487 | case TTI::SK_ExtractSubvector: | 642 | 12 | return getExtractSubvectorOverhead(Tp, Index, SubTp); | 643 | 487 | case TTI::SK_InsertSubvector: | 644 | 73 | return getInsertSubvectorOverhead(Tp, Index, SubTp); | 645 | 0 | } | 646 | 0 | llvm_unreachable("Unknown TTI::ShuffleKind"); | 647 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*) |
648 | | |
649 | | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
650 | 120k | const Instruction *I = nullptr) { |
651 | 120k | const TargetLoweringBase *TLI = getTLI(); |
652 | 120k | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
653 | 120k | assert(ISD && "Invalid opcode"); |
654 | 120k | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
655 | 120k | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
656 | 120k | |
657 | 120k | // Check for NOOP conversions. |
658 | 120k | if (SrcLT.first == DstLT.first && |
659 | 120k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()113k ) { |
660 | 41.2k | |
661 | 41.2k | // Bitcast between types that are legalized to the same type are free. |
662 | 41.2k | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc29.3k ) |
663 | 14.2k | return 0; |
664 | 106k | } |
665 | 106k | |
666 | 106k | if (Opcode == Instruction::Trunc && |
667 | 106k | TLI->isTruncateFree(SrcLT.second, DstLT.second)44.4k ) |
668 | 24.4k | return 0; |
669 | 81.6k | |
670 | 81.6k | if (Opcode == Instruction::ZExt && |
671 | 81.6k | TLI->isZExtFree(SrcLT.second, DstLT.second)20.6k ) |
672 | 4.28k | return 0; |
673 | 77.3k | |
674 | 77.3k | if (Opcode == Instruction::AddrSpaceCast && |
675 | 77.3k | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
676 | 15 | Dst->getPointerAddressSpace())) |
677 | 6 | return 0; |
678 | 77.3k | |
679 | 77.3k | // If this is a zext/sext of a load, return 0 if the corresponding |
680 | 77.3k | // extending load exists on target. |
681 | 77.3k | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt60.9k ) && |
682 | 77.3k | I30.8k && isa<LoadInst>(I->getOperand(0))3.10k ) { |
683 | 785 | EVT ExtVT = EVT::getEVT(Dst); |
684 | 785 | EVT LoadVT = EVT::getEVT(Src); |
685 | 785 | unsigned LType = |
686 | 785 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD519 : ISD::SEXTLOAD266 ); |
687 | 785 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
688 | 572 | return 0; |
689 | 76.7k | } |
690 | 76.7k | |
691 | 76.7k | // If the cast is marked as legal (or promote) then assume low cost. |
692 | 76.7k | if (SrcLT.first == DstLT.first && |
693 | 76.7k | TLI->isOperationLegalOrPromote(ISD, DstLT.second)69.8k ) |
694 | 65.4k | return 1; |
695 | 11.3k | |
696 | 11.3k | // Handle scalar conversions. |
697 | 11.3k | if (!Src->isVectorTy() && !Dst->isVectorTy()3.34k ) { |
698 | 3.34k | // Scalar bitcasts are usually free. |
699 | 3.34k | if (Opcode == Instruction::BitCast) |
700 | 67 | return 0; |
701 | 3.28k | |
702 | 3.28k | // Just check the op cost. If the operation is legal then assume it costs |
703 | 3.28k | // 1. |
704 | 3.28k | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
705 | 2.98k | return 1; |
706 | 298 | |
707 | 298 | // Assume that illegal scalar instruction are expensive. |
708 | 298 | return 4; |
709 | 298 | } |
710 | 7.99k | |
711 | 7.99k | // Check vector-to-vector casts. |
712 | 7.99k | if (Dst->isVectorTy() && Src->isVectorTy()) { |
713 | 7.99k | // If the cast is between same-sized registers, then the check is simple. |
714 | 7.99k | if (SrcLT.first == DstLT.first && |
715 | 7.99k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()2.59k ) { |
716 | 2.22k | |
717 | 2.22k | // Assume that Zext is done using AND. |
718 | 2.22k | if (Opcode == Instruction::ZExt) |
719 | 1.07k | return 1; |
720 | 1.15k | |
721 | 1.15k | // Assume that sext is done using SHL and SRA. |
722 | 1.15k | if (Opcode == Instruction::SExt) |
723 | 256 | return 2; |
724 | 896 | |
725 | 896 | // Just check the op cost. If the operation is legal then assume it |
726 | 896 | // costs |
727 | 896 | // 1 and multiply by the type-legalization overhead. |
728 | 896 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
729 | 36 | return SrcLT.first * 1; |
730 | 6.62k | } |
731 | 6.62k | |
732 | 6.62k | // If we are legalizing by splitting, query the concrete TTI for the cost |
733 | 6.62k | // of casting the original vector twice. We also need to factor in the |
734 | 6.62k | // cost of the split itself. Count that as 1, to be consistent with |
735 | 6.62k | // TLI->getTypeLegalizationCost(). |
736 | 6.62k | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
737 | 6.62k | TargetLowering::TypeSplitVector) || |
738 | 6.62k | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
739 | 4.79k | TargetLowering::TypeSplitVector)) { |
740 | 4.79k | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
741 | 4.79k | Dst->getVectorNumElements() / 2); |
742 | 4.79k | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
743 | 4.79k | Src->getVectorNumElements() / 2); |
744 | 4.79k | T *TTI = static_cast<T *>(this); |
745 | 4.79k | return TTI->getVectorSplitCost() + |
746 | 4.79k | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
747 | 4.79k | } |
748 | 1.83k | |
749 | 1.83k | // In other cases where the source or destination are illegal, assume |
750 | 1.83k | // the operation will get scalarized. |
751 | 1.83k | unsigned Num = Dst->getVectorNumElements(); |
752 | 1.83k | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
753 | 1.83k | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
754 | 1.83k | |
755 | 1.83k | // Return the cost of multiple scalar invocation plus the cost of |
756 | 1.83k | // inserting and extracting the values. |
757 | 1.83k | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
758 | 1.83k | } |
759 | 0 | |
760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
761 | 0 | // This |
762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume |
763 | 0 | // that the conversion is scalarized in one way or another. |
764 | 0 | if (Opcode == Instruction::BitCast) |
765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. |
766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
767 | 0 | : 0) + |
768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
769 | 0 | : 0); |
770 | 0 |
|
771 | 0 | llvm_unreachable("Unhandled cast"); |
772 | 0 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 102k | const Instruction *I = nullptr) { | 651 | 102k | const TargetLoweringBase *TLI = getTLI(); | 652 | 102k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 102k | assert(ISD && "Invalid opcode"); | 654 | 102k | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 102k | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 102k | | 657 | 102k | // Check for NOOP conversions. | 658 | 102k | if (SrcLT.first == DstLT.first && | 659 | 102k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()100k ) { | 660 | 34.0k | | 661 | 34.0k | // Bitcast between types that are legalized to the same type are free. | 662 | 34.0k | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc23.3k ) | 663 | 12.3k | return 0; | 664 | 89.9k | } | 665 | 89.9k | | 666 | 89.9k | if (Opcode == Instruction::Trunc && | 667 | 89.9k | TLI->isTruncateFree(SrcLT.second, DstLT.second)40.1k ) | 668 | 23.3k | return 0; | 669 | 66.6k | | 670 | 66.6k | if (Opcode == Instruction::ZExt && | 671 | 66.6k | TLI->isZExtFree(SrcLT.second, DstLT.second)16.9k ) | 672 | 3.97k | return 0; | 673 | 62.6k | | 674 | 62.6k | if (Opcode == Instruction::AddrSpaceCast && | 675 | 62.6k | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 0 | Dst->getPointerAddressSpace())) | 677 | 0 | return 0; | 678 | 62.6k | | 679 | 62.6k | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 62.6k | // extending load exists on target. | 681 | 62.6k | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt49.7k ) && | 682 | 62.6k | I26.0k && isa<LoadInst>(I->getOperand(0))0 ) { | 683 | 0 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 0 | EVT LoadVT = EVT::getEVT(Src); | 685 | 0 | unsigned LType = | 686 | 0 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); | 687 | 0 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 0 | return 0; | 689 | 62.6k | } | 690 | 62.6k | | 691 | 62.6k | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 62.6k | if (SrcLT.first == DstLT.first && | 693 | 62.6k | TLI->isOperationLegalOrPromote(ISD, DstLT.second)61.1k ) | 694 | 60.1k | return 1; | 695 | 2.52k | | 696 | 2.52k | // Handle scalar conversions. | 697 | 2.52k | if (!Src->isVectorTy() && !Dst->isVectorTy()1.10k ) { | 698 | 1.10k | // Scalar bitcasts are usually free. | 699 | 1.10k | if (Opcode == Instruction::BitCast) | 700 | 0 | return 0; | 701 | 1.10k | | 702 | 1.10k | // Just check the op cost. If the operation is legal then assume it costs | 703 | 1.10k | // 1. | 704 | 1.10k | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 1.10k | return 1; | 706 | 0 | | 707 | 0 | // Assume that illegal scalar instruction are expensive. | 708 | 0 | return 4; | 709 | 0 | } | 710 | 1.42k | | 711 | 1.42k | // Check vector-to-vector casts. | 712 | 1.42k | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 1.42k | // If the cast is between same-sized registers, then the check is simple. | 714 | 1.42k | if (SrcLT.first == DstLT.first && | 715 | 1.42k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()0 ) { | 716 | 0 |
| 717 | 0 | // Assume that Zext is done using AND. | 718 | 0 | if (Opcode == Instruction::ZExt) | 719 | 0 | return 1; | 720 | 0 | | 721 | 0 | // Assume that sext is done using SHL and SRA. | 722 | 0 | if (Opcode == Instruction::SExt) | 723 | 0 | return 2; | 724 | 0 | | 725 | 0 | // Just check the op cost. If the operation is legal then assume it | 726 | 0 | // costs | 727 | 0 | // 1 and multiply by the type-legalization overhead. | 728 | 0 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 0 | return SrcLT.first * 1; | 730 | 1.42k | } | 731 | 1.42k | | 732 | 1.42k | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 1.42k | // of casting the original vector twice. We also need to factor in the | 734 | 1.42k | // cost of the split itself. Count that as 1, to be consistent with | 735 | 1.42k | // TLI->getTypeLegalizationCost(). | 736 | 1.42k | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 1.42k | TargetLowering::TypeSplitVector) || | 738 | 1.42k | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 1.38k | TargetLowering::TypeSplitVector)) { | 740 | 1.38k | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 1.38k | Dst->getVectorNumElements() / 2); | 742 | 1.38k | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 1.38k | Src->getVectorNumElements() / 2); | 744 | 1.38k | T *TTI = static_cast<T *>(this); | 745 | 1.38k | return TTI->getVectorSplitCost() + | 746 | 1.38k | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 1.38k | } | 748 | 36 | | 749 | 36 | // In other cases where the source or destination are illegal, assume | 750 | 36 | // the operation will get scalarized. | 751 | 36 | unsigned Num = Dst->getVectorNumElements(); | 752 | 36 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 36 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 36 | | 755 | 36 | // Return the cost of multiple scalar invocation plus the cost of | 756 | 36 | // inserting and extracting the values. | 757 | 36 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 36 | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 17 | const Instruction *I = nullptr) { | 651 | 17 | const TargetLoweringBase *TLI = getTLI(); | 652 | 17 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 17 | assert(ISD && "Invalid opcode"); | 654 | 17 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 17 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 17 | | 657 | 17 | // Check for NOOP conversions. | 658 | 17 | if (SrcLT.first == DstLT.first && | 659 | 17 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | 660 | 15 | | 661 | 15 | // Bitcast between types that are legalized to the same type are free. | 662 | 15 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) | 663 | 0 | return 0; | 664 | 17 | } | 665 | 17 | | 666 | 17 | if (Opcode == Instruction::Trunc && | 667 | 17 | TLI->isTruncateFree(SrcLT.second, DstLT.second)0 ) | 668 | 0 | return 0; | 669 | 17 | | 670 | 17 | if (Opcode == Instruction::ZExt && | 671 | 17 | TLI->isZExtFree(SrcLT.second, DstLT.second)2 ) | 672 | 2 | return 0; | 673 | 15 | | 674 | 15 | if (Opcode == Instruction::AddrSpaceCast && | 675 | 15 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 15 | Dst->getPointerAddressSpace())) | 677 | 6 | return 0; | 678 | 9 | | 679 | 9 | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 9 | // extending load exists on target. | 681 | 9 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && | 682 | 9 | I0 && isa<LoadInst>(I->getOperand(0))0 ) { | 683 | 0 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 0 | EVT LoadVT = EVT::getEVT(Src); | 685 | 0 | unsigned LType = | 686 | 0 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); | 687 | 0 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 0 | return 0; | 689 | 9 | } | 690 | 9 | | 691 | 9 | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 9 | if (SrcLT.first == DstLT.first && | 693 | 9 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) | 694 | 0 | return 1; | 695 | 9 | | 696 | 9 | // Handle scalar conversions. | 697 | 9 | if (!Src->isVectorTy() && !Dst->isVectorTy()3 ) { | 698 | 3 | // Scalar bitcasts are usually free. | 699 | 3 | if (Opcode == Instruction::BitCast) | 700 | 0 | return 0; | 701 | 3 | | 702 | 3 | // Just check the op cost. If the operation is legal then assume it costs | 703 | 3 | // 1. | 704 | 3 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 3 | return 1; | 706 | 0 | | 707 | 0 | // Assume that illegal scalar instruction are expensive. | 708 | 0 | return 4; | 709 | 0 | } | 710 | 6 | | 711 | 6 | // Check vector-to-vector casts. | 712 | 6 | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 6 | // If the cast is between same-sized registers, then the check is simple. | 714 | 6 | if (SrcLT.first == DstLT.first && | 715 | 6 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | 716 | 6 | | 717 | 6 | // Assume that Zext is done using AND. | 718 | 6 | if (Opcode == Instruction::ZExt) | 719 | 0 | return 1; | 720 | 6 | | 721 | 6 | // Assume that sext is done using SHL and SRA. | 722 | 6 | if (Opcode == Instruction::SExt) | 723 | 0 | return 2; | 724 | 6 | | 725 | 6 | // Just check the op cost. If the operation is legal then assume it | 726 | 6 | // costs | 727 | 6 | // 1 and multiply by the type-legalization overhead. | 728 | 6 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 0 | return SrcLT.first * 1; | 730 | 6 | } | 731 | 6 | | 732 | 6 | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 6 | // of casting the original vector twice. We also need to factor in the | 734 | 6 | // cost of the split itself. Count that as 1, to be consistent with | 735 | 6 | // TLI->getTypeLegalizationCost(). | 736 | 6 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 6 | TargetLowering::TypeSplitVector) || | 738 | 6 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 4 | TargetLowering::TypeSplitVector)) { | 740 | 4 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 4 | Dst->getVectorNumElements() / 2); | 742 | 4 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 4 | Src->getVectorNumElements() / 2); | 744 | 4 | T *TTI = static_cast<T *>(this); | 745 | 4 | return TTI->getVectorSplitCost() + | 746 | 4 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 4 | } | 748 | 2 | | 749 | 2 | // In other cases where the source or destination are illegal, assume | 750 | 2 | // the operation will get scalarized. | 751 | 2 | unsigned Num = Dst->getVectorNumElements(); | 752 | 2 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 2 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 2 | | 755 | 2 | // Return the cost of multiple scalar invocation plus the cost of | 756 | 2 | // inserting and extracting the values. | 757 | 2 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 2 | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 1.18k | const Instruction *I = nullptr) { | 651 | 1.18k | const TargetLoweringBase *TLI = getTLI(); | 652 | 1.18k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 1.18k | assert(ISD && "Invalid opcode"); | 654 | 1.18k | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 1.18k | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 1.18k | | 657 | 1.18k | // Check for NOOP conversions. | 658 | 1.18k | if (SrcLT.first == DstLT.first && | 659 | 1.18k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()856 ) { | 660 | 606 | | 661 | 606 | // Bitcast between types that are legalized to the same type are free. | 662 | 606 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc302 ) | 663 | 316 | return 0; | 664 | 865 | } | 665 | 865 | | 666 | 865 | if (Opcode == Instruction::Trunc && | 667 | 865 | TLI->isTruncateFree(SrcLT.second, DstLT.second)168 ) | 668 | 0 | return 0; | 669 | 865 | | 670 | 865 | if (Opcode == Instruction::ZExt && | 671 | 865 | TLI->isZExtFree(SrcLT.second, DstLT.second)258 ) | 672 | 0 | return 0; | 673 | 865 | | 674 | 865 | if (Opcode == Instruction::AddrSpaceCast && | 675 | 865 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 0 | Dst->getPointerAddressSpace())) | 677 | 0 | return 0; | 678 | 865 | | 679 | 865 | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 865 | // extending load exists on target. | 681 | 865 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt607 ) && | 682 | 865 | I335 && isa<LoadInst>(I->getOperand(0))0 ) { | 683 | 0 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 0 | EVT LoadVT = EVT::getEVT(Src); | 685 | 0 | unsigned LType = | 686 | 0 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); | 687 | 0 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 0 | return 0; | 689 | 865 | } | 690 | 865 | | 691 | 865 | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 865 | if (SrcLT.first == DstLT.first && | 693 | 865 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)540 ) | 694 | 395 | return 1; | 695 | 470 | | 696 | 470 | // Handle scalar conversions. | 697 | 470 | if (!Src->isVectorTy() && !Dst->isVectorTy()116 ) { | 698 | 116 | // Scalar bitcasts are usually free. | 699 | 116 | if (Opcode == Instruction::BitCast) | 700 | 58 | return 0; | 701 | 58 | | 702 | 58 | // Just check the op cost. If the operation is legal then assume it costs | 703 | 58 | // 1. | 704 | 58 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 58 | return 1; | 706 | 0 | | 707 | 0 | // Assume that illegal scalar instruction are expensive. | 708 | 0 | return 4; | 709 | 0 | } | 710 | 354 | | 711 | 354 | // Check vector-to-vector casts. | 712 | 354 | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 354 | // If the cast is between same-sized registers, then the check is simple. | 714 | 354 | if (SrcLT.first == DstLT.first && | 715 | 354 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()144 ) { | 716 | 66 | | 717 | 66 | // Assume that Zext is done using AND. | 718 | 66 | if (Opcode == Instruction::ZExt) | 719 | 0 | return 1; | 720 | 66 | | 721 | 66 | // Assume that sext is done using SHL and SRA. | 722 | 66 | if (Opcode == Instruction::SExt) | 723 | 0 | return 2; | 724 | 66 | | 725 | 66 | // Just check the op cost. If the operation is legal then assume it | 726 | 66 | // costs | 727 | 66 | // 1 and multiply by the type-legalization overhead. | 728 | 66 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 12 | return SrcLT.first * 1; | 730 | 342 | } | 731 | 342 | | 732 | 342 | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 342 | // of casting the original vector twice. We also need to factor in the | 734 | 342 | // cost of the split itself. Count that as 1, to be consistent with | 735 | 342 | // TLI->getTypeLegalizationCost(). | 736 | 342 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 342 | TargetLowering::TypeSplitVector) || | 738 | 342 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 228 | TargetLowering::TypeSplitVector)) { | 740 | 228 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 228 | Dst->getVectorNumElements() / 2); | 742 | 228 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 228 | Src->getVectorNumElements() / 2); | 744 | 228 | T *TTI = static_cast<T *>(this); | 745 | 228 | return TTI->getVectorSplitCost() + | 746 | 228 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 228 | } | 748 | 114 | | 749 | 114 | // In other cases where the source or destination are illegal, assume | 750 | 114 | // the operation will get scalarized. | 751 | 114 | unsigned Num = Dst->getVectorNumElements(); | 752 | 114 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 114 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 114 | | 755 | 114 | // Return the cost of multiple scalar invocation plus the cost of | 756 | 114 | // inserting and extracting the values. | 757 | 114 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 114 | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 373 | const Instruction *I = nullptr) { | 651 | 373 | const TargetLoweringBase *TLI = getTLI(); | 652 | 373 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 373 | assert(ISD && "Invalid opcode"); | 654 | 373 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 373 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 373 | | 657 | 373 | // Check for NOOP conversions. | 658 | 373 | if (SrcLT.first == DstLT.first && | 659 | 373 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()372 ) { | 660 | 159 | | 661 | 159 | // Bitcast between types that are legalized to the same type are free. | 662 | 159 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) | 663 | 0 | return 0; | 664 | 373 | } | 665 | 373 | | 666 | 373 | if (Opcode == Instruction::Trunc && | 667 | 373 | TLI->isTruncateFree(SrcLT.second, DstLT.second)172 ) | 668 | 172 | return 0; | 669 | 201 | | 670 | 201 | if (Opcode == Instruction::ZExt && | 671 | 201 | TLI->isZExtFree(SrcLT.second, DstLT.second)16 ) | 672 | 0 | return 0; | 673 | 201 | | 674 | 201 | if (Opcode == Instruction::AddrSpaceCast && | 675 | 201 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 0 | Dst->getPointerAddressSpace())) | 677 | 0 | return 0; | 678 | 201 | | 679 | 201 | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 201 | // extending load exists on target. | 681 | 201 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt185 ) && | 682 | 201 | I21 && isa<LoadInst>(I->getOperand(0))0 ) { | 683 | 0 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 0 | EVT LoadVT = EVT::getEVT(Src); | 685 | 0 | unsigned LType = | 686 | 0 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); | 687 | 0 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 0 | return 0; | 689 | 201 | } | 690 | 201 | | 691 | 201 | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 201 | if (SrcLT.first == DstLT.first && | 693 | 201 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)200 ) | 694 | 200 | return 1; | 695 | 1 | | 696 | 1 | // Handle scalar conversions. | 697 | 1 | if (!Src->isVectorTy() && !Dst->isVectorTy()0 ) { | 698 | 0 | // Scalar bitcasts are usually free. | 699 | 0 | if (Opcode == Instruction::BitCast) | 700 | 0 | return 0; | 701 | 0 | | 702 | 0 | // Just check the op cost. If the operation is legal then assume it costs | 703 | 0 | // 1. | 704 | 0 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 0 | return 1; | 706 | 0 | | 707 | 0 | // Assume that illegal scalar instruction are expensive. | 708 | 0 | return 4; | 709 | 0 | } | 710 | 1 | | 711 | 1 | // Check vector-to-vector casts. | 712 | 1 | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 1 | // If the cast is between same-sized registers, then the check is simple. | 714 | 1 | if (SrcLT.first == DstLT.first && | 715 | 1 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()0 ) { | 716 | 0 |
| 717 | 0 | // Assume that Zext is done using AND. | 718 | 0 | if (Opcode == Instruction::ZExt) | 719 | 0 | return 1; | 720 | 0 | | 721 | 0 | // Assume that sext is done using SHL and SRA. | 722 | 0 | if (Opcode == Instruction::SExt) | 723 | 0 | return 2; | 724 | 0 | | 725 | 0 | // Just check the op cost. If the operation is legal then assume it | 726 | 0 | // costs | 727 | 0 | // 1 and multiply by the type-legalization overhead. | 728 | 0 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 0 | return SrcLT.first * 1; | 730 | 1 | } | 731 | 1 | | 732 | 1 | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 1 | // of casting the original vector twice. We also need to factor in the | 734 | 1 | // cost of the split itself. Count that as 1, to be consistent with | 735 | 1 | // TLI->getTypeLegalizationCost(). | 736 | 1 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 1 | TargetLowering::TypeSplitVector) || | 738 | 1 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 1 | TargetLowering::TypeSplitVector)) { | 740 | 1 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 1 | Dst->getVectorNumElements() / 2); | 742 | 1 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 1 | Src->getVectorNumElements() / 2); | 744 | 1 | T *TTI = static_cast<T *>(this); | 745 | 1 | return TTI->getVectorSplitCost() + | 746 | 1 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 1 | } | 748 | 0 | | 749 | 0 | // In other cases where the source or destination are illegal, assume | 750 | 0 | // the operation will get scalarized. | 751 | 0 | unsigned Num = Dst->getVectorNumElements(); | 752 | 0 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 0 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 0 |
| 755 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 756 | 0 | // inserting and extracting the values. | 757 | 0 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 0 | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 351 | const Instruction *I = nullptr) { | 651 | 351 | const TargetLoweringBase *TLI = getTLI(); | 652 | 351 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 351 | assert(ISD && "Invalid opcode"); | 654 | 351 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 351 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 351 | | 657 | 351 | // Check for NOOP conversions. | 658 | 351 | if (SrcLT.first == DstLT.first && | 659 | 351 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | 660 | 104 | | 661 | 104 | // Bitcast between types that are legalized to the same type are free. | 662 | 104 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) | 663 | 10 | return 0; | 664 | 341 | } | 665 | 341 | | 666 | 341 | if (Opcode == Instruction::Trunc && | 667 | 341 | TLI->isTruncateFree(SrcLT.second, DstLT.second)41 ) | 668 | 41 | return 0; | 669 | 300 | | 670 | 300 | if (Opcode == Instruction::ZExt && | 671 | 300 | TLI->isZExtFree(SrcLT.second, DstLT.second)22 ) | 672 | 0 | return 0; | 673 | 300 | | 674 | 300 | if (Opcode == Instruction::AddrSpaceCast && | 675 | 300 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 0 | Dst->getPointerAddressSpace())) | 677 | 0 | return 0; | 678 | 300 | | 679 | 300 | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 300 | // extending load exists on target. | 681 | 300 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt278 ) && | 682 | 300 | I88 && isa<LoadInst>(I->getOperand(0))88 ) { | 683 | 76 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 76 | EVT LoadVT = EVT::getEVT(Src); | 685 | 76 | unsigned LType = | 686 | 76 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD16 : ISD::SEXTLOAD60 ); | 687 | 76 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 76 | return 0; | 689 | 224 | } | 690 | 224 | | 691 | 224 | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 224 | if (SrcLT.first == DstLT.first && | 693 | 224 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) | 694 | 224 | return 1; | 695 | 0 | | 696 | 0 | // Handle scalar conversions. | 697 | 0 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { | 698 | 0 | // Scalar bitcasts are usually free. | 699 | 0 | if (Opcode == Instruction::BitCast) | 700 | 0 | return 0; | 701 | 0 | | 702 | 0 | // Just check the op cost. If the operation is legal then assume it costs | 703 | 0 | // 1. | 704 | 0 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 0 | return 1; | 706 | 0 | | 707 | 0 | // Assume that illegal scalar instruction are expensive. | 708 | 0 | return 4; | 709 | 0 | } | 710 | 0 | | 711 | 0 | // Check vector-to-vector casts. | 712 | 0 | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 0 | // If the cast is between same-sized registers, then the check is simple. | 714 | 0 | if (SrcLT.first == DstLT.first && | 715 | 0 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | 716 | 0 |
| 717 | 0 | // Assume that Zext is done using AND. | 718 | 0 | if (Opcode == Instruction::ZExt) | 719 | 0 | return 1; | 720 | 0 | | 721 | 0 | // Assume that sext is done using SHL and SRA. | 722 | 0 | if (Opcode == Instruction::SExt) | 723 | 0 | return 2; | 724 | 0 | | 725 | 0 | // Just check the op cost. If the operation is legal then assume it | 726 | 0 | // costs | 727 | 0 | // 1 and multiply by the type-legalization overhead. | 728 | 0 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 0 | return SrcLT.first * 1; | 730 | 0 | } | 731 | 0 | | 732 | 0 | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 0 | // of casting the original vector twice. We also need to factor in the | 734 | 0 | // cost of the split itself. Count that as 1, to be consistent with | 735 | 0 | // TLI->getTypeLegalizationCost(). | 736 | 0 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 0 | TargetLowering::TypeSplitVector) || | 738 | 0 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 0 | TargetLowering::TypeSplitVector)) { | 740 | 0 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 0 | Dst->getVectorNumElements() / 2); | 742 | 0 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 0 | Src->getVectorNumElements() / 2); | 744 | 0 | T *TTI = static_cast<T *>(this); | 745 | 0 | return TTI->getVectorSplitCost() + | 746 | 0 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 0 | } | 748 | 0 | | 749 | 0 | // In other cases where the source or destination are illegal, assume | 750 | 0 | // the operation will get scalarized. | 751 | 0 | unsigned Num = Dst->getVectorNumElements(); | 752 | 0 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 0 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 0 |
| 755 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 756 | 0 | // inserting and extracting the values. | 757 | 0 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 0 | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 650 | 16.0k | const Instruction *I = nullptr) { | 651 | 16.0k | const TargetLoweringBase *TLI = getTLI(); | 652 | 16.0k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 653 | 16.0k | assert(ISD && "Invalid opcode"); | 654 | 16.0k | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | 655 | 16.0k | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | 656 | 16.0k | | 657 | 16.0k | // Check for NOOP conversions. | 658 | 16.0k | if (SrcLT.first == DstLT.first && | 659 | 16.0k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()10.8k ) { | 660 | 6.33k | | 661 | 6.33k | // Bitcast between types that are legalized to the same type are free. | 662 | 6.33k | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc5.44k ) | 663 | 1.51k | return 0; | 664 | 14.5k | } | 665 | 14.5k | | 666 | 14.5k | if (Opcode == Instruction::Trunc && | 667 | 14.5k | TLI->isTruncateFree(SrcLT.second, DstLT.second)3.89k ) | 668 | 922 | return 0; | 669 | 13.5k | | 670 | 13.5k | if (Opcode == Instruction::ZExt && | 671 | 13.5k | TLI->isZExtFree(SrcLT.second, DstLT.second)3.46k ) | 672 | 308 | return 0; | 673 | 13.2k | | 674 | 13.2k | if (Opcode == Instruction::AddrSpaceCast && | 675 | 13.2k | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | 676 | 0 | Dst->getPointerAddressSpace())) | 677 | 0 | return 0; | 678 | 13.2k | | 679 | 13.2k | // If this is a zext/sext of a load, return 0 if the corresponding | 680 | 13.2k | // extending load exists on target. | 681 | 13.2k | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt10.1k ) && | 682 | 13.2k | I4.37k && isa<LoadInst>(I->getOperand(0))3.02k ) { | 683 | 709 | EVT ExtVT = EVT::getEVT(Dst); | 684 | 709 | EVT LoadVT = EVT::getEVT(Src); | 685 | 709 | unsigned LType = | 686 | 709 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD503 : ISD::SEXTLOAD206 ); | 687 | 709 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | 688 | 496 | return 0; | 689 | 12.7k | } | 690 | 12.7k | | 691 | 12.7k | // If the cast is marked as legal (or promote) then assume low cost. | 692 | 12.7k | if (SrcLT.first == DstLT.first && | 693 | 12.7k | TLI->isOperationLegalOrPromote(ISD, DstLT.second)7.70k ) | 694 | 4.45k | return 1; | 695 | 8.33k | | 696 | 8.33k | // Handle scalar conversions. | 697 | 8.33k | if (!Src->isVectorTy() && !Dst->isVectorTy()2.12k ) { | 698 | 2.12k | // Scalar bitcasts are usually free. | 699 | 2.12k | if (Opcode == Instruction::BitCast) | 700 | 9 | return 0; | 701 | 2.12k | | 702 | 2.12k | // Just check the op cost. If the operation is legal then assume it costs | 703 | 2.12k | // 1. | 704 | 2.12k | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 705 | 1.82k | return 1; | 706 | 298 | | 707 | 298 | // Assume that illegal scalar instruction are expensive. | 708 | 298 | return 4; | 709 | 298 | } | 710 | 6.20k | | 711 | 6.20k | // Check vector-to-vector casts. | 712 | 6.20k | if (Dst->isVectorTy() && Src->isVectorTy()) { | 713 | 6.20k | // If the cast is between same-sized registers, then the check is simple. | 714 | 6.20k | if (SrcLT.first == DstLT.first && | 715 | 6.20k | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()2.44k ) { | 716 | 2.15k | | 717 | 2.15k | // Assume that Zext is done using AND. | 718 | 2.15k | if (Opcode == Instruction::ZExt) | 719 | 1.07k | return 1; | 720 | 1.08k | | 721 | 1.08k | // Assume that sext is done using SHL and SRA. | 722 | 1.08k | if (Opcode == Instruction::SExt) | 723 | 256 | return 2; | 724 | 824 | | 725 | 824 | // Just check the op cost. If the operation is legal then assume it | 726 | 824 | // costs | 727 | 824 | // 1 and multiply by the type-legalization overhead. | 728 | 824 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | 729 | 24 | return SrcLT.first * 1; | 730 | 4.85k | } | 731 | 4.85k | | 732 | 4.85k | // If we are legalizing by splitting, query the concrete TTI for the cost | 733 | 4.85k | // of casting the original vector twice. We also need to factor in the | 734 | 4.85k | // cost of the split itself. Count that as 1, to be consistent with | 735 | 4.85k | // TLI->getTypeLegalizationCost(). | 736 | 4.85k | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | 737 | 4.85k | TargetLowering::TypeSplitVector) || | 738 | 4.85k | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | 739 | 3.17k | TargetLowering::TypeSplitVector)) { | 740 | 3.17k | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), | 741 | 3.17k | Dst->getVectorNumElements() / 2); | 742 | 3.17k | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), | 743 | 3.17k | Src->getVectorNumElements() / 2); | 744 | 3.17k | T *TTI = static_cast<T *>(this); | 745 | 3.17k | return TTI->getVectorSplitCost() + | 746 | 3.17k | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); | 747 | 3.17k | } | 748 | 1.68k | | 749 | 1.68k | // In other cases where the source or destination are illegal, assume | 750 | 1.68k | // the operation will get scalarized. | 751 | 1.68k | unsigned Num = Dst->getVectorNumElements(); | 752 | 1.68k | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | 753 | 1.68k | Opcode, Dst->getScalarType(), Src->getScalarType(), I); | 754 | 1.68k | | 755 | 1.68k | // Return the cost of multiple scalar invocation plus the cost of | 756 | 1.68k | // inserting and extracting the values. | 757 | 1.68k | return getScalarizationOverhead(Dst, true, true) + Num * Cost; | 758 | 1.68k | } | 759 | 0 | | 760 | 0 | // We already handled vector-to-vector and scalar-to-scalar conversions. | 761 | 0 | // This | 762 | 0 | // is where we handle bitcast between vectors and scalars. We need to assume | 763 | 0 | // that the conversion is scalarized in one way or another. | 764 | 0 | if (Opcode == Instruction::BitCast) | 765 | 0 | // Illegal bitcasts are done by storing and loading from a stack slot. | 766 | 0 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | 767 | 0 | : 0) + | 768 | 0 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | 769 | 0 | : 0); | 770 | 0 |
| 771 | 0 | llvm_unreachable("Unhandled cast"); | 772 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) |
773 | | |
774 | | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
775 | 8 | VectorType *VecTy, unsigned Index) { |
776 | 8 | return static_cast<T *>(this)->getVectorInstrCost( |
777 | 8 | Instruction::ExtractElement, VecTy, Index) + |
778 | 8 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
779 | 8 | VecTy->getElementType()); |
780 | 8 | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) Line | Count | Source | 775 | 8 | VectorType *VecTy, unsigned Index) { | 776 | 8 | return static_cast<T *>(this)->getVectorInstrCost( | 777 | 8 | Instruction::ExtractElement, VecTy, Index) + | 778 | 8 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, | 779 | 8 | VecTy->getElementType()); | 780 | 8 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int) |
781 | | |
782 | 134k | unsigned getCFInstrCost(unsigned Opcode) { |
783 | 134k | // Branches are assumed to be predicted. |
784 | 134k | return 0; |
785 | 134k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 111k | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 111k | // Branches are assumed to be predicted. | 784 | 111k | return 0; | 785 | 111k | } |
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 15 | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 15 | // Branches are assumed to be predicted. | 784 | 15 | return 0; | 785 | 15 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCFInstrCost(unsigned int) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 3.08k | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 3.08k | // Branches are assumed to be predicted. | 784 | 3.08k | return 0; | 785 | 3.08k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCFInstrCost(unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCFInstrCost(unsigned int) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 6 | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 6 | // Branches are assumed to be predicted. | 784 | 6 | return 0; | 785 | 6 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 503 | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 503 | // Branches are assumed to be predicted. | 784 | 503 | return 0; | 785 | 503 | } |
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 14 | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 14 | // Branches are assumed to be predicted. | 784 | 14 | return 0; | 785 | 14 | } |
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 957 | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 957 | // Branches are assumed to be predicted. | 784 | 957 | return 0; | 785 | 957 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCFInstrCost(unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCFInstrCost(unsigned int) Line | Count | Source | 782 | 18.3k | unsigned getCFInstrCost(unsigned Opcode) { | 783 | 18.3k | // Branches are assumed to be predicted. | 784 | 18.3k | return 0; | 785 | 18.3k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCFInstrCost(unsigned int) |
786 | | |
787 | | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
788 | 204k | const Instruction *I) { |
789 | 204k | const TargetLoweringBase *TLI = getTLI(); |
790 | 204k | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
791 | 204k | assert(ISD && "Invalid opcode"); |
792 | 204k | |
793 | 204k | // Selects on vectors are actually vector selects. |
794 | 204k | if (ISD == ISD::SELECT) { |
795 | 69.1k | assert(CondTy && "CondTy must exist"); |
796 | 69.1k | if (CondTy->isVectorTy()) |
797 | 25.5k | ISD = ISD::VSELECT; |
798 | 69.1k | } |
799 | 204k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
800 | 204k | |
801 | 204k | if (!(ValTy->isVectorTy() && !LT.second.isVector()66.5k ) && |
802 | 204k | !TLI->isOperationExpand(ISD, LT.second)203k ) { |
803 | 177k | // The operation is legal. Assume it costs 1. Multiply |
804 | 177k | // by the type-legalization overhead. |
805 | 177k | return LT.first * 1; |
806 | 177k | } |
807 | 26.8k | |
808 | 26.8k | // Otherwise, assume that the cast is scalarized. |
809 | 26.8k | // TODO: If one of the types get legalized by splitting, handle this |
810 | 26.8k | // similarly to what getCastInstrCost() does. |
811 | 26.8k | if (ValTy->isVectorTy()) { |
812 | 24.4k | unsigned Num = ValTy->getVectorNumElements(); |
813 | 24.4k | if (CondTy) |
814 | 24.4k | CondTy = CondTy->getScalarType(); |
815 | 24.4k | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
816 | 24.4k | Opcode, ValTy->getScalarType(), CondTy, I); |
817 | 24.4k | |
818 | 24.4k | // Return the cost of multiple scalar invocation plus the cost of |
819 | 24.4k | // inserting and extracting the values. |
820 | 24.4k | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
821 | 24.4k | } |
822 | 2.41k | |
823 | 2.41k | // Unknown scalar opcode. |
824 | 2.41k | return 1; |
825 | 2.41k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 189k | const Instruction *I) { | 789 | 189k | const TargetLoweringBase *TLI = getTLI(); | 790 | 189k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 189k | assert(ISD && "Invalid opcode"); | 792 | 189k | | 793 | 189k | // Selects on vectors are actually vector selects. | 794 | 189k | if (ISD == ISD::SELECT) { | 795 | 66.6k | assert(CondTy && "CondTy must exist"); | 796 | 66.6k | if (CondTy->isVectorTy()) | 797 | 25.1k | ISD = ISD::VSELECT; | 798 | 66.6k | } | 799 | 189k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 189k | | 801 | 189k | if (!(ValTy->isVectorTy() && !LT.second.isVector()65.0k ) && | 802 | 189k | !TLI->isOperationExpand(ISD, LT.second)189k ) { | 803 | 165k | // The operation is legal. Assume it costs 1. Multiply | 804 | 165k | // by the type-legalization overhead. | 805 | 165k | return LT.first * 1; | 806 | 165k | } | 807 | 23.9k | | 808 | 23.9k | // Otherwise, assume that the cast is scalarized. | 809 | 23.9k | // TODO: If one of the types get legalized by splitting, handle this | 810 | 23.9k | // similarly to what getCastInstrCost() does. | 811 | 23.9k | if (ValTy->isVectorTy()) { | 812 | 23.9k | unsigned Num = ValTy->getVectorNumElements(); | 813 | 23.9k | if (CondTy) | 814 | 23.9k | CondTy = CondTy->getScalarType(); | 815 | 23.9k | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 23.9k | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 23.9k | | 818 | 23.9k | // Return the cost of multiple scalar invocation plus the cost of | 819 | 23.9k | // inserting and extracting the values. | 820 | 23.9k | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 23.9k | } | 822 | 0 | | 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 223 | const Instruction *I) { | 789 | 223 | const TargetLoweringBase *TLI = getTLI(); | 790 | 223 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 223 | assert(ISD && "Invalid opcode"); | 792 | 223 | | 793 | 223 | // Selects on vectors are actually vector selects. | 794 | 223 | if (ISD == ISD::SELECT) { | 795 | 107 | assert(CondTy && "CondTy must exist"); | 796 | 107 | if (CondTy->isVectorTy()) | 797 | 44 | ISD = ISD::VSELECT; | 798 | 107 | } | 799 | 223 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 223 | | 801 | 223 | if (!(ValTy->isVectorTy() && !LT.second.isVector()88 ) && | 802 | 223 | !TLI->isOperationExpand(ISD, LT.second)) { | 803 | 135 | // The operation is legal. Assume it costs 1. Multiply | 804 | 135 | // by the type-legalization overhead. | 805 | 135 | return LT.first * 1; | 806 | 135 | } | 807 | 88 | | 808 | 88 | // Otherwise, assume that the cast is scalarized. | 809 | 88 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 88 | // similarly to what getCastInstrCost() does. | 811 | 88 | if (ValTy->isVectorTy()) { | 812 | 88 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 88 | if (CondTy) | 814 | 88 | CondTy = CondTy->getScalarType(); | 815 | 88 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 88 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 88 | | 818 | 88 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 88 | // inserting and extracting the values. | 820 | 88 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 88 | } | 822 | 0 | | 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 3.95k | const Instruction *I) { | 789 | 3.95k | const TargetLoweringBase *TLI = getTLI(); | 790 | 3.95k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 3.95k | assert(ISD && "Invalid opcode"); | 792 | 3.95k | | 793 | 3.95k | // Selects on vectors are actually vector selects. | 794 | 3.95k | if (ISD == ISD::SELECT) { | 795 | 426 | assert(CondTy && "CondTy must exist"); | 796 | 426 | if (CondTy->isVectorTy()) | 797 | 96 | ISD = ISD::VSELECT; | 798 | 426 | } | 799 | 3.95k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 3.95k | | 801 | 3.95k | if (!(ValTy->isVectorTy() && !LT.second.isVector()1.11k ) && | 802 | 3.95k | !TLI->isOperationExpand(ISD, LT.second)) { | 803 | 1.37k | // The operation is legal. Assume it costs 1. Multiply | 804 | 1.37k | // by the type-legalization overhead. | 805 | 1.37k | return LT.first * 1; | 806 | 1.37k | } | 807 | 2.57k | | 808 | 2.57k | // Otherwise, assume that the cast is scalarized. | 809 | 2.57k | // TODO: If one of the types get legalized by splitting, handle this | 810 | 2.57k | // similarly to what getCastInstrCost() does. | 811 | 2.57k | if (ValTy->isVectorTy()) { | 812 | 162 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 162 | if (CondTy) | 814 | 162 | CondTy = CondTy->getScalarType(); | 815 | 162 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 162 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 162 | | 818 | 162 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 162 | // inserting and extracting the values. | 820 | 162 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 162 | } | 822 | 2.41k | | 823 | 2.41k | // Unknown scalar opcode. | 824 | 2.41k | return 1; | 825 | 2.41k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 12 | const Instruction *I) { | 789 | 12 | const TargetLoweringBase *TLI = getTLI(); | 790 | 12 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 12 | assert(ISD && "Invalid opcode"); | 792 | 12 | | 793 | 12 | // Selects on vectors are actually vector selects. | 794 | 12 | if (ISD == ISD::SELECT) { | 795 | 0 | assert(CondTy && "CondTy must exist"); | 796 | 0 | if (CondTy->isVectorTy()) | 797 | 0 | ISD = ISD::VSELECT; | 798 | 0 | } | 799 | 12 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 12 | | 801 | 12 | if (!(ValTy->isVectorTy() && !LT.second.isVector()0 ) && | 802 | 12 | !TLI->isOperationExpand(ISD, LT.second)) { | 803 | 12 | // The operation is legal. Assume it costs 1. Multiply | 804 | 12 | // by the type-legalization overhead. | 805 | 12 | return LT.first * 1; | 806 | 12 | } | 807 | 0 | | 808 | 0 | // Otherwise, assume that the cast is scalarized. | 809 | 0 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 0 | // similarly to what getCastInstrCost() does. | 811 | 0 | if (ValTy->isVectorTy()) { | 812 | 0 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 0 | if (CondTy) | 814 | 0 | CondTy = CondTy->getScalarType(); | 815 | 0 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 0 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 0 |
| 818 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 0 | // inserting and extracting the values. | 820 | 0 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 0 | } | 822 | 0 |
| 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 3 | const Instruction *I) { | 789 | 3 | const TargetLoweringBase *TLI = getTLI(); | 790 | 3 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 3 | assert(ISD && "Invalid opcode"); | 792 | 3 | | 793 | 3 | // Selects on vectors are actually vector selects. | 794 | 3 | if (ISD == ISD::SELECT) { | 795 | 0 | assert(CondTy && "CondTy must exist"); | 796 | 0 | if (CondTy->isVectorTy()) | 797 | 0 | ISD = ISD::VSELECT; | 798 | 0 | } | 799 | 3 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 3 | | 801 | 3 | if (!(ValTy->isVectorTy() && !LT.second.isVector()0 ) && | 802 | 3 | !TLI->isOperationExpand(ISD, LT.second)) { | 803 | 3 | // The operation is legal. Assume it costs 1. Multiply | 804 | 3 | // by the type-legalization overhead. | 805 | 3 | return LT.first * 1; | 806 | 3 | } | 807 | 0 | | 808 | 0 | // Otherwise, assume that the cast is scalarized. | 809 | 0 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 0 | // similarly to what getCastInstrCost() does. | 811 | 0 | if (ValTy->isVectorTy()) { | 812 | 0 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 0 | if (CondTy) | 814 | 0 | CondTy = CondTy->getScalarType(); | 815 | 0 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 0 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 0 |
| 818 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 0 | // inserting and extracting the values. | 820 | 0 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 0 | } | 822 | 0 |
| 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 217 | const Instruction *I) { | 789 | 217 | const TargetLoweringBase *TLI = getTLI(); | 790 | 217 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 217 | assert(ISD && "Invalid opcode"); | 792 | 217 | | 793 | 217 | // Selects on vectors are actually vector selects. | 794 | 217 | if (ISD == ISD::SELECT) { | 795 | 0 | assert(CondTy && "CondTy must exist"); | 796 | 0 | if (CondTy->isVectorTy()) | 797 | 0 | ISD = ISD::VSELECT; | 798 | 0 | } | 799 | 217 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 217 | | 801 | 217 | if (!(ValTy->isVectorTy() && !LT.second.isVector()11 ) && | 802 | 217 | !TLI->isOperationExpand(ISD, LT.second)216 ) { | 803 | 216 | // The operation is legal. Assume it costs 1. Multiply | 804 | 216 | // by the type-legalization overhead. | 805 | 216 | return LT.first * 1; | 806 | 216 | } | 807 | 1 | | 808 | 1 | // Otherwise, assume that the cast is scalarized. | 809 | 1 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 1 | // similarly to what getCastInstrCost() does. | 811 | 1 | if (ValTy->isVectorTy()) { | 812 | 1 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 1 | if (CondTy) | 814 | 1 | CondTy = CondTy->getScalarType(); | 815 | 1 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 1 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 1 | | 818 | 1 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 1 | // inserting and extracting the values. | 820 | 1 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 1 | } | 822 | 0 | | 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 188 | const Instruction *I) { | 789 | 188 | const TargetLoweringBase *TLI = getTLI(); | 790 | 188 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 188 | assert(ISD && "Invalid opcode"); | 792 | 188 | | 793 | 188 | // Selects on vectors are actually vector selects. | 794 | 188 | if (ISD == ISD::SELECT) { | 795 | 0 | assert(CondTy && "CondTy must exist"); | 796 | 0 | if (CondTy->isVectorTy()) | 797 | 0 | ISD = ISD::VSELECT; | 798 | 0 | } | 799 | 188 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 188 | | 801 | 188 | if (!(ValTy->isVectorTy() && !LT.second.isVector()0 ) && | 802 | 188 | !TLI->isOperationExpand(ISD, LT.second)) { | 803 | 188 | // The operation is legal. Assume it costs 1. Multiply | 804 | 188 | // by the type-legalization overhead. | 805 | 188 | return LT.first * 1; | 806 | 188 | } | 807 | 0 | | 808 | 0 | // Otherwise, assume that the cast is scalarized. | 809 | 0 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 0 | // similarly to what getCastInstrCost() does. | 811 | 0 | if (ValTy->isVectorTy()) { | 812 | 0 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 0 | if (CondTy) | 814 | 0 | CondTy = CondTy->getScalarType(); | 815 | 0 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 0 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 0 |
| 818 | 0 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 0 | // inserting and extracting the values. | 820 | 0 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 0 | } | 822 | 0 |
| 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) Line | Count | Source | 788 | 10.1k | const Instruction *I) { | 789 | 10.1k | const TargetLoweringBase *TLI = getTLI(); | 790 | 10.1k | int ISD = TLI->InstructionOpcodeToISD(Opcode); | 791 | 10.1k | assert(ISD && "Invalid opcode"); | 792 | 10.1k | | 793 | 10.1k | // Selects on vectors are actually vector selects. | 794 | 10.1k | if (ISD == ISD::SELECT) { | 795 | 1.99k | assert(CondTy && "CondTy must exist"); | 796 | 1.99k | if (CondTy->isVectorTy()) | 797 | 310 | ISD = ISD::VSELECT; | 798 | 1.99k | } | 799 | 10.1k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | 800 | 10.1k | | 801 | 10.1k | if (!(ValTy->isVectorTy() && !LT.second.isVector()332 ) && | 802 | 10.1k | !TLI->isOperationExpand(ISD, LT.second)9.98k ) { | 803 | 9.98k | // The operation is legal. Assume it costs 1. Multiply | 804 | 9.98k | // by the type-legalization overhead. | 805 | 9.98k | return LT.first * 1; | 806 | 9.98k | } | 807 | 208 | | 808 | 208 | // Otherwise, assume that the cast is scalarized. | 809 | 208 | // TODO: If one of the types get legalized by splitting, handle this | 810 | 208 | // similarly to what getCastInstrCost() does. | 811 | 208 | if (ValTy->isVectorTy()) { | 812 | 208 | unsigned Num = ValTy->getVectorNumElements(); | 813 | 208 | if (CondTy) | 814 | 208 | CondTy = CondTy->getScalarType(); | 815 | 208 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | 816 | 208 | Opcode, ValTy->getScalarType(), CondTy, I); | 817 | 208 | | 818 | 208 | // Return the cost of multiple scalar invocation plus the cost of | 819 | 208 | // inserting and extracting the values. | 820 | 208 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | 821 | 208 | } | 822 | 0 | | 823 | 0 | // Unknown scalar opcode. | 824 | 0 | return 1; | 825 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*) |
826 | | |
827 | 324k | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
828 | 324k | std::pair<unsigned, MVT> LT = |
829 | 324k | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
830 | 324k | |
831 | 324k | return LT.first; |
832 | 324k | } llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 763 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 763 | std::pair<unsigned, MVT> LT = | 829 | 763 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 763 | | 831 | 763 | return LT.first; | 832 | 763 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 12.5k | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 12.5k | std::pair<unsigned, MVT> LT = | 829 | 12.5k | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 12.5k | | 831 | 12.5k | return LT.first; | 832 | 12.5k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 14 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 14 | std::pair<unsigned, MVT> LT = | 829 | 14 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 14 | | 831 | 14 | return LT.first; | 832 | 14 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 2.32k | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 2.32k | std::pair<unsigned, MVT> LT = | 829 | 2.32k | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 2.32k | | 831 | 2.32k | return LT.first; | 832 | 2.32k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 2.00k | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 2.00k | std::pair<unsigned, MVT> LT = | 829 | 2.00k | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 2.00k | | 831 | 2.00k | return LT.first; | 832 | 2.00k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) Line | Count | Source | 827 | 306k | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | 828 | 306k | std::pair<unsigned, MVT> LT = | 829 | 306k | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | 830 | 306k | | 831 | 306k | return LT.first; | 832 | 306k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int) |
833 | | |
834 | | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, |
835 | 1.49k | unsigned AddressSpace, const Instruction *I = nullptr) { |
836 | 1.49k | assert(!Src->isVoidTy() && "Invalid type"); |
837 | 1.49k | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
838 | 1.49k | |
839 | 1.49k | // Assuming that all loads of legal types cost 1. |
840 | 1.49k | unsigned Cost = LT.first; |
841 | 1.49k | |
842 | 1.49k | if (Src->isVectorTy() && |
843 | 1.49k | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()637 ) { |
844 | 62 | // This is a vector load that legalizes to a larger type than the vector |
845 | 62 | // itself. Unless the corresponding extending load or truncating store is |
846 | 62 | // legal, then this will scalarize. |
847 | 62 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
848 | 62 | EVT MemVT = getTLI()->getValueType(DL, Src); |
849 | 62 | if (Opcode == Instruction::Store) |
850 | 38 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
851 | 24 | else |
852 | 24 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
853 | 62 | |
854 | 62 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom57 ) { |
855 | 57 | // This is a vector load/store for some illegal type that is scalarized. |
856 | 57 | // We must account for the cost of building or decomposing the vector. |
857 | 57 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
858 | 57 | Opcode == Instruction::Store); |
859 | 57 | } |
860 | 62 | } |
861 | 1.49k | |
862 | 1.49k | return Cost; |
863 | 1.49k | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Line | Count | Source | 835 | 719 | unsigned AddressSpace, const Instruction *I = nullptr) { | 836 | 719 | assert(!Src->isVoidTy() && "Invalid type"); | 837 | 719 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | 838 | 719 | | 839 | 719 | // Assuming that all loads of legal types cost 1. | 840 | 719 | unsigned Cost = LT.first; | 841 | 719 | | 842 | 719 | if (Src->isVectorTy() && | 843 | 719 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()423 ) { | 844 | 2 | // This is a vector load that legalizes to a larger type than the vector | 845 | 2 | // itself. Unless the corresponding extending load or truncating store is | 846 | 2 | // legal, then this will scalarize. | 847 | 2 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | 848 | 2 | EVT MemVT = getTLI()->getValueType(DL, Src); | 849 | 2 | if (Opcode == Instruction::Store) | 850 | 1 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | 851 | 1 | else | 852 | 1 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | 853 | 2 | | 854 | 2 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom0 ) { | 855 | 0 | // This is a vector load/store for some illegal type that is scalarized. | 856 | 0 | // We must account for the cost of building or decomposing the vector. | 857 | 0 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | 858 | 0 | Opcode == Instruction::Store); | 859 | 0 | } | 860 | 2 | } | 861 | 719 | | 862 | 719 | return Cost; | 863 | 719 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Line | Count | Source | 835 | 42 | unsigned AddressSpace, const Instruction *I = nullptr) { | 836 | 42 | assert(!Src->isVoidTy() && "Invalid type"); | 837 | 42 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | 838 | 42 | | 839 | 42 | // Assuming that all loads of legal types cost 1. | 840 | 42 | unsigned Cost = LT.first; | 841 | 42 | | 842 | 42 | if (Src->isVectorTy() && | 843 | 42 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()15 ) { | 844 | 3 | // This is a vector load that legalizes to a larger type than the vector | 845 | 3 | // itself. Unless the corresponding extending load or truncating store is | 846 | 3 | // legal, then this will scalarize. | 847 | 3 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | 848 | 3 | EVT MemVT = getTLI()->getValueType(DL, Src); | 849 | 3 | if (Opcode == Instruction::Store) | 850 | 3 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | 851 | 0 | else | 852 | 0 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | 853 | 3 | | 854 | 3 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { | 855 | 3 | // This is a vector load/store for some illegal type that is scalarized. | 856 | 3 | // We must account for the cost of building or decomposing the vector. | 857 | 3 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | 858 | 3 | Opcode == Instruction::Store); | 859 | 3 | } | 860 | 3 | } | 861 | 42 | | 862 | 42 | return Cost; | 863 | 42 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Line | Count | Source | 835 | 8 | unsigned AddressSpace, const Instruction *I = nullptr) { | 836 | 8 | assert(!Src->isVoidTy() && "Invalid type"); | 837 | 8 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | 838 | 8 | | 839 | 8 | // Assuming that all loads of legal types cost 1. | 840 | 8 | unsigned Cost = LT.first; | 841 | 8 | | 842 | 8 | if (Src->isVectorTy() && | 843 | 8 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()4 ) { | 844 | 0 | // This is a vector load that legalizes to a larger type than the vector | 845 | 0 | // itself. Unless the corresponding extending load or truncating store is | 846 | 0 | // legal, then this will scalarize. | 847 | 0 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | 848 | 0 | EVT MemVT = getTLI()->getValueType(DL, Src); | 849 | 0 | if (Opcode == Instruction::Store) | 850 | 0 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | 851 | 0 | else | 852 | 0 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | 853 | 0 |
| 854 | 0 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { | 855 | 0 | // This is a vector load/store for some illegal type that is scalarized. | 856 | 0 | // We must account for the cost of building or decomposing the vector. | 857 | 0 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | 858 | 0 | Opcode == Instruction::Store); | 859 | 0 | } | 860 | 0 | } | 861 | 8 | | 862 | 8 | return Cost; | 863 | 8 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Line | Count | Source | 835 | 324 | unsigned AddressSpace, const Instruction *I = nullptr) { | 836 | 324 | assert(!Src->isVoidTy() && "Invalid type"); | 837 | 324 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | 838 | 324 | | 839 | 324 | // Assuming that all loads of legal types cost 1. | 840 | 324 | unsigned Cost = LT.first; | 841 | 324 | | 842 | 324 | if (Src->isVectorTy() && | 843 | 324 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()195 ) { | 844 | 57 | // This is a vector load that legalizes to a larger type than the vector | 845 | 57 | // itself. Unless the corresponding extending load or truncating store is | 846 | 57 | // legal, then this will scalarize. | 847 | 57 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | 848 | 57 | EVT MemVT = getTLI()->getValueType(DL, Src); | 849 | 57 | if (Opcode == Instruction::Store) | 850 | 34 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | 851 | 23 | else | 852 | 23 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | 853 | 57 | | 854 | 57 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom54 ) { | 855 | 54 | // This is a vector load/store for some illegal type that is scalarized. | 856 | 54 | // We must account for the cost of building or decomposing the vector. | 857 | 54 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | 858 | 54 | Opcode == Instruction::Store); | 859 | 54 | } | 860 | 57 | } | 861 | 324 | | 862 | 324 | return Cost; | 863 | 324 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) Line | Count | Source | 835 | 397 | unsigned AddressSpace, const Instruction *I = nullptr) { | 836 | 397 | assert(!Src->isVoidTy() && "Invalid type"); | 837 | 397 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | 838 | 397 | | 839 | 397 | // Assuming that all loads of legal types cost 1. | 840 | 397 | unsigned Cost = LT.first; | 841 | 397 | | 842 | 397 | if (Src->isVectorTy() && | 843 | 397 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()0 ) { | 844 | 0 | // This is a vector load that legalizes to a larger type than the vector | 845 | 0 | // itself. Unless the corresponding extending load or truncating store is | 846 | 0 | // legal, then this will scalarize. | 847 | 0 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | 848 | 0 | EVT MemVT = getTLI()->getValueType(DL, Src); | 849 | 0 | if (Opcode == Instruction::Store) | 850 | 0 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | 851 | 0 | else | 852 | 0 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | 853 | 0 |
| 854 | 0 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { | 855 | 0 | // This is a vector load/store for some illegal type that is scalarized. | 856 | 0 | // We must account for the cost of building or decomposing the vector. | 857 | 0 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | 858 | 0 | Opcode == Instruction::Store); | 859 | 0 | } | 860 | 0 | } | 861 | 397 | | 862 | 397 | return Cost; | 863 | 397 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*) |
864 | | |
865 | | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
866 | | unsigned Factor, |
867 | | ArrayRef<unsigned> Indices, |
868 | | unsigned Alignment, unsigned AddressSpace, |
869 | | bool UseMaskForCond = false, |
870 | 584 | bool UseMaskForGaps = false) { |
871 | 584 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
872 | 584 | assert(VT && "Expect a vector type for interleaved memory op"); |
873 | 584 | |
874 | 584 | unsigned NumElts = VT->getNumElements(); |
875 | 584 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); |
876 | 584 | |
877 | 584 | unsigned NumSubElts = NumElts / Factor; |
878 | 584 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
879 | 584 | |
880 | 584 | // Firstly, the cost of load/store operation. |
881 | 584 | unsigned Cost; |
882 | 584 | if (UseMaskForCond || UseMaskForGaps573 ) |
883 | 12 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
884 | 12 | Opcode, VecTy, Alignment, AddressSpace); |
885 | 572 | else |
886 | 572 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, |
887 | 572 | AddressSpace); |
888 | 584 | |
889 | 584 | // Legalize the vector type, and get the legalized and unlegalized type |
890 | 584 | // sizes. |
891 | 584 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
892 | 584 | unsigned VecTySize = |
893 | 584 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
894 | 584 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
895 | 584 | |
896 | 584 | // Return the ceiling of dividing A by B. |
897 | 584 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }434 ; llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Line | Count | Source | 897 | 416 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const Line | Count | Source | 897 | 18 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const |
898 | 584 | |
899 | 584 | // Scale the cost of the memory operation by the fraction of legalized |
900 | 584 | // instructions that will actually be used. We shouldn't account for the |
901 | 584 | // cost of dead instructions since they will be removed. |
902 | 584 | // |
903 | 584 | // E.g., An interleaved load of factor 8: |
904 | 584 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
905 | 584 | // %v0 = shufflevector %vec, undef, <0, 8> |
906 | 584 | // |
907 | 584 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
908 | 584 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
909 | 584 | // type). The other loads are unused. |
910 | 584 | // |
911 | 584 | // We only scale the cost of loads since interleaved store groups aren't |
912 | 584 | // allowed to have gaps. |
913 | 584 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize366 ) { |
914 | 217 | // The number of loads of a legal type it will take to represent a load |
915 | 217 | // of the unlegalized vector type. |
916 | 217 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
917 | 217 | |
918 | 217 | // The number of elements of the unlegalized type that correspond to a |
919 | 217 | // single legal instruction. |
920 | 217 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
921 | 217 | |
922 | 217 | // Determine which legal instructions will be used. |
923 | 217 | BitVector UsedInsts(NumLegalInsts, false); |
924 | 217 | for (unsigned Index : Indices) |
925 | 3.24k | for (unsigned Elt = 0; 851 Elt < NumSubElts; ++Elt2.39k ) |
926 | 2.39k | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
927 | 217 | |
928 | 217 | // Scale the cost of the load by the fraction of legal instructions that |
929 | 217 | // will be used. |
930 | 217 | Cost *= UsedInsts.count() / NumLegalInsts; |
931 | 217 | } |
932 | 584 | |
933 | 584 | // Then plus the cost of interleave operation. |
934 | 584 | if (Opcode == Instruction::Load) { |
935 | 366 | // The interleave cost is similar to extract sub vectors' elements |
936 | 366 | // from the wide vector, and insert them into sub vectors. |
937 | 366 | // |
938 | 366 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
939 | 366 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
940 | 366 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
941 | 366 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
942 | 366 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
943 | 366 | |
944 | 366 | assert(Indices.size() <= Factor && |
945 | 366 | "Interleaved memory op has too many members"); |
946 | 366 | |
947 | 1.19k | for (unsigned Index : Indices) { |
948 | 1.19k | assert(Index < Factor && "Invalid index for interleaved memory op"); |
949 | 1.19k | |
950 | 1.19k | // Extract elements from loaded vector for each sub vector. |
951 | 4.45k | for (unsigned i = 0; i < NumSubElts; i++3.25k ) |
952 | 3.25k | Cost += static_cast<T *>(this)->getVectorInstrCost( |
953 | 3.25k | Instruction::ExtractElement, VT, Index + i * Factor); |
954 | 1.19k | } |
955 | 366 | |
956 | 366 | unsigned InsSubCost = 0; |
957 | 1.37k | for (unsigned i = 0; i < NumSubElts; i++1.01k ) |
958 | 1.01k | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
959 | 1.01k | Instruction::InsertElement, SubVT, i); |
960 | 366 | |
961 | 366 | Cost += Indices.size() * InsSubCost; |
962 | 366 | } else { |
963 | 218 | // The interleave cost is extract all elements from sub vectors, and |
964 | 218 | // insert them into the wide vector. |
965 | 218 | // |
966 | 218 | // E.g. An interleaved store of factor 2: |
967 | 218 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
968 | 218 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
969 | 218 | // The cost is estimated as extract all elements from both <4 x i32> |
970 | 218 | // vectors and insert into the <8 x i32> vector. |
971 | 218 | |
972 | 218 | unsigned ExtSubCost = 0; |
973 | 864 | for (unsigned i = 0; i < NumSubElts; i++646 ) |
974 | 646 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
975 | 646 | Instruction::ExtractElement, SubVT, i); |
976 | 218 | Cost += ExtSubCost * Factor; |
977 | 218 | |
978 | 2.69k | for (unsigned i = 0; i < NumElts; i++2.47k ) |
979 | 2.47k | Cost += static_cast<T *>(this) |
980 | 2.47k | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
981 | 218 | } |
982 | 584 | |
983 | 584 | if (!UseMaskForCond) |
984 | 573 | return Cost; |
985 | 11 | |
986 | 11 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
987 | 11 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
988 | 11 | SubVT = VectorType::get(I8Type, NumSubElts); |
989 | 11 | |
990 | 11 | // The Mask shuffling cost is extract all the elements of the Mask |
991 | 11 | // and insert each of them Factor times into the wide vector: |
992 | 11 | // |
993 | 11 | // E.g. an interleaved group with factor 3: |
994 | 11 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
995 | 11 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
996 | 11 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
997 | 11 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
998 | 11 | // vector and insert them factor times into the <24xi1> shuffled mask |
999 | 11 | // vector. |
1000 | 99 | for (unsigned i = 0; i < NumSubElts; i++88 ) |
1001 | 88 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1002 | 88 | Instruction::ExtractElement, SubVT, i); |
1003 | 11 | |
1004 | 195 | for (unsigned i = 0; i < NumElts; i++184 ) |
1005 | 184 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1006 | 184 | Instruction::InsertElement, MaskVT, i); |
1007 | 11 | |
1008 | 11 | // The Gaps mask is invariant and created outside the loop, therefore the |
1009 | 11 | // cost of creating it is not accounted for here. However if we have both |
1010 | 11 | // a MaskForGaps and some other mask that guards the execution of the |
1011 | 11 | // memory access, we need to account for the cost of And-ing the two masks |
1012 | 11 | // inside the loop. |
1013 | 11 | if (UseMaskForGaps) |
1014 | 4 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1015 | 4 | BinaryOperator::And, MaskVT); |
1016 | 11 | |
1017 | 11 | return Cost; |
1018 | 11 | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Line | Count | Source | 870 | 488 | bool UseMaskForGaps = false) { | 871 | 488 | VectorType *VT = dyn_cast<VectorType>(VecTy); | 872 | 488 | assert(VT && "Expect a vector type for interleaved memory op"); | 873 | 488 | | 874 | 488 | unsigned NumElts = VT->getNumElements(); | 875 | 488 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); | 876 | 488 | | 877 | 488 | unsigned NumSubElts = NumElts / Factor; | 878 | 488 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); | 879 | 488 | | 880 | 488 | // Firstly, the cost of load/store operation. | 881 | 488 | unsigned Cost; | 882 | 488 | if (UseMaskForCond || UseMaskForGaps) | 883 | 0 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( | 884 | 0 | Opcode, VecTy, Alignment, AddressSpace); | 885 | 488 | else | 886 | 488 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, | 887 | 488 | AddressSpace); | 888 | 488 | | 889 | 488 | // Legalize the vector type, and get the legalized and unlegalized type | 890 | 488 | // sizes. | 891 | 488 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | 892 | 488 | unsigned VecTySize = | 893 | 488 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); | 894 | 488 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); | 895 | 488 | | 896 | 488 | // Return the ceiling of dividing A by B. | 897 | 488 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; | 898 | 488 | | 899 | 488 | // Scale the cost of the memory operation by the fraction of legalized | 900 | 488 | // instructions that will actually be used. We shouldn't account for the | 901 | 488 | // cost of dead instructions since they will be removed. | 902 | 488 | // | 903 | 488 | // E.g., An interleaved load of factor 8: | 904 | 488 | // %vec = load <16 x i64>, <16 x i64>* %ptr | 905 | 488 | // %v0 = shufflevector %vec, undef, <0, 8> | 906 | 488 | // | 907 | 488 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be | 908 | 488 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized | 909 | 488 | // type). The other loads are unused. | 910 | 488 | // | 911 | 488 | // We only scale the cost of loads since interleaved store groups aren't | 912 | 488 | // allowed to have gaps. | 913 | 488 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize338 ) { | 914 | 208 | // The number of loads of a legal type it will take to represent a load | 915 | 208 | // of the unlegalized vector type. | 916 | 208 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); | 917 | 208 | | 918 | 208 | // The number of elements of the unlegalized type that correspond to a | 919 | 208 | // single legal instruction. | 920 | 208 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); | 921 | 208 | | 922 | 208 | // Determine which legal instructions will be used. | 923 | 208 | BitVector UsedInsts(NumLegalInsts, false); | 924 | 208 | for (unsigned Index : Indices) | 925 | 3.19k | for (unsigned Elt = 0; 840 Elt < NumSubElts; ++Elt2.35k ) | 926 | 2.35k | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); | 927 | 208 | | 928 | 208 | // Scale the cost of the load by the fraction of legal instructions that | 929 | 208 | // will be used. | 930 | 208 | Cost *= UsedInsts.count() / NumLegalInsts; | 931 | 208 | } | 932 | 488 | | 933 | 488 | // Then plus the cost of interleave operation. | 934 | 488 | if (Opcode == Instruction::Load) { | 935 | 338 | // The interleave cost is similar to extract sub vectors' elements | 936 | 338 | // from the wide vector, and insert them into sub vectors. | 937 | 338 | // | 938 | 338 | // E.g. An interleaved load of factor 2 (with one member of index 0): | 939 | 338 | // %vec = load <8 x i32>, <8 x i32>* %ptr | 940 | 338 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 | 941 | 338 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the | 942 | 338 | // <8 x i32> vector and insert them into a <4 x i32> vector. | 943 | 338 | | 944 | 338 | assert(Indices.size() <= Factor && | 945 | 338 | "Interleaved memory op has too many members"); | 946 | 338 | | 947 | 1.15k | for (unsigned Index : Indices) { | 948 | 1.15k | assert(Index < Factor && "Invalid index for interleaved memory op"); | 949 | 1.15k | | 950 | 1.15k | // Extract elements from loaded vector for each sub vector. | 951 | 4.22k | for (unsigned i = 0; i < NumSubElts; i++3.06k ) | 952 | 3.06k | Cost += static_cast<T *>(this)->getVectorInstrCost( | 953 | 3.06k | Instruction::ExtractElement, VT, Index + i * Factor); | 954 | 1.15k | } | 955 | 338 | | 956 | 338 | unsigned InsSubCost = 0; | 957 | 1.22k | for (unsigned i = 0; i < NumSubElts; i++882 ) | 958 | 882 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( | 959 | 882 | Instruction::InsertElement, SubVT, i); | 960 | 338 | | 961 | 338 | Cost += Indices.size() * InsSubCost; | 962 | 338 | } else { | 963 | 150 | // The interleave cost is extract all elements from sub vectors, and | 964 | 150 | // insert them into the wide vector. | 965 | 150 | // | 966 | 150 | // E.g. An interleaved store of factor 2: | 967 | 150 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> | 968 | 150 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr | 969 | 150 | // The cost is estimated as extract all elements from both <4 x i32> | 970 | 150 | // vectors and insert into the <8 x i32> vector. | 971 | 150 | | 972 | 150 | unsigned ExtSubCost = 0; | 973 | 580 | for (unsigned i = 0; i < NumSubElts; i++430 ) | 974 | 430 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( | 975 | 430 | Instruction::ExtractElement, SubVT, i); | 976 | 150 | Cost += ExtSubCost * Factor; | 977 | 150 | | 978 | 2.10k | for (unsigned i = 0; i < NumElts; i++1.95k ) | 979 | 1.95k | Cost += static_cast<T *>(this) | 980 | 1.95k | ->getVectorInstrCost(Instruction::InsertElement, VT, i); | 981 | 150 | } | 982 | 488 | | 983 | 488 | if (!UseMaskForCond) | 984 | 488 | return Cost; | 985 | 0 | | 986 | 0 | Type *I8Type = Type::getInt8Ty(VT->getContext()); | 987 | 0 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); | 988 | 0 | SubVT = VectorType::get(I8Type, NumSubElts); | 989 | 0 |
| 990 | 0 | // The Mask shuffling cost is extract all the elements of the Mask | 991 | 0 | // and insert each of them Factor times into the wide vector: | 992 | 0 | // | 993 | 0 | // E.g. an interleaved group with factor 3: | 994 | 0 | // %mask = icmp ult <8 x i32> %vec1, %vec2 | 995 | 0 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, | 996 | 0 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> | 997 | 0 | // The cost is estimated as extract all mask elements from the <8xi1> mask | 998 | 0 | // vector and insert them factor times into the <24xi1> shuffled mask | 999 | 0 | // vector. | 1000 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 1001 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1002 | 0 | Instruction::ExtractElement, SubVT, i); | 1003 | 0 |
| 1004 | 0 | for (unsigned i = 0; i < NumElts; i++) | 1005 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1006 | 0 | Instruction::InsertElement, MaskVT, i); | 1007 | 0 |
| 1008 | 0 | // The Gaps mask is invariant and created outside the loop, therefore the | 1009 | 0 | // cost of creating it is not accounted for here. However if we have both | 1010 | 0 | // a MaskForGaps and some other mask that guards the execution of the | 1011 | 0 | // memory access, we need to account for the cost of And-ing the two masks | 1012 | 0 | // inside the loop. | 1013 | 0 | if (UseMaskForGaps) | 1014 | 0 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( | 1015 | 0 | BinaryOperator::And, MaskVT); | 1016 | 0 |
| 1017 | 0 | return Cost; | 1018 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Line | Count | Source | 870 | 18 | bool UseMaskForGaps = false) { | 871 | 18 | VectorType *VT = dyn_cast<VectorType>(VecTy); | 872 | 18 | assert(VT && "Expect a vector type for interleaved memory op"); | 873 | 18 | | 874 | 18 | unsigned NumElts = VT->getNumElements(); | 875 | 18 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); | 876 | 18 | | 877 | 18 | unsigned NumSubElts = NumElts / Factor; | 878 | 18 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); | 879 | 18 | | 880 | 18 | // Firstly, the cost of load/store operation. | 881 | 18 | unsigned Cost; | 882 | 18 | if (UseMaskForCond || UseMaskForGaps) | 883 | 0 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( | 884 | 0 | Opcode, VecTy, Alignment, AddressSpace); | 885 | 18 | else | 886 | 18 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, | 887 | 18 | AddressSpace); | 888 | 18 | | 889 | 18 | // Legalize the vector type, and get the legalized and unlegalized type | 890 | 18 | // sizes. | 891 | 18 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | 892 | 18 | unsigned VecTySize = | 893 | 18 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); | 894 | 18 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); | 895 | 18 | | 896 | 18 | // Return the ceiling of dividing A by B. | 897 | 18 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; | 898 | 18 | | 899 | 18 | // Scale the cost of the memory operation by the fraction of legalized | 900 | 18 | // instructions that will actually be used. We shouldn't account for the | 901 | 18 | // cost of dead instructions since they will be removed. | 902 | 18 | // | 903 | 18 | // E.g., An interleaved load of factor 8: | 904 | 18 | // %vec = load <16 x i64>, <16 x i64>* %ptr | 905 | 18 | // %v0 = shufflevector %vec, undef, <0, 8> | 906 | 18 | // | 907 | 18 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be | 908 | 18 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized | 909 | 18 | // type). The other loads are unused. | 910 | 18 | // | 911 | 18 | // We only scale the cost of loads since interleaved store groups aren't | 912 | 18 | // allowed to have gaps. | 913 | 18 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize0 ) { | 914 | 0 | // The number of loads of a legal type it will take to represent a load | 915 | 0 | // of the unlegalized vector type. | 916 | 0 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); | 917 | 0 |
| 918 | 0 | // The number of elements of the unlegalized type that correspond to a | 919 | 0 | // single legal instruction. | 920 | 0 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); | 921 | 0 |
| 922 | 0 | // Determine which legal instructions will be used. | 923 | 0 | BitVector UsedInsts(NumLegalInsts, false); | 924 | 0 | for (unsigned Index : Indices) | 925 | 0 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) | 926 | 0 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); | 927 | 0 |
| 928 | 0 | // Scale the cost of the load by the fraction of legal instructions that | 929 | 0 | // will be used. | 930 | 0 | Cost *= UsedInsts.count() / NumLegalInsts; | 931 | 0 | } | 932 | 18 | | 933 | 18 | // Then plus the cost of interleave operation. | 934 | 18 | if (Opcode == Instruction::Load) { | 935 | 0 | // The interleave cost is similar to extract sub vectors' elements | 936 | 0 | // from the wide vector, and insert them into sub vectors. | 937 | 0 | // | 938 | 0 | // E.g. An interleaved load of factor 2 (with one member of index 0): | 939 | 0 | // %vec = load <8 x i32>, <8 x i32>* %ptr | 940 | 0 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 | 941 | 0 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the | 942 | 0 | // <8 x i32> vector and insert them into a <4 x i32> vector. | 943 | 0 |
| 944 | 0 | assert(Indices.size() <= Factor && | 945 | 0 | "Interleaved memory op has too many members"); | 946 | 0 |
| 947 | 0 | for (unsigned Index : Indices) { | 948 | 0 | assert(Index < Factor && "Invalid index for interleaved memory op"); | 949 | 0 |
| 950 | 0 | // Extract elements from loaded vector for each sub vector. | 951 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 952 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 953 | 0 | Instruction::ExtractElement, VT, Index + i * Factor); | 954 | 0 | } | 955 | 0 |
| 956 | 0 | unsigned InsSubCost = 0; | 957 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 958 | 0 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( | 959 | 0 | Instruction::InsertElement, SubVT, i); | 960 | 0 |
| 961 | 0 | Cost += Indices.size() * InsSubCost; | 962 | 18 | } else { | 963 | 18 | // The interleave cost is extract all elements from sub vectors, and | 964 | 18 | // insert them into the wide vector. | 965 | 18 | // | 966 | 18 | // E.g. An interleaved store of factor 2: | 967 | 18 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> | 968 | 18 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr | 969 | 18 | // The cost is estimated as extract all elements from both <4 x i32> | 970 | 18 | // vectors and insert into the <8 x i32> vector. | 971 | 18 | | 972 | 18 | unsigned ExtSubCost = 0; | 973 | 54 | for (unsigned i = 0; i < NumSubElts; i++36 ) | 974 | 36 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( | 975 | 36 | Instruction::ExtractElement, SubVT, i); | 976 | 18 | Cost += ExtSubCost * Factor; | 977 | 18 | | 978 | 90 | for (unsigned i = 0; i < NumElts; i++72 ) | 979 | 72 | Cost += static_cast<T *>(this) | 980 | 72 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); | 981 | 18 | } | 982 | 18 | | 983 | 18 | if (!UseMaskForCond) | 984 | 18 | return Cost; | 985 | 0 | | 986 | 0 | Type *I8Type = Type::getInt8Ty(VT->getContext()); | 987 | 0 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); | 988 | 0 | SubVT = VectorType::get(I8Type, NumSubElts); | 989 | 0 |
| 990 | 0 | // The Mask shuffling cost is extract all the elements of the Mask | 991 | 0 | // and insert each of them Factor times into the wide vector: | 992 | 0 | // | 993 | 0 | // E.g. an interleaved group with factor 3: | 994 | 0 | // %mask = icmp ult <8 x i32> %vec1, %vec2 | 995 | 0 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, | 996 | 0 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> | 997 | 0 | // The cost is estimated as extract all mask elements from the <8xi1> mask | 998 | 0 | // vector and insert them factor times into the <24xi1> shuffled mask | 999 | 0 | // vector. | 1000 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 1001 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1002 | 0 | Instruction::ExtractElement, SubVT, i); | 1003 | 0 |
| 1004 | 0 | for (unsigned i = 0; i < NumElts; i++) | 1005 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1006 | 0 | Instruction::InsertElement, MaskVT, i); | 1007 | 0 |
| 1008 | 0 | // The Gaps mask is invariant and created outside the loop, therefore the | 1009 | 0 | // cost of creating it is not accounted for here. However if we have both | 1010 | 0 | // a MaskForGaps and some other mask that guards the execution of the | 1011 | 0 | // memory access, we need to account for the cost of And-ing the two masks | 1012 | 0 | // inside the loop. | 1013 | 0 | if (UseMaskForGaps) | 1014 | 0 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( | 1015 | 0 | BinaryOperator::And, MaskVT); | 1016 | 0 |
| 1017 | 0 | return Cost; | 1018 | 0 | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Line | Count | Source | 870 | 5 | bool UseMaskForGaps = false) { | 871 | 5 | VectorType *VT = dyn_cast<VectorType>(VecTy); | 872 | 5 | assert(VT && "Expect a vector type for interleaved memory op"); | 873 | 5 | | 874 | 5 | unsigned NumElts = VT->getNumElements(); | 875 | 5 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); | 876 | 5 | | 877 | 5 | unsigned NumSubElts = NumElts / Factor; | 878 | 5 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); | 879 | 5 | | 880 | 5 | // Firstly, the cost of load/store operation. | 881 | 5 | unsigned Cost; | 882 | 5 | if (UseMaskForCond || UseMaskForGaps) | 883 | 0 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( | 884 | 0 | Opcode, VecTy, Alignment, AddressSpace); | 885 | 5 | else | 886 | 5 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, | 887 | 5 | AddressSpace); | 888 | 5 | | 889 | 5 | // Legalize the vector type, and get the legalized and unlegalized type | 890 | 5 | // sizes. | 891 | 5 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | 892 | 5 | unsigned VecTySize = | 893 | 5 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); | 894 | 5 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); | 895 | 5 | | 896 | 5 | // Return the ceiling of dividing A by B. | 897 | 5 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; | 898 | 5 | | 899 | 5 | // Scale the cost of the memory operation by the fraction of legalized | 900 | 5 | // instructions that will actually be used. We shouldn't account for the | 901 | 5 | // cost of dead instructions since they will be removed. | 902 | 5 | // | 903 | 5 | // E.g., An interleaved load of factor 8: | 904 | 5 | // %vec = load <16 x i64>, <16 x i64>* %ptr | 905 | 5 | // %v0 = shufflevector %vec, undef, <0, 8> | 906 | 5 | // | 907 | 5 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be | 908 | 5 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized | 909 | 5 | // type). The other loads are unused. | 910 | 5 | // | 911 | 5 | // We only scale the cost of loads since interleaved store groups aren't | 912 | 5 | // allowed to have gaps. | 913 | 5 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize0 ) { | 914 | 0 | // The number of loads of a legal type it will take to represent a load | 915 | 0 | // of the unlegalized vector type. | 916 | 0 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); | 917 | 0 |
| 918 | 0 | // The number of elements of the unlegalized type that correspond to a | 919 | 0 | // single legal instruction. | 920 | 0 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); | 921 | 0 |
| 922 | 0 | // Determine which legal instructions will be used. | 923 | 0 | BitVector UsedInsts(NumLegalInsts, false); | 924 | 0 | for (unsigned Index : Indices) | 925 | 0 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) | 926 | 0 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); | 927 | 0 |
| 928 | 0 | // Scale the cost of the load by the fraction of legal instructions that | 929 | 0 | // will be used. | 930 | 0 | Cost *= UsedInsts.count() / NumLegalInsts; | 931 | 0 | } | 932 | 5 | | 933 | 5 | // Then plus the cost of interleave operation. | 934 | 5 | if (Opcode == Instruction::Load) { | 935 | 0 | // The interleave cost is similar to extract sub vectors' elements | 936 | 0 | // from the wide vector, and insert them into sub vectors. | 937 | 0 | // | 938 | 0 | // E.g. An interleaved load of factor 2 (with one member of index 0): | 939 | 0 | // %vec = load <8 x i32>, <8 x i32>* %ptr | 940 | 0 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 | 941 | 0 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the | 942 | 0 | // <8 x i32> vector and insert them into a <4 x i32> vector. | 943 | 0 |
| 944 | 0 | assert(Indices.size() <= Factor && | 945 | 0 | "Interleaved memory op has too many members"); | 946 | 0 |
| 947 | 0 | for (unsigned Index : Indices) { | 948 | 0 | assert(Index < Factor && "Invalid index for interleaved memory op"); | 949 | 0 |
| 950 | 0 | // Extract elements from loaded vector for each sub vector. | 951 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 952 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 953 | 0 | Instruction::ExtractElement, VT, Index + i * Factor); | 954 | 0 | } | 955 | 0 |
| 956 | 0 | unsigned InsSubCost = 0; | 957 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 958 | 0 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( | 959 | 0 | Instruction::InsertElement, SubVT, i); | 960 | 0 |
| 961 | 0 | Cost += Indices.size() * InsSubCost; | 962 | 5 | } else { | 963 | 5 | // The interleave cost is extract all elements from sub vectors, and | 964 | 5 | // insert them into the wide vector. | 965 | 5 | // | 966 | 5 | // E.g. An interleaved store of factor 2: | 967 | 5 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> | 968 | 5 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr | 969 | 5 | // The cost is estimated as extract all elements from both <4 x i32> | 970 | 5 | // vectors and insert into the <8 x i32> vector. | 971 | 5 | | 972 | 5 | unsigned ExtSubCost = 0; | 973 | 67 | for (unsigned i = 0; i < NumSubElts; i++62 ) | 974 | 62 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( | 975 | 62 | Instruction::ExtractElement, SubVT, i); | 976 | 5 | Cost += ExtSubCost * Factor; | 977 | 5 | | 978 | 129 | for (unsigned i = 0; i < NumElts; i++124 ) | 979 | 124 | Cost += static_cast<T *>(this) | 980 | 124 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); | 981 | 5 | } | 982 | 5 | | 983 | 5 | if (!UseMaskForCond) | 984 | 5 | return Cost; | 985 | 0 | | 986 | 0 | Type *I8Type = Type::getInt8Ty(VT->getContext()); | 987 | 0 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); | 988 | 0 | SubVT = VectorType::get(I8Type, NumSubElts); | 989 | 0 |
| 990 | 0 | // The Mask shuffling cost is extract all the elements of the Mask | 991 | 0 | // and insert each of them Factor times into the wide vector: | 992 | 0 | // | 993 | 0 | // E.g. an interleaved group with factor 3: | 994 | 0 | // %mask = icmp ult <8 x i32> %vec1, %vec2 | 995 | 0 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, | 996 | 0 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> | 997 | 0 | // The cost is estimated as extract all mask elements from the <8xi1> mask | 998 | 0 | // vector and insert them factor times into the <24xi1> shuffled mask | 999 | 0 | // vector. | 1000 | 0 | for (unsigned i = 0; i < NumSubElts; i++) | 1001 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1002 | 0 | Instruction::ExtractElement, SubVT, i); | 1003 | 0 |
| 1004 | 0 | for (unsigned i = 0; i < NumElts; i++) | 1005 | 0 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1006 | 0 | Instruction::InsertElement, MaskVT, i); | 1007 | 0 |
| 1008 | 0 | // The Gaps mask is invariant and created outside the loop, therefore the | 1009 | 0 | // cost of creating it is not accounted for here. However if we have both | 1010 | 0 | // a MaskForGaps and some other mask that guards the execution of the | 1011 | 0 | // memory access, we need to account for the cost of And-ing the two masks | 1012 | 0 | // inside the loop. | 1013 | 0 | if (UseMaskForGaps) | 1014 | 0 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( | 1015 | 0 | BinaryOperator::And, MaskVT); | 1016 | 0 |
| 1017 | 0 | return Cost; | 1018 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) Line | Count | Source | 870 | 73 | bool UseMaskForGaps = false) { | 871 | 73 | VectorType *VT = dyn_cast<VectorType>(VecTy); | 872 | 73 | assert(VT && "Expect a vector type for interleaved memory op"); | 873 | 73 | | 874 | 73 | unsigned NumElts = VT->getNumElements(); | 875 | 73 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); | 876 | 73 | | 877 | 73 | unsigned NumSubElts = NumElts / Factor; | 878 | 73 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); | 879 | 73 | | 880 | 73 | // Firstly, the cost of load/store operation. | 881 | 73 | unsigned Cost; | 882 | 73 | if (UseMaskForCond || UseMaskForGaps62 ) | 883 | 12 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( | 884 | 12 | Opcode, VecTy, Alignment, AddressSpace); | 885 | 61 | else | 886 | 61 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, | 887 | 61 | AddressSpace); | 888 | 73 | | 889 | 73 | // Legalize the vector type, and get the legalized and unlegalized type | 890 | 73 | // sizes. | 891 | 73 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | 892 | 73 | unsigned VecTySize = | 893 | 73 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); | 894 | 73 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); | 895 | 73 | | 896 | 73 | // Return the ceiling of dividing A by B. | 897 | 73 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; | 898 | 73 | | 899 | 73 | // Scale the cost of the memory operation by the fraction of legalized | 900 | 73 | // instructions that will actually be used. We shouldn't account for the | 901 | 73 | // cost of dead instructions since they will be removed. | 902 | 73 | // | 903 | 73 | // E.g., An interleaved load of factor 8: | 904 | 73 | // %vec = load <16 x i64>, <16 x i64>* %ptr | 905 | 73 | // %v0 = shufflevector %vec, undef, <0, 8> | 906 | 73 | // | 907 | 73 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be | 908 | 73 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized | 909 | 73 | // type). The other loads are unused. | 910 | 73 | // | 911 | 73 | // We only scale the cost of loads since interleaved store groups aren't | 912 | 73 | // allowed to have gaps. | 913 | 73 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize28 ) { | 914 | 9 | // The number of loads of a legal type it will take to represent a load | 915 | 9 | // of the unlegalized vector type. | 916 | 9 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); | 917 | 9 | | 918 | 9 | // The number of elements of the unlegalized type that correspond to a | 919 | 9 | // single legal instruction. | 920 | 9 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); | 921 | 9 | | 922 | 9 | // Determine which legal instructions will be used. | 923 | 9 | BitVector UsedInsts(NumLegalInsts, false); | 924 | 9 | for (unsigned Index : Indices) | 925 | 45 | for (unsigned Elt = 0; 11 Elt < NumSubElts; ++Elt34 ) | 926 | 34 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); | 927 | 9 | | 928 | 9 | // Scale the cost of the load by the fraction of legal instructions that | 929 | 9 | // will be used. | 930 | 9 | Cost *= UsedInsts.count() / NumLegalInsts; | 931 | 9 | } | 932 | 73 | | 933 | 73 | // Then plus the cost of interleave operation. | 934 | 73 | if (Opcode == Instruction::Load) { | 935 | 28 | // The interleave cost is similar to extract sub vectors' elements | 936 | 28 | // from the wide vector, and insert them into sub vectors. | 937 | 28 | // | 938 | 28 | // E.g. An interleaved load of factor 2 (with one member of index 0): | 939 | 28 | // %vec = load <8 x i32>, <8 x i32>* %ptr | 940 | 28 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 | 941 | 28 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the | 942 | 28 | // <8 x i32> vector and insert them into a <4 x i32> vector. | 943 | 28 | | 944 | 28 | assert(Indices.size() <= Factor && | 945 | 28 | "Interleaved memory op has too many members"); | 946 | 28 | | 947 | 43 | for (unsigned Index : Indices) { | 948 | 43 | assert(Index < Factor && "Invalid index for interleaved memory op"); | 949 | 43 | | 950 | 43 | // Extract elements from loaded vector for each sub vector. | 951 | 233 | for (unsigned i = 0; i < NumSubElts; i++190 ) | 952 | 190 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 953 | 190 | Instruction::ExtractElement, VT, Index + i * Factor); | 954 | 43 | } | 955 | 28 | | 956 | 28 | unsigned InsSubCost = 0; | 957 | 156 | for (unsigned i = 0; i < NumSubElts; i++128 ) | 958 | 128 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( | 959 | 128 | Instruction::InsertElement, SubVT, i); | 960 | 28 | | 961 | 28 | Cost += Indices.size() * InsSubCost; | 962 | 45 | } else { | 963 | 45 | // The interleave cost is extract all elements from sub vectors, and | 964 | 45 | // insert them into the wide vector. | 965 | 45 | // | 966 | 45 | // E.g. An interleaved store of factor 2: | 967 | 45 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> | 968 | 45 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr | 969 | 45 | // The cost is estimated as extract all elements from both <4 x i32> | 970 | 45 | // vectors and insert into the <8 x i32> vector. | 971 | 45 | | 972 | 45 | unsigned ExtSubCost = 0; | 973 | 163 | for (unsigned i = 0; i < NumSubElts; i++118 ) | 974 | 118 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( | 975 | 118 | Instruction::ExtractElement, SubVT, i); | 976 | 45 | Cost += ExtSubCost * Factor; | 977 | 45 | | 978 | 367 | for (unsigned i = 0; i < NumElts; i++322 ) | 979 | 322 | Cost += static_cast<T *>(this) | 980 | 322 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); | 981 | 45 | } | 982 | 73 | | 983 | 73 | if (!UseMaskForCond) | 984 | 62 | return Cost; | 985 | 11 | | 986 | 11 | Type *I8Type = Type::getInt8Ty(VT->getContext()); | 987 | 11 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); | 988 | 11 | SubVT = VectorType::get(I8Type, NumSubElts); | 989 | 11 | | 990 | 11 | // The Mask shuffling cost is extract all the elements of the Mask | 991 | 11 | // and insert each of them Factor times into the wide vector: | 992 | 11 | // | 993 | 11 | // E.g. an interleaved group with factor 3: | 994 | 11 | // %mask = icmp ult <8 x i32> %vec1, %vec2 | 995 | 11 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, | 996 | 11 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> | 997 | 11 | // The cost is estimated as extract all mask elements from the <8xi1> mask | 998 | 11 | // vector and insert them factor times into the <24xi1> shuffled mask | 999 | 11 | // vector. | 1000 | 99 | for (unsigned i = 0; i < NumSubElts; i++88 ) | 1001 | 88 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1002 | 88 | Instruction::ExtractElement, SubVT, i); | 1003 | 11 | | 1004 | 195 | for (unsigned i = 0; i < NumElts; i++184 ) | 1005 | 184 | Cost += static_cast<T *>(this)->getVectorInstrCost( | 1006 | 184 | Instruction::InsertElement, MaskVT, i); | 1007 | 11 | | 1008 | 11 | // The Gaps mask is invariant and created outside the loop, therefore the | 1009 | 11 | // cost of creating it is not accounted for here. However if we have both | 1010 | 11 | // a MaskForGaps and some other mask that guards the execution of the | 1011 | 11 | // memory access, we need to account for the cost of And-ing the two masks | 1012 | 11 | // inside the loop. | 1013 | 11 | if (UseMaskForGaps) | 1014 | 4 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( | 1015 | 4 | BinaryOperator::And, MaskVT); | 1016 | 11 | | 1017 | 11 | return Cost; | 1018 | 11 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool) |
1019 | | |
1020 | | /// Get intrinsic cost based on arguments. |
1021 | | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1022 | | ArrayRef<Value *> Args, FastMathFlags FMF, |
1023 | 15.6k | unsigned VF = 1) { |
1024 | 15.6k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()4.96k : 110.6k ); |
1025 | 15.6k | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); |
1026 | 15.6k | auto *ConcreteTTI = static_cast<T *>(this); |
1027 | 15.6k | |
1028 | 15.6k | switch (IID) { |
1029 | 15.6k | default: { |
1030 | 9.19k | // Assume that we need to scalarize this intrinsic. |
1031 | 9.19k | SmallVector<Type *, 4> Types; |
1032 | 17.8k | for (Value *Op : Args) { |
1033 | 17.8k | Type *OpTy = Op->getType(); |
1034 | 17.8k | assert(VF == 1 || !OpTy->isVectorTy()); |
1035 | 17.8k | Types.push_back(VF == 1 ? OpTy11.9k : VectorType::get(OpTy, VF)5.88k ); |
1036 | 17.8k | } |
1037 | 9.19k | |
1038 | 9.19k | if (VF > 1 && !RetTy->isVoidTy()3.95k ) |
1039 | 3.94k | RetTy = VectorType::get(RetTy, VF); |
1040 | 9.19k | |
1041 | 9.19k | // Compute the scalarization overhead based on Args for a vector |
1042 | 9.19k | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1043 | 9.19k | // CostModel will pass a vector RetTy and VF is 1. |
1044 | 9.19k | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1045 | 9.19k | if (RetVF > 1 || VF > 16.62k ) { |
1046 | 6.52k | ScalarizationCost = 0; |
1047 | 6.52k | if (!RetTy->isVoidTy()) |
1048 | 6.52k | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1049 | 6.52k | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1050 | 6.52k | } |
1051 | 9.19k | |
1052 | 9.19k | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1053 | 9.19k | ScalarizationCost); |
1054 | 15.6k | } |
1055 | 15.6k | case Intrinsic::masked_scatter: { |
1056 | 378 | assert(VF == 1 && "Can't vectorize types here."); |
1057 | 378 | Value *Mask = Args[3]; |
1058 | 378 | bool VarMask = !isa<Constant>(Mask); |
1059 | 378 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1060 | 378 | return ConcreteTTI->getGatherScatterOpCost( |
1061 | 378 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1062 | 15.6k | } |
1063 | 15.6k | case Intrinsic::masked_gather: { |
1064 | 463 | assert(VF == 1 && "Can't vectorize types here."); |
1065 | 463 | Value *Mask = Args[2]; |
1066 | 463 | bool VarMask = !isa<Constant>(Mask); |
1067 | 463 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1068 | 463 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1069 | 463 | Args[0], VarMask, Alignment); |
1070 | 15.6k | } |
1071 | 15.6k | case Intrinsic::experimental_vector_reduce_add: |
1072 | 3.71k | case Intrinsic::experimental_vector_reduce_mul: |
1073 | 3.71k | case Intrinsic::experimental_vector_reduce_and: |
1074 | 3.71k | case Intrinsic::experimental_vector_reduce_or: |
1075 | 3.71k | case Intrinsic::experimental_vector_reduce_xor: |
1076 | 3.71k | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1077 | 3.71k | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1078 | 3.71k | case Intrinsic::experimental_vector_reduce_smax: |
1079 | 3.71k | case Intrinsic::experimental_vector_reduce_smin: |
1080 | 3.71k | case Intrinsic::experimental_vector_reduce_fmax: |
1081 | 3.71k | case Intrinsic::experimental_vector_reduce_fmin: |
1082 | 3.71k | case Intrinsic::experimental_vector_reduce_umax: |
1083 | 3.71k | case Intrinsic::experimental_vector_reduce_umin: |
1084 | 3.71k | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1085 | 3.71k | case Intrinsic::fshl: |
1086 | 1.87k | case Intrinsic::fshr: { |
1087 | 1.87k | Value *X = Args[0]; |
1088 | 1.87k | Value *Y = Args[1]; |
1089 | 1.87k | Value *Z = Args[2]; |
1090 | 1.87k | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1091 | 1.87k | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1092 | 1.87k | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1093 | 1.87k | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1094 | 1.87k | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1095 | 1.87k | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1096 | 1.87k | : TTI::OP_None0 ; |
1097 | 1.87k | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1098 | 1.87k | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1099 | 1.87k | unsigned Cost = 0; |
1100 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1101 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1102 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1103 | 1.87k | OpKindX, OpKindZ, OpPropsX); |
1104 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1105 | 1.87k | OpKindY, OpKindZ, OpPropsY); |
1106 | 1.87k | // Non-constant shift amounts requires a modulo. |
1107 | 1.87k | if (OpKindZ != TTI::OK_UniformConstantValue && |
1108 | 1.87k | OpKindZ != TTI::OK_NonUniformConstantValue1.40k ) |
1109 | 936 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1110 | 936 | OpKindZ, OpKindBW, OpPropsZ, |
1111 | 936 | OpPropsBW); |
1112 | 1.87k | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1113 | 1.87k | if (X != Y) { |
1114 | 1.05k | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1115 | 1.05k | if (RetVF > 1) |
1116 | 1.05k | CondTy = VectorType::get(CondTy, RetVF); |
1117 | 1.05k | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1118 | 1.05k | CondTy, nullptr); |
1119 | 1.05k | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1120 | 1.05k | CondTy, nullptr); |
1121 | 1.05k | } |
1122 | 1.87k | return Cost; |
1123 | 1.87k | } |
1124 | 15.6k | } |
1125 | 15.6k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 1.91k | unsigned VF = 1) { | 1024 | 1.91k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()5 : 11.90k ); | 1025 | 1.91k | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 1.91k | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 1.91k | | 1028 | 1.91k | switch (IID) { | 1029 | 1.91k | default: { | 1030 | 1.88k | // Assume that we need to scalarize this intrinsic. | 1031 | 1.88k | SmallVector<Type *, 4> Types; | 1032 | 2.15k | for (Value *Op : Args) { | 1033 | 2.15k | Type *OpTy = Op->getType(); | 1034 | 2.15k | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 2.15k | Types.push_back(VF == 1 ? OpTy302 : VectorType::get(OpTy, VF)1.85k ); | 1036 | 2.15k | } | 1037 | 1.88k | | 1038 | 1.88k | if (VF > 1 && !RetTy->isVoidTy()1.60k ) | 1039 | 1.60k | RetTy = VectorType::get(RetTy, VF); | 1040 | 1.88k | | 1041 | 1.88k | // Compute the scalarization overhead based on Args for a vector | 1042 | 1.88k | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 1.88k | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 1.88k | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 1.88k | if (RetVF > 1 || VF > 11.88k ) { | 1046 | 1.61k | ScalarizationCost = 0; | 1047 | 1.61k | if (!RetTy->isVoidTy()) | 1048 | 1.61k | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 1.61k | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 1.61k | } | 1051 | 1.88k | | 1052 | 1.88k | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 1.88k | ScalarizationCost); | 1054 | 1.91k | } | 1055 | 1.91k | case Intrinsic::masked_scatter: { | 1056 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 0 | Value *Mask = Args[3]; | 1058 | 0 | bool VarMask = !isa<Constant>(Mask); | 1059 | 0 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 0 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 0 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 1.91k | } | 1063 | 1.91k | case Intrinsic::masked_gather: { | 1064 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 0 | Value *Mask = Args[2]; | 1066 | 0 | bool VarMask = !isa<Constant>(Mask); | 1067 | 0 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 0 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 0 | Args[0], VarMask, Alignment); | 1070 | 1.91k | } | 1071 | 1.91k | case Intrinsic::experimental_vector_reduce_add: | 1072 | 27 | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 27 | case Intrinsic::experimental_vector_reduce_and: | 1074 | 27 | case Intrinsic::experimental_vector_reduce_or: | 1075 | 27 | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 27 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 27 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 27 | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 27 | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 27 | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 27 | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 27 | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 27 | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 27 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 27 | case Intrinsic::fshl: | 1086 | 0 | case Intrinsic::fshr: { | 1087 | 0 | Value *X = Args[0]; | 1088 | 0 | Value *Y = Args[1]; | 1089 | 0 | Value *Z = Args[2]; | 1090 | 0 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 0 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 0 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 0 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 0 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 0 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 0 | : TTI::OP_None; | 1097 | 0 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 0 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 0 | unsigned Cost = 0; | 1100 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 0 | OpKindX, OpKindZ, OpPropsX); | 1104 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 0 | OpKindY, OpKindZ, OpPropsY); | 1106 | 0 | // Non-constant shift amounts requires a modulo. | 1107 | 0 | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 0 | OpKindZ != TTI::OK_NonUniformConstantValue) | 1109 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 0 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 0 | OpPropsBW); | 1112 | 0 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 0 | if (X != Y) { | 1114 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 0 | if (RetVF > 1) | 1116 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 0 | CondTy, nullptr); | 1119 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 0 | CondTy, nullptr); | 1121 | 0 | } | 1122 | 0 | return Cost; | 1123 | 0 | } | 1124 | 1.91k | } | 1125 | 1.91k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 22 | unsigned VF = 1) { | 1024 | 22 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()7 : 115 ); | 1025 | 22 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 22 | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 22 | | 1028 | 22 | switch (IID) { | 1029 | 22 | default: { | 1030 | 22 | // Assume that we need to scalarize this intrinsic. | 1031 | 22 | SmallVector<Type *, 4> Types; | 1032 | 34 | for (Value *Op : Args) { | 1033 | 34 | Type *OpTy = Op->getType(); | 1034 | 34 | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 34 | Types.push_back(VF == 1 ? OpTy10 : VectorType::get(OpTy, VF)24 ); | 1036 | 34 | } | 1037 | 22 | | 1038 | 22 | if (VF > 1 && !RetTy->isVoidTy()12 ) | 1039 | 12 | RetTy = VectorType::get(RetTy, VF); | 1040 | 22 | | 1041 | 22 | // Compute the scalarization overhead based on Args for a vector | 1042 | 22 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 22 | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 22 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 22 | if (RetVF > 1 || VF > 115 ) { | 1046 | 19 | ScalarizationCost = 0; | 1047 | 19 | if (!RetTy->isVoidTy()) | 1048 | 19 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 19 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 19 | } | 1051 | 22 | | 1052 | 22 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 22 | ScalarizationCost); | 1054 | 22 | } | 1055 | 22 | case Intrinsic::masked_scatter: { | 1056 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 0 | Value *Mask = Args[3]; | 1058 | 0 | bool VarMask = !isa<Constant>(Mask); | 1059 | 0 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 0 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 0 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 22 | } | 1063 | 22 | case Intrinsic::masked_gather: { | 1064 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 0 | Value *Mask = Args[2]; | 1066 | 0 | bool VarMask = !isa<Constant>(Mask); | 1067 | 0 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 0 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 0 | Args[0], VarMask, Alignment); | 1070 | 22 | } | 1071 | 22 | case Intrinsic::experimental_vector_reduce_add: | 1072 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1074 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1075 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 0 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 0 | case Intrinsic::fshl: | 1086 | 0 | case Intrinsic::fshr: { | 1087 | 0 | Value *X = Args[0]; | 1088 | 0 | Value *Y = Args[1]; | 1089 | 0 | Value *Z = Args[2]; | 1090 | 0 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 0 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 0 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 0 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 0 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 0 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 0 | : TTI::OP_None; | 1097 | 0 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 0 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 0 | unsigned Cost = 0; | 1100 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 0 | OpKindX, OpKindZ, OpPropsX); | 1104 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 0 | OpKindY, OpKindZ, OpPropsY); | 1106 | 0 | // Non-constant shift amounts requires a modulo. | 1107 | 0 | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 0 | OpKindZ != TTI::OK_NonUniformConstantValue) | 1109 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 0 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 0 | OpPropsBW); | 1112 | 0 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 0 | if (X != Y) { | 1114 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 0 | if (RetVF > 1) | 1116 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 0 | CondTy, nullptr); | 1119 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 0 | CondTy, nullptr); | 1121 | 0 | } | 1122 | 0 | return Cost; | 1123 | 0 | } | 1124 | 22 | } | 1125 | 22 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 270 | unsigned VF = 1) { | 1024 | 270 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()0 : 1); | 1025 | 270 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 270 | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 270 | | 1028 | 270 | switch (IID) { | 1029 | 270 | default: { | 1030 | 270 | // Assume that we need to scalarize this intrinsic. | 1031 | 270 | SmallVector<Type *, 4> Types; | 1032 | 426 | for (Value *Op : Args) { | 1033 | 426 | Type *OpTy = Op->getType(); | 1034 | 426 | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 426 | Types.push_back(VF == 1 ? OpTy0 : VectorType::get(OpTy, VF)); | 1036 | 426 | } | 1037 | 270 | | 1038 | 270 | if (VF > 1 && !RetTy->isVoidTy()) | 1039 | 270 | RetTy = VectorType::get(RetTy, VF); | 1040 | 270 | | 1041 | 270 | // Compute the scalarization overhead based on Args for a vector | 1042 | 270 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 270 | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 270 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 270 | if (RetVF > 1 || VF > 1) { | 1046 | 270 | ScalarizationCost = 0; | 1047 | 270 | if (!RetTy->isVoidTy()) | 1048 | 270 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 270 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 270 | } | 1051 | 270 | | 1052 | 270 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 270 | ScalarizationCost); | 1054 | 270 | } | 1055 | 270 | case Intrinsic::masked_scatter: { | 1056 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 0 | Value *Mask = Args[3]; | 1058 | 0 | bool VarMask = !isa<Constant>(Mask); | 1059 | 0 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 0 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 0 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 270 | } | 1063 | 270 | case Intrinsic::masked_gather: { | 1064 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 0 | Value *Mask = Args[2]; | 1066 | 0 | bool VarMask = !isa<Constant>(Mask); | 1067 | 0 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 0 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 0 | Args[0], VarMask, Alignment); | 1070 | 270 | } | 1071 | 270 | case Intrinsic::experimental_vector_reduce_add: | 1072 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1074 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1075 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 0 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 0 | case Intrinsic::fshl: | 1086 | 0 | case Intrinsic::fshr: { | 1087 | 0 | Value *X = Args[0]; | 1088 | 0 | Value *Y = Args[1]; | 1089 | 0 | Value *Z = Args[2]; | 1090 | 0 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 0 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 0 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 0 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 0 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 0 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 0 | : TTI::OP_None; | 1097 | 0 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 0 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 0 | unsigned Cost = 0; | 1100 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 0 | OpKindX, OpKindZ, OpPropsX); | 1104 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 0 | OpKindY, OpKindZ, OpPropsY); | 1106 | 0 | // Non-constant shift amounts requires a modulo. | 1107 | 0 | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 0 | OpKindZ != TTI::OK_NonUniformConstantValue) | 1109 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 0 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 0 | OpPropsBW); | 1112 | 0 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 0 | if (X != Y) { | 1114 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 0 | if (RetVF > 1) | 1116 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 0 | CondTy, nullptr); | 1119 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 0 | CondTy, nullptr); | 1121 | 0 | } | 1122 | 0 | return Cost; | 1123 | 0 | } | 1124 | 270 | } | 1125 | 270 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 91 | unsigned VF = 1) { | 1024 | 91 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()1 : 190 ); | 1025 | 91 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 91 | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 91 | | 1028 | 91 | switch (IID) { | 1029 | 91 | default: { | 1030 | 91 | // Assume that we need to scalarize this intrinsic. | 1031 | 91 | SmallVector<Type *, 4> Types; | 1032 | 101 | for (Value *Op : Args) { | 1033 | 101 | Type *OpTy = Op->getType(); | 1034 | 101 | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 101 | Types.push_back(VF == 1 ? OpTy21 : VectorType::get(OpTy, VF)80 ); | 1036 | 101 | } | 1037 | 91 | | 1038 | 91 | if (VF > 1 && !RetTy->isVoidTy()72 ) | 1039 | 72 | RetTy = VectorType::get(RetTy, VF); | 1040 | 91 | | 1041 | 91 | // Compute the scalarization overhead based on Args for a vector | 1042 | 91 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 91 | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 91 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 91 | if (RetVF > 1 || VF > 190 ) { | 1046 | 73 | ScalarizationCost = 0; | 1047 | 73 | if (!RetTy->isVoidTy()) | 1048 | 73 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 73 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 73 | } | 1051 | 91 | | 1052 | 91 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 91 | ScalarizationCost); | 1054 | 91 | } | 1055 | 91 | case Intrinsic::masked_scatter: { | 1056 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 0 | Value *Mask = Args[3]; | 1058 | 0 | bool VarMask = !isa<Constant>(Mask); | 1059 | 0 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 0 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 0 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 91 | } | 1063 | 91 | case Intrinsic::masked_gather: { | 1064 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 0 | Value *Mask = Args[2]; | 1066 | 0 | bool VarMask = !isa<Constant>(Mask); | 1067 | 0 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 0 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 0 | Args[0], VarMask, Alignment); | 1070 | 91 | } | 1071 | 91 | case Intrinsic::experimental_vector_reduce_add: | 1072 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1074 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1075 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 0 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 0 | case Intrinsic::fshl: | 1086 | 0 | case Intrinsic::fshr: { | 1087 | 0 | Value *X = Args[0]; | 1088 | 0 | Value *Y = Args[1]; | 1089 | 0 | Value *Z = Args[2]; | 1090 | 0 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 0 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 0 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 0 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 0 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 0 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 0 | : TTI::OP_None; | 1097 | 0 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 0 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 0 | unsigned Cost = 0; | 1100 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 0 | OpKindX, OpKindZ, OpPropsX); | 1104 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 0 | OpKindY, OpKindZ, OpPropsY); | 1106 | 0 | // Non-constant shift amounts requires a modulo. | 1107 | 0 | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 0 | OpKindZ != TTI::OK_NonUniformConstantValue) | 1109 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 0 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 0 | OpPropsBW); | 1112 | 0 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 0 | if (X != Y) { | 1114 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 0 | if (RetVF > 1) | 1116 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 0 | CondTy, nullptr); | 1119 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 0 | CondTy, nullptr); | 1121 | 0 | } | 1122 | 0 | return Cost; | 1123 | 0 | } | 1124 | 91 | } | 1125 | 91 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 29 | unsigned VF = 1) { | 1024 | 29 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()0 : 1); | 1025 | 29 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 29 | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 29 | | 1028 | 29 | switch (IID) { | 1029 | 29 | default: { | 1030 | 29 | // Assume that we need to scalarize this intrinsic. | 1031 | 29 | SmallVector<Type *, 4> Types; | 1032 | 34 | for (Value *Op : Args) { | 1033 | 34 | Type *OpTy = Op->getType(); | 1034 | 34 | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 34 | Types.push_back(VF == 1 ? OpTy26 : VectorType::get(OpTy, VF)8 ); | 1036 | 34 | } | 1037 | 29 | | 1038 | 29 | if (VF > 1 && !RetTy->isVoidTy()4 ) | 1039 | 0 | RetTy = VectorType::get(RetTy, VF); | 1040 | 29 | | 1041 | 29 | // Compute the scalarization overhead based on Args for a vector | 1042 | 29 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 29 | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 29 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 29 | if (RetVF > 1 || VF > 1) { | 1046 | 4 | ScalarizationCost = 0; | 1047 | 4 | if (!RetTy->isVoidTy()) | 1048 | 0 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 4 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 4 | } | 1051 | 29 | | 1052 | 29 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 29 | ScalarizationCost); | 1054 | 29 | } | 1055 | 29 | case Intrinsic::masked_scatter: { | 1056 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 0 | Value *Mask = Args[3]; | 1058 | 0 | bool VarMask = !isa<Constant>(Mask); | 1059 | 0 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 0 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 0 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 29 | } | 1063 | 29 | case Intrinsic::masked_gather: { | 1064 | 0 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 0 | Value *Mask = Args[2]; | 1066 | 0 | bool VarMask = !isa<Constant>(Mask); | 1067 | 0 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 0 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 0 | Args[0], VarMask, Alignment); | 1070 | 29 | } | 1071 | 29 | case Intrinsic::experimental_vector_reduce_add: | 1072 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1074 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1075 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 0 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 0 | case Intrinsic::fshl: | 1086 | 0 | case Intrinsic::fshr: { | 1087 | 0 | Value *X = Args[0]; | 1088 | 0 | Value *Y = Args[1]; | 1089 | 0 | Value *Z = Args[2]; | 1090 | 0 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 0 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 0 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 0 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 0 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 0 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 0 | : TTI::OP_None; | 1097 | 0 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 0 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 0 | unsigned Cost = 0; | 1100 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 0 | OpKindX, OpKindZ, OpPropsX); | 1104 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 0 | OpKindY, OpKindZ, OpPropsY); | 1106 | 0 | // Non-constant shift amounts requires a modulo. | 1107 | 0 | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 0 | OpKindZ != TTI::OK_NonUniformConstantValue) | 1109 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 0 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 0 | OpPropsBW); | 1112 | 0 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 0 | if (X != Y) { | 1114 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 0 | if (RetVF > 1) | 1116 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 0 | CondTy, nullptr); | 1119 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 0 | CondTy, nullptr); | 1121 | 0 | } | 1122 | 0 | return Cost; | 1123 | 0 | } | 1124 | 29 | } | 1125 | 29 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1023 | 13.2k | unsigned VF = 1) { | 1024 | 13.2k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()4.95k : 18.34k ); | 1025 | 13.2k | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); | 1026 | 13.2k | auto *ConcreteTTI = static_cast<T *>(this); | 1027 | 13.2k | | 1028 | 13.2k | switch (IID) { | 1029 | 13.2k | default: { | 1030 | 6.89k | // Assume that we need to scalarize this intrinsic. | 1031 | 6.89k | SmallVector<Type *, 4> Types; | 1032 | 15.0k | for (Value *Op : Args) { | 1033 | 15.0k | Type *OpTy = Op->getType(); | 1034 | 15.0k | assert(VF == 1 || !OpTy->isVectorTy()); | 1035 | 15.0k | Types.push_back(VF == 1 ? OpTy11.5k : VectorType::get(OpTy, VF)3.49k ); | 1036 | 15.0k | } | 1037 | 6.89k | | 1038 | 6.89k | if (VF > 1 && !RetTy->isVoidTy()1.98k ) | 1039 | 1.98k | RetTy = VectorType::get(RetTy, VF); | 1040 | 6.89k | | 1041 | 6.89k | // Compute the scalarization overhead based on Args for a vector | 1042 | 6.89k | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | 1043 | 6.89k | // CostModel will pass a vector RetTy and VF is 1. | 1044 | 6.89k | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | 1045 | 6.89k | if (RetVF > 1 || VF > 14.33k ) { | 1046 | 4.54k | ScalarizationCost = 0; | 1047 | 4.54k | if (!RetTy->isVoidTy()) | 1048 | 4.54k | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); | 1049 | 4.54k | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | 1050 | 4.54k | } | 1051 | 6.89k | | 1052 | 6.89k | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, | 1053 | 6.89k | ScalarizationCost); | 1054 | 13.2k | } | 1055 | 13.2k | case Intrinsic::masked_scatter: { | 1056 | 378 | assert(VF == 1 && "Can't vectorize types here."); | 1057 | 378 | Value *Mask = Args[3]; | 1058 | 378 | bool VarMask = !isa<Constant>(Mask); | 1059 | 378 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); | 1060 | 378 | return ConcreteTTI->getGatherScatterOpCost( | 1061 | 378 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); | 1062 | 13.2k | } | 1063 | 13.2k | case Intrinsic::masked_gather: { | 1064 | 463 | assert(VF == 1 && "Can't vectorize types here."); | 1065 | 463 | Value *Mask = Args[2]; | 1066 | 463 | bool VarMask = !isa<Constant>(Mask); | 1067 | 463 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); | 1068 | 463 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, | 1069 | 463 | Args[0], VarMask, Alignment); | 1070 | 13.2k | } | 1071 | 13.2k | case Intrinsic::experimental_vector_reduce_add: | 1072 | 3.68k | case Intrinsic::experimental_vector_reduce_mul: | 1073 | 3.68k | case Intrinsic::experimental_vector_reduce_and: | 1074 | 3.68k | case Intrinsic::experimental_vector_reduce_or: | 1075 | 3.68k | case Intrinsic::experimental_vector_reduce_xor: | 1076 | 3.68k | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1077 | 3.68k | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1078 | 3.68k | case Intrinsic::experimental_vector_reduce_smax: | 1079 | 3.68k | case Intrinsic::experimental_vector_reduce_smin: | 1080 | 3.68k | case Intrinsic::experimental_vector_reduce_fmax: | 1081 | 3.68k | case Intrinsic::experimental_vector_reduce_fmin: | 1082 | 3.68k | case Intrinsic::experimental_vector_reduce_umax: | 1083 | 3.68k | case Intrinsic::experimental_vector_reduce_umin: | 1084 | 3.68k | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); | 1085 | 3.68k | case Intrinsic::fshl: | 1086 | 1.87k | case Intrinsic::fshr: { | 1087 | 1.87k | Value *X = Args[0]; | 1088 | 1.87k | Value *Y = Args[1]; | 1089 | 1.87k | Value *Z = Args[2]; | 1090 | 1.87k | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | 1091 | 1.87k | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | 1092 | 1.87k | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | 1093 | 1.87k | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | 1094 | 1.87k | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | 1095 | 1.87k | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | 1096 | 1.87k | : TTI::OP_None0 ; | 1097 | 1.87k | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | 1098 | 1.87k | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | 1099 | 1.87k | unsigned Cost = 0; | 1100 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); | 1101 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); | 1102 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, | 1103 | 1.87k | OpKindX, OpKindZ, OpPropsX); | 1104 | 1.87k | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, | 1105 | 1.87k | OpKindY, OpKindZ, OpPropsY); | 1106 | 1.87k | // Non-constant shift amounts requires a modulo. | 1107 | 1.87k | if (OpKindZ != TTI::OK_UniformConstantValue && | 1108 | 1.87k | OpKindZ != TTI::OK_NonUniformConstantValue1.40k ) | 1109 | 936 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | 1110 | 936 | OpKindZ, OpKindBW, OpPropsZ, | 1111 | 936 | OpPropsBW); | 1112 | 1.87k | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | 1113 | 1.87k | if (X != Y) { | 1114 | 1.05k | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1115 | 1.05k | if (RetVF > 1) | 1116 | 1.05k | CondTy = VectorType::get(CondTy, RetVF); | 1117 | 1.05k | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1118 | 1.05k | CondTy, nullptr); | 1119 | 1.05k | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1120 | 1.05k | CondTy, nullptr); | 1121 | 1.05k | } | 1122 | 1.87k | return Cost; | 1123 | 1.87k | } | 1124 | 13.2k | } | 1125 | 13.2k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int) |
1126 | | |
1127 | | /// Get intrinsic cost based on argument types. |
1128 | | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1129 | | /// cost of scalarizing the arguments and the return value will be computed |
1130 | | /// based on types. |
1131 | | unsigned getIntrinsicInstrCost( |
1132 | | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1133 | 16.1k | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1134 | 16.1k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()4.91k : 111.2k ); |
1135 | 16.1k | auto *ConcreteTTI = static_cast<T *>(this); |
1136 | 16.1k | |
1137 | 16.1k | SmallVector<unsigned, 2> ISDs; |
1138 | 16.1k | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1139 | 16.1k | switch (IID) { |
1140 | 16.1k | default: { |
1141 | 2.36k | // Assume that we need to scalarize this intrinsic. |
1142 | 2.36k | unsigned ScalarizationCost = ScalarizationCostPassed; |
1143 | 2.36k | unsigned ScalarCalls = 1; |
1144 | 2.36k | Type *ScalarRetTy = RetTy; |
1145 | 2.36k | if (RetTy->isVectorTy()) { |
1146 | 581 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1147 | 28 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1148 | 581 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1149 | 581 | ScalarRetTy = RetTy->getScalarType(); |
1150 | 581 | } |
1151 | 2.36k | SmallVector<Type *, 4> ScalarTys; |
1152 | 7.82k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i5.46k ) { |
1153 | 5.46k | Type *Ty = Tys[i]; |
1154 | 5.46k | if (Ty->isVectorTy()) { |
1155 | 1.63k | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1156 | 728 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1157 | 1.63k | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1158 | 1.63k | Ty = Ty->getScalarType(); |
1159 | 1.63k | } |
1160 | 5.46k | ScalarTys.push_back(Ty); |
1161 | 5.46k | } |
1162 | 2.36k | if (ScalarCalls == 1) |
1163 | 1.50k | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1164 | 861 | |
1165 | 861 | unsigned ScalarCost = |
1166 | 861 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1167 | 861 | |
1168 | 861 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1169 | 861 | } |
1170 | 861 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1171 | 861 | // intrinsic call. |
1172 | 861 | case Intrinsic::sqrt: |
1173 | 482 | ISDs.push_back(ISD::FSQRT); |
1174 | 482 | break; |
1175 | 861 | case Intrinsic::sin: |
1176 | 165 | ISDs.push_back(ISD::FSIN); |
1177 | 165 | break; |
1178 | 861 | case Intrinsic::cos: |
1179 | 497 | ISDs.push_back(ISD::FCOS); |
1180 | 497 | break; |
1181 | 861 | case Intrinsic::exp: |
1182 | 483 | ISDs.push_back(ISD::FEXP); |
1183 | 483 | break; |
1184 | 861 | case Intrinsic::exp2: |
1185 | 42 | ISDs.push_back(ISD::FEXP2); |
1186 | 42 | break; |
1187 | 861 | case Intrinsic::log: |
1188 | 95 | ISDs.push_back(ISD::FLOG); |
1189 | 95 | break; |
1190 | 861 | case Intrinsic::log10: |
1191 | 46 | ISDs.push_back(ISD::FLOG10); |
1192 | 46 | break; |
1193 | 861 | case Intrinsic::log2: |
1194 | 18 | ISDs.push_back(ISD::FLOG2); |
1195 | 18 | break; |
1196 | 1.47k | case Intrinsic::fabs: |
1197 | 1.47k | ISDs.push_back(ISD::FABS); |
1198 | 1.47k | break; |
1199 | 861 | case Intrinsic::canonicalize: |
1200 | 5 | ISDs.push_back(ISD::FCANONICALIZE); |
1201 | 5 | break; |
1202 | 861 | case Intrinsic::minnum: |
1203 | 0 | ISDs.push_back(ISD::FMINNUM); |
1204 | 0 | if (FMF.noNaNs()) |
1205 | 0 | ISDs.push_back(ISD::FMINIMUM); |
1206 | 0 | break; |
1207 | 861 | case Intrinsic::maxnum: |
1208 | 0 | ISDs.push_back(ISD::FMAXNUM); |
1209 | 0 | if (FMF.noNaNs()) |
1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); |
1211 | 0 | break; |
1212 | 1.08k | case Intrinsic::copysign: |
1213 | 1.08k | ISDs.push_back(ISD::FCOPYSIGN); |
1214 | 1.08k | break; |
1215 | 861 | case Intrinsic::floor: |
1216 | 305 | ISDs.push_back(ISD::FFLOOR); |
1217 | 305 | break; |
1218 | 861 | case Intrinsic::ceil: |
1219 | 270 | ISDs.push_back(ISD::FCEIL); |
1220 | 270 | break; |
1221 | 861 | case Intrinsic::trunc: |
1222 | 256 | ISDs.push_back(ISD::FTRUNC); |
1223 | 256 | break; |
1224 | 861 | case Intrinsic::nearbyint: |
1225 | 259 | ISDs.push_back(ISD::FNEARBYINT); |
1226 | 259 | break; |
1227 | 861 | case Intrinsic::rint: |
1228 | 268 | ISDs.push_back(ISD::FRINT); |
1229 | 268 | break; |
1230 | 861 | case Intrinsic::round: |
1231 | 0 | ISDs.push_back(ISD::FROUND); |
1232 | 0 | break; |
1233 | 861 | case Intrinsic::pow: |
1234 | 139 | ISDs.push_back(ISD::FPOW); |
1235 | 139 | break; |
1236 | 861 | case Intrinsic::fma: |
1237 | 471 | ISDs.push_back(ISD::FMA); |
1238 | 471 | break; |
1239 | 861 | case Intrinsic::fmuladd: |
1240 | 2 | ISDs.push_back(ISD::FMA); |
1241 | 2 | break; |
1242 | 861 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1243 | 861 | case Intrinsic::lifetime_start: |
1244 | 5 | case Intrinsic::lifetime_end: |
1245 | 5 | case Intrinsic::sideeffect: |
1246 | 5 | return 0; |
1247 | 378 | case Intrinsic::masked_store: |
1248 | 378 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1249 | 378 | 0); |
1250 | 406 | case Intrinsic::masked_load: |
1251 | 406 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1252 | 373 | case Intrinsic::experimental_vector_reduce_add: |
1253 | 373 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1254 | 373 | /*IsPairwiseForm=*/false); |
1255 | 368 | case Intrinsic::experimental_vector_reduce_mul: |
1256 | 368 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1257 | 368 | /*IsPairwiseForm=*/false); |
1258 | 496 | case Intrinsic::experimental_vector_reduce_and: |
1259 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1260 | 496 | /*IsPairwiseForm=*/false); |
1261 | 496 | case Intrinsic::experimental_vector_reduce_or: |
1262 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1263 | 496 | /*IsPairwiseForm=*/false); |
1264 | 496 | case Intrinsic::experimental_vector_reduce_xor: |
1265 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1266 | 496 | /*IsPairwiseForm=*/false); |
1267 | 5 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( |
1269 | 0 | Instruction::FAdd, Tys[0], |
1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1271 | 5 | // reductions. |
1272 | 5 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( |
1274 | 0 | Instruction::FMul, Tys[0], |
1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1276 | 5 | // reductions. |
1277 | 740 | case Intrinsic::experimental_vector_reduce_smax: |
1278 | 740 | case Intrinsic::experimental_vector_reduce_smin: |
1279 | 740 | case Intrinsic::experimental_vector_reduce_fmax: |
1280 | 740 | case Intrinsic::experimental_vector_reduce_fmin: |
1281 | 740 | return ConcreteTTI->getMinMaxReductionCost( |
1282 | 740 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1283 | 740 | /*IsUnsigned=*/true); |
1284 | 746 | case Intrinsic::experimental_vector_reduce_umax: |
1285 | 746 | case Intrinsic::experimental_vector_reduce_umin: |
1286 | 746 | return ConcreteTTI->getMinMaxReductionCost( |
1287 | 746 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1288 | 746 | /*IsUnsigned=*/false); |
1289 | 746 | case Intrinsic::sadd_sat: |
1290 | 472 | case Intrinsic::ssub_sat: { |
1291 | 472 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1292 | 472 | if (RetVF > 1) |
1293 | 210 | CondTy = VectorType::get(CondTy, RetVF); |
1294 | 472 | |
1295 | 472 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1296 | 472 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1297 | 472 | ? Intrinsic::sadd_with_overflow236 |
1298 | 472 | : Intrinsic::ssub_with_overflow236 ; |
1299 | 472 | |
1300 | 472 | // SatMax -> Overflow && SumDiff < 0 |
1301 | 472 | // SatMin -> Overflow && SumDiff >= 0 |
1302 | 472 | unsigned Cost = 0; |
1303 | 472 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1304 | 472 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1305 | 472 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1306 | 472 | CondTy, nullptr); |
1307 | 472 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1308 | 472 | CondTy, nullptr); |
1309 | 472 | return Cost; |
1310 | 472 | } |
1311 | 472 | case Intrinsic::uadd_sat: |
1312 | 404 | case Intrinsic::usub_sat: { |
1313 | 404 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1314 | 404 | if (RetVF > 1) |
1315 | 144 | CondTy = VectorType::get(CondTy, RetVF); |
1316 | 404 | |
1317 | 404 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1318 | 404 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1319 | 404 | ? Intrinsic::uadd_with_overflow200 |
1320 | 404 | : Intrinsic::usub_with_overflow204 ; |
1321 | 404 | |
1322 | 404 | unsigned Cost = 0; |
1323 | 404 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1324 | 404 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1325 | 404 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1326 | 404 | CondTy, nullptr); |
1327 | 404 | return Cost; |
1328 | 404 | } |
1329 | 794 | case Intrinsic::smul_fix: |
1330 | 794 | case Intrinsic::umul_fix: { |
1331 | 794 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1332 | 794 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1333 | 794 | if (RetVF > 1) |
1334 | 477 | ExtTy = VectorType::get(ExtTy, RetVF); |
1335 | 794 | |
1336 | 794 | unsigned ExtOp = |
1337 | 794 | IID == Intrinsic::smul_fix ? Instruction::SExt388 : Instruction::ZExt406 ; |
1338 | 794 | |
1339 | 794 | unsigned Cost = 0; |
1340 | 794 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1341 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1342 | 794 | Cost += |
1343 | 794 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1344 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1345 | 794 | TTI::OK_AnyValue, |
1346 | 794 | TTI::OK_UniformConstantValue); |
1347 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1348 | 794 | TTI::OK_AnyValue, |
1349 | 794 | TTI::OK_UniformConstantValue); |
1350 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1351 | 794 | return Cost; |
1352 | 794 | } |
1353 | 794 | case Intrinsic::sadd_with_overflow: |
1354 | 450 | case Intrinsic::ssub_with_overflow: { |
1355 | 450 | Type *SumTy = RetTy->getContainedType(0); |
1356 | 450 | Type *OverflowTy = RetTy->getContainedType(1); |
1357 | 450 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1358 | 450 | ? BinaryOperator::Add225 |
1359 | 450 | : BinaryOperator::Sub225 ; |
1360 | 450 | |
1361 | 450 | // LHSSign -> LHS >= 0 |
1362 | 450 | // RHSSign -> RHS >= 0 |
1363 | 450 | // SumSign -> Sum >= 0 |
1364 | 450 | // |
1365 | 450 | // Add: |
1366 | 450 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1367 | 450 | // Sub: |
1368 | 450 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1369 | 450 | unsigned Cost = 0; |
1370 | 450 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1371 | 450 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1372 | 450 | OverflowTy, nullptr); |
1373 | 450 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1374 | 450 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1375 | 450 | Cost += |
1376 | 450 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1377 | 450 | return Cost; |
1378 | 450 | } |
1379 | 450 | case Intrinsic::uadd_with_overflow: |
1380 | 402 | case Intrinsic::usub_with_overflow: { |
1381 | 402 | Type *SumTy = RetTy->getContainedType(0); |
1382 | 402 | Type *OverflowTy = RetTy->getContainedType(1); |
1383 | 402 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1384 | 402 | ? BinaryOperator::Add199 |
1385 | 402 | : BinaryOperator::Sub203 ; |
1386 | 402 | |
1387 | 402 | unsigned Cost = 0; |
1388 | 402 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1389 | 402 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1390 | 402 | OverflowTy, nullptr); |
1391 | 402 | return Cost; |
1392 | 402 | } |
1393 | 402 | case Intrinsic::smul_with_overflow: |
1394 | 320 | case Intrinsic::umul_with_overflow: { |
1395 | 320 | Type *MulTy = RetTy->getContainedType(0); |
1396 | 320 | Type *OverflowTy = RetTy->getContainedType(1); |
1397 | 320 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1398 | 320 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1399 | 320 | if (MulTy->isVectorTy()) |
1400 | 240 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); |
1401 | 320 | |
1402 | 320 | unsigned ExtOp = |
1403 | 320 | IID == Intrinsic::smul_fix ? Instruction::SExt0 : Instruction::ZExt; |
1404 | 320 | |
1405 | 320 | unsigned Cost = 0; |
1406 | 320 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1407 | 320 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1408 | 320 | Cost += |
1409 | 320 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1410 | 320 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1411 | 320 | TTI::OK_AnyValue, |
1412 | 320 | TTI::OK_UniformConstantValue); |
1413 | 320 | |
1414 | 320 | if (IID == Intrinsic::smul_with_overflow) |
1415 | 160 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1416 | 160 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1417 | 160 | TTI::OK_UniformConstantValue); |
1418 | 320 | |
1419 | 320 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1420 | 320 | OverflowTy, nullptr); |
1421 | 320 | return Cost; |
1422 | 320 | } |
1423 | 320 | case Intrinsic::ctpop: |
1424 | 84 | ISDs.push_back(ISD::CTPOP); |
1425 | 84 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1426 | 84 | // library call but still not a cheap instruction. |
1427 | 84 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1428 | 84 | break; |
1429 | 6.44k | // FIXME: ctlz, cttz, ... |
1430 | 6.44k | } |
1431 | 6.44k | |
1432 | 6.44k | const TargetLoweringBase *TLI = getTLI(); |
1433 | 6.44k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1434 | 6.44k | |
1435 | 6.44k | SmallVector<unsigned, 2> LegalCost; |
1436 | 6.44k | SmallVector<unsigned, 2> CustomCost; |
1437 | 6.44k | for (unsigned ISD : ISDs) { |
1438 | 6.44k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1439 | 2.53k | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint()1.06k && |
1440 | 2.53k | TLI->isFAbsFree(LT.second)1.06k ) { |
1441 | 14 | return 0; |
1442 | 14 | } |
1443 | 2.51k | |
1444 | 2.51k | // The operation is legal. Assume it costs 1. |
1445 | 2.51k | // If the type is split to multiple registers, assume that there is some |
1446 | 2.51k | // overhead to this. |
1447 | 2.51k | // TODO: Once we have extract/insert subvector cost we need to use them. |
1448 | 2.51k | if (LT.first > 1) |
1449 | 227 | LegalCost.push_back(LT.first * 2); |
1450 | 2.29k | else |
1451 | 2.29k | LegalCost.push_back(LT.first * 1); |
1452 | 3.91k | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1453 | 1.26k | // If the operation is custom lowered then assume |
1454 | 1.26k | // that the code is twice as expensive. |
1455 | 1.26k | CustomCost.push_back(LT.first * 2); |
1456 | 1.26k | } |
1457 | 6.44k | } |
1458 | 6.44k | |
1459 | 6.44k | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1460 | 6.43k | if (MinLegalCostI != LegalCost.end()) |
1461 | 2.51k | return *MinLegalCostI; |
1462 | 3.91k | |
1463 | 3.91k | auto MinCustomCostI = |
1464 | 3.91k | std::min_element(CustomCost.begin(), CustomCost.end()); |
1465 | 3.91k | if (MinCustomCostI != CustomCost.end()) |
1466 | 1.26k | return *MinCustomCostI; |
1467 | 2.65k | |
1468 | 2.65k | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1469 | 2.65k | // point mul followed by an add. |
1470 | 2.65k | if (IID == Intrinsic::fmuladd) |
1471 | 2 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1472 | 2 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1473 | 2.65k | |
1474 | 2.65k | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1475 | 2.65k | // this will emit a costly libcall, adding call overhead and spills. Make it |
1476 | 2.65k | // very expensive. |
1477 | 2.65k | if (RetTy->isVectorTy()) { |
1478 | 1.17k | unsigned ScalarizationCost = |
1479 | 1.17k | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1480 | 1.17k | ? ScalarizationCostPassed |
1481 | 1.17k | : getScalarizationOverhead(RetTy, true, false)0 ); |
1482 | 1.17k | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1483 | 1.17k | SmallVector<Type *, 4> ScalarTys; |
1484 | 2.79k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i1.62k ) { |
1485 | 1.62k | Type *Ty = Tys[i]; |
1486 | 1.62k | if (Ty->isVectorTy()) |
1487 | 1.62k | Ty = Ty->getScalarType(); |
1488 | 1.62k | ScalarTys.push_back(Ty); |
1489 | 1.62k | } |
1490 | 1.17k | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1491 | 1.17k | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1492 | 2.79k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i1.62k ) { |
1493 | 1.62k | if (Tys[i]->isVectorTy()) { |
1494 | 1.62k | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1496 | 1.62k | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1497 | 1.62k | } |
1498 | 1.62k | } |
1499 | 1.17k | |
1500 | 1.17k | return ScalarCalls * ScalarCost + ScalarizationCost; |
1501 | 1.17k | } |
1502 | 1.48k | |
1503 | 1.48k | // This is going to be turned into a library call, make it expensive. |
1504 | 1.48k | return SingleCallCost; |
1505 | 1.48k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 3.36k | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 3.36k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()1.61k : 11.75k ); | 1135 | 3.36k | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 3.36k | | 1137 | 3.36k | SmallVector<unsigned, 2> ISDs; | 1138 | 3.36k | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 3.36k | switch (IID) { | 1140 | 3.36k | default: { | 1141 | 548 | // Assume that we need to scalarize this intrinsic. | 1142 | 548 | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 548 | unsigned ScalarCalls = 1; | 1144 | 548 | Type *ScalarRetTy = RetTy; | 1145 | 548 | if (RetTy->isVectorTy()) { | 1146 | 219 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 0 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 219 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 219 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 219 | } | 1151 | 548 | SmallVector<Type *, 4> ScalarTys; | 1152 | 1.14k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i596 ) { | 1153 | 596 | Type *Ty = Tys[i]; | 1154 | 596 | if (Ty->isVectorTy()) { | 1155 | 235 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 0 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 235 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 235 | Ty = Ty->getScalarType(); | 1159 | 235 | } | 1160 | 596 | ScalarTys.push_back(Ty); | 1161 | 596 | } | 1162 | 548 | if (ScalarCalls == 1) | 1163 | 329 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 219 | | 1165 | 219 | unsigned ScalarCost = | 1166 | 219 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 219 | | 1168 | 219 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 219 | } | 1170 | 219 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 219 | // intrinsic call. | 1172 | 462 | case Intrinsic::sqrt: | 1173 | 462 | ISDs.push_back(ISD::FSQRT); | 1174 | 462 | break; | 1175 | 219 | case Intrinsic::sin: | 1176 | 109 | ISDs.push_back(ISD::FSIN); | 1177 | 109 | break; | 1178 | 441 | case Intrinsic::cos: | 1179 | 441 | ISDs.push_back(ISD::FCOS); | 1180 | 441 | break; | 1181 | 419 | case Intrinsic::exp: | 1182 | 419 | ISDs.push_back(ISD::FEXP); | 1183 | 419 | break; | 1184 | 219 | case Intrinsic::exp2: | 1185 | 21 | ISDs.push_back(ISD::FEXP2); | 1186 | 21 | break; | 1187 | 219 | case Intrinsic::log: | 1188 | 42 | ISDs.push_back(ISD::FLOG); | 1189 | 42 | break; | 1190 | 219 | case Intrinsic::log10: | 1191 | 17 | ISDs.push_back(ISD::FLOG10); | 1192 | 17 | break; | 1193 | 219 | case Intrinsic::log2: | 1194 | 0 | ISDs.push_back(ISD::FLOG2); | 1195 | 0 | break; | 1196 | 739 | case Intrinsic::fabs: | 1197 | 739 | ISDs.push_back(ISD::FABS); | 1198 | 739 | break; | 1199 | 219 | case Intrinsic::canonicalize: | 1200 | 0 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 0 | break; | 1202 | 219 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 219 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 266 | case Intrinsic::copysign: | 1213 | 266 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 266 | break; | 1215 | 219 | case Intrinsic::floor: | 1216 | 14 | ISDs.push_back(ISD::FFLOOR); | 1217 | 14 | break; | 1218 | 219 | case Intrinsic::ceil: | 1219 | 0 | ISDs.push_back(ISD::FCEIL); | 1220 | 0 | break; | 1221 | 219 | case Intrinsic::trunc: | 1222 | 0 | ISDs.push_back(ISD::FTRUNC); | 1223 | 0 | break; | 1224 | 219 | case Intrinsic::nearbyint: | 1225 | 0 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 0 | break; | 1227 | 219 | case Intrinsic::rint: | 1228 | 12 | ISDs.push_back(ISD::FRINT); | 1229 | 12 | break; | 1230 | 219 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 219 | case Intrinsic::pow: | 1234 | 94 | ISDs.push_back(ISD::FPOW); | 1235 | 94 | break; | 1236 | 219 | case Intrinsic::fma: | 1237 | 0 | ISDs.push_back(ISD::FMA); | 1238 | 0 | break; | 1239 | 219 | case Intrinsic::fmuladd: | 1240 | 0 | ISDs.push_back(ISD::FMA); | 1241 | 0 | break; | 1242 | 219 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 219 | case Intrinsic::lifetime_start: | 1244 | 0 | case Intrinsic::lifetime_end: | 1245 | 0 | case Intrinsic::sideeffect: | 1246 | 0 | return 0; | 1247 | 0 | case Intrinsic::masked_store: | 1248 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 0 | 0); | 1250 | 0 | case Intrinsic::masked_load: | 1251 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 5 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 5 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 5 | /*IsPairwiseForm=*/false); | 1255 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 0 | /*IsPairwiseForm=*/false); | 1258 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 0 | /*IsPairwiseForm=*/false); | 1261 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 0 | /*IsPairwiseForm=*/false); | 1264 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 0 | /*IsPairwiseForm=*/false); | 1267 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 0 | // reductions. | 1272 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 0 | // reductions. | 1277 | 12 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 12 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 12 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 12 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 12 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 12 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 12 | /*IsUnsigned=*/true); | 1284 | 12 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 10 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 10 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 10 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 10 | /*IsUnsigned=*/false); | 1289 | 10 | case Intrinsic::sadd_sat: | 1290 | 0 | case Intrinsic::ssub_sat: { | 1291 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 0 | if (RetVF > 1) | 1293 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 0 |
| 1295 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 0 | ? Intrinsic::sadd_with_overflow | 1298 | 0 | : Intrinsic::ssub_with_overflow; | 1299 | 0 |
| 1300 | 0 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 0 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 0 | unsigned Cost = 0; | 1303 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 0 | CondTy, nullptr); | 1307 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 0 | CondTy, nullptr); | 1309 | 0 | return Cost; | 1310 | 0 | } | 1311 | 76 | case Intrinsic::uadd_sat: | 1312 | 76 | case Intrinsic::usub_sat: { | 1313 | 76 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 76 | if (RetVF > 1) | 1315 | 58 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 76 | | 1317 | 76 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 76 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 76 | ? Intrinsic::uadd_with_overflow36 | 1320 | 76 | : Intrinsic::usub_with_overflow40 ; | 1321 | 76 | | 1322 | 76 | unsigned Cost = 0; | 1323 | 76 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 76 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 76 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 76 | CondTy, nullptr); | 1327 | 76 | return Cost; | 1328 | 76 | } | 1329 | 76 | case Intrinsic::smul_fix: | 1330 | 0 | case Intrinsic::umul_fix: { | 1331 | 0 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 0 | if (RetVF > 1) | 1334 | 0 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 0 |
| 1336 | 0 | unsigned ExtOp = | 1337 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1338 | 0 |
| 1339 | 0 | unsigned Cost = 0; | 1340 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 0 | Cost += | 1343 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 0 | TTI::OK_AnyValue, | 1346 | 0 | TTI::OK_UniformConstantValue); | 1347 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 0 | TTI::OK_AnyValue, | 1349 | 0 | TTI::OK_UniformConstantValue); | 1350 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 0 | return Cost; | 1352 | 0 | } | 1353 | 0 | case Intrinsic::sadd_with_overflow: | 1354 | 0 | case Intrinsic::ssub_with_overflow: { | 1355 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 0 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 0 | ? BinaryOperator::Add | 1359 | 0 | : BinaryOperator::Sub; | 1360 | 0 |
| 1361 | 0 | // LHSSign -> LHS >= 0 | 1362 | 0 | // RHSSign -> RHS >= 0 | 1363 | 0 | // SumSign -> Sum >= 0 | 1364 | 0 | // | 1365 | 0 | // Add: | 1366 | 0 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 0 | // Sub: | 1368 | 0 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 0 | unsigned Cost = 0; | 1370 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 0 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 0 | OverflowTy, nullptr); | 1373 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 0 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 0 | Cost += | 1376 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 0 | return Cost; | 1378 | 0 | } | 1379 | 76 | case Intrinsic::uadd_with_overflow: | 1380 | 76 | case Intrinsic::usub_with_overflow: { | 1381 | 76 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 76 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 76 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 76 | ? BinaryOperator::Add36 | 1385 | 76 | : BinaryOperator::Sub40 ; | 1386 | 76 | | 1387 | 76 | unsigned Cost = 0; | 1388 | 76 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 76 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 76 | OverflowTy, nullptr); | 1391 | 76 | return Cost; | 1392 | 76 | } | 1393 | 76 | case Intrinsic::smul_with_overflow: | 1394 | 0 | case Intrinsic::umul_with_overflow: { | 1395 | 0 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 0 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 0 | if (MulTy->isVectorTy()) | 1400 | 0 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 0 |
| 1402 | 0 | unsigned ExtOp = | 1403 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1404 | 0 |
| 1405 | 0 | unsigned Cost = 0; | 1406 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 0 | Cost += | 1409 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 0 | TTI::OK_AnyValue, | 1412 | 0 | TTI::OK_UniformConstantValue); | 1413 | 0 |
| 1414 | 0 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 0 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 0 | TTI::OK_UniformConstantValue); | 1418 | 0 |
| 1419 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 0 | OverflowTy, nullptr); | 1421 | 0 | return Cost; | 1422 | 0 | } | 1423 | 0 | case Intrinsic::ctpop: | 1424 | 0 | ISDs.push_back(ISD::CTPOP); | 1425 | 0 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 0 | // library call but still not a cheap instruction. | 1427 | 0 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 0 | break; | 1429 | 2.63k | // FIXME: ctlz, cttz, ... | 1430 | 2.63k | } | 1431 | 2.63k | | 1432 | 2.63k | const TargetLoweringBase *TLI = getTLI(); | 1433 | 2.63k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 2.63k | | 1435 | 2.63k | SmallVector<unsigned, 2> LegalCost; | 1436 | 2.63k | SmallVector<unsigned, 2> CustomCost; | 1437 | 2.63k | for (unsigned ISD : ISDs) { | 1438 | 2.63k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 1.22k | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint()739 && | 1440 | 1.22k | TLI->isFAbsFree(LT.second)739 ) { | 1441 | 0 | return 0; | 1442 | 0 | } | 1443 | 1.22k | | 1444 | 1.22k | // The operation is legal. Assume it costs 1. | 1445 | 1.22k | // If the type is split to multiple registers, assume that there is some | 1446 | 1.22k | // overhead to this. | 1447 | 1.22k | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 1.22k | if (LT.first > 1) | 1449 | 87 | LegalCost.push_back(LT.first * 2); | 1450 | 1.14k | else | 1451 | 1.14k | LegalCost.push_back(LT.first * 1); | 1452 | 1.40k | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | 1453 | 266 | // If the operation is custom lowered then assume | 1454 | 266 | // that the code is twice as expensive. | 1455 | 266 | CustomCost.push_back(LT.first * 2); | 1456 | 266 | } | 1457 | 2.63k | } | 1458 | 2.63k | | 1459 | 2.63k | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 2.63k | if (MinLegalCostI != LegalCost.end()) | 1461 | 1.22k | return *MinLegalCostI; | 1462 | 1.40k | | 1463 | 1.40k | auto MinCustomCostI = | 1464 | 1.40k | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 1.40k | if (MinCustomCostI != CustomCost.end()) | 1466 | 266 | return *MinCustomCostI; | 1467 | 1.14k | | 1468 | 1.14k | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 1.14k | // point mul followed by an add. | 1470 | 1.14k | if (IID == Intrinsic::fmuladd) | 1471 | 0 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 1.14k | | 1474 | 1.14k | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 1.14k | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 1.14k | // very expensive. | 1477 | 1.14k | if (RetTy->isVectorTy()) { | 1478 | 463 | unsigned ScalarizationCost = | 1479 | 463 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 463 | ? ScalarizationCostPassed | 1481 | 463 | : getScalarizationOverhead(RetTy, true, false)0 ); | 1482 | 463 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 463 | SmallVector<Type *, 4> ScalarTys; | 1484 | 962 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i499 ) { | 1485 | 499 | Type *Ty = Tys[i]; | 1486 | 499 | if (Ty->isVectorTy()) | 1487 | 499 | Ty = Ty->getScalarType(); | 1488 | 499 | ScalarTys.push_back(Ty); | 1489 | 499 | } | 1490 | 463 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 463 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 962 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i499 ) { | 1493 | 499 | if (Tys[i]->isVectorTy()) { | 1494 | 499 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 499 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 499 | } | 1498 | 499 | } | 1499 | 463 | | 1500 | 463 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 463 | } | 1502 | 680 | | 1503 | 680 | // This is going to be turned into a library call, make it expensive. | 1504 | 680 | return SingleCallCost; | 1505 | 680 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 43 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 43 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()19 : 124 ); | 1135 | 43 | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 43 | | 1137 | 43 | SmallVector<unsigned, 2> ISDs; | 1138 | 43 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 43 | switch (IID) { | 1140 | 43 | default: { | 1141 | 0 | // Assume that we need to scalarize this intrinsic. | 1142 | 0 | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 0 | unsigned ScalarCalls = 1; | 1144 | 0 | Type *ScalarRetTy = RetTy; | 1145 | 0 | if (RetTy->isVectorTy()) { | 1146 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 0 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 0 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 0 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 0 | } | 1151 | 0 | SmallVector<Type *, 4> ScalarTys; | 1152 | 0 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | 1153 | 0 | Type *Ty = Tys[i]; | 1154 | 0 | if (Ty->isVectorTy()) { | 1155 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 0 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 0 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 0 | Ty = Ty->getScalarType(); | 1159 | 0 | } | 1160 | 0 | ScalarTys.push_back(Ty); | 1161 | 0 | } | 1162 | 0 | if (ScalarCalls == 1) | 1163 | 0 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 0 | | 1165 | 0 | unsigned ScalarCost = | 1166 | 0 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 0 |
| 1168 | 0 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 0 | } | 1170 | 0 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 0 | // intrinsic call. | 1172 | 0 | case Intrinsic::sqrt: | 1173 | 0 | ISDs.push_back(ISD::FSQRT); | 1174 | 0 | break; | 1175 | 0 | case Intrinsic::sin: | 1176 | 0 | ISDs.push_back(ISD::FSIN); | 1177 | 0 | break; | 1178 | 0 | case Intrinsic::cos: | 1179 | 0 | ISDs.push_back(ISD::FCOS); | 1180 | 0 | break; | 1181 | 0 | case Intrinsic::exp: | 1182 | 0 | ISDs.push_back(ISD::FEXP); | 1183 | 0 | break; | 1184 | 0 | case Intrinsic::exp2: | 1185 | 0 | ISDs.push_back(ISD::FEXP2); | 1186 | 0 | break; | 1187 | 0 | case Intrinsic::log: | 1188 | 0 | ISDs.push_back(ISD::FLOG); | 1189 | 0 | break; | 1190 | 0 | case Intrinsic::log10: | 1191 | 0 | ISDs.push_back(ISD::FLOG10); | 1192 | 0 | break; | 1193 | 0 | case Intrinsic::log2: | 1194 | 0 | ISDs.push_back(ISD::FLOG2); | 1195 | 0 | break; | 1196 | 23 | case Intrinsic::fabs: | 1197 | 23 | ISDs.push_back(ISD::FABS); | 1198 | 23 | break; | 1199 | 5 | case Intrinsic::canonicalize: | 1200 | 5 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 5 | break; | 1202 | 0 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 0 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 0 | case Intrinsic::copysign: | 1213 | 0 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 0 | break; | 1215 | 0 | case Intrinsic::floor: | 1216 | 0 | ISDs.push_back(ISD::FFLOOR); | 1217 | 0 | break; | 1218 | 0 | case Intrinsic::ceil: | 1219 | 0 | ISDs.push_back(ISD::FCEIL); | 1220 | 0 | break; | 1221 | 0 | case Intrinsic::trunc: | 1222 | 0 | ISDs.push_back(ISD::FTRUNC); | 1223 | 0 | break; | 1224 | 0 | case Intrinsic::nearbyint: | 1225 | 0 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 0 | break; | 1227 | 0 | case Intrinsic::rint: | 1228 | 0 | ISDs.push_back(ISD::FRINT); | 1229 | 0 | break; | 1230 | 0 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 0 | case Intrinsic::pow: | 1234 | 0 | ISDs.push_back(ISD::FPOW); | 1235 | 0 | break; | 1236 | 15 | case Intrinsic::fma: | 1237 | 15 | ISDs.push_back(ISD::FMA); | 1238 | 15 | break; | 1239 | 0 | case Intrinsic::fmuladd: | 1240 | 0 | ISDs.push_back(ISD::FMA); | 1241 | 0 | break; | 1242 | 0 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 0 | case Intrinsic::lifetime_start: | 1244 | 0 | case Intrinsic::lifetime_end: | 1245 | 0 | case Intrinsic::sideeffect: | 1246 | 0 | return 0; | 1247 | 0 | case Intrinsic::masked_store: | 1248 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 0 | 0); | 1250 | 0 | case Intrinsic::masked_load: | 1251 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 0 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 0 | /*IsPairwiseForm=*/false); | 1255 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 0 | /*IsPairwiseForm=*/false); | 1258 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 0 | /*IsPairwiseForm=*/false); | 1261 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 0 | /*IsPairwiseForm=*/false); | 1264 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 0 | /*IsPairwiseForm=*/false); | 1267 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 0 | // reductions. | 1272 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 0 | // reductions. | 1277 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 0 | /*IsUnsigned=*/true); | 1284 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 0 | /*IsUnsigned=*/false); | 1289 | 0 | case Intrinsic::sadd_sat: | 1290 | 0 | case Intrinsic::ssub_sat: { | 1291 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 0 | if (RetVF > 1) | 1293 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 0 |
| 1295 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 0 | ? Intrinsic::sadd_with_overflow | 1298 | 0 | : Intrinsic::ssub_with_overflow; | 1299 | 0 |
| 1300 | 0 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 0 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 0 | unsigned Cost = 0; | 1303 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 0 | CondTy, nullptr); | 1307 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 0 | CondTy, nullptr); | 1309 | 0 | return Cost; | 1310 | 0 | } | 1311 | 0 | case Intrinsic::uadd_sat: | 1312 | 0 | case Intrinsic::usub_sat: { | 1313 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 0 | if (RetVF > 1) | 1315 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 0 |
| 1317 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 0 | ? Intrinsic::uadd_with_overflow | 1320 | 0 | : Intrinsic::usub_with_overflow; | 1321 | 0 |
| 1322 | 0 | unsigned Cost = 0; | 1323 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 0 | CondTy, nullptr); | 1327 | 0 | return Cost; | 1328 | 0 | } | 1329 | 0 | case Intrinsic::smul_fix: | 1330 | 0 | case Intrinsic::umul_fix: { | 1331 | 0 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 0 | if (RetVF > 1) | 1334 | 0 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 0 |
| 1336 | 0 | unsigned ExtOp = | 1337 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1338 | 0 |
| 1339 | 0 | unsigned Cost = 0; | 1340 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 0 | Cost += | 1343 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 0 | TTI::OK_AnyValue, | 1346 | 0 | TTI::OK_UniformConstantValue); | 1347 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 0 | TTI::OK_AnyValue, | 1349 | 0 | TTI::OK_UniformConstantValue); | 1350 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 0 | return Cost; | 1352 | 0 | } | 1353 | 0 | case Intrinsic::sadd_with_overflow: | 1354 | 0 | case Intrinsic::ssub_with_overflow: { | 1355 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 0 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 0 | ? BinaryOperator::Add | 1359 | 0 | : BinaryOperator::Sub; | 1360 | 0 |
| 1361 | 0 | // LHSSign -> LHS >= 0 | 1362 | 0 | // RHSSign -> RHS >= 0 | 1363 | 0 | // SumSign -> Sum >= 0 | 1364 | 0 | // | 1365 | 0 | // Add: | 1366 | 0 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 0 | // Sub: | 1368 | 0 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 0 | unsigned Cost = 0; | 1370 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 0 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 0 | OverflowTy, nullptr); | 1373 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 0 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 0 | Cost += | 1376 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 0 | return Cost; | 1378 | 0 | } | 1379 | 0 | case Intrinsic::uadd_with_overflow: | 1380 | 0 | case Intrinsic::usub_with_overflow: { | 1381 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 0 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 0 | ? BinaryOperator::Add | 1385 | 0 | : BinaryOperator::Sub; | 1386 | 0 |
| 1387 | 0 | unsigned Cost = 0; | 1388 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 0 | OverflowTy, nullptr); | 1391 | 0 | return Cost; | 1392 | 0 | } | 1393 | 0 | case Intrinsic::smul_with_overflow: | 1394 | 0 | case Intrinsic::umul_with_overflow: { | 1395 | 0 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 0 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 0 | if (MulTy->isVectorTy()) | 1400 | 0 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 0 |
| 1402 | 0 | unsigned ExtOp = | 1403 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1404 | 0 |
| 1405 | 0 | unsigned Cost = 0; | 1406 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 0 | Cost += | 1409 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 0 | TTI::OK_AnyValue, | 1412 | 0 | TTI::OK_UniformConstantValue); | 1413 | 0 |
| 1414 | 0 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 0 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 0 | TTI::OK_UniformConstantValue); | 1418 | 0 |
| 1419 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 0 | OverflowTy, nullptr); | 1421 | 0 | return Cost; | 1422 | 0 | } | 1423 | 0 | case Intrinsic::ctpop: | 1424 | 0 | ISDs.push_back(ISD::CTPOP); | 1425 | 0 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 0 | // library call but still not a cheap instruction. | 1427 | 0 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 0 | break; | 1429 | 43 | // FIXME: ctlz, cttz, ... | 1430 | 43 | } | 1431 | 43 | | 1432 | 43 | const TargetLoweringBase *TLI = getTLI(); | 1433 | 43 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 43 | | 1435 | 43 | SmallVector<unsigned, 2> LegalCost; | 1436 | 43 | SmallVector<unsigned, 2> CustomCost; | 1437 | 43 | for (unsigned ISD : ISDs) { | 1438 | 43 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 34 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint()18 && | 1440 | 34 | TLI->isFAbsFree(LT.second)18 ) { | 1441 | 14 | return 0; | 1442 | 14 | } | 1443 | 20 | | 1444 | 20 | // The operation is legal. Assume it costs 1. | 1445 | 20 | // If the type is split to multiple registers, assume that there is some | 1446 | 20 | // overhead to this. | 1447 | 20 | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 20 | if (LT.first > 1) | 1449 | 0 | LegalCost.push_back(LT.first * 2); | 1450 | 20 | else | 1451 | 20 | LegalCost.push_back(LT.first * 1); | 1452 | 20 | } else if (9 !TLI->isOperationExpand(ISD, LT.second)9 ) { | 1453 | 0 | // If the operation is custom lowered then assume | 1454 | 0 | // that the code is twice as expensive. | 1455 | 0 | CustomCost.push_back(LT.first * 2); | 1456 | 0 | } | 1457 | 43 | } | 1458 | 43 | | 1459 | 43 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 29 | if (MinLegalCostI != LegalCost.end()) | 1461 | 20 | return *MinLegalCostI; | 1462 | 9 | | 1463 | 9 | auto MinCustomCostI = | 1464 | 9 | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 9 | if (MinCustomCostI != CustomCost.end()) | 1466 | 0 | return *MinCustomCostI; | 1467 | 9 | | 1468 | 9 | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 9 | // point mul followed by an add. | 1470 | 9 | if (IID == Intrinsic::fmuladd) | 1471 | 0 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 9 | | 1474 | 9 | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 9 | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 9 | // very expensive. | 1477 | 9 | if (RetTy->isVectorTy()) { | 1478 | 9 | unsigned ScalarizationCost = | 1479 | 9 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 9 | ? ScalarizationCostPassed | 1481 | 9 | : getScalarizationOverhead(RetTy, true, false)0 ); | 1482 | 9 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 9 | SmallVector<Type *, 4> ScalarTys; | 1484 | 24 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i15 ) { | 1485 | 15 | Type *Ty = Tys[i]; | 1486 | 15 | if (Ty->isVectorTy()) | 1487 | 15 | Ty = Ty->getScalarType(); | 1488 | 15 | ScalarTys.push_back(Ty); | 1489 | 15 | } | 1490 | 9 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 9 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 24 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i15 ) { | 1493 | 15 | if (Tys[i]->isVectorTy()) { | 1494 | 15 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 15 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 15 | } | 1498 | 15 | } | 1499 | 9 | | 1500 | 9 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 9 | } | 1502 | 0 | | 1503 | 0 | // This is going to be turned into a library call, make it expensive. | 1504 | 0 | return SingleCallCost; | 1505 | 0 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 768 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 768 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()270 : 1498 ); | 1135 | 768 | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 768 | | 1137 | 768 | SmallVector<unsigned, 2> ISDs; | 1138 | 768 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 768 | switch (IID) { | 1140 | 768 | default: { | 1141 | 72 | // Assume that we need to scalarize this intrinsic. | 1142 | 72 | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 72 | unsigned ScalarCalls = 1; | 1144 | 72 | Type *ScalarRetTy = RetTy; | 1145 | 72 | if (RetTy->isVectorTy()) { | 1146 | 24 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 0 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 24 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 24 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 24 | } | 1151 | 72 | SmallVector<Type *, 4> ScalarTys; | 1152 | 216 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i144 ) { | 1153 | 144 | Type *Ty = Tys[i]; | 1154 | 144 | if (Ty->isVectorTy()) { | 1155 | 48 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 0 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 48 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 48 | Ty = Ty->getScalarType(); | 1159 | 48 | } | 1160 | 144 | ScalarTys.push_back(Ty); | 1161 | 144 | } | 1162 | 72 | if (ScalarCalls == 1) | 1163 | 48 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 24 | | 1165 | 24 | unsigned ScalarCost = | 1166 | 24 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 24 | | 1168 | 24 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 24 | } | 1170 | 24 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 24 | // intrinsic call. | 1172 | 24 | case Intrinsic::sqrt: | 1173 | 0 | ISDs.push_back(ISD::FSQRT); | 1174 | 0 | break; | 1175 | 24 | case Intrinsic::sin: | 1176 | 0 | ISDs.push_back(ISD::FSIN); | 1177 | 0 | break; | 1178 | 24 | case Intrinsic::cos: | 1179 | 0 | ISDs.push_back(ISD::FCOS); | 1180 | 0 | break; | 1181 | 24 | case Intrinsic::exp: | 1182 | 0 | ISDs.push_back(ISD::FEXP); | 1183 | 0 | break; | 1184 | 24 | case Intrinsic::exp2: | 1185 | 0 | ISDs.push_back(ISD::FEXP2); | 1186 | 0 | break; | 1187 | 24 | case Intrinsic::log: | 1188 | 0 | ISDs.push_back(ISD::FLOG); | 1189 | 0 | break; | 1190 | 24 | case Intrinsic::log10: | 1191 | 0 | ISDs.push_back(ISD::FLOG10); | 1192 | 0 | break; | 1193 | 24 | case Intrinsic::log2: | 1194 | 0 | ISDs.push_back(ISD::FLOG2); | 1195 | 0 | break; | 1196 | 300 | case Intrinsic::fabs: | 1197 | 300 | ISDs.push_back(ISD::FABS); | 1198 | 300 | break; | 1199 | 24 | case Intrinsic::canonicalize: | 1200 | 0 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 0 | break; | 1202 | 24 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 24 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 396 | case Intrinsic::copysign: | 1213 | 396 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 396 | break; | 1215 | 24 | case Intrinsic::floor: | 1216 | 0 | ISDs.push_back(ISD::FFLOOR); | 1217 | 0 | break; | 1218 | 24 | case Intrinsic::ceil: | 1219 | 0 | ISDs.push_back(ISD::FCEIL); | 1220 | 0 | break; | 1221 | 24 | case Intrinsic::trunc: | 1222 | 0 | ISDs.push_back(ISD::FTRUNC); | 1223 | 0 | break; | 1224 | 24 | case Intrinsic::nearbyint: | 1225 | 0 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 0 | break; | 1227 | 24 | case Intrinsic::rint: | 1228 | 0 | ISDs.push_back(ISD::FRINT); | 1229 | 0 | break; | 1230 | 24 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 24 | case Intrinsic::pow: | 1234 | 0 | ISDs.push_back(ISD::FPOW); | 1235 | 0 | break; | 1236 | 24 | case Intrinsic::fma: | 1237 | 0 | ISDs.push_back(ISD::FMA); | 1238 | 0 | break; | 1239 | 24 | case Intrinsic::fmuladd: | 1240 | 0 | ISDs.push_back(ISD::FMA); | 1241 | 0 | break; | 1242 | 24 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 24 | case Intrinsic::lifetime_start: | 1244 | 0 | case Intrinsic::lifetime_end: | 1245 | 0 | case Intrinsic::sideeffect: | 1246 | 0 | return 0; | 1247 | 0 | case Intrinsic::masked_store: | 1248 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 0 | 0); | 1250 | 0 | case Intrinsic::masked_load: | 1251 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 0 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 0 | /*IsPairwiseForm=*/false); | 1255 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 0 | /*IsPairwiseForm=*/false); | 1258 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 0 | /*IsPairwiseForm=*/false); | 1261 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 0 | /*IsPairwiseForm=*/false); | 1264 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 0 | /*IsPairwiseForm=*/false); | 1267 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 0 | // reductions. | 1272 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 0 | // reductions. | 1277 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 0 | /*IsUnsigned=*/true); | 1284 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 0 | /*IsUnsigned=*/false); | 1289 | 0 | case Intrinsic::sadd_sat: | 1290 | 0 | case Intrinsic::ssub_sat: { | 1291 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 0 | if (RetVF > 1) | 1293 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 0 |
| 1295 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 0 | ? Intrinsic::sadd_with_overflow | 1298 | 0 | : Intrinsic::ssub_with_overflow; | 1299 | 0 |
| 1300 | 0 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 0 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 0 | unsigned Cost = 0; | 1303 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 0 | CondTy, nullptr); | 1307 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 0 | CondTy, nullptr); | 1309 | 0 | return Cost; | 1310 | 0 | } | 1311 | 0 | case Intrinsic::uadd_sat: | 1312 | 0 | case Intrinsic::usub_sat: { | 1313 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 0 | if (RetVF > 1) | 1315 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 0 |
| 1317 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 0 | ? Intrinsic::uadd_with_overflow | 1320 | 0 | : Intrinsic::usub_with_overflow; | 1321 | 0 |
| 1322 | 0 | unsigned Cost = 0; | 1323 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 0 | CondTy, nullptr); | 1327 | 0 | return Cost; | 1328 | 0 | } | 1329 | 0 | case Intrinsic::smul_fix: | 1330 | 0 | case Intrinsic::umul_fix: { | 1331 | 0 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 0 | if (RetVF > 1) | 1334 | 0 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 0 |
| 1336 | 0 | unsigned ExtOp = | 1337 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1338 | 0 |
| 1339 | 0 | unsigned Cost = 0; | 1340 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 0 | Cost += | 1343 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 0 | TTI::OK_AnyValue, | 1346 | 0 | TTI::OK_UniformConstantValue); | 1347 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 0 | TTI::OK_AnyValue, | 1349 | 0 | TTI::OK_UniformConstantValue); | 1350 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 0 | return Cost; | 1352 | 0 | } | 1353 | 0 | case Intrinsic::sadd_with_overflow: | 1354 | 0 | case Intrinsic::ssub_with_overflow: { | 1355 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 0 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 0 | ? BinaryOperator::Add | 1359 | 0 | : BinaryOperator::Sub; | 1360 | 0 |
| 1361 | 0 | // LHSSign -> LHS >= 0 | 1362 | 0 | // RHSSign -> RHS >= 0 | 1363 | 0 | // SumSign -> Sum >= 0 | 1364 | 0 | // | 1365 | 0 | // Add: | 1366 | 0 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 0 | // Sub: | 1368 | 0 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 0 | unsigned Cost = 0; | 1370 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 0 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 0 | OverflowTy, nullptr); | 1373 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 0 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 0 | Cost += | 1376 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 0 | return Cost; | 1378 | 0 | } | 1379 | 0 | case Intrinsic::uadd_with_overflow: | 1380 | 0 | case Intrinsic::usub_with_overflow: { | 1381 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 0 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 0 | ? BinaryOperator::Add | 1385 | 0 | : BinaryOperator::Sub; | 1386 | 0 |
| 1387 | 0 | unsigned Cost = 0; | 1388 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 0 | OverflowTy, nullptr); | 1391 | 0 | return Cost; | 1392 | 0 | } | 1393 | 0 | case Intrinsic::smul_with_overflow: | 1394 | 0 | case Intrinsic::umul_with_overflow: { | 1395 | 0 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 0 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 0 | if (MulTy->isVectorTy()) | 1400 | 0 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 0 |
| 1402 | 0 | unsigned ExtOp = | 1403 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1404 | 0 |
| 1405 | 0 | unsigned Cost = 0; | 1406 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 0 | Cost += | 1409 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 0 | TTI::OK_AnyValue, | 1412 | 0 | TTI::OK_UniformConstantValue); | 1413 | 0 |
| 1414 | 0 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 0 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 0 | TTI::OK_UniformConstantValue); | 1418 | 0 |
| 1419 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 0 | OverflowTy, nullptr); | 1421 | 0 | return Cost; | 1422 | 0 | } | 1423 | 0 | case Intrinsic::ctpop: | 1424 | 0 | ISDs.push_back(ISD::CTPOP); | 1425 | 0 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 0 | // library call but still not a cheap instruction. | 1427 | 0 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 0 | break; | 1429 | 696 | // FIXME: ctlz, cttz, ... | 1430 | 696 | } | 1431 | 696 | | 1432 | 696 | const TargetLoweringBase *TLI = getTLI(); | 1433 | 696 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 696 | | 1435 | 696 | SmallVector<unsigned, 2> LegalCost; | 1436 | 696 | SmallVector<unsigned, 2> CustomCost; | 1437 | 696 | for (unsigned ISD : ISDs) { | 1438 | 696 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 228 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && | 1440 | 228 | TLI->isFAbsFree(LT.second)) { | 1441 | 0 | return 0; | 1442 | 0 | } | 1443 | 228 | | 1444 | 228 | // The operation is legal. Assume it costs 1. | 1445 | 228 | // If the type is split to multiple registers, assume that there is some | 1446 | 228 | // overhead to this. | 1447 | 228 | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 228 | if (LT.first > 1) | 1449 | 0 | LegalCost.push_back(LT.first * 2); | 1450 | 228 | else | 1451 | 228 | LegalCost.push_back(LT.first * 1); | 1452 | 468 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | 1453 | 264 | // If the operation is custom lowered then assume | 1454 | 264 | // that the code is twice as expensive. | 1455 | 264 | CustomCost.push_back(LT.first * 2); | 1456 | 264 | } | 1457 | 696 | } | 1458 | 696 | | 1459 | 696 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 696 | if (MinLegalCostI != LegalCost.end()) | 1461 | 228 | return *MinLegalCostI; | 1462 | 468 | | 1463 | 468 | auto MinCustomCostI = | 1464 | 468 | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 468 | if (MinCustomCostI != CustomCost.end()) | 1466 | 264 | return *MinCustomCostI; | 1467 | 204 | | 1468 | 204 | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 204 | // point mul followed by an add. | 1470 | 204 | if (IID == Intrinsic::fmuladd) | 1471 | 0 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 204 | | 1474 | 204 | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 204 | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 204 | // very expensive. | 1477 | 204 | if (RetTy->isVectorTy()) { | 1478 | 204 | unsigned ScalarizationCost = | 1479 | 204 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 204 | ? ScalarizationCostPassed | 1481 | 204 | : getScalarizationOverhead(RetTy, true, false)0 ); | 1482 | 204 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 204 | SmallVector<Type *, 4> ScalarTys; | 1484 | 540 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i336 ) { | 1485 | 336 | Type *Ty = Tys[i]; | 1486 | 336 | if (Ty->isVectorTy()) | 1487 | 336 | Ty = Ty->getScalarType(); | 1488 | 336 | ScalarTys.push_back(Ty); | 1489 | 336 | } | 1490 | 204 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 204 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 540 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i336 ) { | 1493 | 336 | if (Tys[i]->isVectorTy()) { | 1494 | 336 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 336 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 336 | } | 1498 | 336 | } | 1499 | 204 | | 1500 | 204 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 204 | } | 1502 | 0 | | 1503 | 0 | // This is going to be turned into a library call, make it expensive. | 1504 | 0 | return SingleCallCost; | 1505 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 155 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 155 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()73 : 182 ); | 1135 | 155 | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 155 | | 1137 | 155 | SmallVector<unsigned, 2> ISDs; | 1138 | 155 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 155 | switch (IID) { | 1140 | 155 | default: { | 1141 | 0 | // Assume that we need to scalarize this intrinsic. | 1142 | 0 | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 0 | unsigned ScalarCalls = 1; | 1144 | 0 | Type *ScalarRetTy = RetTy; | 1145 | 0 | if (RetTy->isVectorTy()) { | 1146 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 0 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 0 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 0 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 0 | } | 1151 | 0 | SmallVector<Type *, 4> ScalarTys; | 1152 | 0 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | 1153 | 0 | Type *Ty = Tys[i]; | 1154 | 0 | if (Ty->isVectorTy()) { | 1155 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 0 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 0 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 0 | Ty = Ty->getScalarType(); | 1159 | 0 | } | 1160 | 0 | ScalarTys.push_back(Ty); | 1161 | 0 | } | 1162 | 0 | if (ScalarCalls == 1) | 1163 | 0 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 0 | | 1165 | 0 | unsigned ScalarCost = | 1166 | 0 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 0 |
| 1168 | 0 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 0 | } | 1170 | 0 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 0 | // intrinsic call. | 1172 | 10 | case Intrinsic::sqrt: | 1173 | 10 | ISDs.push_back(ISD::FSQRT); | 1174 | 10 | break; | 1175 | 18 | case Intrinsic::sin: | 1176 | 18 | ISDs.push_back(ISD::FSIN); | 1177 | 18 | break; | 1178 | 18 | case Intrinsic::cos: | 1179 | 18 | ISDs.push_back(ISD::FCOS); | 1180 | 18 | break; | 1181 | 18 | case Intrinsic::exp: | 1182 | 18 | ISDs.push_back(ISD::FEXP); | 1183 | 18 | break; | 1184 | 18 | case Intrinsic::exp2: | 1185 | 18 | ISDs.push_back(ISD::FEXP2); | 1186 | 18 | break; | 1187 | 18 | case Intrinsic::log: | 1188 | 18 | ISDs.push_back(ISD::FLOG); | 1189 | 18 | break; | 1190 | 18 | case Intrinsic::log10: | 1191 | 18 | ISDs.push_back(ISD::FLOG10); | 1192 | 18 | break; | 1193 | 18 | case Intrinsic::log2: | 1194 | 18 | ISDs.push_back(ISD::FLOG2); | 1195 | 18 | break; | 1196 | 0 | case Intrinsic::fabs: | 1197 | 0 | ISDs.push_back(ISD::FABS); | 1198 | 0 | break; | 1199 | 0 | case Intrinsic::canonicalize: | 1200 | 0 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 0 | break; | 1202 | 0 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 0 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 0 | case Intrinsic::copysign: | 1213 | 0 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 0 | break; | 1215 | 0 | case Intrinsic::floor: | 1216 | 0 | ISDs.push_back(ISD::FFLOOR); | 1217 | 0 | break; | 1218 | 0 | case Intrinsic::ceil: | 1219 | 0 | ISDs.push_back(ISD::FCEIL); | 1220 | 0 | break; | 1221 | 0 | case Intrinsic::trunc: | 1222 | 0 | ISDs.push_back(ISD::FTRUNC); | 1223 | 0 | break; | 1224 | 0 | case Intrinsic::nearbyint: | 1225 | 0 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 0 | break; | 1227 | 0 | case Intrinsic::rint: | 1228 | 0 | ISDs.push_back(ISD::FRINT); | 1229 | 0 | break; | 1230 | 0 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 18 | case Intrinsic::pow: | 1234 | 18 | ISDs.push_back(ISD::FPOW); | 1235 | 18 | break; | 1236 | 0 | case Intrinsic::fma: | 1237 | 0 | ISDs.push_back(ISD::FMA); | 1238 | 0 | break; | 1239 | 0 | case Intrinsic::fmuladd: | 1240 | 0 | ISDs.push_back(ISD::FMA); | 1241 | 0 | break; | 1242 | 0 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 0 | case Intrinsic::lifetime_start: | 1244 | 0 | case Intrinsic::lifetime_end: | 1245 | 0 | case Intrinsic::sideeffect: | 1246 | 0 | return 0; | 1247 | 0 | case Intrinsic::masked_store: | 1248 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 0 | 0); | 1250 | 0 | case Intrinsic::masked_load: | 1251 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 0 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 0 | /*IsPairwiseForm=*/false); | 1255 | 0 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 0 | /*IsPairwiseForm=*/false); | 1258 | 0 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 0 | /*IsPairwiseForm=*/false); | 1261 | 0 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 0 | /*IsPairwiseForm=*/false); | 1264 | 0 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 0 | /*IsPairwiseForm=*/false); | 1267 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 0 | // reductions. | 1272 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 0 | // reductions. | 1277 | 0 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 0 | /*IsUnsigned=*/true); | 1284 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 0 | /*IsUnsigned=*/false); | 1289 | 0 | case Intrinsic::sadd_sat: | 1290 | 0 | case Intrinsic::ssub_sat: { | 1291 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 0 | if (RetVF > 1) | 1293 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 0 |
| 1295 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 0 | ? Intrinsic::sadd_with_overflow | 1298 | 0 | : Intrinsic::ssub_with_overflow; | 1299 | 0 |
| 1300 | 0 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 0 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 0 | unsigned Cost = 0; | 1303 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 0 | CondTy, nullptr); | 1307 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 0 | CondTy, nullptr); | 1309 | 0 | return Cost; | 1310 | 0 | } | 1311 | 0 | case Intrinsic::uadd_sat: | 1312 | 0 | case Intrinsic::usub_sat: { | 1313 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 0 | if (RetVF > 1) | 1315 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 0 |
| 1317 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 0 | ? Intrinsic::uadd_with_overflow | 1320 | 0 | : Intrinsic::usub_with_overflow; | 1321 | 0 |
| 1322 | 0 | unsigned Cost = 0; | 1323 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 0 | CondTy, nullptr); | 1327 | 0 | return Cost; | 1328 | 0 | } | 1329 | 0 | case Intrinsic::smul_fix: | 1330 | 0 | case Intrinsic::umul_fix: { | 1331 | 0 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 0 | if (RetVF > 1) | 1334 | 0 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 0 |
| 1336 | 0 | unsigned ExtOp = | 1337 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1338 | 0 |
| 1339 | 0 | unsigned Cost = 0; | 1340 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 0 | Cost += | 1343 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 0 | TTI::OK_AnyValue, | 1346 | 0 | TTI::OK_UniformConstantValue); | 1347 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 0 | TTI::OK_AnyValue, | 1349 | 0 | TTI::OK_UniformConstantValue); | 1350 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 0 | return Cost; | 1352 | 0 | } | 1353 | 0 | case Intrinsic::sadd_with_overflow: | 1354 | 0 | case Intrinsic::ssub_with_overflow: { | 1355 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 0 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 0 | ? BinaryOperator::Add | 1359 | 0 | : BinaryOperator::Sub; | 1360 | 0 |
| 1361 | 0 | // LHSSign -> LHS >= 0 | 1362 | 0 | // RHSSign -> RHS >= 0 | 1363 | 0 | // SumSign -> Sum >= 0 | 1364 | 0 | // | 1365 | 0 | // Add: | 1366 | 0 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 0 | // Sub: | 1368 | 0 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 0 | unsigned Cost = 0; | 1370 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 0 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 0 | OverflowTy, nullptr); | 1373 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 0 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 0 | Cost += | 1376 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 0 | return Cost; | 1378 | 0 | } | 1379 | 0 | case Intrinsic::uadd_with_overflow: | 1380 | 0 | case Intrinsic::usub_with_overflow: { | 1381 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 0 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 0 | ? BinaryOperator::Add | 1385 | 0 | : BinaryOperator::Sub; | 1386 | 0 |
| 1387 | 0 | unsigned Cost = 0; | 1388 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 0 | OverflowTy, nullptr); | 1391 | 0 | return Cost; | 1392 | 0 | } | 1393 | 0 | case Intrinsic::smul_with_overflow: | 1394 | 0 | case Intrinsic::umul_with_overflow: { | 1395 | 0 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 0 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 0 | if (MulTy->isVectorTy()) | 1400 | 0 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 0 |
| 1402 | 0 | unsigned ExtOp = | 1403 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1404 | 0 |
| 1405 | 0 | unsigned Cost = 0; | 1406 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 0 | Cost += | 1409 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 0 | TTI::OK_AnyValue, | 1412 | 0 | TTI::OK_UniformConstantValue); | 1413 | 0 |
| 1414 | 0 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 0 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 0 | TTI::OK_UniformConstantValue); | 1418 | 0 |
| 1419 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 0 | OverflowTy, nullptr); | 1421 | 0 | return Cost; | 1422 | 0 | } | 1423 | 1 | case Intrinsic::ctpop: | 1424 | 1 | ISDs.push_back(ISD::CTPOP); | 1425 | 1 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 1 | // library call but still not a cheap instruction. | 1427 | 1 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 1 | break; | 1429 | 155 | // FIXME: ctlz, cttz, ... | 1430 | 155 | } | 1431 | 155 | | 1432 | 155 | const TargetLoweringBase *TLI = getTLI(); | 1433 | 155 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 155 | | 1435 | 155 | SmallVector<unsigned, 2> LegalCost; | 1436 | 155 | SmallVector<unsigned, 2> CustomCost; | 1437 | 155 | for (unsigned ISD : ISDs) { | 1438 | 155 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 11 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint()0 && | 1440 | 11 | TLI->isFAbsFree(LT.second)0 ) { | 1441 | 0 | return 0; | 1442 | 0 | } | 1443 | 11 | | 1444 | 11 | // The operation is legal. Assume it costs 1. | 1445 | 11 | // If the type is split to multiple registers, assume that there is some | 1446 | 11 | // overhead to this. | 1447 | 11 | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 11 | if (LT.first > 1) | 1449 | 0 | LegalCost.push_back(LT.first * 2); | 1450 | 11 | else | 1451 | 11 | LegalCost.push_back(LT.first * 1); | 1452 | 144 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | 1453 | 0 | // If the operation is custom lowered then assume | 1454 | 0 | // that the code is twice as expensive. | 1455 | 0 | CustomCost.push_back(LT.first * 2); | 1456 | 0 | } | 1457 | 155 | } | 1458 | 155 | | 1459 | 155 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 155 | if (MinLegalCostI != LegalCost.end()) | 1461 | 11 | return *MinLegalCostI; | 1462 | 144 | | 1463 | 144 | auto MinCustomCostI = | 1464 | 144 | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 144 | if (MinCustomCostI != CustomCost.end()) | 1466 | 0 | return *MinCustomCostI; | 1467 | 144 | | 1468 | 144 | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 144 | // point mul followed by an add. | 1470 | 144 | if (IID == Intrinsic::fmuladd) | 1471 | 0 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 144 | | 1474 | 144 | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 144 | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 144 | // very expensive. | 1477 | 144 | if (RetTy->isVectorTy()) { | 1478 | 64 | unsigned ScalarizationCost = | 1479 | 64 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 64 | ? ScalarizationCostPassed | 1481 | 64 | : getScalarizationOverhead(RetTy, true, false)0 ); | 1482 | 64 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 64 | SmallVector<Type *, 4> ScalarTys; | 1484 | 136 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i72 ) { | 1485 | 72 | Type *Ty = Tys[i]; | 1486 | 72 | if (Ty->isVectorTy()) | 1487 | 72 | Ty = Ty->getScalarType(); | 1488 | 72 | ScalarTys.push_back(Ty); | 1489 | 72 | } | 1490 | 64 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 64 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 136 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i72 ) { | 1493 | 72 | if (Tys[i]->isVectorTy()) { | 1494 | 72 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 72 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 72 | } | 1498 | 72 | } | 1499 | 64 | | 1500 | 64 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 64 | } | 1502 | 80 | | 1503 | 80 | // This is going to be turned into a library call, make it expensive. | 1504 | 80 | return SingleCallCost; | 1505 | 80 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 29 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 29 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()0 : 1); | 1135 | 29 | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 29 | | 1137 | 29 | SmallVector<unsigned, 2> ISDs; | 1138 | 29 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 29 | switch (IID) { | 1140 | 29 | default: { | 1141 | 24 | // Assume that we need to scalarize this intrinsic. | 1142 | 24 | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 24 | unsigned ScalarCalls = 1; | 1144 | 24 | Type *ScalarRetTy = RetTy; | 1145 | 24 | if (RetTy->isVectorTy()) { | 1146 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 0 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 0 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 0 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 0 | } | 1151 | 24 | SmallVector<Type *, 4> ScalarTys; | 1152 | 48 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i24 ) { | 1153 | 24 | Type *Ty = Tys[i]; | 1154 | 24 | if (Ty->isVectorTy()) { | 1155 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 0 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 0 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 0 | Ty = Ty->getScalarType(); | 1159 | 0 | } | 1160 | 24 | ScalarTys.push_back(Ty); | 1161 | 24 | } | 1162 | 24 | if (ScalarCalls == 1) | 1163 | 24 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 0 | | 1165 | 0 | unsigned ScalarCost = | 1166 | 0 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 0 |
| 1168 | 0 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 0 | } | 1170 | 0 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 0 | // intrinsic call. | 1172 | 0 | case Intrinsic::sqrt: | 1173 | 0 | ISDs.push_back(ISD::FSQRT); | 1174 | 0 | break; | 1175 | 0 | case Intrinsic::sin: | 1176 | 0 | ISDs.push_back(ISD::FSIN); | 1177 | 0 | break; | 1178 | 0 | case Intrinsic::cos: | 1179 | 0 | ISDs.push_back(ISD::FCOS); | 1180 | 0 | break; | 1181 | 0 | case Intrinsic::exp: | 1182 | 0 | ISDs.push_back(ISD::FEXP); | 1183 | 0 | break; | 1184 | 0 | case Intrinsic::exp2: | 1185 | 0 | ISDs.push_back(ISD::FEXP2); | 1186 | 0 | break; | 1187 | 0 | case Intrinsic::log: | 1188 | 0 | ISDs.push_back(ISD::FLOG); | 1189 | 0 | break; | 1190 | 0 | case Intrinsic::log10: | 1191 | 0 | ISDs.push_back(ISD::FLOG10); | 1192 | 0 | break; | 1193 | 0 | case Intrinsic::log2: | 1194 | 0 | ISDs.push_back(ISD::FLOG2); | 1195 | 0 | break; | 1196 | 0 | case Intrinsic::fabs: | 1197 | 0 | ISDs.push_back(ISD::FABS); | 1198 | 0 | break; | 1199 | 0 | case Intrinsic::canonicalize: | 1200 | 0 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 0 | break; | 1202 | 0 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 0 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 0 | case Intrinsic::copysign: | 1213 | 0 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 0 | break; | 1215 | 0 | case Intrinsic::floor: | 1216 | 0 | ISDs.push_back(ISD::FFLOOR); | 1217 | 0 | break; | 1218 | 0 | case Intrinsic::ceil: | 1219 | 0 | ISDs.push_back(ISD::FCEIL); | 1220 | 0 | break; | 1221 | 0 | case Intrinsic::trunc: | 1222 | 0 | ISDs.push_back(ISD::FTRUNC); | 1223 | 0 | break; | 1224 | 0 | case Intrinsic::nearbyint: | 1225 | 0 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 0 | break; | 1227 | 0 | case Intrinsic::rint: | 1228 | 0 | ISDs.push_back(ISD::FRINT); | 1229 | 0 | break; | 1230 | 0 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 0 | case Intrinsic::pow: | 1234 | 0 | ISDs.push_back(ISD::FPOW); | 1235 | 0 | break; | 1236 | 0 | case Intrinsic::fma: | 1237 | 0 | ISDs.push_back(ISD::FMA); | 1238 | 0 | break; | 1239 | 0 | case Intrinsic::fmuladd: | 1240 | 0 | ISDs.push_back(ISD::FMA); | 1241 | 0 | break; | 1242 | 0 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 5 | case Intrinsic::lifetime_start: | 1244 | 5 | case Intrinsic::lifetime_end: | 1245 | 5 | case Intrinsic::sideeffect: | 1246 | 5 | return 0; | 1247 | 5 | case Intrinsic::masked_store: | 1248 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 0 | 0); | 1250 | 5 | case Intrinsic::masked_load: | 1251 | 0 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 5 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 0 | /*IsPairwiseForm=*/false); | 1255 | 5 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 0 | /*IsPairwiseForm=*/false); | 1258 | 5 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 0 | /*IsPairwiseForm=*/false); | 1261 | 5 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 0 | /*IsPairwiseForm=*/false); | 1264 | 5 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 0 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 0 | /*IsPairwiseForm=*/false); | 1267 | 5 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 5 | // reductions. | 1272 | 5 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 5 | // reductions. | 1277 | 5 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 0 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 0 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 0 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 0 | /*IsUnsigned=*/true); | 1284 | 0 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 0 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 0 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 0 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 0 | /*IsUnsigned=*/false); | 1289 | 0 | case Intrinsic::sadd_sat: | 1290 | 0 | case Intrinsic::ssub_sat: { | 1291 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 0 | if (RetVF > 1) | 1293 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 0 |
| 1295 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 0 | ? Intrinsic::sadd_with_overflow | 1298 | 0 | : Intrinsic::ssub_with_overflow; | 1299 | 0 |
| 1300 | 0 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 0 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 0 | unsigned Cost = 0; | 1303 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 0 | CondTy, nullptr); | 1307 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 0 | CondTy, nullptr); | 1309 | 0 | return Cost; | 1310 | 0 | } | 1311 | 0 | case Intrinsic::uadd_sat: | 1312 | 0 | case Intrinsic::usub_sat: { | 1313 | 0 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 0 | if (RetVF > 1) | 1315 | 0 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 0 |
| 1317 | 0 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 0 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 0 | ? Intrinsic::uadd_with_overflow | 1320 | 0 | : Intrinsic::usub_with_overflow; | 1321 | 0 |
| 1322 | 0 | unsigned Cost = 0; | 1323 | 0 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 0 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 0 | CondTy, nullptr); | 1327 | 0 | return Cost; | 1328 | 0 | } | 1329 | 0 | case Intrinsic::smul_fix: | 1330 | 0 | case Intrinsic::umul_fix: { | 1331 | 0 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 0 | if (RetVF > 1) | 1334 | 0 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 0 |
| 1336 | 0 | unsigned ExtOp = | 1337 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1338 | 0 |
| 1339 | 0 | unsigned Cost = 0; | 1340 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 0 | Cost += | 1343 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 0 | TTI::OK_AnyValue, | 1346 | 0 | TTI::OK_UniformConstantValue); | 1347 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 0 | TTI::OK_AnyValue, | 1349 | 0 | TTI::OK_UniformConstantValue); | 1350 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 0 | return Cost; | 1352 | 0 | } | 1353 | 0 | case Intrinsic::sadd_with_overflow: | 1354 | 0 | case Intrinsic::ssub_with_overflow: { | 1355 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 0 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 0 | ? BinaryOperator::Add | 1359 | 0 | : BinaryOperator::Sub; | 1360 | 0 |
| 1361 | 0 | // LHSSign -> LHS >= 0 | 1362 | 0 | // RHSSign -> RHS >= 0 | 1363 | 0 | // SumSign -> Sum >= 0 | 1364 | 0 | // | 1365 | 0 | // Add: | 1366 | 0 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 0 | // Sub: | 1368 | 0 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 0 | unsigned Cost = 0; | 1370 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 0 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 0 | OverflowTy, nullptr); | 1373 | 0 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 0 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 0 | Cost += | 1376 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 0 | return Cost; | 1378 | 0 | } | 1379 | 0 | case Intrinsic::uadd_with_overflow: | 1380 | 0 | case Intrinsic::usub_with_overflow: { | 1381 | 0 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 0 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 0 | ? BinaryOperator::Add | 1385 | 0 | : BinaryOperator::Sub; | 1386 | 0 |
| 1387 | 0 | unsigned Cost = 0; | 1388 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 0 | OverflowTy, nullptr); | 1391 | 0 | return Cost; | 1392 | 0 | } | 1393 | 0 | case Intrinsic::smul_with_overflow: | 1394 | 0 | case Intrinsic::umul_with_overflow: { | 1395 | 0 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 0 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 0 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 0 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 0 | if (MulTy->isVectorTy()) | 1400 | 0 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 0 |
| 1402 | 0 | unsigned ExtOp = | 1403 | 0 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | 1404 | 0 |
| 1405 | 0 | unsigned Cost = 0; | 1406 | 0 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 0 | Cost += | 1409 | 0 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 0 | TTI::OK_AnyValue, | 1412 | 0 | TTI::OK_UniformConstantValue); | 1413 | 0 |
| 1414 | 0 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 0 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 0 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 0 | TTI::OK_UniformConstantValue); | 1418 | 0 |
| 1419 | 0 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 0 | OverflowTy, nullptr); | 1421 | 0 | return Cost; | 1422 | 0 | } | 1423 | 0 | case Intrinsic::ctpop: | 1424 | 0 | ISDs.push_back(ISD::CTPOP); | 1425 | 0 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 0 | // library call but still not a cheap instruction. | 1427 | 0 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 0 | break; | 1429 | 0 | // FIXME: ctlz, cttz, ... | 1430 | 0 | } | 1431 | 0 | | 1432 | 0 | const TargetLoweringBase *TLI = getTLI(); | 1433 | 0 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 0 |
| 1435 | 0 | SmallVector<unsigned, 2> LegalCost; | 1436 | 0 | SmallVector<unsigned, 2> CustomCost; | 1437 | 0 | for (unsigned ISD : ISDs) { | 1438 | 0 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 0 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && | 1440 | 0 | TLI->isFAbsFree(LT.second)) { | 1441 | 0 | return 0; | 1442 | 0 | } | 1443 | 0 | | 1444 | 0 | // The operation is legal. Assume it costs 1. | 1445 | 0 | // If the type is split to multiple registers, assume that there is some | 1446 | 0 | // overhead to this. | 1447 | 0 | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 0 | if (LT.first > 1) | 1449 | 0 | LegalCost.push_back(LT.first * 2); | 1450 | 0 | else | 1451 | 0 | LegalCost.push_back(LT.first * 1); | 1452 | 0 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | 1453 | 0 | // If the operation is custom lowered then assume | 1454 | 0 | // that the code is twice as expensive. | 1455 | 0 | CustomCost.push_back(LT.first * 2); | 1456 | 0 | } | 1457 | 0 | } | 1458 | 0 |
| 1459 | 0 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 0 | if (MinLegalCostI != LegalCost.end()) | 1461 | 0 | return *MinLegalCostI; | 1462 | 0 | | 1463 | 0 | auto MinCustomCostI = | 1464 | 0 | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 0 | if (MinCustomCostI != CustomCost.end()) | 1466 | 0 | return *MinCustomCostI; | 1467 | 0 | | 1468 | 0 | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 0 | // point mul followed by an add. | 1470 | 0 | if (IID == Intrinsic::fmuladd) | 1471 | 0 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 0 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 0 | | 1474 | 0 | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 0 | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 0 | // very expensive. | 1477 | 0 | if (RetTy->isVectorTy()) { | 1478 | 0 | unsigned ScalarizationCost = | 1479 | 0 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 0 | ? ScalarizationCostPassed | 1481 | 0 | : getScalarizationOverhead(RetTy, true, false)); | 1482 | 0 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 0 | SmallVector<Type *, 4> ScalarTys; | 1484 | 0 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | 1485 | 0 | Type *Ty = Tys[i]; | 1486 | 0 | if (Ty->isVectorTy()) | 1487 | 0 | Ty = Ty->getScalarType(); | 1488 | 0 | ScalarTys.push_back(Ty); | 1489 | 0 | } | 1490 | 0 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 0 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 0 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | 1493 | 0 | if (Tys[i]->isVectorTy()) { | 1494 | 0 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 0 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 0 | } | 1498 | 0 | } | 1499 | 0 |
| 1500 | 0 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 0 | } | 1502 | 0 |
| 1503 | 0 | // This is going to be turned into a library call, make it expensive. | 1504 | 0 | return SingleCallCost; | 1505 | 0 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) Line | Count | Source | 1133 | 11.7k | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { | 1134 | 11.7k | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements()2.93k : 18.85k ); | 1135 | 11.7k | auto *ConcreteTTI = static_cast<T *>(this); | 1136 | 11.7k | | 1137 | 11.7k | SmallVector<unsigned, 2> ISDs; | 1138 | 11.7k | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | 1139 | 11.7k | switch (IID) { | 1140 | 11.7k | default: { | 1141 | 1.71k | // Assume that we need to scalarize this intrinsic. | 1142 | 1.71k | unsigned ScalarizationCost = ScalarizationCostPassed; | 1143 | 1.71k | unsigned ScalarCalls = 1; | 1144 | 1.71k | Type *ScalarRetTy = RetTy; | 1145 | 1.71k | if (RetTy->isVectorTy()) { | 1146 | 338 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1147 | 28 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | 1148 | 338 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | 1149 | 338 | ScalarRetTy = RetTy->getScalarType(); | 1150 | 338 | } | 1151 | 1.71k | SmallVector<Type *, 4> ScalarTys; | 1152 | 6.41k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i4.69k ) { | 1153 | 4.69k | Type *Ty = Tys[i]; | 1154 | 4.69k | if (Ty->isVectorTy()) { | 1155 | 1.34k | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1156 | 728 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); | 1157 | 1.34k | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); | 1158 | 1.34k | Ty = Ty->getScalarType(); | 1159 | 1.34k | } | 1160 | 4.69k | ScalarTys.push_back(Ty); | 1161 | 4.69k | } | 1162 | 1.71k | if (ScalarCalls == 1) | 1163 | 1.10k | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | 1164 | 618 | | 1165 | 618 | unsigned ScalarCost = | 1166 | 618 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); | 1167 | 618 | | 1168 | 618 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1169 | 618 | } | 1170 | 618 | // Look for intrinsics that can be lowered directly or turned into a scalar | 1171 | 618 | // intrinsic call. | 1172 | 618 | case Intrinsic::sqrt: | 1173 | 10 | ISDs.push_back(ISD::FSQRT); | 1174 | 10 | break; | 1175 | 618 | case Intrinsic::sin: | 1176 | 38 | ISDs.push_back(ISD::FSIN); | 1177 | 38 | break; | 1178 | 618 | case Intrinsic::cos: | 1179 | 38 | ISDs.push_back(ISD::FCOS); | 1180 | 38 | break; | 1181 | 618 | case Intrinsic::exp: | 1182 | 46 | ISDs.push_back(ISD::FEXP); | 1183 | 46 | break; | 1184 | 618 | case Intrinsic::exp2: | 1185 | 3 | ISDs.push_back(ISD::FEXP2); | 1186 | 3 | break; | 1187 | 618 | case Intrinsic::log: | 1188 | 35 | ISDs.push_back(ISD::FLOG); | 1189 | 35 | break; | 1190 | 618 | case Intrinsic::log10: | 1191 | 11 | ISDs.push_back(ISD::FLOG10); | 1192 | 11 | break; | 1193 | 618 | case Intrinsic::log2: | 1194 | 0 | ISDs.push_back(ISD::FLOG2); | 1195 | 0 | break; | 1196 | 618 | case Intrinsic::fabs: | 1197 | 414 | ISDs.push_back(ISD::FABS); | 1198 | 414 | break; | 1199 | 618 | case Intrinsic::canonicalize: | 1200 | 0 | ISDs.push_back(ISD::FCANONICALIZE); | 1201 | 0 | break; | 1202 | 618 | case Intrinsic::minnum: | 1203 | 0 | ISDs.push_back(ISD::FMINNUM); | 1204 | 0 | if (FMF.noNaNs()) | 1205 | 0 | ISDs.push_back(ISD::FMINIMUM); | 1206 | 0 | break; | 1207 | 618 | case Intrinsic::maxnum: | 1208 | 0 | ISDs.push_back(ISD::FMAXNUM); | 1209 | 0 | if (FMF.noNaNs()) | 1210 | 0 | ISDs.push_back(ISD::FMAXIMUM); | 1211 | 0 | break; | 1212 | 618 | case Intrinsic::copysign: | 1213 | 422 | ISDs.push_back(ISD::FCOPYSIGN); | 1214 | 422 | break; | 1215 | 618 | case Intrinsic::floor: | 1216 | 291 | ISDs.push_back(ISD::FFLOOR); | 1217 | 291 | break; | 1218 | 618 | case Intrinsic::ceil: | 1219 | 270 | ISDs.push_back(ISD::FCEIL); | 1220 | 270 | break; | 1221 | 618 | case Intrinsic::trunc: | 1222 | 256 | ISDs.push_back(ISD::FTRUNC); | 1223 | 256 | break; | 1224 | 618 | case Intrinsic::nearbyint: | 1225 | 259 | ISDs.push_back(ISD::FNEARBYINT); | 1226 | 259 | break; | 1227 | 618 | case Intrinsic::rint: | 1228 | 256 | ISDs.push_back(ISD::FRINT); | 1229 | 256 | break; | 1230 | 618 | case Intrinsic::round: | 1231 | 0 | ISDs.push_back(ISD::FROUND); | 1232 | 0 | break; | 1233 | 618 | case Intrinsic::pow: | 1234 | 27 | ISDs.push_back(ISD::FPOW); | 1235 | 27 | break; | 1236 | 618 | case Intrinsic::fma: | 1237 | 456 | ISDs.push_back(ISD::FMA); | 1238 | 456 | break; | 1239 | 618 | case Intrinsic::fmuladd: | 1240 | 2 | ISDs.push_back(ISD::FMA); | 1241 | 2 | break; | 1242 | 618 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | 1243 | 618 | case Intrinsic::lifetime_start: | 1244 | 0 | case Intrinsic::lifetime_end: | 1245 | 0 | case Intrinsic::sideeffect: | 1246 | 0 | return 0; | 1247 | 378 | case Intrinsic::masked_store: | 1248 | 378 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, | 1249 | 378 | 0); | 1250 | 406 | case Intrinsic::masked_load: | 1251 | 406 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | 1252 | 368 | case Intrinsic::experimental_vector_reduce_add: | 1253 | 368 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], | 1254 | 368 | /*IsPairwiseForm=*/false); | 1255 | 368 | case Intrinsic::experimental_vector_reduce_mul: | 1256 | 368 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], | 1257 | 368 | /*IsPairwiseForm=*/false); | 1258 | 496 | case Intrinsic::experimental_vector_reduce_and: | 1259 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], | 1260 | 496 | /*IsPairwiseForm=*/false); | 1261 | 496 | case Intrinsic::experimental_vector_reduce_or: | 1262 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], | 1263 | 496 | /*IsPairwiseForm=*/false); | 1264 | 496 | case Intrinsic::experimental_vector_reduce_xor: | 1265 | 496 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], | 1266 | 496 | /*IsPairwiseForm=*/false); | 1267 | 0 | case Intrinsic::experimental_vector_reduce_v2_fadd: | 1268 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1269 | 0 | Instruction::FAdd, Tys[0], | 1270 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1271 | 0 | // reductions. | 1272 | 0 | case Intrinsic::experimental_vector_reduce_v2_fmul: | 1273 | 0 | return ConcreteTTI->getArithmeticReductionCost( | 1274 | 0 | Instruction::FMul, Tys[0], | 1275 | 0 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict | 1276 | 0 | // reductions. | 1277 | 728 | case Intrinsic::experimental_vector_reduce_smax: | 1278 | 728 | case Intrinsic::experimental_vector_reduce_smin: | 1279 | 728 | case Intrinsic::experimental_vector_reduce_fmax: | 1280 | 728 | case Intrinsic::experimental_vector_reduce_fmin: | 1281 | 728 | return ConcreteTTI->getMinMaxReductionCost( | 1282 | 728 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1283 | 728 | /*IsUnsigned=*/true); | 1284 | 736 | case Intrinsic::experimental_vector_reduce_umax: | 1285 | 736 | case Intrinsic::experimental_vector_reduce_umin: | 1286 | 736 | return ConcreteTTI->getMinMaxReductionCost( | 1287 | 736 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, | 1288 | 736 | /*IsUnsigned=*/false); | 1289 | 736 | case Intrinsic::sadd_sat: | 1290 | 472 | case Intrinsic::ssub_sat: { | 1291 | 472 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1292 | 472 | if (RetVF > 1) | 1293 | 210 | CondTy = VectorType::get(CondTy, RetVF); | 1294 | 472 | | 1295 | 472 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1296 | 472 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | 1297 | 472 | ? Intrinsic::sadd_with_overflow236 | 1298 | 472 | : Intrinsic::ssub_with_overflow236 ; | 1299 | 472 | | 1300 | 472 | // SatMax -> Overflow && SumDiff < 0 | 1301 | 472 | // SatMin -> Overflow && SumDiff >= 0 | 1302 | 472 | unsigned Cost = 0; | 1303 | 472 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1304 | 472 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1305 | 472 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, | 1306 | 472 | CondTy, nullptr); | 1307 | 472 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1308 | 472 | CondTy, nullptr); | 1309 | 472 | return Cost; | 1310 | 472 | } | 1311 | 472 | case Intrinsic::uadd_sat: | 1312 | 328 | case Intrinsic::usub_sat: { | 1313 | 328 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); | 1314 | 328 | if (RetVF > 1) | 1315 | 86 | CondTy = VectorType::get(CondTy, RetVF); | 1316 | 328 | | 1317 | 328 | Type *OpTy = StructType::create({RetTy, CondTy}); | 1318 | 328 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | 1319 | 328 | ? Intrinsic::uadd_with_overflow164 | 1320 | 328 | : Intrinsic::usub_with_overflow164 ; | 1321 | 328 | | 1322 | 328 | unsigned Cost = 0; | 1323 | 328 | Cost += ConcreteTTI->getIntrinsicInstrCost( | 1324 | 328 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); | 1325 | 328 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | 1326 | 328 | CondTy, nullptr); | 1327 | 328 | return Cost; | 1328 | 328 | } | 1329 | 794 | case Intrinsic::smul_fix: | 1330 | 794 | case Intrinsic::umul_fix: { | 1331 | 794 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | 1332 | 794 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1333 | 794 | if (RetVF > 1) | 1334 | 477 | ExtTy = VectorType::get(ExtTy, RetVF); | 1335 | 794 | | 1336 | 794 | unsigned ExtOp = | 1337 | 794 | IID == Intrinsic::smul_fix ? Instruction::SExt388 : Instruction::ZExt406 ; | 1338 | 794 | | 1339 | 794 | unsigned Cost = 0; | 1340 | 794 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); | 1341 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1342 | 794 | Cost += | 1343 | 794 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); | 1344 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, | 1345 | 794 | TTI::OK_AnyValue, | 1346 | 794 | TTI::OK_UniformConstantValue); | 1347 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, | 1348 | 794 | TTI::OK_AnyValue, | 1349 | 794 | TTI::OK_UniformConstantValue); | 1350 | 794 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); | 1351 | 794 | return Cost; | 1352 | 794 | } | 1353 | 794 | case Intrinsic::sadd_with_overflow: | 1354 | 450 | case Intrinsic::ssub_with_overflow: { | 1355 | 450 | Type *SumTy = RetTy->getContainedType(0); | 1356 | 450 | Type *OverflowTy = RetTy->getContainedType(1); | 1357 | 450 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | 1358 | 450 | ? BinaryOperator::Add225 | 1359 | 450 | : BinaryOperator::Sub225 ; | 1360 | 450 | | 1361 | 450 | // LHSSign -> LHS >= 0 | 1362 | 450 | // RHSSign -> RHS >= 0 | 1363 | 450 | // SumSign -> Sum >= 0 | 1364 | 450 | // | 1365 | 450 | // Add: | 1366 | 450 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | 1367 | 450 | // Sub: | 1368 | 450 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | 1369 | 450 | unsigned Cost = 0; | 1370 | 450 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1371 | 450 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1372 | 450 | OverflowTy, nullptr); | 1373 | 450 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( | 1374 | 450 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); | 1375 | 450 | Cost += | 1376 | 450 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); | 1377 | 450 | return Cost; | 1378 | 450 | } | 1379 | 450 | case Intrinsic::uadd_with_overflow: | 1380 | 326 | case Intrinsic::usub_with_overflow: { | 1381 | 326 | Type *SumTy = RetTy->getContainedType(0); | 1382 | 326 | Type *OverflowTy = RetTy->getContainedType(1); | 1383 | 326 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | 1384 | 326 | ? BinaryOperator::Add163 | 1385 | 326 | : BinaryOperator::Sub163 ; | 1386 | 326 | | 1387 | 326 | unsigned Cost = 0; | 1388 | 326 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); | 1389 | 326 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | 1390 | 326 | OverflowTy, nullptr); | 1391 | 326 | return Cost; | 1392 | 326 | } | 1393 | 326 | case Intrinsic::smul_with_overflow: | 1394 | 320 | case Intrinsic::umul_with_overflow: { | 1395 | 320 | Type *MulTy = RetTy->getContainedType(0); | 1396 | 320 | Type *OverflowTy = RetTy->getContainedType(1); | 1397 | 320 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | 1398 | 320 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); | 1399 | 320 | if (MulTy->isVectorTy()) | 1400 | 240 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); | 1401 | 320 | | 1402 | 320 | unsigned ExtOp = | 1403 | 320 | IID == Intrinsic::smul_fix ? Instruction::SExt0 : Instruction::ZExt; | 1404 | 320 | | 1405 | 320 | unsigned Cost = 0; | 1406 | 320 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); | 1407 | 320 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); | 1408 | 320 | Cost += | 1409 | 320 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); | 1410 | 320 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, | 1411 | 320 | TTI::OK_AnyValue, | 1412 | 320 | TTI::OK_UniformConstantValue); | 1413 | 320 | | 1414 | 320 | if (IID == Intrinsic::smul_with_overflow) | 1415 | 160 | Cost += ConcreteTTI->getArithmeticInstrCost( | 1416 | 160 | Instruction::AShr, MulTy, TTI::OK_AnyValue, | 1417 | 160 | TTI::OK_UniformConstantValue); | 1418 | 320 | | 1419 | 320 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | 1420 | 320 | OverflowTy, nullptr); | 1421 | 320 | return Cost; | 1422 | 320 | } | 1423 | 320 | case Intrinsic::ctpop: | 1424 | 83 | ISDs.push_back(ISD::CTPOP); | 1425 | 83 | // In case of legalization use TCC_Expensive. This is cheaper than a | 1426 | 83 | // library call but still not a cheap instruction. | 1427 | 83 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | 1428 | 83 | break; | 1429 | 2.91k | // FIXME: ctlz, cttz, ... | 1430 | 2.91k | } | 1431 | 2.91k | | 1432 | 2.91k | const TargetLoweringBase *TLI = getTLI(); | 1433 | 2.91k | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1434 | 2.91k | | 1435 | 2.91k | SmallVector<unsigned, 2> LegalCost; | 1436 | 2.91k | SmallVector<unsigned, 2> CustomCost; | 1437 | 2.91k | for (unsigned ISD : ISDs) { | 1438 | 2.91k | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | 1439 | 1.03k | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint()78 && | 1440 | 1.03k | TLI->isFAbsFree(LT.second)76 ) { | 1441 | 0 | return 0; | 1442 | 0 | } | 1443 | 1.03k | | 1444 | 1.03k | // The operation is legal. Assume it costs 1. | 1445 | 1.03k | // If the type is split to multiple registers, assume that there is some | 1446 | 1.03k | // overhead to this. | 1447 | 1.03k | // TODO: Once we have extract/insert subvector cost we need to use them. | 1448 | 1.03k | if (LT.first > 1) | 1449 | 140 | LegalCost.push_back(LT.first * 2); | 1450 | 892 | else | 1451 | 892 | LegalCost.push_back(LT.first * 1); | 1452 | 1.88k | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | 1453 | 730 | // If the operation is custom lowered then assume | 1454 | 730 | // that the code is twice as expensive. | 1455 | 730 | CustomCost.push_back(LT.first * 2); | 1456 | 730 | } | 1457 | 2.91k | } | 1458 | 2.91k | | 1459 | 2.91k | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | 1460 | 2.91k | if (MinLegalCostI != LegalCost.end()) | 1461 | 1.03k | return *MinLegalCostI; | 1462 | 1.88k | | 1463 | 1.88k | auto MinCustomCostI = | 1464 | 1.88k | std::min_element(CustomCost.begin(), CustomCost.end()); | 1465 | 1.88k | if (MinCustomCostI != CustomCost.end()) | 1466 | 730 | return *MinCustomCostI; | 1467 | 1.15k | | 1468 | 1.15k | // If we can't lower fmuladd into an FMA estimate the cost as a floating | 1469 | 1.15k | // point mul followed by an add. | 1470 | 1.15k | if (IID == Intrinsic::fmuladd) | 1471 | 2 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | 1472 | 2 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | 1473 | 1.15k | | 1474 | 1.15k | // Else, assume that we need to scalarize this intrinsic. For math builtins | 1475 | 1.15k | // this will emit a costly libcall, adding call overhead and spills. Make it | 1476 | 1.15k | // very expensive. | 1477 | 1.15k | if (RetTy->isVectorTy()) { | 1478 | 431 | unsigned ScalarizationCost = | 1479 | 431 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) | 1480 | 431 | ? ScalarizationCostPassed | 1481 | 431 | : getScalarizationOverhead(RetTy, true, false)0 ); | 1482 | 431 | unsigned ScalarCalls = RetTy->getVectorNumElements(); | 1483 | 431 | SmallVector<Type *, 4> ScalarTys; | 1484 | 1.13k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i702 ) { | 1485 | 702 | Type *Ty = Tys[i]; | 1486 | 702 | if (Ty->isVectorTy()) | 1487 | 702 | Ty = Ty->getScalarType(); | 1488 | 702 | ScalarTys.push_back(Ty); | 1489 | 702 | } | 1490 | 431 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( | 1491 | 431 | IID, RetTy->getScalarType(), ScalarTys, FMF); | 1492 | 1.13k | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i702 ) { | 1493 | 702 | if (Tys[i]->isVectorTy()) { | 1494 | 702 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) | 1495 | 0 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | 1496 | 702 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); | 1497 | 702 | } | 1498 | 702 | } | 1499 | 431 | | 1500 | 431 | return ScalarCalls * ScalarCost + ScalarizationCost; | 1501 | 431 | } | 1502 | 722 | | 1503 | 722 | // This is going to be turned into a library call, make it expensive. | 1504 | 722 | return SingleCallCost; | 1505 | 722 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int) |
1506 | | |
1507 | | /// Compute a cost of the given call instruction. |
1508 | | /// |
1509 | | /// Compute the cost of calling function F with return type RetTy and |
1510 | | /// argument types Tys. F might be nullptr, in this case the cost of an |
1511 | | /// arbitrary call with the specified signature will be returned. |
1512 | | /// This is used, for instance, when we estimate call of a vector |
1513 | | /// counterpart of the given function. |
1514 | | /// \param F Called function, might be nullptr. |
1515 | | /// \param RetTy Return value types. |
1516 | | /// \param Tys Argument types. |
1517 | | /// \returns The cost of Call instruction. |
1518 | 2.15k | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1519 | 2.15k | return 10; |
1520 | 2.15k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Line | Count | Source | 1518 | 1.18k | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | 1519 | 1.18k | return 10; | 1520 | 1.18k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Line | Count | Source | 1518 | 562 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | 1519 | 562 | return 10; | 1520 | 562 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Line | Count | Source | 1518 | 5 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | 1519 | 5 | return 10; | 1520 | 5 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) Line | Count | Source | 1518 | 407 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | 1519 | 407 | return 10; | 1520 | 407 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>) |
1521 | | |
1522 | 635k | unsigned getNumberOfParts(Type *Tp) { |
1523 | 635k | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1524 | 635k | return LT.first; |
1525 | 635k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 560k | unsigned getNumberOfParts(Type *Tp) { | 1523 | 560k | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 560k | return LT.first; | 1525 | 560k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getNumberOfParts(llvm::Type*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 90 | unsigned getNumberOfParts(Type *Tp) { | 1523 | 90 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 90 | return LT.first; | 1525 | 90 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 14.7k | unsigned getNumberOfParts(Type *Tp) { | 1523 | 14.7k | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 14.7k | return LT.first; | 1525 | 14.7k | } |
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 74 | unsigned getNumberOfParts(Type *Tp) { | 1523 | 74 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 74 | return LT.first; | 1525 | 74 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getNumberOfParts(llvm::Type*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getNumberOfParts(llvm::Type*) llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 6 | unsigned getNumberOfParts(Type *Tp) { | 1523 | 6 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 6 | return LT.first; | 1525 | 6 | } |
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 713 | unsigned getNumberOfParts(Type *Tp) { | 1523 | 713 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 713 | return LT.first; | 1525 | 713 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getNumberOfParts(llvm::Type*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 180 | unsigned getNumberOfParts(Type *Tp) { | 1523 | 180 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 180 | return LT.first; | 1525 | 180 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getNumberOfParts(llvm::Type*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getNumberOfParts(llvm::Type*) Line | Count | Source | 1522 | 59.5k | unsigned getNumberOfParts(Type *Tp) { | 1523 | 59.5k | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | 1524 | 59.5k | return LT.first; | 1525 | 59.5k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getNumberOfParts(llvm::Type*) |
1526 | | |
1527 | | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1528 | 3.28k | const SCEV *) { |
1529 | 3.28k | return 0; |
1530 | 3.28k | } Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Line | Count | Source | 1528 | 7 | const SCEV *) { | 1529 | 7 | return 0; | 1530 | 7 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Line | Count | Source | 1528 | 99 | const SCEV *) { | 1529 | 99 | return 0; | 1530 | 99 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Line | Count | Source | 1528 | 6 | const SCEV *) { | 1529 | 6 | return 0; | 1530 | 6 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) Line | Count | Source | 1528 | 3.17k | const SCEV *) { | 1529 | 3.17k | return 0; | 1530 | 3.17k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*) |
1531 | | |
1532 | | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1533 | | /// We're assuming that reduction operation are performing the following way: |
1534 | | /// 1. Non-pairwise reduction |
1535 | | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1536 | | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1537 | | /// \----------------v-------------/ \----------v------------/ |
1538 | | /// n/2 elements n/2 elements |
1539 | | /// %red1 = op <n x t> %val, <n x t> val1 |
1540 | | /// After this operation we have a vector %red1 where only the first n/2 |
1541 | | /// elements are meaningful, the second n/2 elements are undefined and can be |
1542 | | /// dropped. All other operations are actually working with the vector of |
1543 | | /// length n/2, not n, though the real vector length is still n. |
1544 | | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1545 | | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1546 | | /// \----------------v-------------/ \----------v------------/ |
1547 | | /// n/4 elements 3*n/4 elements |
1548 | | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1549 | | /// length n/2, the resulting vector has length n/4 etc. |
1550 | | /// 2. Pairwise reduction: |
1551 | | /// Everything is the same except for an additional shuffle operation which |
1552 | | /// is used to produce operands for pairwise kind of reductions. |
1553 | | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1554 | | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1555 | | /// \-------------v----------/ \----------v------------/ |
1556 | | /// n/2 elements n/2 elements |
1557 | | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1558 | | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1559 | | /// \-------------v----------/ \----------v------------/ |
1560 | | /// n/2 elements n/2 elements |
1561 | | /// %red1 = op <n x t> %val1, <n x t> val2 |
1562 | | /// Again, the operation is performed on <n x t> vector, but the resulting |
1563 | | /// vector %red1 is <n/2 x t> vector. |
1564 | | /// |
1565 | | /// The cost model should take into account that the actual length of the |
1566 | | /// vector is reduced on each iteration. |
1567 | | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1568 | 4.98k | bool IsPairwise) { |
1569 | 4.98k | assert(Ty->isVectorTy() && "Expect a vector type"); |
1570 | 4.98k | Type *ScalarTy = Ty->getVectorElementType(); |
1571 | 4.98k | unsigned NumVecElts = Ty->getVectorNumElements(); |
1572 | 4.98k | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1573 | 4.98k | unsigned ArithCost = 0; |
1574 | 4.98k | unsigned ShuffleCost = 0; |
1575 | 4.98k | auto *ConcreteTTI = static_cast<T *>(this); |
1576 | 4.98k | std::pair<unsigned, MVT> LT = |
1577 | 4.98k | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1578 | 4.98k | unsigned LongVectorCount = 0; |
1579 | 4.98k | unsigned MVTLen = |
1580 | 4.98k | LT.second.isVector() ? LT.second.getVectorNumElements()4.87k : 1110 ; |
1581 | 7.26k | while (NumVecElts > MVTLen) { |
1582 | 2.28k | NumVecElts /= 2; |
1583 | 2.28k | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1584 | 2.28k | // Assume the pairwise shuffles add a cost. |
1585 | 2.28k | ShuffleCost += (IsPairwise + 1) * |
1586 | 2.28k | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1587 | 2.28k | NumVecElts, SubTy); |
1588 | 2.28k | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1589 | 2.28k | Ty = SubTy; |
1590 | 2.28k | ++LongVectorCount; |
1591 | 2.28k | } |
1592 | 4.98k | |
1593 | 4.98k | NumReduxLevels -= LongVectorCount; |
1594 | 4.98k | |
1595 | 4.98k | // The minimal length of the vector is limited by the real length of vector |
1596 | 4.98k | // operations performed on the current platform. That's why several final |
1597 | 4.98k | // reduction operations are performed on the vectors with the same |
1598 | 4.98k | // architecture-dependent length. |
1599 | 4.98k | |
1600 | 4.98k | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1601 | 4.98k | // reductions need two shuffles on every level, but the last one. On that |
1602 | 4.98k | // level one of the shuffles is <0, u, u, ...> which is identity. |
1603 | 4.98k | unsigned NumShuffles = NumReduxLevels; |
1604 | 4.98k | if (IsPairwise && NumReduxLevels >= 12.30k ) |
1605 | 2.30k | NumShuffles += NumReduxLevels - 1; |
1606 | 4.98k | ShuffleCost += NumShuffles * |
1607 | 4.98k | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1608 | 4.98k | 0, Ty); |
1609 | 4.98k | ArithCost += NumReduxLevels * |
1610 | 4.98k | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1611 | 4.98k | return ShuffleCost + ArithCost + |
1612 | 4.98k | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1613 | 4.98k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Line | Count | Source | 1568 | 2.92k | bool IsPairwise) { | 1569 | 2.92k | assert(Ty->isVectorTy() && "Expect a vector type"); | 1570 | 2.92k | Type *ScalarTy = Ty->getVectorElementType(); | 1571 | 2.92k | unsigned NumVecElts = Ty->getVectorNumElements(); | 1572 | 2.92k | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1573 | 2.92k | unsigned ArithCost = 0; | 1574 | 2.92k | unsigned ShuffleCost = 0; | 1575 | 2.92k | auto *ConcreteTTI = static_cast<T *>(this); | 1576 | 2.92k | std::pair<unsigned, MVT> LT = | 1577 | 2.92k | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1578 | 2.92k | unsigned LongVectorCount = 0; | 1579 | 2.92k | unsigned MVTLen = | 1580 | 2.92k | LT.second.isVector() ? LT.second.getVectorNumElements() : 10 ; | 1581 | 3.93k | while (NumVecElts > MVTLen) { | 1582 | 1.01k | NumVecElts /= 2; | 1583 | 1.01k | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1584 | 1.01k | // Assume the pairwise shuffles add a cost. | 1585 | 1.01k | ShuffleCost += (IsPairwise + 1) * | 1586 | 1.01k | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1587 | 1.01k | NumVecElts, SubTy); | 1588 | 1.01k | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); | 1589 | 1.01k | Ty = SubTy; | 1590 | 1.01k | ++LongVectorCount; | 1591 | 1.01k | } | 1592 | 2.92k | | 1593 | 2.92k | NumReduxLevels -= LongVectorCount; | 1594 | 2.92k | | 1595 | 2.92k | // The minimal length of the vector is limited by the real length of vector | 1596 | 2.92k | // operations performed on the current platform. That's why several final | 1597 | 2.92k | // reduction operations are performed on the vectors with the same | 1598 | 2.92k | // architecture-dependent length. | 1599 | 2.92k | | 1600 | 2.92k | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1601 | 2.92k | // reductions need two shuffles on every level, but the last one. On that | 1602 | 2.92k | // level one of the shuffles is <0, u, u, ...> which is identity. | 1603 | 2.92k | unsigned NumShuffles = NumReduxLevels; | 1604 | 2.92k | if (IsPairwise && NumReduxLevels >= 12.24k ) | 1605 | 2.24k | NumShuffles += NumReduxLevels - 1; | 1606 | 2.92k | ShuffleCost += NumShuffles * | 1607 | 2.92k | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1608 | 2.92k | 0, Ty); | 1609 | 2.92k | ArithCost += NumReduxLevels * | 1610 | 2.92k | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); | 1611 | 2.92k | return ShuffleCost + ArithCost + | 1612 | 2.92k | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1613 | 2.92k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Line | Count | Source | 1568 | 19 | bool IsPairwise) { | 1569 | 19 | assert(Ty->isVectorTy() && "Expect a vector type"); | 1570 | 19 | Type *ScalarTy = Ty->getVectorElementType(); | 1571 | 19 | unsigned NumVecElts = Ty->getVectorNumElements(); | 1572 | 19 | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1573 | 19 | unsigned ArithCost = 0; | 1574 | 19 | unsigned ShuffleCost = 0; | 1575 | 19 | auto *ConcreteTTI = static_cast<T *>(this); | 1576 | 19 | std::pair<unsigned, MVT> LT = | 1577 | 19 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1578 | 19 | unsigned LongVectorCount = 0; | 1579 | 19 | unsigned MVTLen = | 1580 | 19 | LT.second.isVector() ? LT.second.getVectorNumElements() : 10 ; | 1581 | 31 | while (NumVecElts > MVTLen) { | 1582 | 12 | NumVecElts /= 2; | 1583 | 12 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1584 | 12 | // Assume the pairwise shuffles add a cost. | 1585 | 12 | ShuffleCost += (IsPairwise + 1) * | 1586 | 12 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1587 | 12 | NumVecElts, SubTy); | 1588 | 12 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); | 1589 | 12 | Ty = SubTy; | 1590 | 12 | ++LongVectorCount; | 1591 | 12 | } | 1592 | 19 | | 1593 | 19 | NumReduxLevels -= LongVectorCount; | 1594 | 19 | | 1595 | 19 | // The minimal length of the vector is limited by the real length of vector | 1596 | 19 | // operations performed on the current platform. That's why several final | 1597 | 19 | // reduction operations are performed on the vectors with the same | 1598 | 19 | // architecture-dependent length. | 1599 | 19 | | 1600 | 19 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1601 | 19 | // reductions need two shuffles on every level, but the last one. On that | 1602 | 19 | // level one of the shuffles is <0, u, u, ...> which is identity. | 1603 | 19 | unsigned NumShuffles = NumReduxLevels; | 1604 | 19 | if (IsPairwise && NumReduxLevels >= 112 ) | 1605 | 12 | NumShuffles += NumReduxLevels - 1; | 1606 | 19 | ShuffleCost += NumShuffles * | 1607 | 19 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1608 | 19 | 0, Ty); | 1609 | 19 | ArithCost += NumReduxLevels * | 1610 | 19 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); | 1611 | 19 | return ShuffleCost + ArithCost + | 1612 | 19 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1613 | 19 | } |
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Line | Count | Source | 1568 | 30 | bool IsPairwise) { | 1569 | 30 | assert(Ty->isVectorTy() && "Expect a vector type"); | 1570 | 30 | Type *ScalarTy = Ty->getVectorElementType(); | 1571 | 30 | unsigned NumVecElts = Ty->getVectorNumElements(); | 1572 | 30 | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1573 | 30 | unsigned ArithCost = 0; | 1574 | 30 | unsigned ShuffleCost = 0; | 1575 | 30 | auto *ConcreteTTI = static_cast<T *>(this); | 1576 | 30 | std::pair<unsigned, MVT> LT = | 1577 | 30 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1578 | 30 | unsigned LongVectorCount = 0; | 1579 | 30 | unsigned MVTLen = | 1580 | 30 | LT.second.isVector() ? LT.second.getVectorNumElements() : 10 ; | 1581 | 30 | while (NumVecElts > MVTLen) { | 1582 | 0 | NumVecElts /= 2; | 1583 | 0 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1584 | 0 | // Assume the pairwise shuffles add a cost. | 1585 | 0 | ShuffleCost += (IsPairwise + 1) * | 1586 | 0 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1587 | 0 | NumVecElts, SubTy); | 1588 | 0 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); | 1589 | 0 | Ty = SubTy; | 1590 | 0 | ++LongVectorCount; | 1591 | 0 | } | 1592 | 30 | | 1593 | 30 | NumReduxLevels -= LongVectorCount; | 1594 | 30 | | 1595 | 30 | // The minimal length of the vector is limited by the real length of vector | 1596 | 30 | // operations performed on the current platform. That's why several final | 1597 | 30 | // reduction operations are performed on the vectors with the same | 1598 | 30 | // architecture-dependent length. | 1599 | 30 | | 1600 | 30 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1601 | 30 | // reductions need two shuffles on every level, but the last one. On that | 1602 | 30 | // level one of the shuffles is <0, u, u, ...> which is identity. | 1603 | 30 | unsigned NumShuffles = NumReduxLevels; | 1604 | 30 | if (IsPairwise && NumReduxLevels >= 115 ) | 1605 | 15 | NumShuffles += NumReduxLevels - 1; | 1606 | 30 | ShuffleCost += NumShuffles * | 1607 | 30 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1608 | 30 | 0, Ty); | 1609 | 30 | ArithCost += NumReduxLevels * | 1610 | 30 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); | 1611 | 30 | return ShuffleCost + ArithCost + | 1612 | 30 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1613 | 30 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) Line | Count | Source | 1568 | 2.01k | bool IsPairwise) { | 1569 | 2.01k | assert(Ty->isVectorTy() && "Expect a vector type"); | 1570 | 2.01k | Type *ScalarTy = Ty->getVectorElementType(); | 1571 | 2.01k | unsigned NumVecElts = Ty->getVectorNumElements(); | 1572 | 2.01k | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1573 | 2.01k | unsigned ArithCost = 0; | 1574 | 2.01k | unsigned ShuffleCost = 0; | 1575 | 2.01k | auto *ConcreteTTI = static_cast<T *>(this); | 1576 | 2.01k | std::pair<unsigned, MVT> LT = | 1577 | 2.01k | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1578 | 2.01k | unsigned LongVectorCount = 0; | 1579 | 2.01k | unsigned MVTLen = | 1580 | 2.01k | LT.second.isVector() ? LT.second.getVectorNumElements()1.90k : 1110 ; | 1581 | 3.27k | while (NumVecElts > MVTLen) { | 1582 | 1.26k | NumVecElts /= 2; | 1583 | 1.26k | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1584 | 1.26k | // Assume the pairwise shuffles add a cost. | 1585 | 1.26k | ShuffleCost += (IsPairwise + 1) * | 1586 | 1.26k | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1587 | 1.26k | NumVecElts, SubTy); | 1588 | 1.26k | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); | 1589 | 1.26k | Ty = SubTy; | 1590 | 1.26k | ++LongVectorCount; | 1591 | 1.26k | } | 1592 | 2.01k | | 1593 | 2.01k | NumReduxLevels -= LongVectorCount; | 1594 | 2.01k | | 1595 | 2.01k | // The minimal length of the vector is limited by the real length of vector | 1596 | 2.01k | // operations performed on the current platform. That's why several final | 1597 | 2.01k | // reduction operations are performed on the vectors with the same | 1598 | 2.01k | // architecture-dependent length. | 1599 | 2.01k | | 1600 | 2.01k | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1601 | 2.01k | // reductions need two shuffles on every level, but the last one. On that | 1602 | 2.01k | // level one of the shuffles is <0, u, u, ...> which is identity. | 1603 | 2.01k | unsigned NumShuffles = NumReduxLevels; | 1604 | 2.01k | if (IsPairwise && NumReduxLevels >= 137 ) | 1605 | 37 | NumShuffles += NumReduxLevels - 1; | 1606 | 2.01k | ShuffleCost += NumShuffles * | 1607 | 2.01k | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1608 | 2.01k | 0, Ty); | 1609 | 2.01k | ArithCost += NumReduxLevels * | 1610 | 2.01k | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); | 1611 | 2.01k | return ShuffleCost + ArithCost + | 1612 | 2.01k | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1613 | 2.01k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool) |
1614 | | |
1615 | | /// Try to calculate op costs for min/max reduction operations. |
1616 | | /// \param CondTy Conditional type for the Select instruction. |
1617 | | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1618 | 2.98k | bool) { |
1619 | 2.98k | assert(Ty->isVectorTy() && "Expect a vector type"); |
1620 | 2.98k | Type *ScalarTy = Ty->getVectorElementType(); |
1621 | 2.98k | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1622 | 2.98k | unsigned NumVecElts = Ty->getVectorNumElements(); |
1623 | 2.98k | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1624 | 2.98k | unsigned CmpOpcode; |
1625 | 2.98k | if (Ty->isFPOrFPVectorTy()) { |
1626 | 12 | CmpOpcode = Instruction::FCmp; |
1627 | 2.97k | } else { |
1628 | 2.97k | assert(Ty->isIntOrIntVectorTy() && |
1629 | 2.97k | "expecting floating point or integer type for min/max reduction"); |
1630 | 2.97k | CmpOpcode = Instruction::ICmp; |
1631 | 2.97k | } |
1632 | 2.98k | unsigned MinMaxCost = 0; |
1633 | 2.98k | unsigned ShuffleCost = 0; |
1634 | 2.98k | auto *ConcreteTTI = static_cast<T *>(this); |
1635 | 2.98k | std::pair<unsigned, MVT> LT = |
1636 | 2.98k | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1637 | 2.98k | unsigned LongVectorCount = 0; |
1638 | 2.98k | unsigned MVTLen = |
1639 | 2.98k | LT.second.isVector() ? LT.second.getVectorNumElements()2.92k : 164 ; |
1640 | 5.81k | while (NumVecElts > MVTLen) { |
1641 | 2.82k | NumVecElts /= 2; |
1642 | 2.82k | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1643 | 2.82k | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1644 | 2.82k | |
1645 | 2.82k | // Assume the pairwise shuffles add a cost. |
1646 | 2.82k | ShuffleCost += (IsPairwise + 1) * |
1647 | 2.82k | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1648 | 2.82k | NumVecElts, SubTy); |
1649 | 2.82k | MinMaxCost += |
1650 | 2.82k | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1651 | 2.82k | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1652 | 2.82k | nullptr); |
1653 | 2.82k | Ty = SubTy; |
1654 | 2.82k | ++LongVectorCount; |
1655 | 2.82k | } |
1656 | 2.98k | |
1657 | 2.98k | NumReduxLevels -= LongVectorCount; |
1658 | 2.98k | |
1659 | 2.98k | // The minimal length of the vector is limited by the real length of vector |
1660 | 2.98k | // operations performed on the current platform. That's why several final |
1661 | 2.98k | // reduction opertions are perfomed on the vectors with the same |
1662 | 2.98k | // architecture-dependent length. |
1663 | 2.98k | |
1664 | 2.98k | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1665 | 2.98k | // reductions need two shuffles on every level, but the last one. On that |
1666 | 2.98k | // level one of the shuffles is <0, u, u, ...> which is identity. |
1667 | 2.98k | unsigned NumShuffles = NumReduxLevels; |
1668 | 2.98k | if (IsPairwise && NumReduxLevels >= 11.43k ) |
1669 | 1.43k | NumShuffles += NumReduxLevels - 1; |
1670 | 2.98k | ShuffleCost += NumShuffles * |
1671 | 2.98k | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1672 | 2.98k | 0, Ty); |
1673 | 2.98k | MinMaxCost += |
1674 | 2.98k | NumReduxLevels * |
1675 | 2.98k | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1676 | 2.98k | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1677 | 2.98k | nullptr)); |
1678 | 2.98k | // The last min/max should be in vector registers and we counted it above. |
1679 | 2.98k | // So just need a single extractelement. |
1680 | 2.98k | return ShuffleCost + MinMaxCost + |
1681 | 2.98k | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1682 | 2.98k | } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Line | Count | Source | 1618 | 2.86k | bool) { | 1619 | 2.86k | assert(Ty->isVectorTy() && "Expect a vector type"); | 1620 | 2.86k | Type *ScalarTy = Ty->getVectorElementType(); | 1621 | 2.86k | Type *ScalarCondTy = CondTy->getVectorElementType(); | 1622 | 2.86k | unsigned NumVecElts = Ty->getVectorNumElements(); | 1623 | 2.86k | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1624 | 2.86k | unsigned CmpOpcode; | 1625 | 2.86k | if (Ty->isFPOrFPVectorTy()) { | 1626 | 2 | CmpOpcode = Instruction::FCmp; | 1627 | 2.86k | } else { | 1628 | 2.86k | assert(Ty->isIntOrIntVectorTy() && | 1629 | 2.86k | "expecting floating point or integer type for min/max reduction"); | 1630 | 2.86k | CmpOpcode = Instruction::ICmp; | 1631 | 2.86k | } | 1632 | 2.86k | unsigned MinMaxCost = 0; | 1633 | 2.86k | unsigned ShuffleCost = 0; | 1634 | 2.86k | auto *ConcreteTTI = static_cast<T *>(this); | 1635 | 2.86k | std::pair<unsigned, MVT> LT = | 1636 | 2.86k | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1637 | 2.86k | unsigned LongVectorCount = 0; | 1638 | 2.86k | unsigned MVTLen = | 1639 | 2.86k | LT.second.isVector() ? LT.second.getVectorNumElements() : 10 ; | 1640 | 5.66k | while (NumVecElts > MVTLen) { | 1641 | 2.80k | NumVecElts /= 2; | 1642 | 2.80k | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1643 | 2.80k | CondTy = VectorType::get(ScalarCondTy, NumVecElts); | 1644 | 2.80k | | 1645 | 2.80k | // Assume the pairwise shuffles add a cost. | 1646 | 2.80k | ShuffleCost += (IsPairwise + 1) * | 1647 | 2.80k | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1648 | 2.80k | NumVecElts, SubTy); | 1649 | 2.80k | MinMaxCost += | 1650 | 2.80k | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + | 1651 | 2.80k | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, | 1652 | 2.80k | nullptr); | 1653 | 2.80k | Ty = SubTy; | 1654 | 2.80k | ++LongVectorCount; | 1655 | 2.80k | } | 1656 | 2.86k | | 1657 | 2.86k | NumReduxLevels -= LongVectorCount; | 1658 | 2.86k | | 1659 | 2.86k | // The minimal length of the vector is limited by the real length of vector | 1660 | 2.86k | // operations performed on the current platform. That's why several final | 1661 | 2.86k | // reduction opertions are perfomed on the vectors with the same | 1662 | 2.86k | // architecture-dependent length. | 1663 | 2.86k | | 1664 | 2.86k | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1665 | 2.86k | // reductions need two shuffles on every level, but the last one. On that | 1666 | 2.86k | // level one of the shuffles is <0, u, u, ...> which is identity. | 1667 | 2.86k | unsigned NumShuffles = NumReduxLevels; | 1668 | 2.86k | if (IsPairwise && NumReduxLevels >= 11.42k ) | 1669 | 1.42k | NumShuffles += NumReduxLevels - 1; | 1670 | 2.86k | ShuffleCost += NumShuffles * | 1671 | 2.86k | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1672 | 2.86k | 0, Ty); | 1673 | 2.86k | MinMaxCost += | 1674 | 2.86k | NumReduxLevels * | 1675 | 2.86k | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + | 1676 | 2.86k | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, | 1677 | 2.86k | nullptr)); | 1678 | 2.86k | // The last min/max should be in vector registers and we counted it above. | 1679 | 2.86k | // So just need a single extractelement. | 1680 | 2.86k | return ShuffleCost + MinMaxCost + | 1681 | 2.86k | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1682 | 2.86k | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Line | Count | Source | 1618 | 31 | bool) { | 1619 | 31 | assert(Ty->isVectorTy() && "Expect a vector type"); | 1620 | 31 | Type *ScalarTy = Ty->getVectorElementType(); | 1621 | 31 | Type *ScalarCondTy = CondTy->getVectorElementType(); | 1622 | 31 | unsigned NumVecElts = Ty->getVectorNumElements(); | 1623 | 31 | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1624 | 31 | unsigned CmpOpcode; | 1625 | 31 | if (Ty->isFPOrFPVectorTy()) { | 1626 | 10 | CmpOpcode = Instruction::FCmp; | 1627 | 21 | } else { | 1628 | 21 | assert(Ty->isIntOrIntVectorTy() && | 1629 | 21 | "expecting floating point or integer type for min/max reduction"); | 1630 | 21 | CmpOpcode = Instruction::ICmp; | 1631 | 21 | } | 1632 | 31 | unsigned MinMaxCost = 0; | 1633 | 31 | unsigned ShuffleCost = 0; | 1634 | 31 | auto *ConcreteTTI = static_cast<T *>(this); | 1635 | 31 | std::pair<unsigned, MVT> LT = | 1636 | 31 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1637 | 31 | unsigned LongVectorCount = 0; | 1638 | 31 | unsigned MVTLen = | 1639 | 31 | LT.second.isVector() ? LT.second.getVectorNumElements() : 10 ; | 1640 | 44 | while (NumVecElts > MVTLen) { | 1641 | 13 | NumVecElts /= 2; | 1642 | 13 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1643 | 13 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); | 1644 | 13 | | 1645 | 13 | // Assume the pairwise shuffles add a cost. | 1646 | 13 | ShuffleCost += (IsPairwise + 1) * | 1647 | 13 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1648 | 13 | NumVecElts, SubTy); | 1649 | 13 | MinMaxCost += | 1650 | 13 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + | 1651 | 13 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, | 1652 | 13 | nullptr); | 1653 | 13 | Ty = SubTy; | 1654 | 13 | ++LongVectorCount; | 1655 | 13 | } | 1656 | 31 | | 1657 | 31 | NumReduxLevels -= LongVectorCount; | 1658 | 31 | | 1659 | 31 | // The minimal length of the vector is limited by the real length of vector | 1660 | 31 | // operations performed on the current platform. That's why several final | 1661 | 31 | // reduction opertions are perfomed on the vectors with the same | 1662 | 31 | // architecture-dependent length. | 1663 | 31 | | 1664 | 31 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1665 | 31 | // reductions need two shuffles on every level, but the last one. On that | 1666 | 31 | // level one of the shuffles is <0, u, u, ...> which is identity. | 1667 | 31 | unsigned NumShuffles = NumReduxLevels; | 1668 | 31 | if (IsPairwise && NumReduxLevels >= 119 ) | 1669 | 19 | NumShuffles += NumReduxLevels - 1; | 1670 | 31 | ShuffleCost += NumShuffles * | 1671 | 31 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1672 | 31 | 0, Ty); | 1673 | 31 | MinMaxCost += | 1674 | 31 | NumReduxLevels * | 1675 | 31 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + | 1676 | 31 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, | 1677 | 31 | nullptr)); | 1678 | 31 | // The last min/max should be in vector registers and we counted it above. | 1679 | 31 | // So just need a single extractelement. | 1680 | 31 | return ShuffleCost + MinMaxCost + | 1681 | 31 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1682 | 31 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) Line | Count | Source | 1618 | 96 | bool) { | 1619 | 96 | assert(Ty->isVectorTy() && "Expect a vector type"); | 1620 | 96 | Type *ScalarTy = Ty->getVectorElementType(); | 1621 | 96 | Type *ScalarCondTy = CondTy->getVectorElementType(); | 1622 | 96 | unsigned NumVecElts = Ty->getVectorNumElements(); | 1623 | 96 | unsigned NumReduxLevels = Log2_32(NumVecElts); | 1624 | 96 | unsigned CmpOpcode; | 1625 | 96 | if (Ty->isFPOrFPVectorTy()) { | 1626 | 0 | CmpOpcode = Instruction::FCmp; | 1627 | 96 | } else { | 1628 | 96 | assert(Ty->isIntOrIntVectorTy() && | 1629 | 96 | "expecting floating point or integer type for min/max reduction"); | 1630 | 96 | CmpOpcode = Instruction::ICmp; | 1631 | 96 | } | 1632 | 96 | unsigned MinMaxCost = 0; | 1633 | 96 | unsigned ShuffleCost = 0; | 1634 | 96 | auto *ConcreteTTI = static_cast<T *>(this); | 1635 | 96 | std::pair<unsigned, MVT> LT = | 1636 | 96 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); | 1637 | 96 | unsigned LongVectorCount = 0; | 1638 | 96 | unsigned MVTLen = | 1639 | 96 | LT.second.isVector() ? LT.second.getVectorNumElements()32 : 164 ; | 1640 | 112 | while (NumVecElts > MVTLen) { | 1641 | 16 | NumVecElts /= 2; | 1642 | 16 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); | 1643 | 16 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); | 1644 | 16 | | 1645 | 16 | // Assume the pairwise shuffles add a cost. | 1646 | 16 | ShuffleCost += (IsPairwise + 1) * | 1647 | 16 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, | 1648 | 16 | NumVecElts, SubTy); | 1649 | 16 | MinMaxCost += | 1650 | 16 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + | 1651 | 16 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, | 1652 | 16 | nullptr); | 1653 | 16 | Ty = SubTy; | 1654 | 16 | ++LongVectorCount; | 1655 | 16 | } | 1656 | 96 | | 1657 | 96 | NumReduxLevels -= LongVectorCount; | 1658 | 96 | | 1659 | 96 | // The minimal length of the vector is limited by the real length of vector | 1660 | 96 | // operations performed on the current platform. That's why several final | 1661 | 96 | // reduction opertions are perfomed on the vectors with the same | 1662 | 96 | // architecture-dependent length. | 1663 | 96 | | 1664 | 96 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | 1665 | 96 | // reductions need two shuffles on every level, but the last one. On that | 1666 | 96 | // level one of the shuffles is <0, u, u, ...> which is identity. | 1667 | 96 | unsigned NumShuffles = NumReduxLevels; | 1668 | 96 | if (IsPairwise && NumReduxLevels >= 10 ) | 1669 | 0 | NumShuffles += NumReduxLevels - 1; | 1670 | 96 | ShuffleCost += NumShuffles * | 1671 | 96 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, | 1672 | 96 | 0, Ty); | 1673 | 96 | MinMaxCost += | 1674 | 96 | NumReduxLevels * | 1675 | 96 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + | 1676 | 96 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, | 1677 | 96 | nullptr)); | 1678 | 96 | // The last min/max should be in vector registers and we counted it above. | 1679 | 96 | // So just need a single extractelement. | 1680 | 96 | return ShuffleCost + MinMaxCost + | 1681 | 96 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | 1682 | 96 | } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool) |
1683 | | |
1684 | 4.78k | unsigned getVectorSplitCost() { return 1; } llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getVectorSplitCost() Line | Count | Source | 1684 | 1.38k | unsigned getVectorSplitCost() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getVectorSplitCost() llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getVectorSplitCost() Line | Count | Source | 1684 | 228 | unsigned getVectorSplitCost() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getVectorSplitCost() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getVectorSplitCost() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getVectorSplitCost() llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getVectorSplitCost() Line | Count | Source | 1684 | 1 | unsigned getVectorSplitCost() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getVectorSplitCost() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getVectorSplitCost() Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getVectorSplitCost() llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getVectorSplitCost() Line | Count | Source | 1684 | 3.17k | unsigned getVectorSplitCost() { return 1; } |
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getVectorSplitCost() |
1685 | | |
1686 | | /// @} |
1687 | | }; |
1688 | | |
1689 | | /// Concrete BasicTTIImpl that can be used if no further customization |
1690 | | /// is needed. |
1691 | | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1692 | | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1693 | | |
1694 | | friend class BasicTTIImplBase<BasicTTIImpl>; |
1695 | | |
1696 | | const TargetSubtargetInfo *ST; |
1697 | | const TargetLoweringBase *TLI; |
1698 | | |
1699 | 0 | const TargetSubtargetInfo *getST() const { return ST; } |
1700 | 1.36k | const TargetLoweringBase *getTLI() const { return TLI; } |
1701 | | |
1702 | | public: |
1703 | | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1704 | | }; |
1705 | | |
1706 | | } // end namespace llvm |
1707 | | |
1708 | | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |