Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/CodeGen/BasicTTIImpl.h
Line
Count
Source (jump to first uncovered line)
1
//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file provides a helper that implements much of the TTI interface in
11
/// terms of the target-independent code generator and TargetLowering
12
/// interfaces.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17
#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19
#include "llvm/ADT/APInt.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/BitVector.h"
22
#include "llvm/ADT/SmallPtrSet.h"
23
#include "llvm/ADT/SmallVector.h"
24
#include "llvm/Analysis/LoopInfo.h"
25
#include "llvm/Analysis/TargetTransformInfo.h"
26
#include "llvm/Analysis/TargetTransformInfoImpl.h"
27
#include "llvm/CodeGen/ISDOpcodes.h"
28
#include "llvm/CodeGen/TargetLowering.h"
29
#include "llvm/CodeGen/TargetSubtargetInfo.h"
30
#include "llvm/CodeGen/ValueTypes.h"
31
#include "llvm/IR/BasicBlock.h"
32
#include "llvm/IR/CallSite.h"
33
#include "llvm/IR/Constant.h"
34
#include "llvm/IR/Constants.h"
35
#include "llvm/IR/DataLayout.h"
36
#include "llvm/IR/DerivedTypes.h"
37
#include "llvm/IR/InstrTypes.h"
38
#include "llvm/IR/Instruction.h"
39
#include "llvm/IR/Instructions.h"
40
#include "llvm/IR/Intrinsics.h"
41
#include "llvm/IR/Operator.h"
42
#include "llvm/IR/Type.h"
43
#include "llvm/IR/Value.h"
44
#include "llvm/MC/MCSchedule.h"
45
#include "llvm/Support/Casting.h"
46
#include "llvm/Support/CommandLine.h"
47
#include "llvm/Support/ErrorHandling.h"
48
#include "llvm/Support/MachineValueType.h"
49
#include "llvm/Support/MathExtras.h"
50
#include <algorithm>
51
#include <cassert>
52
#include <cstdint>
53
#include <limits>
54
#include <utility>
55
56
namespace llvm {
57
58
class Function;
59
class GlobalValue;
60
class LLVMContext;
61
class ScalarEvolution;
62
class SCEV;
63
class TargetMachine;
64
65
extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67
/// Base class which can be used to help build a TTI implementation.
68
///
69
/// This class provides as much implementation of the TTI interface as is
70
/// possible using the target independent parts of the code generator.
71
///
72
/// In order to subclass it, your class must implement a getST() method to
73
/// return the subtarget, and a getTLI() method to return the target lowering.
74
/// We need these methods implemented in the derived class so that this class
75
/// doesn't have to duplicate storage for them.
76
template <typename T>
77
class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78
private:
79
  using BaseT = TargetTransformInfoImplCRTPBase<T>;
80
  using TTI = TargetTransformInfo;
81
82
  /// Estimate a cost of Broadcast as an extract and sequence of insert
83
  /// operations.
84
51
  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85
51
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
86
51
    unsigned Cost = 0;
87
51
    // Broadcast cost is equal to the cost of extracting the zero'th element
88
51
    // plus the cost of inserting it into every element of the result vector.
89
51
    Cost += static_cast<T *>(this)->getVectorInstrCost(
90
51
        Instruction::ExtractElement, Ty, 0);
91
51
92
153
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i102
) {
93
102
      Cost += static_cast<T *>(this)->getVectorInstrCost(
94
102
          Instruction::InsertElement, Ty, i);
95
102
    }
96
51
    return Cost;
97
51
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Line
Count
Source
84
2
  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85
2
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
86
2
    unsigned Cost = 0;
87
2
    // Broadcast cost is equal to the cost of extracting the zero'th element
88
2
    // plus the cost of inserting it into every element of the result vector.
89
2
    Cost += static_cast<T *>(this)->getVectorInstrCost(
90
2
        Instruction::ExtractElement, Ty, 0);
91
2
92
6
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i4
) {
93
4
      Cost += static_cast<T *>(this)->getVectorInstrCost(
94
4
          Instruction::InsertElement, Ty, i);
95
4
    }
96
2
    return Cost;
97
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Line
Count
Source
84
2
  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85
2
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
86
2
    unsigned Cost = 0;
87
2
    // Broadcast cost is equal to the cost of extracting the zero'th element
88
2
    // plus the cost of inserting it into every element of the result vector.
89
2
    Cost += static_cast<T *>(this)->getVectorInstrCost(
90
2
        Instruction::ExtractElement, Ty, 0);
91
2
92
6
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i4
) {
93
4
      Cost += static_cast<T *>(this)->getVectorInstrCost(
94
4
          Instruction::InsertElement, Ty, i);
95
4
    }
96
2
    return Cost;
97
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
Line
Count
Source
84
47
  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85
47
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
86
47
    unsigned Cost = 0;
87
47
    // Broadcast cost is equal to the cost of extracting the zero'th element
88
47
    // plus the cost of inserting it into every element of the result vector.
89
47
    Cost += static_cast<T *>(this)->getVectorInstrCost(
90
47
        Instruction::ExtractElement, Ty, 0);
91
47
92
141
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i94
) {
93
94
      Cost += static_cast<T *>(this)->getVectorInstrCost(
94
94
          Instruction::InsertElement, Ty, i);
95
94
    }
96
47
    return Cost;
97
47
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getBroadcastShuffleOverhead(llvm::Type*)
98
99
  /// Estimate a cost of shuffle as a sequence of extract and insert
100
  /// operations.
101
3.96k
  unsigned getPermuteShuffleOverhead(Type *Ty) {
102
3.96k
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
103
3.96k
    unsigned Cost = 0;
104
3.96k
    // Shuffle cost is equal to the cost of extracting element from its argument
105
3.96k
    // plus the cost of inserting them onto the result vector.
106
3.96k
107
3.96k
    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108
3.96k
    // index 0 of first vector, index 1 of second vector,index 2 of first
109
3.96k
    // vector and finally index 3 of second vector and insert them at index
110
3.96k
    // <0,1,2,3> of result vector.
111
22.1k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i18.1k
) {
112
18.1k
      Cost += static_cast<T *>(this)
113
18.1k
                  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114
18.1k
      Cost += static_cast<T *>(this)
115
18.1k
                  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116
18.1k
    }
117
3.96k
    return Cost;
118
3.96k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Line
Count
Source
101
3.32k
  unsigned getPermuteShuffleOverhead(Type *Ty) {
102
3.32k
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
103
3.32k
    unsigned Cost = 0;
104
3.32k
    // Shuffle cost is equal to the cost of extracting element from its argument
105
3.32k
    // plus the cost of inserting them onto the result vector.
106
3.32k
107
3.32k
    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108
3.32k
    // index 0 of first vector, index 1 of second vector,index 2 of first
109
3.32k
    // vector and finally index 3 of second vector and insert them at index
110
3.32k
    // <0,1,2,3> of result vector.
111
17.8k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i14.5k
) {
112
14.5k
      Cost += static_cast<T *>(this)
113
14.5k
                  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114
14.5k
      Cost += static_cast<T *>(this)
115
14.5k
                  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116
14.5k
    }
117
3.32k
    return Cost;
118
3.32k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Line
Count
Source
101
54
  unsigned getPermuteShuffleOverhead(Type *Ty) {
102
54
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
103
54
    unsigned Cost = 0;
104
54
    // Shuffle cost is equal to the cost of extracting element from its argument
105
54
    // plus the cost of inserting them onto the result vector.
106
54
107
54
    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108
54
    // index 0 of first vector, index 1 of second vector,index 2 of first
109
54
    // vector and finally index 3 of second vector and insert them at index
110
54
    // <0,1,2,3> of result vector.
111
254
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i200
) {
112
200
      Cost += static_cast<T *>(this)
113
200
                  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114
200
      Cost += static_cast<T *>(this)
115
200
                  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116
200
    }
117
54
    return Cost;
118
54
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Line
Count
Source
101
96
  unsigned getPermuteShuffleOverhead(Type *Ty) {
102
96
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
103
96
    unsigned Cost = 0;
104
96
    // Shuffle cost is equal to the cost of extracting element from its argument
105
96
    // plus the cost of inserting them onto the result vector.
106
96
107
96
    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108
96
    // index 0 of first vector, index 1 of second vector,index 2 of first
109
96
    // vector and finally index 3 of second vector and insert them at index
110
96
    // <0,1,2,3> of result vector.
111
480
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i384
) {
112
384
      Cost += static_cast<T *>(this)
113
384
                  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114
384
      Cost += static_cast<T *>(this)
115
384
                  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116
384
    }
117
96
    return Cost;
118
96
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
Line
Count
Source
101
487
  unsigned getPermuteShuffleOverhead(Type *Ty) {
102
487
    assert(Ty->isVectorTy() && "Can only shuffle vectors");
103
487
    unsigned Cost = 0;
104
487
    // Shuffle cost is equal to the cost of extracting element from its argument
105
487
    // plus the cost of inserting them onto the result vector.
106
487
107
487
    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108
487
    // index 0 of first vector, index 1 of second vector,index 2 of first
109
487
    // vector and finally index 3 of second vector and insert them at index
110
487
    // <0,1,2,3> of result vector.
111
3.53k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i3.04k
) {
112
3.04k
      Cost += static_cast<T *>(this)
113
3.04k
                  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114
3.04k
      Cost += static_cast<T *>(this)
115
3.04k
                  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116
3.04k
    }
117
487
    return Cost;
118
487
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getPermuteShuffleOverhead(llvm::Type*)
119
120
  /// Estimate a cost of subvector extraction as a sequence of extract and
121
  /// insert operations.
122
3.93k
  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123
3.93k
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124
3.93k
           "Can only extract subvectors from vectors");
125
3.93k
    int NumSubElts = SubTy->getVectorNumElements();
126
3.93k
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127
3.93k
           "SK_ExtractSubvector index out of range");
128
3.93k
129
3.93k
    unsigned Cost = 0;
130
3.93k
    // Subvector extraction cost is equal to the cost of extracting element from
131
3.93k
    // the source type plus the cost of inserting them into the result vector
132
3.93k
    // type.
133
13.9k
    for (int i = 0; i != NumSubElts; 
++i9.99k
) {
134
9.99k
      Cost += static_cast<T *>(this)->getVectorInstrCost(
135
9.99k
          Instruction::ExtractElement, Ty, i + Index);
136
9.99k
      Cost += static_cast<T *>(this)->getVectorInstrCost(
137
9.99k
          Instruction::InsertElement, SubTy, i);
138
9.99k
    }
139
3.93k
    return Cost;
140
3.93k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Line
Count
Source
122
3.89k
  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123
3.89k
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124
3.89k
           "Can only extract subvectors from vectors");
125
3.89k
    int NumSubElts = SubTy->getVectorNumElements();
126
3.89k
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127
3.89k
           "SK_ExtractSubvector index out of range");
128
3.89k
129
3.89k
    unsigned Cost = 0;
130
3.89k
    // Subvector extraction cost is equal to the cost of extracting element from
131
3.89k
    // the source type plus the cost of inserting them into the result vector
132
3.89k
    // type.
133
13.7k
    for (int i = 0; i != NumSubElts; 
++i9.82k
) {
134
9.82k
      Cost += static_cast<T *>(this)->getVectorInstrCost(
135
9.82k
          Instruction::ExtractElement, Ty, i + Index);
136
9.82k
      Cost += static_cast<T *>(this)->getVectorInstrCost(
137
9.82k
          Instruction::InsertElement, SubTy, i);
138
9.82k
    }
139
3.89k
    return Cost;
140
3.89k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Line
Count
Source
122
25
  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123
25
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124
25
           "Can only extract subvectors from vectors");
125
25
    int NumSubElts = SubTy->getVectorNumElements();
126
25
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127
25
           "SK_ExtractSubvector index out of range");
128
25
129
25
    unsigned Cost = 0;
130
25
    // Subvector extraction cost is equal to the cost of extracting element from
131
25
    // the source type plus the cost of inserting them into the result vector
132
25
    // type.
133
141
    for (int i = 0; i != NumSubElts; 
++i116
) {
134
116
      Cost += static_cast<T *>(this)->getVectorInstrCost(
135
116
          Instruction::ExtractElement, Ty, i + Index);
136
116
      Cost += static_cast<T *>(this)->getVectorInstrCost(
137
116
          Instruction::InsertElement, SubTy, i);
138
116
    }
139
25
    return Cost;
140
25
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Line
Count
Source
122
12
  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123
12
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124
12
           "Can only extract subvectors from vectors");
125
12
    int NumSubElts = SubTy->getVectorNumElements();
126
12
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127
12
           "SK_ExtractSubvector index out of range");
128
12
129
12
    unsigned Cost = 0;
130
12
    // Subvector extraction cost is equal to the cost of extracting element from
131
12
    // the source type plus the cost of inserting them into the result vector
132
12
    // type.
133
60
    for (int i = 0; i != NumSubElts; 
++i48
) {
134
48
      Cost += static_cast<T *>(this)->getVectorInstrCost(
135
48
          Instruction::ExtractElement, Ty, i + Index);
136
48
      Cost += static_cast<T *>(this)->getVectorInstrCost(
137
48
          Instruction::InsertElement, SubTy, i);
138
48
    }
139
12
    return Cost;
140
12
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtractSubvectorOverhead(llvm::Type*, int, llvm::Type*)
141
142
  /// Estimate a cost of subvector insertion as a sequence of extract and
143
  /// insert operations.
144
73
  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145
73
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146
73
           "Can only insert subvectors into vectors");
147
73
    int NumSubElts = SubTy->getVectorNumElements();
148
73
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149
73
           "SK_InsertSubvector index out of range");
150
73
151
73
    unsigned Cost = 0;
152
73
    // Subvector insertion cost is equal to the cost of extracting element from
153
73
    // the source type plus the cost of inserting them into the result vector
154
73
    // type.
155
235
    for (int i = 0; i != NumSubElts; 
++i162
) {
156
162
      Cost += static_cast<T *>(this)->getVectorInstrCost(
157
162
          Instruction::ExtractElement, SubTy, i);
158
162
      Cost += static_cast<T *>(this)->getVectorInstrCost(
159
162
          Instruction::InsertElement, Ty, i + Index);
160
162
    }
161
73
    return Cost;
162
73
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
Line
Count
Source
144
73
  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145
73
    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146
73
           "Can only insert subvectors into vectors");
147
73
    int NumSubElts = SubTy->getVectorNumElements();
148
73
    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149
73
           "SK_InsertSubvector index out of range");
150
73
151
73
    unsigned Cost = 0;
152
73
    // Subvector insertion cost is equal to the cost of extracting element from
153
73
    // the source type plus the cost of inserting them into the result vector
154
73
    // type.
155
235
    for (int i = 0; i != NumSubElts; 
++i162
) {
156
162
      Cost += static_cast<T *>(this)->getVectorInstrCost(
157
162
          Instruction::ExtractElement, SubTy, i);
158
162
      Cost += static_cast<T *>(this)->getVectorInstrCost(
159
162
          Instruction::InsertElement, Ty, i + Index);
160
162
    }
161
73
    return Cost;
162
73
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInsertSubvectorOverhead(llvm::Type*, int, llvm::Type*)
163
164
  /// Local query method delegates up to T which *must* implement this!
165
390k
  const TargetSubtargetInfo *getST() const {
166
390k
    return static_cast<const T *>(this)->getST();
167
390k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getST() const
Line
Count
Source
165
359k
  const TargetSubtargetInfo *getST() const {
166
359k
    return static_cast<const T *>(this)->getST();
167
359k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getST() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getST() const
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getST() const
Line
Count
Source
165
6.89k
  const TargetSubtargetInfo *getST() const {
166
6.89k
    return static_cast<const T *>(this)->getST();
167
6.89k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getST() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getST() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getST() const
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getST() const
Line
Count
Source
165
2
  const TargetSubtargetInfo *getST() const {
166
2
    return static_cast<const T *>(this)->getST();
167
2
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getST() const
Line
Count
Source
165
654
  const TargetSubtargetInfo *getST() const {
166
654
    return static_cast<const T *>(this)->getST();
167
654
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getST() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getST() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getST() const
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getST() const
Line
Count
Source
165
22.9k
  const TargetSubtargetInfo *getST() const {
166
22.9k
    return static_cast<const T *>(this)->getST();
167
22.9k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getST() const
168
169
  /// Local query method delegates up to T which *must* implement this!
170
117M
  const TargetLoweringBase *getTLI() const {
171
117M
    return static_cast<const T *>(this)->getTLI();
172
117M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getTLI() const
Line
Count
Source
170
104M
  const TargetLoweringBase *getTLI() const {
171
104M
    return static_cast<const T *>(this)->getTLI();
172
104M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getTLI() const
Line
Count
Source
170
3.64k
  const TargetLoweringBase *getTLI() const {
171
3.64k
    return static_cast<const T *>(this)->getTLI();
172
3.64k
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getTLI() const
Line
Count
Source
170
98.3k
  const TargetLoweringBase *getTLI() const {
171
98.3k
    return static_cast<const T *>(this)->getTLI();
172
98.3k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getTLI() const
Line
Count
Source
170
2.07M
  const TargetLoweringBase *getTLI() const {
171
2.07M
    return static_cast<const T *>(this)->getTLI();
172
2.07M
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getTLI() const
Line
Count
Source
170
40.9k
  const TargetLoweringBase *getTLI() const {
171
40.9k
    return static_cast<const T *>(this)->getTLI();
172
40.9k
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getTLI() const
Line
Count
Source
170
268
  const TargetLoweringBase *getTLI() const {
171
268
    return static_cast<const T *>(this)->getTLI();
172
268
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getTLI() const
Line
Count
Source
170
1.36k
  const TargetLoweringBase *getTLI() const {
171
1.36k
    return static_cast<const T *>(this)->getTLI();
172
1.36k
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getTLI() const
Line
Count
Source
170
2.22k
  const TargetLoweringBase *getTLI() const {
171
2.22k
    return static_cast<const T *>(this)->getTLI();
172
2.22k
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getTLI() const
Line
Count
Source
170
155k
  const TargetLoweringBase *getTLI() const {
171
155k
    return static_cast<const T *>(this)->getTLI();
172
155k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getTLI() const
Line
Count
Source
170
655
  const TargetLoweringBase *getTLI() const {
171
655
    return static_cast<const T *>(this)->getTLI();
172
655
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getTLI() const
Line
Count
Source
170
12.6k
  const TargetLoweringBase *getTLI() const {
171
12.6k
    return static_cast<const T *>(this)->getTLI();
172
12.6k
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getTLI() const
Line
Count
Source
170
966
  const TargetLoweringBase *getTLI() const {
171
966
    return static_cast<const T *>(this)->getTLI();
172
966
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getTLI() const
Line
Count
Source
170
10.0M
  const TargetLoweringBase *getTLI() const {
171
10.0M
    return static_cast<const T *>(this)->getTLI();
172
10.0M
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getTLI() const
Line
Count
Source
170
26
  const TargetLoweringBase *getTLI() const {
171
26
    return static_cast<const T *>(this)->getTLI();
172
26
  }
173
174
8.49M
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
8.49M
    switch (M) {
176
8.49M
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
8.49M
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
8.49M
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
8.49M
      case TTI::MIM_PostInc:
183
8.49M
        return ISD::POST_INC;
184
8.49M
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
6.95M
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
6.95M
    switch (M) {
176
6.95M
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
6.95M
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
6.95M
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
6.95M
      case TTI::MIM_PostInc:
183
6.95M
        return ISD::POST_INC;
184
6.95M
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
304
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
304
    switch (M) {
176
304
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
304
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
304
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
304
      case TTI::MIM_PostInc:
183
304
        return ISD::POST_INC;
184
304
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
7.27k
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
7.27k
    switch (M) {
176
7.27k
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
7.27k
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
7.27k
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
7.27k
      case TTI::MIM_PostInc:
183
7.27k
        return ISD::POST_INC;
184
7.27k
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
177k
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
177k
    switch (M) {
176
177k
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
177k
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
177k
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
177k
      case TTI::MIM_PostInc:
183
177k
        return ISD::POST_INC;
184
177k
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
8.07k
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
8.07k
    switch (M) {
176
8.07k
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
8.07k
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
8.07k
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
8.07k
      case TTI::MIM_PostInc:
183
8.07k
        return ISD::POST_INC;
184
8.07k
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
112
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
112
    switch (M) {
176
112
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
112
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
112
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
112
      case TTI::MIM_PostInc:
183
112
        return ISD::POST_INC;
184
112
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
454
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
454
    switch (M) {
176
454
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
454
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
454
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
454
      case TTI::MIM_PostInc:
183
454
        return ISD::POST_INC;
184
454
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
494
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
494
    switch (M) {
176
494
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
494
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
494
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
494
      case TTI::MIM_PostInc:
183
494
        return ISD::POST_INC;
184
494
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
23.3k
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
23.3k
    switch (M) {
176
23.3k
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
23.3k
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
23.3k
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
23.3k
      case TTI::MIM_PostInc:
183
23.3k
        return ISD::POST_INC;
184
23.3k
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
108
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
108
    switch (M) {
176
108
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
108
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
108
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
108
      case TTI::MIM_PostInc:
183
108
        return ISD::POST_INC;
184
108
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
1.54k
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
1.54k
    switch (M) {
176
1.54k
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
1.54k
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
1.54k
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
1.54k
      case TTI::MIM_PostInc:
183
1.54k
        return ISD::POST_INC;
184
1.54k
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
368
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
368
    switch (M) {
176
368
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
368
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
368
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
368
      case TTI::MIM_PostInc:
183
368
        return ISD::POST_INC;
184
368
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
1.31M
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
1.31M
    switch (M) {
176
1.31M
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
1.31M
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
1.31M
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
1.31M
      case TTI::MIM_PostInc:
183
1.31M
        return ISD::POST_INC;
184
1.31M
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getISDIndexedMode(llvm::TargetTransformInfo::MemIndexedMode)
Line
Count
Source
174
8
  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175
8
    switch (M) {
176
8
      case TTI::MIM_Unindexed:
177
0
        return ISD::UNINDEXED;
178
8
      case TTI::MIM_PreInc:
179
0
        return ISD::PRE_INC;
180
8
      case TTI::MIM_PreDec:
181
0
        return ISD::PRE_DEC;
182
8
      case TTI::MIM_PostInc:
183
8
        return ISD::POST_INC;
184
8
      case TTI::MIM_PostDec:
185
0
        return ISD::POST_DEC;
186
0
    }
187
0
    llvm_unreachable("Unexpected MemIndexedMode");
188
0
  }
189
190
protected:
191
  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192
16.5M
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
11.9M
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
37.9k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::AMDGPUTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
551k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
513k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
656k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
30.6k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
648
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
19.6k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
97.9k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
23.8k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
107k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
33.7k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
2.43M
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
1.89k
      : BaseT(DL) {}
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::BasicTTIImplBase(llvm::TargetMachine const*, llvm::DataLayout const&)
Line
Count
Source
192
95.5k
      : BaseT(DL) {}
193
194
  using TargetTransformInfoImplBase::DL;
195
196
public:
197
  /// \name Scalar TTI Implementations
198
  /// @{
199
  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
200
                                      unsigned AddressSpace, unsigned Alignment,
201
9.42k
                                      bool *Fast) const {
202
9.42k
    EVT E = EVT::getIntegerVT(Context, BitWidth);
203
9.42k
    return getTLI()->allowsMisalignedMemoryAccesses(
204
9.42k
        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205
9.42k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Line
Count
Source
201
118
                                      bool *Fast) const {
202
118
    EVT E = EVT::getIntegerVT(Context, BitWidth);
203
118
    return getTLI()->allowsMisalignedMemoryAccesses(
204
118
        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205
118
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Line
Count
Source
201
9.24k
                                      bool *Fast) const {
202
9.24k
    EVT E = EVT::getIntegerVT(Context, BitWidth);
203
9.24k
    return getTLI()->allowsMisalignedMemoryAccesses(
204
9.24k
        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205
9.24k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Line
Count
Source
201
21
                                      bool *Fast) const {
202
21
    EVT E = EVT::getIntegerVT(Context, BitWidth);
203
21
    return getTLI()->allowsMisalignedMemoryAccesses(
204
21
        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205
21
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
Line
Count
Source
201
47
                                      bool *Fast) const {
202
47
    EVT E = EVT::getIntegerVT(Context, BitWidth);
203
47
    return getTLI()->allowsMisalignedMemoryAccesses(
204
47
        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205
47
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::allowsMisalignedMemoryAccesses(llvm::LLVMContext&, unsigned int, unsigned int, unsigned int, bool*) const
206
207
469k
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::hasBranchDivergence()
Line
Count
Source
207
359k
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::hasBranchDivergence()
Line
Count
Source
207
2.44k
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
22.3k
  bool hasBranchDivergence() { return false; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::hasBranchDivergence()
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
49
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
81
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
6
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
3.43k
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::hasBranchDivergence()
Line
Count
Source
207
170
  bool hasBranchDivergence() { return false; }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::hasBranchDivergence()
Line
Count
Source
207
81.2k
  bool hasBranchDivergence() { return false; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::hasBranchDivergence()
208
209
0
  bool isSourceOfDivergence(const Value *V) { return false; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isSourceOfDivergence(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isSourceOfDivergence(llvm::Value const*)
210
211
108
  bool isAlwaysUniform(const Value *V) { return false; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isAlwaysUniform(llvm::Value const*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isAlwaysUniform(llvm::Value const*)
Line
Count
Source
211
108
  bool isAlwaysUniform(const Value *V) { return false; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isAlwaysUniform(llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isAlwaysUniform(llvm::Value const*)
212
213
2.36k
  unsigned getFlatAddressSpace() {
214
2.36k
    // Return an invalid address space.
215
2.36k
    return -1;
216
2.36k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getFlatAddressSpace()
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getFlatAddressSpace()
Line
Count
Source
213
2.36k
  unsigned getFlatAddressSpace() {
214
2.36k
    // Return an invalid address space.
215
2.36k
    return -1;
216
2.36k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getFlatAddressSpace()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getFlatAddressSpace()
217
218
2.61M
  bool isLegalAddImmediate(int64_t imm) {
219
2.61M
    return getTLI()->isLegalAddImmediate(imm);
220
2.61M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
2.54M
  bool isLegalAddImmediate(int64_t imm) {
219
2.54M
    return getTLI()->isLegalAddImmediate(imm);
220
2.54M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
14
  bool isLegalAddImmediate(int64_t imm) {
219
14
    return getTLI()->isLegalAddImmediate(imm);
220
14
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
660
  bool isLegalAddImmediate(int64_t imm) {
219
660
    return getTLI()->isLegalAddImmediate(imm);
220
660
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
29.4k
  bool isLegalAddImmediate(int64_t imm) {
219
29.4k
    return getTLI()->isLegalAddImmediate(imm);
220
29.4k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
1.15k
  bool isLegalAddImmediate(int64_t imm) {
219
1.15k
    return getTLI()->isLegalAddImmediate(imm);
220
1.15k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalAddImmediate(long long)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
5
  bool isLegalAddImmediate(int64_t imm) {
219
5
    return getTLI()->isLegalAddImmediate(imm);
220
5
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
19
  bool isLegalAddImmediate(int64_t imm) {
219
19
    return getTLI()->isLegalAddImmediate(imm);
220
19
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
4.39k
  bool isLegalAddImmediate(int64_t imm) {
219
4.39k
    return getTLI()->isLegalAddImmediate(imm);
220
4.39k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
38
  bool isLegalAddImmediate(int64_t imm) {
219
38
    return getTLI()->isLegalAddImmediate(imm);
220
38
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
22
  bool isLegalAddImmediate(int64_t imm) {
219
22
    return getTLI()->isLegalAddImmediate(imm);
220
22
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
12
  bool isLegalAddImmediate(int64_t imm) {
219
12
    return getTLI()->isLegalAddImmediate(imm);
220
12
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalAddImmediate(long long)
Line
Count
Source
218
34.0k
  bool isLegalAddImmediate(int64_t imm) {
219
34.0k
    return getTLI()->isLegalAddImmediate(imm);
220
34.0k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalAddImmediate(long long)
221
222
535k
  bool isLegalICmpImmediate(int64_t imm) {
223
535k
    return getTLI()->isLegalICmpImmediate(imm);
224
535k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
452k
  bool isLegalICmpImmediate(int64_t imm) {
223
452k
    return getTLI()->isLegalICmpImmediate(imm);
224
452k
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
74
  bool isLegalICmpImmediate(int64_t imm) {
223
74
    return getTLI()->isLegalICmpImmediate(imm);
224
74
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
470
  bool isLegalICmpImmediate(int64_t imm) {
223
470
    return getTLI()->isLegalICmpImmediate(imm);
224
470
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
12.9k
  bool isLegalICmpImmediate(int64_t imm) {
223
12.9k
    return getTLI()->isLegalICmpImmediate(imm);
224
12.9k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
1.27k
  bool isLegalICmpImmediate(int64_t imm) {
223
1.27k
    return getTLI()->isLegalICmpImmediate(imm);
224
1.27k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalICmpImmediate(long long)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
22
  bool isLegalICmpImmediate(int64_t imm) {
223
22
    return getTLI()->isLegalICmpImmediate(imm);
224
22
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
48
  bool isLegalICmpImmediate(int64_t imm) {
223
48
    return getTLI()->isLegalICmpImmediate(imm);
224
48
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
836
  bool isLegalICmpImmediate(int64_t imm) {
223
836
    return getTLI()->isLegalICmpImmediate(imm);
224
836
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
19
  bool isLegalICmpImmediate(int64_t imm) {
223
19
    return getTLI()->isLegalICmpImmediate(imm);
224
19
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
222
  bool isLegalICmpImmediate(int64_t imm) {
223
222
    return getTLI()->isLegalICmpImmediate(imm);
224
222
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
50
  bool isLegalICmpImmediate(int64_t imm) {
223
50
    return getTLI()->isLegalICmpImmediate(imm);
224
50
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalICmpImmediate(long long)
Line
Count
Source
222
66.9k
  bool isLegalICmpImmediate(int64_t imm) {
223
66.9k
    return getTLI()->isLegalICmpImmediate(imm);
224
66.9k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalICmpImmediate(long long)
225
226
  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
227
                             bool HasBaseReg, int64_t Scale,
228
59.0M
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
59.0M
    TargetLoweringBase::AddrMode AM;
230
59.0M
    AM.BaseGV = BaseGV;
231
59.0M
    AM.BaseOffs = BaseOffset;
232
59.0M
    AM.HasBaseReg = HasBaseReg;
233
59.0M
    AM.Scale = Scale;
234
59.0M
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
59.0M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
53.7M
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
53.7M
    TargetLoweringBase::AddrMode AM;
230
53.7M
    AM.BaseGV = BaseGV;
231
53.7M
    AM.BaseOffs = BaseOffset;
232
53.7M
    AM.HasBaseReg = HasBaseReg;
233
53.7M
    AM.Scale = Scale;
234
53.7M
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
53.7M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
2.39k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
2.39k
    TargetLoweringBase::AddrMode AM;
230
2.39k
    AM.BaseGV = BaseGV;
231
2.39k
    AM.BaseOffs = BaseOffset;
232
2.39k
    AM.HasBaseReg = HasBaseReg;
233
2.39k
    AM.Scale = Scale;
234
2.39k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
2.39k
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
62.9k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
62.9k
    TargetLoweringBase::AddrMode AM;
230
62.9k
    AM.BaseGV = BaseGV;
231
62.9k
    AM.BaseOffs = BaseOffset;
232
62.9k
    AM.HasBaseReg = HasBaseReg;
233
62.9k
    AM.Scale = Scale;
234
62.9k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
62.9k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
866k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
866k
    TargetLoweringBase::AddrMode AM;
230
866k
    AM.BaseGV = BaseGV;
231
866k
    AM.BaseOffs = BaseOffset;
232
866k
    AM.HasBaseReg = HasBaseReg;
233
866k
    AM.Scale = Scale;
234
866k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
866k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
21.4k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
21.4k
    TargetLoweringBase::AddrMode AM;
230
21.4k
    AM.BaseGV = BaseGV;
231
21.4k
    AM.BaseOffs = BaseOffset;
232
21.4k
    AM.HasBaseReg = HasBaseReg;
233
21.4k
    AM.Scale = Scale;
234
21.4k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
21.4k
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
32
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
32
    TargetLoweringBase::AddrMode AM;
230
32
    AM.BaseGV = BaseGV;
231
32
    AM.BaseOffs = BaseOffset;
232
32
    AM.HasBaseReg = HasBaseReg;
233
32
    AM.Scale = Scale;
234
32
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
32
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
333
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
333
    TargetLoweringBase::AddrMode AM;
230
333
    AM.BaseGV = BaseGV;
231
333
    AM.BaseOffs = BaseOffset;
232
333
    AM.HasBaseReg = HasBaseReg;
233
333
    AM.Scale = Scale;
234
333
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
333
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
899
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
899
    TargetLoweringBase::AddrMode AM;
230
899
    AM.BaseGV = BaseGV;
231
899
    AM.BaseOffs = BaseOffset;
232
899
    AM.HasBaseReg = HasBaseReg;
233
899
    AM.Scale = Scale;
234
899
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
899
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
75.9k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
75.9k
    TargetLoweringBase::AddrMode AM;
230
75.9k
    AM.BaseGV = BaseGV;
231
75.9k
    AM.BaseOffs = BaseOffset;
232
75.9k
    AM.HasBaseReg = HasBaseReg;
233
75.9k
    AM.Scale = Scale;
234
75.9k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
75.9k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
194
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
194
    TargetLoweringBase::AddrMode AM;
230
194
    AM.BaseGV = BaseGV;
231
194
    AM.BaseOffs = BaseOffset;
232
194
    AM.HasBaseReg = HasBaseReg;
233
194
    AM.Scale = Scale;
234
194
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
194
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
4.13k
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
4.13k
    TargetLoweringBase::AddrMode AM;
230
4.13k
    AM.BaseGV = BaseGV;
231
4.13k
    AM.BaseOffs = BaseOffset;
232
4.13k
    AM.HasBaseReg = HasBaseReg;
233
4.13k
    AM.Scale = Scale;
234
4.13k
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
4.13k
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
116
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
116
    TargetLoweringBase::AddrMode AM;
230
116
    AM.BaseGV = BaseGV;
231
116
    AM.BaseOffs = BaseOffset;
232
116
    AM.HasBaseReg = HasBaseReg;
233
116
    AM.Scale = Scale;
234
116
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
116
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
4.26M
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
4.26M
    TargetLoweringBase::AddrMode AM;
230
4.26M
    AM.BaseGV = BaseGV;
231
4.26M
    AM.BaseOffs = BaseOffset;
232
4.26M
    AM.HasBaseReg = HasBaseReg;
233
4.26M
    AM.Scale = Scale;
234
4.26M
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
4.26M
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLegalAddressingMode(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int, llvm::Instruction*)
Line
Count
Source
228
10
                             unsigned AddrSpace, Instruction *I = nullptr) {
229
10
    TargetLoweringBase::AddrMode AM;
230
10
    AM.BaseGV = BaseGV;
231
10
    AM.BaseOffs = BaseOffset;
232
10
    AM.HasBaseReg = HasBaseReg;
233
10
    AM.Scale = Scale;
234
10
    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235
10
  }
236
237
  bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
238
7.82M
                          const DataLayout &DL) const {
239
7.82M
    EVT VT = getTLI()->getValueType(DL, Ty);
240
7.82M
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
7.82M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
6.95M
                          const DataLayout &DL) const {
239
6.95M
    EVT VT = getTLI()->getValueType(DL, Ty);
240
6.95M
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
6.95M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
152
                          const DataLayout &DL) const {
239
152
    EVT VT = getTLI()->getValueType(DL, Ty);
240
152
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
152
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
3.63k
                          const DataLayout &DL) const {
239
3.63k
    EVT VT = getTLI()->getValueType(DL, Ty);
240
3.63k
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
3.63k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
177k
                          const DataLayout &DL) const {
239
177k
    EVT VT = getTLI()->getValueType(DL, Ty);
240
177k
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
177k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
8.07k
                          const DataLayout &DL) const {
239
8.07k
    EVT VT = getTLI()->getValueType(DL, Ty);
240
8.07k
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
8.07k
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
56
                          const DataLayout &DL) const {
239
56
    EVT VT = getTLI()->getValueType(DL, Ty);
240
56
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
56
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
267
                          const DataLayout &DL) const {
239
267
    EVT VT = getTLI()->getValueType(DL, Ty);
240
267
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
267
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
247
                          const DataLayout &DL) const {
239
247
    EVT VT = getTLI()->getValueType(DL, Ty);
240
247
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
247
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
11.6k
                          const DataLayout &DL) const {
239
11.6k
    EVT VT = getTLI()->getValueType(DL, Ty);
240
11.6k
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
11.6k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
54
                          const DataLayout &DL) const {
239
54
    EVT VT = getTLI()->getValueType(DL, Ty);
240
54
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
54
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
773
                          const DataLayout &DL) const {
239
773
    EVT VT = getTLI()->getValueType(DL, Ty);
240
773
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
773
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
184
                          const DataLayout &DL) const {
239
184
    EVT VT = getTLI()->getValueType(DL, Ty);
240
184
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
184
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
659k
                          const DataLayout &DL) const {
239
659k
    EVT VT = getTLI()->getValueType(DL, Ty);
240
659k
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
659k
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isIndexedLoadLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
238
4
                          const DataLayout &DL) const {
239
4
    EVT VT = getTLI()->getValueType(DL, Ty);
240
4
    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241
4
  }
242
243
  bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
244
676k
                           const DataLayout &DL) const {
245
676k
    EVT VT = getTLI()->getValueType(DL, Ty);
246
676k
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
676k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
152
                           const DataLayout &DL) const {
245
152
    EVT VT = getTLI()->getValueType(DL, Ty);
246
152
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
152
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
3.63k
                           const DataLayout &DL) const {
245
3.63k
    EVT VT = getTLI()->getValueType(DL, Ty);
246
3.63k
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
3.63k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
56
                           const DataLayout &DL) const {
245
56
    EVT VT = getTLI()->getValueType(DL, Ty);
246
56
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
56
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
187
                           const DataLayout &DL) const {
245
187
    EVT VT = getTLI()->getValueType(DL, Ty);
246
187
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
187
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
247
                           const DataLayout &DL) const {
245
247
    EVT VT = getTLI()->getValueType(DL, Ty);
246
247
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
247
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
11.6k
                           const DataLayout &DL) const {
245
11.6k
    EVT VT = getTLI()->getValueType(DL, Ty);
246
11.6k
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
11.6k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
54
                           const DataLayout &DL) const {
245
54
    EVT VT = getTLI()->getValueType(DL, Ty);
246
54
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
54
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
773
                           const DataLayout &DL) const {
245
773
    EVT VT = getTLI()->getValueType(DL, Ty);
246
773
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
773
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
184
                           const DataLayout &DL) const {
245
184
    EVT VT = getTLI()->getValueType(DL, Ty);
246
184
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
184
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
659k
                           const DataLayout &DL) const {
245
659k
    EVT VT = getTLI()->getValueType(DL, Ty);
246
659k
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
659k
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isIndexedStoreLegal(llvm::TargetTransformInfo::MemIndexedMode, llvm::Type*, llvm::DataLayout const&) const
Line
Count
Source
244
4
                           const DataLayout &DL) const {
245
4
    EVT VT = getTLI()->getValueType(DL, Ty);
246
4
    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247
4
  }
248
249
4.21M
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
4.21M
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
4.21M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
4.07M
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
4.07M
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
4.07M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
86
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
86
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
86
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
1.02k
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
1.02k
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
1.02k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
123k
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
123k
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
123k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
4.38k
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
4.38k
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
4.38k
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
28
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
28
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
28
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
142
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
142
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
142
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
130
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
130
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
130
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
4.90k
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
4.90k
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
4.90k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
39
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
39
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
39
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
96
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
96
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
96
  }
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isLSRCostLess(llvm::TargetTransformInfo::LSRCost, llvm::TargetTransformInfo::LSRCost)
Line
Count
Source
249
2
  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250
2
    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251
2
  }
252
253
  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
254
6.05M
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
6.05M
    TargetLoweringBase::AddrMode AM;
256
6.05M
    AM.BaseGV = BaseGV;
257
6.05M
    AM.BaseOffs = BaseOffset;
258
6.05M
    AM.HasBaseReg = HasBaseReg;
259
6.05M
    AM.Scale = Scale;
260
6.05M
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
6.05M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
5.24M
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
5.24M
    TargetLoweringBase::AddrMode AM;
256
5.24M
    AM.BaseGV = BaseGV;
257
5.24M
    AM.BaseOffs = BaseOffset;
258
5.24M
    AM.HasBaseReg = HasBaseReg;
259
5.24M
    AM.Scale = Scale;
260
5.24M
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
5.24M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
10
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
10
    TargetLoweringBase::AddrMode AM;
256
10
    AM.BaseGV = BaseGV;
257
10
    AM.BaseOffs = BaseOffset;
258
10
    AM.HasBaseReg = HasBaseReg;
259
10
    AM.Scale = Scale;
260
10
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
10
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
446
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
446
    TargetLoweringBase::AddrMode AM;
256
446
    AM.BaseGV = BaseGV;
257
446
    AM.BaseOffs = BaseOffset;
258
446
    AM.HasBaseReg = HasBaseReg;
259
446
    AM.Scale = Scale;
260
446
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
446
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
105k
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
105k
    TargetLoweringBase::AddrMode AM;
256
105k
    AM.BaseGV = BaseGV;
257
105k
    AM.BaseOffs = BaseOffset;
258
105k
    AM.HasBaseReg = HasBaseReg;
259
105k
    AM.Scale = Scale;
260
105k
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
105k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
4
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
4
    TargetLoweringBase::AddrMode AM;
256
4
    AM.BaseGV = BaseGV;
257
4
    AM.BaseOffs = BaseOffset;
258
4
    AM.HasBaseReg = HasBaseReg;
259
4
    AM.Scale = Scale;
260
4
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
4
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
14
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
14
    TargetLoweringBase::AddrMode AM;
256
14
    AM.BaseGV = BaseGV;
257
14
    AM.BaseOffs = BaseOffset;
258
14
    AM.HasBaseReg = HasBaseReg;
259
14
    AM.Scale = Scale;
260
14
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
14
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
11.9k
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
11.9k
    TargetLoweringBase::AddrMode AM;
256
11.9k
    AM.BaseGV = BaseGV;
257
11.9k
    AM.BaseOffs = BaseOffset;
258
11.9k
    AM.HasBaseReg = HasBaseReg;
259
11.9k
    AM.Scale = Scale;
260
11.9k
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
11.9k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
662
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
662
    TargetLoweringBase::AddrMode AM;
256
662
    AM.BaseGV = BaseGV;
257
662
    AM.BaseOffs = BaseOffset;
258
662
    AM.HasBaseReg = HasBaseReg;
259
662
    AM.Scale = Scale;
260
662
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
662
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
Line
Count
Source
254
690k
                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255
690k
    TargetLoweringBase::AddrMode AM;
256
690k
    AM.BaseGV = BaseGV;
257
690k
    AM.BaseOffs = BaseOffset;
258
690k
    AM.HasBaseReg = HasBaseReg;
259
690k
    AM.Scale = Scale;
260
690k
    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261
690k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalingFactorCost(llvm::Type*, llvm::GlobalValue*, long long, bool, long long, unsigned int)
262
263
1.46M
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
1.46M
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
1.46M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
1.43M
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
1.43M
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
1.43M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
30
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
30
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
30
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
1.33k
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
1.33k
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
1.33k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
4.18k
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
4.18k
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
4.18k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
443
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
443
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
443
  }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
8
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
8
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
8
  }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
65
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
65
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
65
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
98
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
98
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
98
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
3.88k
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
3.88k
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
3.88k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
184
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
184
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
184
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
209
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
209
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
209
  }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
52
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
52
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
52
  }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
Line
Count
Source
263
21.6k
  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264
21.6k
    return getTLI()->isTruncateFree(Ty1, Ty2);
265
21.6k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isTruncateFree(llvm::Type*, llvm::Type*)
266
267
121k
  bool isProfitableToHoist(Instruction *I) {
268
121k
    return getTLI()->isProfitableToHoist(I);
269
121k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
78.6k
  bool isProfitableToHoist(Instruction *I) {
268
78.6k
    return getTLI()->isProfitableToHoist(I);
269
78.6k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isProfitableToHoist(llvm::Instruction*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
24
  bool isProfitableToHoist(Instruction *I) {
268
24
    return getTLI()->isProfitableToHoist(I);
269
24
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
8.67k
  bool isProfitableToHoist(Instruction *I) {
268
8.67k
    return getTLI()->isProfitableToHoist(I);
269
8.67k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
16
  bool isProfitableToHoist(Instruction *I) {
268
16
    return getTLI()->isProfitableToHoist(I);
269
16
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isProfitableToHoist(llvm::Instruction*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
64
  bool isProfitableToHoist(Instruction *I) {
268
64
    return getTLI()->isProfitableToHoist(I);
269
64
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isProfitableToHoist(llvm::Instruction*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isProfitableToHoist(llvm::Instruction*)
Line
Count
Source
267
33.6k
  bool isProfitableToHoist(Instruction *I) {
268
33.6k
    return getTLI()->isProfitableToHoist(I);
269
33.6k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isProfitableToHoist(llvm::Instruction*)
270
271
674
  bool useAA() const { return getST()->useAA(); }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::useAA() const
Line
Count
Source
271
35
  bool useAA() const { return getST()->useAA(); }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::useAA() const
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::useAA() const
Line
Count
Source
271
639
  bool useAA() const { return getST()->useAA(); }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::useAA() const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::useAA() const
272
273
18.0k
  bool isTypeLegal(Type *Ty) {
274
18.0k
    EVT VT = getTLI()->getValueType(DL, Ty);
275
18.0k
    return getTLI()->isTypeLegal(VT);
276
18.0k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
17.2k
  bool isTypeLegal(Type *Ty) {
274
17.2k
    EVT VT = getTLI()->getValueType(DL, Ty);
275
17.2k
    return getTLI()->isTypeLegal(VT);
276
17.2k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isTypeLegal(llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
2
  bool isTypeLegal(Type *Ty) {
274
2
    EVT VT = getTLI()->getValueType(DL, Ty);
275
2
    return getTLI()->isTypeLegal(VT);
276
2
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
160
  bool isTypeLegal(Type *Ty) {
274
160
    EVT VT = getTLI()->getValueType(DL, Ty);
275
160
    return getTLI()->isTypeLegal(VT);
276
160
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
3
  bool isTypeLegal(Type *Ty) {
274
3
    EVT VT = getTLI()->getValueType(DL, Ty);
275
3
    return getTLI()->isTypeLegal(VT);
276
3
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isTypeLegal(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isTypeLegal(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isTypeLegal(llvm::Type*)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
75
  bool isTypeLegal(Type *Ty) {
274
75
    EVT VT = getTLI()->getValueType(DL, Ty);
275
75
    return getTLI()->isTypeLegal(VT);
276
75
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isTypeLegal(llvm::Type*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
5
  bool isTypeLegal(Type *Ty) {
274
5
    EVT VT = getTLI()->getValueType(DL, Ty);
275
5
    return getTLI()->isTypeLegal(VT);
276
5
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isTypeLegal(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isTypeLegal(llvm::Type*)
Line
Count
Source
273
624
  bool isTypeLegal(Type *Ty) {
274
624
    EVT VT = getTLI()->getValueType(DL, Ty);
275
624
    return getTLI()->isTypeLegal(VT);
276
624
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isTypeLegal(llvm::Type*)
277
278
  int getGEPCost(Type *PointeeType, const Value *Ptr,
279
6.06M
                 ArrayRef<const Value *> Operands) {
280
6.06M
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
6.06M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
5.47M
                 ArrayRef<const Value *> Operands) {
280
5.47M
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
5.47M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
1.84k
                 ArrayRef<const Value *> Operands) {
280
1.84k
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
1.84k
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
38.1k
                 ArrayRef<const Value *> Operands) {
280
38.1k
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
38.1k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
92.2k
                 ArrayRef<const Value *> Operands) {
280
92.2k
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
92.2k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
29
                 ArrayRef<const Value *> Operands) {
280
29
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
29
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
335
                 ArrayRef<const Value *> Operands) {
280
335
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
335
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
1.08k
                 ArrayRef<const Value *> Operands) {
280
1.08k
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
1.08k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
132
                 ArrayRef<const Value *> Operands) {
280
132
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
132
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
75
                 ArrayRef<const Value *> Operands) {
280
75
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
75
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
279
461k
                 ArrayRef<const Value *> Operands) {
280
461k
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281
461k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getGEPCost(llvm::Type*, llvm::Value const*, llvm::ArrayRef<llvm::Value const*>)
282
283
1.56M
  int getExtCost(const Instruction *I, const Value *Src) {
284
1.56M
    if (getTLI()->isExtFree(I))
285
592k
      return TargetTransformInfo::TCC_Free;
286
974k
287
974k
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)599k
)
288
901k
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
592k
        if (getTLI()->isExtLoad(LI, I, DL))
290
583k
          return TargetTransformInfo::TCC_Free;
291
390k
292
390k
    return TargetTransformInfo::TCC_Basic;
293
390k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
1.37M
  int getExtCost(const Instruction *I, const Value *Src) {
284
1.37M
    if (getTLI()->isExtFree(I))
285
566k
      return TargetTransformInfo::TCC_Free;
286
811k
287
811k
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)504k
)
288
738k
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
485k
        if (getTLI()->isExtLoad(LI, I, DL))
290
477k
          return TargetTransformInfo::TCC_Free;
291
334k
292
334k
    return TargetTransformInfo::TCC_Basic;
293
334k
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
7
  int getExtCost(const Instruction *I, const Value *Src) {
284
7
    if (getTLI()->isExtFree(I))
285
0
      return TargetTransformInfo::TCC_Free;
286
7
287
7
    if (isa<ZExtInst>(I) || isa<SExtInst>(I))
288
7
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
0
        if (getTLI()->isExtLoad(LI, I, DL))
290
0
          return TargetTransformInfo::TCC_Free;
291
7
292
7
    return TargetTransformInfo::TCC_Basic;
293
7
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
221
  int getExtCost(const Instruction *I, const Value *Src) {
284
221
    if (getTLI()->isExtFree(I))
285
109
      return TargetTransformInfo::TCC_Free;
286
112
287
112
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)103
)
288
112
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
34
        if (getTLI()->isExtLoad(LI, I, DL))
290
30
          return TargetTransformInfo::TCC_Free;
291
82
292
82
    return TargetTransformInfo::TCC_Basic;
293
82
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
69.0k
  int getExtCost(const Instruction *I, const Value *Src) {
284
69.0k
    if (getTLI()->isExtFree(I))
285
0
      return TargetTransformInfo::TCC_Free;
286
69.0k
287
69.0k
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)39.0k
)
288
69.0k
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
50.9k
        if (getTLI()->isExtLoad(LI, I, DL))
290
50.5k
          return TargetTransformInfo::TCC_Free;
291
18.4k
292
18.4k
    return TargetTransformInfo::TCC_Basic;
293
18.4k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
12
  int getExtCost(const Instruction *I, const Value *Src) {
284
12
    if (getTLI()->isExtFree(I))
285
0
      return TargetTransformInfo::TCC_Free;
286
12
287
12
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)2
)
288
12
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
2
        if (getTLI()->isExtLoad(LI, I, DL))
290
2
          return TargetTransformInfo::TCC_Free;
291
10
292
10
    return TargetTransformInfo::TCC_Basic;
293
10
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
1
  int getExtCost(const Instruction *I, const Value *Src) {
284
1
    if (getTLI()->isExtFree(I))
285
0
      return TargetTransformInfo::TCC_Free;
286
1
287
1
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)0
)
288
1
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
0
        if (getTLI()->isExtLoad(LI, I, DL))
290
0
          return TargetTransformInfo::TCC_Free;
291
1
292
1
    return TargetTransformInfo::TCC_Basic;
293
1
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
245
  int getExtCost(const Instruction *I, const Value *Src) {
284
245
    if (getTLI()->isExtFree(I))
285
6
      return TargetTransformInfo::TCC_Free;
286
239
287
239
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)104
)
288
239
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
124
        if (getTLI()->isExtLoad(LI, I, DL))
290
119
          return TargetTransformInfo::TCC_Free;
291
120
292
120
    return TargetTransformInfo::TCC_Basic;
293
120
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
23
  int getExtCost(const Instruction *I, const Value *Src) {
284
23
    if (getTLI()->isExtFree(I))
285
0
      return TargetTransformInfo::TCC_Free;
286
23
287
23
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)8
)
288
23
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
9
        if (getTLI()->isExtLoad(LI, I, DL))
290
9
          return TargetTransformInfo::TCC_Free;
291
14
292
14
    return TargetTransformInfo::TCC_Basic;
293
14
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
Line
Count
Source
283
118k
  int getExtCost(const Instruction *I, const Value *Src) {
284
118k
    if (getTLI()->isExtFree(I))
285
25.3k
      return TargetTransformInfo::TCC_Free;
286
93.1k
287
93.1k
    if (isa<ZExtInst>(I) || 
isa<SExtInst>(I)55.4k
)
288
93.1k
      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289
55.8k
        if (getTLI()->isExtLoad(LI, I, DL))
290
55.6k
          return TargetTransformInfo::TCC_Free;
291
37.5k
292
37.5k
    return TargetTransformInfo::TCC_Basic;
293
37.5k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtCost(llvm::Instruction const*, llvm::Value const*)
294
295
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
296
4
                            ArrayRef<const Value *> Arguments, const User *U) {
297
4
    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
298
4
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Line
Count
Source
296
2
                            ArrayRef<const Value *> Arguments, const User *U) {
297
2
    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
298
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
Line
Count
Source
296
2
                            ArrayRef<const Value *> Arguments, const User *U) {
297
2
    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
298
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value const*>, llvm::User const*)
299
300
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
301
968k
                            ArrayRef<Type *> ParamTys, const User *U) {
302
968k
    if (IID == Intrinsic::cttz) {
303
6.85k
      if (getTLI()->isCheapToSpeculateCttz())
304
3.70k
        return TargetTransformInfo::TCC_Basic;
305
3.14k
      return TargetTransformInfo::TCC_Expensive;
306
3.14k
    }
307
962k
308
962k
    if (IID == Intrinsic::ctlz) {
309
10.3k
      if (getTLI()->isCheapToSpeculateCtlz())
310
6.37k
        return TargetTransformInfo::TCC_Basic;
311
3.94k
      return TargetTransformInfo::TCC_Expensive;
312
3.94k
    }
313
951k
314
951k
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
951k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
755k
                            ArrayRef<Type *> ParamTys, const User *U) {
302
755k
    if (IID == Intrinsic::cttz) {
303
688
      if (getTLI()->isCheapToSpeculateCttz())
304
688
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
754k
308
754k
    if (IID == Intrinsic::ctlz) {
309
1.51k
      if (getTLI()->isCheapToSpeculateCtlz())
310
1.51k
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
753k
314
753k
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
753k
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
2
                            ArrayRef<Type *> ParamTys, const User *U) {
302
2
    if (IID == Intrinsic::cttz) {
303
0
      if (getTLI()->isCheapToSpeculateCttz())
304
0
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
2
308
2
    if (IID == Intrinsic::ctlz) {
309
0
      if (getTLI()->isCheapToSpeculateCtlz())
310
0
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
2
314
2
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
2
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
537
                            ArrayRef<Type *> ParamTys, const User *U) {
302
537
    if (IID == Intrinsic::cttz) {
303
12
      if (getTLI()->isCheapToSpeculateCttz())
304
12
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
525
308
525
    if (IID == Intrinsic::ctlz) {
309
12
      if (getTLI()->isCheapToSpeculateCtlz())
310
12
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
513
314
513
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
513
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
57.5k
                            ArrayRef<Type *> ParamTys, const User *U) {
302
57.5k
    if (IID == Intrinsic::cttz) {
303
2.03k
      if (getTLI()->isCheapToSpeculateCttz())
304
1.97k
        return TargetTransformInfo::TCC_Basic;
305
60
      return TargetTransformInfo::TCC_Expensive;
306
60
    }
307
55.4k
308
55.4k
    if (IID == Intrinsic::ctlz) {
309
4.23k
      if (getTLI()->isCheapToSpeculateCtlz())
310
3.80k
        return TargetTransformInfo::TCC_Basic;
311
437
      return TargetTransformInfo::TCC_Expensive;
312
437
    }
313
51.2k
314
51.2k
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
51.2k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
14
                            ArrayRef<Type *> ParamTys, const User *U) {
302
14
    if (IID == Intrinsic::cttz) {
303
0
      if (getTLI()->isCheapToSpeculateCttz())
304
0
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
14
308
14
    if (IID == Intrinsic::ctlz) {
309
0
      if (getTLI()->isCheapToSpeculateCtlz())
310
0
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
14
314
14
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
14
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
5
                            ArrayRef<Type *> ParamTys, const User *U) {
302
5
    if (IID == Intrinsic::cttz) {
303
1
      if (getTLI()->isCheapToSpeculateCttz())
304
1
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
4
308
4
    if (IID == Intrinsic::ctlz) {
309
1
      if (getTLI()->isCheapToSpeculateCtlz())
310
1
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
3
314
3
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
3
  }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
16
                            ArrayRef<Type *> ParamTys, const User *U) {
302
16
    if (IID == Intrinsic::cttz) {
303
0
      if (getTLI()->isCheapToSpeculateCttz())
304
0
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
16
308
16
    if (IID == Intrinsic::ctlz) {
309
0
      if (getTLI()->isCheapToSpeculateCtlz())
310
0
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
16
314
16
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
16
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
59
                            ArrayRef<Type *> ParamTys, const User *U) {
302
59
    if (IID == Intrinsic::cttz) {
303
1
      if (getTLI()->isCheapToSpeculateCttz())
304
1
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
58
308
58
    if (IID == Intrinsic::ctlz) {
309
1
      if (getTLI()->isCheapToSpeculateCtlz())
310
1
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
57
314
57
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
57
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
20
                            ArrayRef<Type *> ParamTys, const User *U) {
302
20
    if (IID == Intrinsic::cttz) {
303
0
      if (getTLI()->isCheapToSpeculateCttz())
304
0
        return TargetTransformInfo::TCC_Basic;
305
0
      return TargetTransformInfo::TCC_Expensive;
306
0
    }
307
20
308
20
    if (IID == Intrinsic::ctlz) {
309
0
      if (getTLI()->isCheapToSpeculateCtlz())
310
0
        return TargetTransformInfo::TCC_Basic;
311
0
      return TargetTransformInfo::TCC_Expensive;
312
0
    }
313
20
314
20
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
20
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
Line
Count
Source
301
155k
                            ArrayRef<Type *> ParamTys, const User *U) {
302
155k
    if (IID == Intrinsic::cttz) {
303
4.11k
      if (getTLI()->isCheapToSpeculateCttz())
304
1.02k
        return TargetTransformInfo::TCC_Basic;
305
3.08k
      return TargetTransformInfo::TCC_Expensive;
306
3.08k
    }
307
151k
308
151k
    if (IID == Intrinsic::ctlz) {
309
4.55k
      if (getTLI()->isCheapToSpeculateCtlz())
310
1.04k
        return TargetTransformInfo::TCC_Basic;
311
3.51k
      return TargetTransformInfo::TCC_Expensive;
312
3.51k
    }
313
146k
314
146k
    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315
146k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::User const*)
316
317
  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
318
31.4k
                                            unsigned &JumpTableSize) {
319
31.4k
    /// Try to find the estimated number of clusters. Note that the number of
320
31.4k
    /// clusters identified in this function could be different from the actural
321
31.4k
    /// numbers found in lowering. This function ignore switches that are
322
31.4k
    /// lowered with a mix of jump table / bit test / BTree. This function was
323
31.4k
    /// initially intended to be used when estimating the cost of switch in
324
31.4k
    /// inline cost heuristic, but it's a generic cost model to be used in other
325
31.4k
    /// places (e.g., in loop unrolling).
326
31.4k
    unsigned N = SI.getNumCases();
327
31.4k
    const TargetLoweringBase *TLI = getTLI();
328
31.4k
    const DataLayout &DL = this->getDataLayout();
329
31.4k
330
31.4k
    JumpTableSize = 0;
331
31.4k
    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332
31.4k
333
31.4k
    // Early exit if both a jump table and bit test are not allowed.
334
31.4k
    if (N < 1 || (!IsJTAllowed && 
DL.getIndexSizeInBits(0u) < N0
))
335
0
      return N;
336
31.4k
337
31.4k
    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338
31.4k
    APInt MinCaseVal = MaxCaseVal;
339
303k
    for (auto CI : SI.cases()) {
340
303k
      const APInt &CaseVal = CI.getCaseValue()->getValue();
341
303k
      if (CaseVal.sgt(MaxCaseVal))
342
59.0k
        MaxCaseVal = CaseVal;
343
303k
      if (CaseVal.slt(MinCaseVal))
344
30.0k
        MinCaseVal = CaseVal;
345
303k
    }
346
31.4k
347
31.4k
    // Check if suitable for a bit test
348
31.4k
    if (N <= DL.getIndexSizeInBits(0u)) {
349
30.8k
      SmallPtrSet<const BasicBlock *, 4> Dests;
350
30.8k
      for (auto I : SI.cases())
351
196k
        Dests.insert(I.getCaseSuccessor());
352
30.8k
353
30.8k
      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354
30.8k
                                     DL))
355
3.38k
        return 1;
356
28.0k
    }
357
28.0k
358
28.0k
    // Check if suitable for a jump table.
359
28.0k
    if (IsJTAllowed) {
360
28.0k
      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361
17.5k
        return N;
362
10.5k
      uint64_t Range =
363
10.5k
          (MaxCaseVal - MinCaseVal)
364
10.5k
              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365
10.5k
      // Check whether a range of clusters is dense enough for a jump table
366
10.5k
      if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367
8.59k
        JumpTableSize = Range;
368
8.59k
        return 1;
369
8.59k
      }
370
1.90k
    }
371
1.90k
    return N;
372
1.90k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Line
Count
Source
318
23.7k
                                            unsigned &JumpTableSize) {
319
23.7k
    /// Try to find the estimated number of clusters. Note that the number of
320
23.7k
    /// clusters identified in this function could be different from the actural
321
23.7k
    /// numbers found in lowering. This function ignore switches that are
322
23.7k
    /// lowered with a mix of jump table / bit test / BTree. This function was
323
23.7k
    /// initially intended to be used when estimating the cost of switch in
324
23.7k
    /// inline cost heuristic, but it's a generic cost model to be used in other
325
23.7k
    /// places (e.g., in loop unrolling).
326
23.7k
    unsigned N = SI.getNumCases();
327
23.7k
    const TargetLoweringBase *TLI = getTLI();
328
23.7k
    const DataLayout &DL = this->getDataLayout();
329
23.7k
330
23.7k
    JumpTableSize = 0;
331
23.7k
    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332
23.7k
333
23.7k
    // Early exit if both a jump table and bit test are not allowed.
334
23.7k
    if (N < 1 || (!IsJTAllowed && 
DL.getIndexSizeInBits(0u) < N0
))
335
0
      return N;
336
23.7k
337
23.7k
    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338
23.7k
    APInt MinCaseVal = MaxCaseVal;
339
270k
    for (auto CI : SI.cases()) {
340
270k
      const APInt &CaseVal = CI.getCaseValue()->getValue();
341
270k
      if (CaseVal.sgt(MaxCaseVal))
342
42.7k
        MaxCaseVal = CaseVal;
343
270k
      if (CaseVal.slt(MinCaseVal))
344
25.3k
        MinCaseVal = CaseVal;
345
270k
    }
346
23.7k
347
23.7k
    // Check if suitable for a bit test
348
23.7k
    if (N <= DL.getIndexSizeInBits(0u)) {
349
23.0k
      SmallPtrSet<const BasicBlock *, 4> Dests;
350
23.0k
      for (auto I : SI.cases())
351
163k
        Dests.insert(I.getCaseSuccessor());
352
23.0k
353
23.0k
      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354
23.0k
                                     DL))
355
2.11k
        return 1;
356
21.5k
    }
357
21.5k
358
21.5k
    // Check if suitable for a jump table.
359
21.5k
    if (IsJTAllowed) {
360
21.5k
      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361
13.4k
        return N;
362
8.16k
      uint64_t Range =
363
8.16k
          (MaxCaseVal - MinCaseVal)
364
8.16k
              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365
8.16k
      // Check whether a range of clusters is dense enough for a jump table
366
8.16k
      if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367
6.66k
        JumpTableSize = Range;
368
6.66k
        return 1;
369
6.66k
      }
370
1.49k
    }
371
1.49k
    return N;
372
1.49k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Line
Count
Source
318
1.95k
                                            unsigned &JumpTableSize) {
319
1.95k
    /// Try to find the estimated number of clusters. Note that the number of
320
1.95k
    /// clusters identified in this function could be different from the actural
321
1.95k
    /// numbers found in lowering. This function ignore switches that are
322
1.95k
    /// lowered with a mix of jump table / bit test / BTree. This function was
323
1.95k
    /// initially intended to be used when estimating the cost of switch in
324
1.95k
    /// inline cost heuristic, but it's a generic cost model to be used in other
325
1.95k
    /// places (e.g., in loop unrolling).
326
1.95k
    unsigned N = SI.getNumCases();
327
1.95k
    const TargetLoweringBase *TLI = getTLI();
328
1.95k
    const DataLayout &DL = this->getDataLayout();
329
1.95k
330
1.95k
    JumpTableSize = 0;
331
1.95k
    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332
1.95k
333
1.95k
    // Early exit if both a jump table and bit test are not allowed.
334
1.95k
    if (N < 1 || (!IsJTAllowed && 
DL.getIndexSizeInBits(0u) < N0
))
335
0
      return N;
336
1.95k
337
1.95k
    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338
1.95k
    APInt MinCaseVal = MaxCaseVal;
339
7.86k
    for (auto CI : SI.cases()) {
340
7.86k
      const APInt &CaseVal = CI.getCaseValue()->getValue();
341
7.86k
      if (CaseVal.sgt(MaxCaseVal))
342
3.60k
        MaxCaseVal = CaseVal;
343
7.86k
      if (CaseVal.slt(MinCaseVal))
344
1.34k
        MinCaseVal = CaseVal;
345
7.86k
    }
346
1.95k
347
1.95k
    // Check if suitable for a bit test
348
1.95k
    if (N <= DL.getIndexSizeInBits(0u)) {
349
1.95k
      SmallPtrSet<const BasicBlock *, 4> Dests;
350
1.95k
      for (auto I : SI.cases())
351
7.86k
        Dests.insert(I.getCaseSuccessor());
352
1.95k
353
1.95k
      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354
1.95k
                                     DL))
355
90
        return 1;
356
1.86k
    }
357
1.86k
358
1.86k
    // Check if suitable for a jump table.
359
1.86k
    if (IsJTAllowed) {
360
1.86k
      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361
1.17k
        return N;
362
690
      uint64_t Range =
363
690
          (MaxCaseVal - MinCaseVal)
364
690
              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365
690
      // Check whether a range of clusters is dense enough for a jump table
366
690
      if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367
567
        JumpTableSize = Range;
368
567
        return 1;
369
567
      }
370
123
    }
371
123
    return N;
372
123
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
Line
Count
Source
318
5.78k
                                            unsigned &JumpTableSize) {
319
5.78k
    /// Try to find the estimated number of clusters. Note that the number of
320
5.78k
    /// clusters identified in this function could be different from the actural
321
5.78k
    /// numbers found in lowering. This function ignore switches that are
322
5.78k
    /// lowered with a mix of jump table / bit test / BTree. This function was
323
5.78k
    /// initially intended to be used when estimating the cost of switch in
324
5.78k
    /// inline cost heuristic, but it's a generic cost model to be used in other
325
5.78k
    /// places (e.g., in loop unrolling).
326
5.78k
    unsigned N = SI.getNumCases();
327
5.78k
    const TargetLoweringBase *TLI = getTLI();
328
5.78k
    const DataLayout &DL = this->getDataLayout();
329
5.78k
330
5.78k
    JumpTableSize = 0;
331
5.78k
    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332
5.78k
333
5.78k
    // Early exit if both a jump table and bit test are not allowed.
334
5.78k
    if (N < 1 || (!IsJTAllowed && 
DL.getIndexSizeInBits(0u) < N0
))
335
0
      return N;
336
5.78k
337
5.78k
    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338
5.78k
    APInt MinCaseVal = MaxCaseVal;
339
24.8k
    for (auto CI : SI.cases()) {
340
24.8k
      const APInt &CaseVal = CI.getCaseValue()->getValue();
341
24.8k
      if (CaseVal.sgt(MaxCaseVal))
342
12.7k
        MaxCaseVal = CaseVal;
343
24.8k
      if (CaseVal.slt(MinCaseVal))
344
3.39k
        MinCaseVal = CaseVal;
345
24.8k
    }
346
5.78k
347
5.78k
    // Check if suitable for a bit test
348
5.78k
    if (N <= DL.getIndexSizeInBits(0u)) {
349
5.78k
      SmallPtrSet<const BasicBlock *, 4> Dests;
350
5.78k
      for (auto I : SI.cases())
351
24.7k
        Dests.insert(I.getCaseSuccessor());
352
5.78k
353
5.78k
      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354
5.78k
                                     DL))
355
1.17k
        return 1;
356
4.60k
    }
357
4.60k
358
4.60k
    // Check if suitable for a jump table.
359
4.60k
    if (IsJTAllowed) {
360
4.60k
      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361
2.96k
        return N;
362
1.64k
      uint64_t Range =
363
1.64k
          (MaxCaseVal - MinCaseVal)
364
1.64k
              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365
1.64k
      // Check whether a range of clusters is dense enough for a jump table
366
1.64k
      if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367
1.36k
        JumpTableSize = Range;
368
1.36k
        return 1;
369
1.36k
      }
370
284
    }
371
284
    return N;
372
284
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getEstimatedNumberOfCaseClusters(llvm::SwitchInst const&, unsigned int&)
373
374
0
  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getJumpBufAlignment()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getJumpBufAlignment()
375
376
0
  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getJumpBufSize()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getJumpBufSize()
377
378
61.3k
  bool shouldBuildLookupTables() {
379
61.3k
    const TargetLoweringBase *TLI = getTLI();
380
61.3k
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
61.3k
           
TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)2.53k
;
382
61.3k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::shouldBuildLookupTables()
Line
Count
Source
378
57.7k
  bool shouldBuildLookupTables() {
379
57.7k
    const TargetLoweringBase *TLI = getTLI();
380
57.7k
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
57.7k
           
TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)0
;
382
57.7k
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::shouldBuildLookupTables()
Line
Count
Source
378
6
  bool shouldBuildLookupTables() {
379
6
    const TargetLoweringBase *TLI = getTLI();
380
6
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
6
           TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
382
6
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::shouldBuildLookupTables()
Line
Count
Source
378
6
  bool shouldBuildLookupTables() {
379
6
    const TargetLoweringBase *TLI = getTLI();
380
6
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
6
           TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
382
6
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::shouldBuildLookupTables()
Line
Count
Source
378
1.11k
  bool shouldBuildLookupTables() {
379
1.11k
    const TargetLoweringBase *TLI = getTLI();
380
1.11k
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
1.11k
           
TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)0
;
382
1.11k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::shouldBuildLookupTables()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::shouldBuildLookupTables()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::shouldBuildLookupTables()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::shouldBuildLookupTables()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::shouldBuildLookupTables()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::shouldBuildLookupTables()
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::shouldBuildLookupTables()
Line
Count
Source
378
2.52k
  bool shouldBuildLookupTables() {
379
2.52k
    const TargetLoweringBase *TLI = getTLI();
380
2.52k
    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381
2.52k
           TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
382
2.52k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::shouldBuildLookupTables()
383
384
50
  bool haveFastSqrt(Type *Ty) {
385
50
    const TargetLoweringBase *TLI = getTLI();
386
50
    EVT VT = TLI->getValueType(DL, Ty);
387
50
    return TLI->isTypeLegal(VT) &&
388
50
           
TLI->isOperationLegalOrCustom(ISD::FSQRT, VT)48
;
389
50
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::haveFastSqrt(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::haveFastSqrt(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::haveFastSqrt(llvm::Type*)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::haveFastSqrt(llvm::Type*)
Line
Count
Source
384
2
  bool haveFastSqrt(Type *Ty) {
385
2
    const TargetLoweringBase *TLI = getTLI();
386
2
    EVT VT = TLI->getValueType(DL, Ty);
387
2
    return TLI->isTypeLegal(VT) &&
388
2
           
TLI->isOperationLegalOrCustom(ISD::FSQRT, VT)0
;
389
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::haveFastSqrt(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::haveFastSqrt(llvm::Type*)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::haveFastSqrt(llvm::Type*)
Line
Count
Source
384
14
  bool haveFastSqrt(Type *Ty) {
385
14
    const TargetLoweringBase *TLI = getTLI();
386
14
    EVT VT = TLI->getValueType(DL, Ty);
387
14
    return TLI->isTypeLegal(VT) &&
388
14
           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
389
14
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::haveFastSqrt(llvm::Type*)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::haveFastSqrt(llvm::Type*)
Line
Count
Source
384
4
  bool haveFastSqrt(Type *Ty) {
385
4
    const TargetLoweringBase *TLI = getTLI();
386
4
    EVT VT = TLI->getValueType(DL, Ty);
387
4
    return TLI->isTypeLegal(VT) &&
388
4
           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
389
4
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::haveFastSqrt(llvm::Type*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::haveFastSqrt(llvm::Type*)
Line
Count
Source
384
4
  bool haveFastSqrt(Type *Ty) {
385
4
    const TargetLoweringBase *TLI = getTLI();
386
4
    EVT VT = TLI->getValueType(DL, Ty);
387
4
    return TLI->isTypeLegal(VT) &&
388
4
           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
389
4
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::haveFastSqrt(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::haveFastSqrt(llvm::Type*)
Line
Count
Source
384
26
  bool haveFastSqrt(Type *Ty) {
385
26
    const TargetLoweringBase *TLI = getTLI();
386
26
    EVT VT = TLI->getValueType(DL, Ty);
387
26
    return TLI->isTypeLegal(VT) &&
388
26
           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
389
26
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::haveFastSqrt(llvm::Type*)
390
391
22
  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
392
22
    return true;
393
22
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Line
Count
Source
391
14
  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
392
14
    return true;
393
14
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Line
Count
Source
391
4
  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
392
4
    return true;
393
4
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Line
Count
Source
391
4
  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
392
4
    return true;
393
4
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isFCmpOrdCheaperThanFCmpZero(llvm::Type*)
394
395
383k
  unsigned getFPOpCost(Type *Ty) {
396
383k
    // Check whether FADD is available, as a proxy for floating-point in
397
383k
    // general.
398
383k
    const TargetLoweringBase *TLI = getTLI();
399
383k
    EVT VT = TLI->getValueType(DL, Ty);
400
383k
    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401
382k
      return TargetTransformInfo::TCC_Basic;
402
1.27k
    return TargetTransformInfo::TCC_Expensive;
403
1.27k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getFPOpCost(llvm::Type*)
Line
Count
Source
395
381k
  unsigned getFPOpCost(Type *Ty) {
396
381k
    // Check whether FADD is available, as a proxy for floating-point in
397
381k
    // general.
398
381k
    const TargetLoweringBase *TLI = getTLI();
399
381k
    EVT VT = TLI->getValueType(DL, Ty);
400
381k
    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401
380k
      return TargetTransformInfo::TCC_Basic;
402
1.07k
    return TargetTransformInfo::TCC_Expensive;
403
1.07k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getFPOpCost(llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getFPOpCost(llvm::Type*)
Line
Count
Source
395
36
  unsigned getFPOpCost(Type *Ty) {
396
36
    // Check whether FADD is available, as a proxy for floating-point in
397
36
    // general.
398
36
    const TargetLoweringBase *TLI = getTLI();
399
36
    EVT VT = TLI->getValueType(DL, Ty);
400
36
    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401
36
      return TargetTransformInfo::TCC_Basic;
402
0
    return TargetTransformInfo::TCC_Expensive;
403
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getFPOpCost(llvm::Type*)
Line
Count
Source
395
312
  unsigned getFPOpCost(Type *Ty) {
396
312
    // Check whether FADD is available, as a proxy for floating-point in
397
312
    // general.
398
312
    const TargetLoweringBase *TLI = getTLI();
399
312
    EVT VT = TLI->getValueType(DL, Ty);
400
312
    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401
114
      return TargetTransformInfo::TCC_Basic;
402
198
    return TargetTransformInfo::TCC_Expensive;
403
198
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getFPOpCost(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getFPOpCost(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getFPOpCost(llvm::Type*)
Line
Count
Source
395
2.15k
  unsigned getFPOpCost(Type *Ty) {
396
2.15k
    // Check whether FADD is available, as a proxy for floating-point in
397
2.15k
    // general.
398
2.15k
    const TargetLoweringBase *TLI = getTLI();
399
2.15k
    EVT VT = TLI->getValueType(DL, Ty);
400
2.15k
    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401
2.15k
      return TargetTransformInfo::TCC_Basic;
402
0
    return TargetTransformInfo::TCC_Expensive;
403
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getFPOpCost(llvm::Type*)
404
405
26.0M
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
26.0M
    const TargetLoweringBase *TLI = getTLI();
407
26.0M
    switch (Opcode) {
408
26.0M
    
default: break25.3M
;
409
26.0M
    case Instruction::Trunc:
410
710k
      if (TLI->isTruncateFree(OpTy, Ty))
411
664k
        return TargetTransformInfo::TCC_Free;
412
45.8k
      return TargetTransformInfo::TCC_Basic;
413
45.8k
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
204
    case Instruction::AddrSpaceCast:
419
204
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
204
                                   Ty->getPointerAddressSpace()))
421
132
        return TargetTransformInfo::TCC_Free;
422
72
      return TargetTransformInfo::TCC_Basic;
423
25.3M
    }
424
25.3M
425
25.3M
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
25.3M
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
23.8M
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
23.8M
    const TargetLoweringBase *TLI = getTLI();
407
23.8M
    switch (Opcode) {
408
23.8M
    
default: break23.2M
;
409
23.8M
    case Instruction::Trunc:
410
650k
      if (TLI->isTruncateFree(OpTy, Ty))
411
608k
        return TargetTransformInfo::TCC_Free;
412
42.6k
      return TargetTransformInfo::TCC_Basic;
413
42.6k
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
23.2M
    }
424
23.2M
425
23.2M
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
23.2M
  }
llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
386
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
386
    const TargetLoweringBase *TLI = getTLI();
407
386
    switch (Opcode) {
408
386
    default: break;
409
386
    case Instruction::Trunc:
410
0
      if (TLI->isTruncateFree(OpTy, Ty))
411
0
        return TargetTransformInfo::TCC_Free;
412
0
      return TargetTransformInfo::TCC_Basic;
413
0
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
386
    }
424
386
425
386
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
386
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
6.53k
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
6.53k
    const TargetLoweringBase *TLI = getTLI();
407
6.53k
    switch (Opcode) {
408
6.53k
    
default: break6.28k
;
409
6.53k
    case Instruction::Trunc:
410
39
      if (TLI->isTruncateFree(OpTy, Ty))
411
33
        return TargetTransformInfo::TCC_Free;
412
6
      return TargetTransformInfo::TCC_Basic;
413
6
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
204
    case Instruction::AddrSpaceCast:
419
204
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
204
                                   Ty->getPointerAddressSpace()))
421
132
        return TargetTransformInfo::TCC_Free;
422
72
      return TargetTransformInfo::TCC_Basic;
423
6.28k
    }
424
6.28k
425
6.28k
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
6.28k
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
508k
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
508k
    const TargetLoweringBase *TLI = getTLI();
407
508k
    switch (Opcode) {
408
508k
    
default: break499k
;
409
508k
    case Instruction::Trunc:
410
8.90k
      if (TLI->isTruncateFree(OpTy, Ty))
411
5.81k
        return TargetTransformInfo::TCC_Free;
412
3.09k
      return TargetTransformInfo::TCC_Basic;
413
3.09k
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
499k
    }
424
499k
425
499k
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
499k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
197
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
197
    const TargetLoweringBase *TLI = getTLI();
407
197
    switch (Opcode) {
408
197
    
default: break185
;
409
197
    case Instruction::Trunc:
410
12
      if (TLI->isTruncateFree(OpTy, Ty))
411
1
        return TargetTransformInfo::TCC_Free;
412
11
      return TargetTransformInfo::TCC_Basic;
413
11
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
185
    }
424
185
425
185
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
185
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
102
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
102
    const TargetLoweringBase *TLI = getTLI();
407
102
    switch (Opcode) {
408
102
    default: break;
409
102
    case Instruction::Trunc:
410
0
      if (TLI->isTruncateFree(OpTy, Ty))
411
0
        return TargetTransformInfo::TCC_Free;
412
0
      return TargetTransformInfo::TCC_Basic;
413
0
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
102
    }
424
102
425
102
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
102
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
6.46k
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
6.46k
    const TargetLoweringBase *TLI = getTLI();
407
6.46k
    switch (Opcode) {
408
6.46k
    
default: break6.18k
;
409
6.46k
    case Instruction::Trunc:
410
272
      if (TLI->isTruncateFree(OpTy, Ty))
411
217
        return TargetTransformInfo::TCC_Free;
412
55
      return TargetTransformInfo::TCC_Basic;
413
55
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
6.18k
    }
424
6.18k
425
6.18k
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
6.18k
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
4
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
4
    const TargetLoweringBase *TLI = getTLI();
407
4
    switch (Opcode) {
408
4
    
default: break3
;
409
4
    case Instruction::Trunc:
410
1
      if (TLI->isTruncateFree(OpTy, Ty))
411
1
        return TargetTransformInfo::TCC_Free;
412
0
      return TargetTransformInfo::TCC_Basic;
413
0
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
3
    }
424
3
425
3
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
3
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
1.08k
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
1.08k
    const TargetLoweringBase *TLI = getTLI();
407
1.08k
    switch (Opcode) {
408
1.08k
    
default: break1.06k
;
409
1.08k
    case Instruction::Trunc:
410
18
      if (TLI->isTruncateFree(OpTy, Ty))
411
15
        return TargetTransformInfo::TCC_Free;
412
3
      return TargetTransformInfo::TCC_Basic;
413
3
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
1.06k
    }
424
1.06k
425
1.06k
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
1.06k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
Line
Count
Source
405
1.63M
  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406
1.63M
    const TargetLoweringBase *TLI = getTLI();
407
1.63M
    switch (Opcode) {
408
1.63M
    
default: break1.58M
;
409
1.63M
    case Instruction::Trunc:
410
50.1k
      if (TLI->isTruncateFree(OpTy, Ty))
411
50.1k
        return TargetTransformInfo::TCC_Free;
412
54
      return TargetTransformInfo::TCC_Basic;
413
54
    case Instruction::ZExt:
414
0
      if (TLI->isZExtFree(OpTy, Ty))
415
0
        return TargetTransformInfo::TCC_Free;
416
0
      return TargetTransformInfo::TCC_Basic;
417
0
418
0
    case Instruction::AddrSpaceCast:
419
0
      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
420
0
                                   Ty->getPointerAddressSpace()))
421
0
        return TargetTransformInfo::TCC_Free;
422
0
      return TargetTransformInfo::TCC_Basic;
423
1.58M
    }
424
1.58M
425
1.58M
    return BaseT::getOperationCost(Opcode, Ty, OpTy);
426
1.58M
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getOperationCost(unsigned int, llvm::Type*, llvm::Type*)
427
428
790k
  unsigned getInliningThresholdMultiplier() { return 1; }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
527k
  unsigned getInliningThresholdMultiplier() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInliningThresholdMultiplier()
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
61.9k
  unsigned getInliningThresholdMultiplier() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInliningThresholdMultiplier()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInliningThresholdMultiplier()
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
7
  unsigned getInliningThresholdMultiplier() { return 1; }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
17
  unsigned getInliningThresholdMultiplier() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInliningThresholdMultiplier()
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
4
  unsigned getInliningThresholdMultiplier() { return 1; }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInliningThresholdMultiplier()
Line
Count
Source
428
200k
  unsigned getInliningThresholdMultiplier() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInliningThresholdMultiplier()
429
430
790k
  int getInlinerVectorBonusPercent() { return 150; }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
527k
  int getInlinerVectorBonusPercent() { return 150; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInlinerVectorBonusPercent()
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
61.9k
  int getInlinerVectorBonusPercent() { return 150; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInlinerVectorBonusPercent()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInlinerVectorBonusPercent()
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
7
  int getInlinerVectorBonusPercent() { return 150; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInlinerVectorBonusPercent()
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
17
  int getInlinerVectorBonusPercent() { return 150; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInlinerVectorBonusPercent()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInlinerVectorBonusPercent()
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
4
  int getInlinerVectorBonusPercent() { return 150; }
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInlinerVectorBonusPercent()
Line
Count
Source
430
200k
  int getInlinerVectorBonusPercent() { return 150; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInlinerVectorBonusPercent()
431
432
  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
433
389k
                               TTI::UnrollingPreferences &UP) {
434
389k
    // This unrolling functionality is target independent, but to provide some
435
389k
    // motivation for its intended use, for x86:
436
389k
437
389k
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
389k
    // Manual, Intel Core models and later have a loop stream detector (and
439
389k
    // associated uop queue) that can benefit from partial unrolling.
440
389k
    // The relevant requirements are:
441
389k
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
389k
    //    taken, and none of them may be calls.
443
389k
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
389k
445
389k
    // According to the Software Optimization Guide for AMD Family 15h
446
389k
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
389k
    // and loop buffer which can benefit from partial unrolling.
448
389k
    // The relevant requirements are:
449
389k
    //  - The loop must have fewer than 16 branches
450
389k
    //  - The loop must have less than 40 uops in all executed loop branches
451
389k
452
389k
    // The number of taken branches in a loop is hard to estimate here, and
453
389k
    // benchmarking has revealed that it is better not to be conservative when
454
389k
    // estimating the branch count. As a result, we'll ignore the branch limits
455
389k
    // until someone finds a case where it matters in practice.
456
389k
457
389k
    unsigned MaxOps;
458
389k
    const TargetSubtargetInfo *ST = getST();
459
389k
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
389k
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
22.8k
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
366k
    else
464
366k
      return;
465
22.8k
466
22.8k
    // Scan the loop: don't unroll loops with calls.
467
69.1k
    
for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 22.8k
I != E;
468
56.4k
         
++I46.2k
) {
469
56.4k
      BasicBlock *BB = *I;
470
56.4k
471
380k
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; 
++J323k
)
472
333k
        if (isa<CallInst>(J) || 
isa<InvokeInst>(J)318k
) {
473
15.1k
          ImmutableCallSite CS(&*J);
474
15.1k
          if (const Function *F = CS.getCalledFunction()) {
475
13.8k
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
4.92k
              continue;
477
10.2k
          }
478
10.2k
479
10.2k
          return;
480
10.2k
        }
481
56.4k
    }
482
22.8k
483
22.8k
    // Enable runtime and partial unrolling up to the specified size.
484
22.8k
    // Enable using trip count upper bound to unroll loops.
485
22.8k
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
12.6k
    UP.PartialThreshold = MaxOps;
487
12.6k
488
12.6k
    // Avoid unrolling when optimizing for size.
489
12.6k
    UP.OptSizeThreshold = 0;
490
12.6k
    UP.PartialOptSizeThreshold = 0;
491
12.6k
492
12.6k
    // Set number of instructions optimized when "back edge"
493
12.6k
    // becomes "fall through" to default value of 2.
494
12.6k
    UP.BEInsns = 2;
495
12.6k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Line
Count
Source
433
359k
                               TTI::UnrollingPreferences &UP) {
434
359k
    // This unrolling functionality is target independent, but to provide some
435
359k
    // motivation for its intended use, for x86:
436
359k
437
359k
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
359k
    // Manual, Intel Core models and later have a loop stream detector (and
439
359k
    // associated uop queue) that can benefit from partial unrolling.
440
359k
    // The relevant requirements are:
441
359k
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
359k
    //    taken, and none of them may be calls.
443
359k
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
359k
445
359k
    // According to the Software Optimization Guide for AMD Family 15h
446
359k
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
359k
    // and loop buffer which can benefit from partial unrolling.
448
359k
    // The relevant requirements are:
449
359k
    //  - The loop must have fewer than 16 branches
450
359k
    //  - The loop must have less than 40 uops in all executed loop branches
451
359k
452
359k
    // The number of taken branches in a loop is hard to estimate here, and
453
359k
    // benchmarking has revealed that it is better not to be conservative when
454
359k
    // estimating the branch count. As a result, we'll ignore the branch limits
455
359k
    // until someone finds a case where it matters in practice.
456
359k
457
359k
    unsigned MaxOps;
458
359k
    const TargetSubtargetInfo *ST = getST();
459
359k
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
359k
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
28
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
359k
    else
464
359k
      return;
465
28
466
28
    // Scan the loop: don't unroll loops with calls.
467
68
    
for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 28
I != E;
468
40
         ++I) {
469
40
      BasicBlock *BB = *I;
470
40
471
279
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; 
++J239
)
472
239
        if (isa<CallInst>(J) || 
isa<InvokeInst>(J)209
) {
473
30
          ImmutableCallSite CS(&*J);
474
30
          if (const Function *F = CS.getCalledFunction()) {
475
30
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
30
              continue;
477
0
          }
478
0
479
0
          return;
480
0
        }
481
40
    }
482
28
483
28
    // Enable runtime and partial unrolling up to the specified size.
484
28
    // Enable using trip count upper bound to unroll loops.
485
28
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
28
    UP.PartialThreshold = MaxOps;
487
28
488
28
    // Avoid unrolling when optimizing for size.
489
28
    UP.OptSizeThreshold = 0;
490
28
    UP.PartialOptSizeThreshold = 0;
491
28
492
28
    // Set number of instructions optimized when "back edge"
493
28
    // becomes "fall through" to default value of 2.
494
28
    UP.BEInsns = 2;
495
28
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Line
Count
Source
433
6.89k
                               TTI::UnrollingPreferences &UP) {
434
6.89k
    // This unrolling functionality is target independent, but to provide some
435
6.89k
    // motivation for its intended use, for x86:
436
6.89k
437
6.89k
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
6.89k
    // Manual, Intel Core models and later have a loop stream detector (and
439
6.89k
    // associated uop queue) that can benefit from partial unrolling.
440
6.89k
    // The relevant requirements are:
441
6.89k
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
6.89k
    //    taken, and none of them may be calls.
443
6.89k
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
6.89k
445
6.89k
    // According to the Software Optimization Guide for AMD Family 15h
446
6.89k
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
6.89k
    // and loop buffer which can benefit from partial unrolling.
448
6.89k
    // The relevant requirements are:
449
6.89k
    //  - The loop must have fewer than 16 branches
450
6.89k
    //  - The loop must have less than 40 uops in all executed loop branches
451
6.89k
452
6.89k
    // The number of taken branches in a loop is hard to estimate here, and
453
6.89k
    // benchmarking has revealed that it is better not to be conservative when
454
6.89k
    // estimating the branch count. As a result, we'll ignore the branch limits
455
6.89k
    // until someone finds a case where it matters in practice.
456
6.89k
457
6.89k
    unsigned MaxOps;
458
6.89k
    const TargetSubtargetInfo *ST = getST();
459
6.89k
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
6.89k
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
24
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
6.87k
    else
464
6.87k
      return;
465
24
466
24
    // Scan the loop: don't unroll loops with calls.
467
51
    
for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 24
I != E;
468
30
         
++I27
) {
469
30
      BasicBlock *BB = *I;
470
30
471
276
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; 
++J246
)
472
249
        if (isa<CallInst>(J) || 
isa<InvokeInst>(J)234
) {
473
15
          ImmutableCallSite CS(&*J);
474
15
          if (const Function *F = CS.getCalledFunction()) {
475
15
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
12
              continue;
477
3
          }
478
3
479
3
          return;
480
3
        }
481
30
    }
482
24
483
24
    // Enable runtime and partial unrolling up to the specified size.
484
24
    // Enable using trip count upper bound to unroll loops.
485
24
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
21
    UP.PartialThreshold = MaxOps;
487
21
488
21
    // Avoid unrolling when optimizing for size.
489
21
    UP.OptSizeThreshold = 0;
490
21
    UP.PartialOptSizeThreshold = 0;
491
21
492
21
    // Set number of instructions optimized when "back edge"
493
21
    // becomes "fall through" to default value of 2.
494
21
    UP.BEInsns = 2;
495
21
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Line
Count
Source
433
2
                               TTI::UnrollingPreferences &UP) {
434
2
    // This unrolling functionality is target independent, but to provide some
435
2
    // motivation for its intended use, for x86:
436
2
437
2
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
2
    // Manual, Intel Core models and later have a loop stream detector (and
439
2
    // associated uop queue) that can benefit from partial unrolling.
440
2
    // The relevant requirements are:
441
2
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
2
    //    taken, and none of them may be calls.
443
2
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
2
445
2
    // According to the Software Optimization Guide for AMD Family 15h
446
2
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
2
    // and loop buffer which can benefit from partial unrolling.
448
2
    // The relevant requirements are:
449
2
    //  - The loop must have fewer than 16 branches
450
2
    //  - The loop must have less than 40 uops in all executed loop branches
451
2
452
2
    // The number of taken branches in a loop is hard to estimate here, and
453
2
    // benchmarking has revealed that it is better not to be conservative when
454
2
    // estimating the branch count. As a result, we'll ignore the branch limits
455
2
    // until someone finds a case where it matters in practice.
456
2
457
2
    unsigned MaxOps;
458
2
    const TargetSubtargetInfo *ST = getST();
459
2
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
2
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
0
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
2
    else
464
2
      return;
465
0
466
0
    // Scan the loop: don't unroll loops with calls.
467
0
    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
468
0
         ++I) {
469
0
      BasicBlock *BB = *I;
470
0
471
0
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
472
0
        if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
473
0
          ImmutableCallSite CS(&*J);
474
0
          if (const Function *F = CS.getCalledFunction()) {
475
0
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
0
              continue;
477
0
          }
478
0
479
0
          return;
480
0
        }
481
0
    }
482
0
483
0
    // Enable runtime and partial unrolling up to the specified size.
484
0
    // Enable using trip count upper bound to unroll loops.
485
0
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
0
    UP.PartialThreshold = MaxOps;
487
0
488
0
    // Avoid unrolling when optimizing for size.
489
0
    UP.OptSizeThreshold = 0;
490
0
    UP.PartialOptSizeThreshold = 0;
491
0
492
0
    // Set number of instructions optimized when "back edge"
493
0
    // becomes "fall through" to default value of 2.
494
0
    UP.BEInsns = 2;
495
0
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Line
Count
Source
433
15
                               TTI::UnrollingPreferences &UP) {
434
15
    // This unrolling functionality is target independent, but to provide some
435
15
    // motivation for its intended use, for x86:
436
15
437
15
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
15
    // Manual, Intel Core models and later have a loop stream detector (and
439
15
    // associated uop queue) that can benefit from partial unrolling.
440
15
    // The relevant requirements are:
441
15
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
15
    //    taken, and none of them may be calls.
443
15
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
15
445
15
    // According to the Software Optimization Guide for AMD Family 15h
446
15
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
15
    // and loop buffer which can benefit from partial unrolling.
448
15
    // The relevant requirements are:
449
15
    //  - The loop must have fewer than 16 branches
450
15
    //  - The loop must have less than 40 uops in all executed loop branches
451
15
452
15
    // The number of taken branches in a loop is hard to estimate here, and
453
15
    // benchmarking has revealed that it is better not to be conservative when
454
15
    // estimating the branch count. As a result, we'll ignore the branch limits
455
15
    // until someone finds a case where it matters in practice.
456
15
457
15
    unsigned MaxOps;
458
15
    const TargetSubtargetInfo *ST = getST();
459
15
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
15
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
9
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
6
    else
464
6
      return;
465
9
466
9
    // Scan the loop: don't unroll loops with calls.
467
18
    
for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 9
I != E;
468
9
         ++I) {
469
9
      BasicBlock *BB = *I;
470
9
471
93
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; 
++J84
)
472
84
        if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
473
0
          ImmutableCallSite CS(&*J);
474
0
          if (const Function *F = CS.getCalledFunction()) {
475
0
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
0
              continue;
477
0
          }
478
0
479
0
          return;
480
0
        }
481
9
    }
482
9
483
9
    // Enable runtime and partial unrolling up to the specified size.
484
9
    // Enable using trip count upper bound to unroll loops.
485
9
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
9
    UP.PartialThreshold = MaxOps;
487
9
488
9
    // Avoid unrolling when optimizing for size.
489
9
    UP.OptSizeThreshold = 0;
490
9
    UP.PartialOptSizeThreshold = 0;
491
9
492
9
    // Set number of instructions optimized when "back edge"
493
9
    // becomes "fall through" to default value of 2.
494
9
    UP.BEInsns = 2;
495
9
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
Line
Count
Source
433
22.9k
                               TTI::UnrollingPreferences &UP) {
434
22.9k
    // This unrolling functionality is target independent, but to provide some
435
22.9k
    // motivation for its intended use, for x86:
436
22.9k
437
22.9k
    // According to the Intel 64 and IA-32 Architectures Optimization Reference
438
22.9k
    // Manual, Intel Core models and later have a loop stream detector (and
439
22.9k
    // associated uop queue) that can benefit from partial unrolling.
440
22.9k
    // The relevant requirements are:
441
22.9k
    //  - The loop must have no more than 4 (8 for Nehalem and later) branches
442
22.9k
    //    taken, and none of them may be calls.
443
22.9k
    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
444
22.9k
445
22.9k
    // According to the Software Optimization Guide for AMD Family 15h
446
22.9k
    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447
22.9k
    // and loop buffer which can benefit from partial unrolling.
448
22.9k
    // The relevant requirements are:
449
22.9k
    //  - The loop must have fewer than 16 branches
450
22.9k
    //  - The loop must have less than 40 uops in all executed loop branches
451
22.9k
452
22.9k
    // The number of taken branches in a loop is hard to estimate here, and
453
22.9k
    // benchmarking has revealed that it is better not to be conservative when
454
22.9k
    // estimating the branch count. As a result, we'll ignore the branch limits
455
22.9k
    // until someone finds a case where it matters in practice.
456
22.9k
457
22.9k
    unsigned MaxOps;
458
22.9k
    const TargetSubtargetInfo *ST = getST();
459
22.9k
    if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460
0
      MaxOps = PartialUnrollingThreshold;
461
22.9k
    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462
22.8k
      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463
157
    else
464
157
      return;
465
22.8k
466
22.8k
    // Scan the loop: don't unroll loops with calls.
467
68.9k
    
for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 22.8k
I != E;
468
56.3k
         
++I46.1k
) {
469
56.3k
      BasicBlock *BB = *I;
470
56.3k
471
379k
      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; 
++J323k
)
472
333k
        if (isa<CallInst>(J) || 
isa<InvokeInst>(J)318k
) {
473
15.1k
          ImmutableCallSite CS(&*J);
474
15.1k
          if (const Function *F = CS.getCalledFunction()) {
475
13.7k
            if (!static_cast<T *>(this)->isLoweredToCall(F))
476
4.88k
              continue;
477
10.2k
          }
478
10.2k
479
10.2k
          return;
480
10.2k
        }
481
56.3k
    }
482
22.8k
483
22.8k
    // Enable runtime and partial unrolling up to the specified size.
484
22.8k
    // Enable using trip count upper bound to unroll loops.
485
22.8k
    UP.Partial = UP.Runtime = UP.UpperBound = true;
486
12.5k
    UP.PartialThreshold = MaxOps;
487
12.5k
488
12.5k
    // Avoid unrolling when optimizing for size.
489
12.5k
    UP.OptSizeThreshold = 0;
490
12.5k
    UP.PartialOptSizeThreshold = 0;
491
12.5k
492
12.5k
    // Set number of instructions optimized when "back edge"
493
12.5k
    // becomes "fall through" to default value of 2.
494
12.5k
    UP.BEInsns = 2;
495
12.5k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&)
496
497
  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
498
                                AssumptionCache &AC,
499
                                TargetLibraryInfo *LibInfo,
500
0
                                HardwareLoopInfo &HWLoopInfo) {
501
0
    return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
502
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::X86TTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::isHardwareLoopProfitable(llvm::Loop*, llvm::ScalarEvolution&, llvm::AssumptionCache&, llvm::TargetLibraryInfo*, llvm::HardwareLoopInfo&)
503
504
119
  int getInstructionLatency(const Instruction *I) {
505
119
    if (isa<LoadInst>(I))
506
41
      return getST()->getSchedModel().DefaultLoadLatency;
507
78
508
78
    return BaseT::getInstructionLatency(I);
509
78
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInstructionLatency(llvm::Instruction const*)
Line
Count
Source
504
108
  int getInstructionLatency(const Instruction *I) {
505
108
    if (isa<LoadInst>(I))
506
40
      return getST()->getSchedModel().DefaultLoadLatency;
507
68
508
68
    return BaseT::getInstructionLatency(I);
509
68
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInstructionLatency(llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInstructionLatency(llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInstructionLatency(llvm::Instruction const*)
Line
Count
Source
504
11
  int getInstructionLatency(const Instruction *I) {
505
11
    if (isa<LoadInst>(I))
506
1
      return getST()->getSchedModel().DefaultLoadLatency;
507
10
508
10
    return BaseT::getInstructionLatency(I);
509
10
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInstructionLatency(llvm::Instruction const*)
510
511
  /// @}
512
513
  /// \name Vector TTI Implementations
514
  /// @{
515
516
867
  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 
0286
:
1581
; }
llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getNumberOfRegisters(bool)
Line
Count
Source
516
56
  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 
00
: 1; }
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getNumberOfRegisters(bool)
Line
Count
Source
516
335
  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 
060
:
1275
; }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getNumberOfRegisters(bool)
Line
Count
Source
516
68
  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 
06
:
162
; }
llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getNumberOfRegisters(bool)
Line
Count
Source
516
408
  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 
0220
:
1188
; }
517
518
0
  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getRegisterBitWidth(bool) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getRegisterBitWidth(bool) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getRegisterBitWidth(bool) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getRegisterBitWidth(bool) const
519
520
  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
521
  /// are set if the result needs to be inserted and/or extracted from vectors.
522
88.3k
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
88.3k
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
88.3k
    unsigned Cost = 0;
525
88.3k
526
395k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i307k
) {
527
307k
      if (Insert)
528
167k
        Cost += static_cast<T *>(this)
529
167k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
307k
      if (Extract)
531
144k
        Cost += static_cast<T *>(this)
532
144k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
307k
    }
534
88.3k
535
88.3k
    return Cost;
536
88.3k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
69.6k
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
69.6k
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
69.6k
    unsigned Cost = 0;
525
69.6k
526
254k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i185k
) {
527
185k
      if (Insert)
528
105k
        Cost += static_cast<T *>(this)
529
105k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
185k
      if (Extract)
531
80.0k
        Cost += static_cast<T *>(this)
532
80.0k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
185k
    }
534
69.6k
535
69.6k
    return Cost;
536
69.6k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
170
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
170
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
170
    unsigned Cost = 0;
525
170
526
720
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i550
) {
527
550
      if (Insert)
528
437
        Cost += static_cast<T *>(this)
529
437
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
550
      if (Extract)
531
117
        Cost += static_cast<T *>(this)
532
117
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
550
    }
534
170
535
170
    return Cost;
536
170
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
2.83k
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
2.83k
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
2.83k
    unsigned Cost = 0;
525
2.83k
526
9.78k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i6.95k
) {
527
6.95k
      if (Insert)
528
2.60k
        Cost += static_cast<T *>(this)
529
2.60k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
6.95k
      if (Extract)
531
4.58k
        Cost += static_cast<T *>(this)
532
4.58k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
6.95k
    }
534
2.83k
535
2.83k
    return Cost;
536
2.83k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
23
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
23
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
23
    unsigned Cost = 0;
525
23
526
311
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i288
) {
527
288
      if (Insert)
528
124
        Cost += static_cast<T *>(this)
529
124
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
288
      if (Extract)
531
164
        Cost += static_cast<T *>(this)
532
164
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
288
    }
534
23
535
23
    return Cost;
536
23
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
4
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
4
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
4
    unsigned Cost = 0;
525
4
526
12
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i8
) {
527
8
      if (Insert)
528
4
        Cost += static_cast<T *>(this)
529
4
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
8
      if (Extract)
531
4
        Cost += static_cast<T *>(this)
532
4
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
8
    }
534
4
535
4
    return Cost;
536
4
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
788
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
788
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
788
    unsigned Cost = 0;
525
788
526
2.99k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i2.21k
) {
527
2.21k
      if (Insert)
528
1.07k
        Cost += static_cast<T *>(this)
529
1.07k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
2.21k
      if (Extract)
531
1.13k
        Cost += static_cast<T *>(this)
532
1.13k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
2.21k
    }
534
788
535
788
    return Cost;
536
788
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
758
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
758
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
758
    unsigned Cost = 0;
525
758
526
5.53k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i4.77k
) {
527
4.77k
      if (Insert)
528
2.21k
        Cost += static_cast<T *>(this)
529
2.21k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
4.77k
      if (Extract)
531
2.11k
        Cost += static_cast<T *>(this)
532
2.11k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
4.77k
    }
534
758
535
758
    return Cost;
536
758
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
Line
Count
Source
522
14.1k
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523
14.1k
    assert(Ty->isVectorTy() && "Can only scalarize vectors");
524
14.1k
    unsigned Cost = 0;
525
14.1k
526
121k
    for (int i = 0, e = Ty->getVectorNumElements(); i < e; 
++i107k
) {
527
107k
      if (Insert)
528
55.7k
        Cost += static_cast<T *>(this)
529
55.7k
                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530
107k
      if (Extract)
531
55.9k
        Cost += static_cast<T *>(this)
532
55.9k
                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533
107k
    }
534
14.1k
535
14.1k
    return Cost;
536
14.1k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalarizationOverhead(llvm::Type*, bool, bool)
537
538
  /// Estimate the overhead of scalarizing an instructions unique
539
  /// non-constant operands. The types of the arguments are ordinarily
540
  /// scalar, in which case the costs are multiplied with VF.
541
  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
542
24.4k
                                            unsigned VF) {
543
24.4k
    unsigned Cost = 0;
544
24.4k
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
34.0k
    for (const Value *A : Args) {
546
34.0k
      if (!isa<Constant>(A) && 
UniqueOperands.insert(A).second28.0k
) {
547
28.0k
        Type *VecTy = nullptr;
548
28.0k
        if (A->getType()->isVectorTy()) {
549
823
          VecTy = A->getType();
550
823
          // If A is a vector operand, VF should be 1 or correspond to A.
551
823
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
823
                 "Vector argument does not match VF");
553
823
        }
554
27.2k
        else
555
27.2k
          VecTy = VectorType::get(A->getType(), VF);
556
28.0k
557
28.0k
        Cost += getScalarizationOverhead(VecTy, false, true);
558
28.0k
      }
559
34.0k
    }
560
24.4k
561
24.4k
    return Cost;
562
24.4k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
17.6k
                                            unsigned VF) {
543
17.6k
    unsigned Cost = 0;
544
17.6k
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
21.5k
    for (const Value *A : Args) {
546
21.5k
      if (!isa<Constant>(A) && 
UniqueOperands.insert(A).second21.4k
) {
547
21.4k
        Type *VecTy = nullptr;
548
21.4k
        if (A->getType()->isVectorTy()) {
549
5
          VecTy = A->getType();
550
5
          // If A is a vector operand, VF should be 1 or correspond to A.
551
5
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
5
                 "Vector argument does not match VF");
553
5
        }
554
21.4k
        else
555
21.4k
          VecTy = VectorType::get(A->getType(), VF);
556
21.4k
557
21.4k
        Cost += getScalarizationOverhead(VecTy, false, true);
558
21.4k
      }
559
21.5k
    }
560
17.6k
561
17.6k
    return Cost;
562
17.6k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
19
                                            unsigned VF) {
543
19
    unsigned Cost = 0;
544
19
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
31
    for (const Value *A : Args) {
546
31
      if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
547
31
        Type *VecTy = nullptr;
548
31
        if (A->getType()->isVectorTy()) {
549
7
          VecTy = A->getType();
550
7
          // If A is a vector operand, VF should be 1 or correspond to A.
551
7
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
7
                 "Vector argument does not match VF");
553
7
        }
554
24
        else
555
24
          VecTy = VectorType::get(A->getType(), VF);
556
31
557
31
        Cost += getScalarizationOverhead(VecTy, false, true);
558
31
      }
559
31
    }
560
19
561
19
    return Cost;
562
19
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
613
                                            unsigned VF) {
543
613
    unsigned Cost = 0;
544
613
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
943
    for (const Value *A : Args) {
546
943
      if (!isa<Constant>(A) && 
UniqueOperands.insert(A).second835
) {
547
835
        Type *VecTy = nullptr;
548
835
        if (A->getType()->isVectorTy()) {
549
0
          VecTy = A->getType();
550
0
          // If A is a vector operand, VF should be 1 or correspond to A.
551
0
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
0
                 "Vector argument does not match VF");
553
0
        }
554
835
        else
555
835
          VecTy = VectorType::get(A->getType(), VF);
556
835
557
835
        Cost += getScalarizationOverhead(VecTy, false, true);
558
835
      }
559
943
    }
560
613
561
613
    return Cost;
562
613
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
5
                                            unsigned VF) {
543
5
    unsigned Cost = 0;
544
5
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
10
    for (const Value *A : Args) {
546
10
      if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
547
10
        Type *VecTy = nullptr;
548
10
        if (A->getType()->isVectorTy()) {
549
0
          VecTy = A->getType();
550
0
          // If A is a vector operand, VF should be 1 or correspond to A.
551
0
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
0
                 "Vector argument does not match VF");
553
0
        }
554
10
        else
555
10
          VecTy = VectorType::get(A->getType(), VF);
556
10
557
10
        Cost += getScalarizationOverhead(VecTy, false, true);
558
10
      }
559
10
    }
560
5
561
5
    return Cost;
562
5
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
354
                                            unsigned VF) {
543
354
    unsigned Cost = 0;
544
354
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
379
    for (const Value *A : Args) {
546
379
      if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
547
379
        Type *VecTy = nullptr;
548
379
        if (A->getType()->isVectorTy()) {
549
1
          VecTy = A->getType();
550
1
          // If A is a vector operand, VF should be 1 or correspond to A.
551
1
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
1
                 "Vector argument does not match VF");
553
1
        }
554
378
        else
555
378
          VecTy = VectorType::get(A->getType(), VF);
556
379
557
379
        Cost += getScalarizationOverhead(VecTy, false, true);
558
379
      }
559
379
    }
560
354
561
354
    return Cost;
562
354
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
72
                                            unsigned VF) {
543
72
    unsigned Cost = 0;
544
72
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
136
    for (const Value *A : Args) {
546
136
      if (!isa<Constant>(A) && 
UniqueOperands.insert(A).second60
) {
547
60
        Type *VecTy = nullptr;
548
60
        if (A->getType()->isVectorTy()) {
549
60
          VecTy = A->getType();
550
60
          // If A is a vector operand, VF should be 1 or correspond to A.
551
60
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
60
                 "Vector argument does not match VF");
553
60
        }
554
0
        else
555
0
          VecTy = VectorType::get(A->getType(), VF);
556
60
557
60
        Cost += getScalarizationOverhead(VecTy, false, true);
558
60
      }
559
136
    }
560
72
561
72
    return Cost;
562
72
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
Line
Count
Source
542
5.77k
                                            unsigned VF) {
543
5.77k
    unsigned Cost = 0;
544
5.77k
    SmallPtrSet<const Value*, 4> UniqueOperands;
545
10.9k
    for (const Value *A : Args) {
546
10.9k
      if (!isa<Constant>(A) && 
UniqueOperands.insert(A).second5.28k
) {
547
5.28k
        Type *VecTy = nullptr;
548
5.28k
        if (A->getType()->isVectorTy()) {
549
750
          VecTy = A->getType();
550
750
          // If A is a vector operand, VF should be 1 or correspond to A.
551
750
          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552
750
                 "Vector argument does not match VF");
553
750
        }
554
4.53k
        else
555
4.53k
          VecTy = VectorType::get(A->getType(), VF);
556
5.28k
557
5.28k
        Cost += getScalarizationOverhead(VecTy, false, true);
558
5.28k
      }
559
10.9k
    }
560
5.77k
561
5.77k
    return Cost;
562
5.77k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getOperandsScalarizationOverhead(llvm::ArrayRef<llvm::Value const*>, unsigned int)
563
564
6.23k
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
6.23k
    assert(VecTy->isVectorTy());
566
6.23k
567
6.23k
    unsigned Cost = 0;
568
6.23k
569
6.23k
    Cost += getScalarizationOverhead(VecTy, true, false);
570
6.23k
    if (!Args.empty())
571
64
      Cost += getOperandsScalarizationOverhead(Args,
572
64
                                               VecTy->getVectorNumElements());
573
6.17k
    else
574
6.17k
      // When no information on arguments is provided, we add the cost
575
6.17k
      // associated with one argument as a heuristic.
576
6.17k
      Cost += getScalarizationOverhead(VecTy, false, true);
577
6.23k
578
6.23k
    return Cost;
579
6.23k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
5.40k
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
5.40k
    assert(VecTy->isVectorTy());
566
5.40k
567
5.40k
    unsigned Cost = 0;
568
5.40k
569
5.40k
    Cost += getScalarizationOverhead(VecTy, true, false);
570
5.40k
    if (!Args.empty())
571
0
      Cost += getOperandsScalarizationOverhead(Args,
572
0
                                               VecTy->getVectorNumElements());
573
5.40k
    else
574
5.40k
      // When no information on arguments is provided, we add the cost
575
5.40k
      // associated with one argument as a heuristic.
576
5.40k
      Cost += getScalarizationOverhead(VecTy, false, true);
577
5.40k
578
5.40k
    return Cost;
579
5.40k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
15
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
15
    assert(VecTy->isVectorTy());
566
15
567
15
    unsigned Cost = 0;
568
15
569
15
    Cost += getScalarizationOverhead(VecTy, true, false);
570
15
    if (!Args.empty())
571
0
      Cost += getOperandsScalarizationOverhead(Args,
572
0
                                               VecTy->getVectorNumElements());
573
15
    else
574
15
      // When no information on arguments is provided, we add the cost
575
15
      // associated with one argument as a heuristic.
576
15
      Cost += getScalarizationOverhead(VecTy, false, true);
577
15
578
15
    return Cost;
579
15
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
694
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
694
    assert(VecTy->isVectorTy());
566
694
567
694
    unsigned Cost = 0;
568
694
569
694
    Cost += getScalarizationOverhead(VecTy, true, false);
570
694
    if (!Args.empty())
571
0
      Cost += getOperandsScalarizationOverhead(Args,
572
0
                                               VecTy->getVectorNumElements());
573
694
    else
574
694
      // When no information on arguments is provided, we add the cost
575
694
      // associated with one argument as a heuristic.
576
694
      Cost += getScalarizationOverhead(VecTy, false, true);
577
694
578
694
    return Cost;
579
694
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
2
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
2
    assert(VecTy->isVectorTy());
566
2
567
2
    unsigned Cost = 0;
568
2
569
2
    Cost += getScalarizationOverhead(VecTy, true, false);
570
2
    if (!Args.empty())
571
0
      Cost += getOperandsScalarizationOverhead(Args,
572
0
                                               VecTy->getVectorNumElements());
573
2
    else
574
2
      // When no information on arguments is provided, we add the cost
575
2
      // associated with one argument as a heuristic.
576
2
      Cost += getScalarizationOverhead(VecTy, false, true);
577
2
578
2
    return Cost;
579
2
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
64
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
64
    assert(VecTy->isVectorTy());
566
64
567
64
    unsigned Cost = 0;
568
64
569
64
    Cost += getScalarizationOverhead(VecTy, true, false);
570
64
    if (!Args.empty())
571
64
      Cost += getOperandsScalarizationOverhead(Args,
572
64
                                               VecTy->getVectorNumElements());
573
0
    else
574
0
      // When no information on arguments is provided, we add the cost
575
0
      // associated with one argument as a heuristic.
576
0
      Cost += getScalarizationOverhead(VecTy, false, true);
577
64
578
64
    return Cost;
579
64
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
564
60
  unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
565
60
    assert(VecTy->isVectorTy());
566
60
567
60
    unsigned Cost = 0;
568
60
569
60
    Cost += getScalarizationOverhead(VecTy, true, false);
570
60
    if (!Args.empty())
571
0
      Cost += getOperandsScalarizationOverhead(Args,
572
0
                                               VecTy->getVectorNumElements());
573
60
    else
574
60
      // When no information on arguments is provided, we add the cost
575
60
      // associated with one argument as a heuristic.
576
60
      Cost += getScalarizationOverhead(VecTy, false, true);
577
60
578
60
    return Cost;
579
60
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getScalarizationOverhead(llvm::Type*, llvm::ArrayRef<llvm::Value const*>)
580
581
76
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMaxInterleaveFactor(unsigned int)
llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMaxInterleaveFactor(unsigned int)
Line
Count
Source
581
46
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMaxInterleaveFactor(unsigned int)
Line
Count
Source
581
1
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMaxInterleaveFactor(unsigned int)
Line
Count
Source
581
6
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getMaxInterleaveFactor(unsigned int)
Line
Count
Source
581
22
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMaxInterleaveFactor(unsigned int)
llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMaxInterleaveFactor(unsigned int)
Line
Count
Source
581
1
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
582
583
  unsigned getArithmeticInstrCost(
584
      unsigned Opcode, Type *Ty,
585
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
586
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
587
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
588
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
589
530k
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
530k
    // Check if any of the operands are vector operands.
591
530k
    const TargetLoweringBase *TLI = getTLI();
592
530k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
530k
    assert(ISD && "Invalid opcode");
594
530k
595
530k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
530k
597
530k
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
530k
    // Assume that floating point arithmetic operations cost twice as much as
599
530k
    // integer operations.
600
530k
    unsigned OpCost = (IsFloat ? 
2264k
:
1265k
);
601
530k
602
530k
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
465k
      // The operation is legal. Assume it costs 1.
604
465k
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
465k
      return LT.first * OpCost;
606
465k
    }
607
65.1k
608
65.1k
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
50.7k
      // If the operation is custom lowered, then assume that the code is twice
610
50.7k
      // as expensive.
611
50.7k
      return LT.first * 2 * OpCost;
612
50.7k
    }
613
14.3k
614
14.3k
    // Else, assume that we need to scalarize this op.
615
14.3k
    // TODO: If one of the types get legalized by splitting, handle this
616
14.3k
    // similarly to what getCastInstrCost() does.
617
14.3k
    if (Ty->isVectorTy()) {
618
6.19k
      unsigned Num = Ty->getVectorNumElements();
619
6.19k
      unsigned Cost = static_cast<T *>(this)
620
6.19k
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
6.19k
      // Return the cost of multiple scalar invocation plus the cost of
622
6.19k
      // inserting and extracting the values.
623
6.19k
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
6.19k
    }
625
8.14k
626
8.14k
    // We don't know anything about this scalar instruction.
627
8.14k
    return OpCost;
628
8.14k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
420k
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
420k
    // Check if any of the operands are vector operands.
591
420k
    const TargetLoweringBase *TLI = getTLI();
592
420k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
420k
    assert(ISD && "Invalid opcode");
594
420k
595
420k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
420k
597
420k
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
420k
    // Assume that floating point arithmetic operations cost twice as much as
599
420k
    // integer operations.
600
420k
    unsigned OpCost = (IsFloat ? 
2260k
:
1160k
);
601
420k
602
420k
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
364k
      // The operation is legal. Assume it costs 1.
604
364k
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
364k
      return LT.first * OpCost;
606
364k
    }
607
55.6k
608
55.6k
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
48.6k
      // If the operation is custom lowered, then assume that the code is twice
610
48.6k
      // as expensive.
611
48.6k
      return LT.first * 2 * OpCost;
612
48.6k
    }
613
7.01k
614
7.01k
    // Else, assume that we need to scalarize this op.
615
7.01k
    // TODO: If one of the types get legalized by splitting, handle this
616
7.01k
    // similarly to what getCastInstrCost() does.
617
7.01k
    if (Ty->isVectorTy()) {
618
5.40k
      unsigned Num = Ty->getVectorNumElements();
619
5.40k
      unsigned Cost = static_cast<T *>(this)
620
5.40k
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
5.40k
      // Return the cost of multiple scalar invocation plus the cost of
622
5.40k
      // inserting and extracting the values.
623
5.40k
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
5.40k
    }
625
1.61k
626
1.61k
    // We don't know anything about this scalar instruction.
627
1.61k
    return OpCost;
628
1.61k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
16
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
16
    // Check if any of the operands are vector operands.
591
16
    const TargetLoweringBase *TLI = getTLI();
592
16
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
16
    assert(ISD && "Invalid opcode");
594
16
595
16
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
16
597
16
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
16
    // Assume that floating point arithmetic operations cost twice as much as
599
16
    // integer operations.
600
16
    unsigned OpCost = (IsFloat ? 
212
:
14
);
601
16
602
16
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
1
      // The operation is legal. Assume it costs 1.
604
1
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
1
      return LT.first * OpCost;
606
1
    }
607
15
608
15
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
0
      // If the operation is custom lowered, then assume that the code is twice
610
0
      // as expensive.
611
0
      return LT.first * 2 * OpCost;
612
0
    }
613
15
614
15
    // Else, assume that we need to scalarize this op.
615
15
    // TODO: If one of the types get legalized by splitting, handle this
616
15
    // similarly to what getCastInstrCost() does.
617
15
    if (Ty->isVectorTy()) {
618
15
      unsigned Num = Ty->getVectorNumElements();
619
15
      unsigned Cost = static_cast<T *>(this)
620
15
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
15
      // Return the cost of multiple scalar invocation plus the cost of
622
15
      // inserting and extracting the values.
623
15
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
15
    }
625
0
626
0
    // We don't know anything about this scalar instruction.
627
0
    return OpCost;
628
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
20.4k
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
20.4k
    // Check if any of the operands are vector operands.
591
20.4k
    const TargetLoweringBase *TLI = getTLI();
592
20.4k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
20.4k
    assert(ISD && "Invalid opcode");
594
20.4k
595
20.4k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
20.4k
597
20.4k
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
20.4k
    // Assume that floating point arithmetic operations cost twice as much as
599
20.4k
    // integer operations.
600
20.4k
    unsigned OpCost = (IsFloat ? 
22.93k
:
117.4k
);
601
20.4k
602
20.4k
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
17.6k
      // The operation is legal. Assume it costs 1.
604
17.6k
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
17.6k
      return LT.first * OpCost;
606
17.6k
    }
607
2.76k
608
2.76k
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
2.05k
      // If the operation is custom lowered, then assume that the code is twice
610
2.05k
      // as expensive.
611
2.05k
      return LT.first * 2 * OpCost;
612
2.05k
    }
613
712
614
712
    // Else, assume that we need to scalarize this op.
615
712
    // TODO: If one of the types get legalized by splitting, handle this
616
712
    // similarly to what getCastInstrCost() does.
617
712
    if (Ty->isVectorTy()) {
618
694
      unsigned Num = Ty->getVectorNumElements();
619
694
      unsigned Cost = static_cast<T *>(this)
620
694
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
694
      // Return the cost of multiple scalar invocation plus the cost of
622
694
      // inserting and extracting the values.
623
694
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
694
    }
625
18
626
18
    // We don't know anything about this scalar instruction.
627
18
    return OpCost;
628
18
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
42
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
42
    // Check if any of the operands are vector operands.
591
42
    const TargetLoweringBase *TLI = getTLI();
592
42
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
42
    assert(ISD && "Invalid opcode");
594
42
595
42
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
42
597
42
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
42
    // Assume that floating point arithmetic operations cost twice as much as
599
42
    // integer operations.
600
42
    unsigned OpCost = (IsFloat ? 
20
: 1);
601
42
602
42
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
42
      // The operation is legal. Assume it costs 1.
604
42
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
42
      return LT.first * OpCost;
606
42
    }
607
0
608
0
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
0
      // If the operation is custom lowered, then assume that the code is twice
610
0
      // as expensive.
611
0
      return LT.first * 2 * OpCost;
612
0
    }
613
0
614
0
    // Else, assume that we need to scalarize this op.
615
0
    // TODO: If one of the types get legalized by splitting, handle this
616
0
    // similarly to what getCastInstrCost() does.
617
0
    if (Ty->isVectorTy()) {
618
0
      unsigned Num = Ty->getVectorNumElements();
619
0
      unsigned Cost = static_cast<T *>(this)
620
0
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
0
      // Return the cost of multiple scalar invocation plus the cost of
622
0
      // inserting and extracting the values.
623
0
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
0
    }
625
0
626
0
    // We don't know anything about this scalar instruction.
627
0
    return OpCost;
628
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
13
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
13
    // Check if any of the operands are vector operands.
591
13
    const TargetLoweringBase *TLI = getTLI();
592
13
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
13
    assert(ISD && "Invalid opcode");
594
13
595
13
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
13
597
13
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
13
    // Assume that floating point arithmetic operations cost twice as much as
599
13
    // integer operations.
600
13
    unsigned OpCost = (IsFloat ? 
210
:
13
);
601
13
602
13
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
11
      // The operation is legal. Assume it costs 1.
604
11
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
11
      return LT.first * OpCost;
606
11
    }
607
2
608
2
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
0
      // If the operation is custom lowered, then assume that the code is twice
610
0
      // as expensive.
611
0
      return LT.first * 2 * OpCost;
612
0
    }
613
2
614
2
    // Else, assume that we need to scalarize this op.
615
2
    // TODO: If one of the types get legalized by splitting, handle this
616
2
    // similarly to what getCastInstrCost() does.
617
2
    if (Ty->isVectorTy()) {
618
2
      unsigned Num = Ty->getVectorNumElements();
619
2
      unsigned Cost = static_cast<T *>(this)
620
2
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
2
      // Return the cost of multiple scalar invocation plus the cost of
622
2
      // inserting and extracting the values.
623
2
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
2
    }
625
0
626
0
    // We don't know anything about this scalar instruction.
627
0
    return OpCost;
628
0
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
288
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
288
    // Check if any of the operands are vector operands.
591
288
    const TargetLoweringBase *TLI = getTLI();
592
288
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
288
    assert(ISD && "Invalid opcode");
594
288
595
288
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
288
597
288
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
288
    // Assume that floating point arithmetic operations cost twice as much as
599
288
    // integer operations.
600
288
    unsigned OpCost = (IsFloat ? 
270
:
1218
);
601
288
602
288
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
288
      // The operation is legal. Assume it costs 1.
604
288
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
288
      return LT.first * OpCost;
606
288
    }
607
0
608
0
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
0
      // If the operation is custom lowered, then assume that the code is twice
610
0
      // as expensive.
611
0
      return LT.first * 2 * OpCost;
612
0
    }
613
0
614
0
    // Else, assume that we need to scalarize this op.
615
0
    // TODO: If one of the types get legalized by splitting, handle this
616
0
    // similarly to what getCastInstrCost() does.
617
0
    if (Ty->isVectorTy()) {
618
0
      unsigned Num = Ty->getVectorNumElements();
619
0
      unsigned Cost = static_cast<T *>(this)
620
0
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
0
      // Return the cost of multiple scalar invocation plus the cost of
622
0
      // inserting and extracting the values.
623
0
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
0
    }
625
0
626
0
    // We don't know anything about this scalar instruction.
627
0
    return OpCost;
628
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
378
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
378
    // Check if any of the operands are vector operands.
591
378
    const TargetLoweringBase *TLI = getTLI();
592
378
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
378
    assert(ISD && "Invalid opcode");
594
378
595
378
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
378
597
378
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
378
    // Assume that floating point arithmetic operations cost twice as much as
599
378
    // integer operations.
600
378
    unsigned OpCost = (IsFloat ? 
20
: 1);
601
378
602
378
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
358
      // The operation is legal. Assume it costs 1.
604
358
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
358
      return LT.first * OpCost;
606
358
    }
607
20
608
20
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
0
      // If the operation is custom lowered, then assume that the code is twice
610
0
      // as expensive.
611
0
      return LT.first * 2 * OpCost;
612
0
    }
613
20
614
20
    // Else, assume that we need to scalarize this op.
615
20
    // TODO: If one of the types get legalized by splitting, handle this
616
20
    // similarly to what getCastInstrCost() does.
617
20
    if (Ty->isVectorTy()) {
618
20
      unsigned Num = Ty->getVectorNumElements();
619
20
      unsigned Cost = static_cast<T *>(this)
620
20
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
20
      // Return the cost of multiple scalar invocation plus the cost of
622
20
      // inserting and extracting the values.
623
20
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
20
    }
625
0
626
0
    // We don't know anything about this scalar instruction.
627
0
    return OpCost;
628
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
Line
Count
Source
589
88.5k
      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
590
88.5k
    // Check if any of the operands are vector operands.
591
88.5k
    const TargetLoweringBase *TLI = getTLI();
592
88.5k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
593
88.5k
    assert(ISD && "Invalid opcode");
594
88.5k
595
88.5k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596
88.5k
597
88.5k
    bool IsFloat = Ty->isFPOrFPVectorTy();
598
88.5k
    // Assume that floating point arithmetic operations cost twice as much as
599
88.5k
    // integer operations.
600
88.5k
    unsigned OpCost = (IsFloat ? 
21.15k
:
187.3k
);
601
88.5k
602
88.5k
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603
81.8k
      // The operation is legal. Assume it costs 1.
604
81.8k
      // TODO: Once we have extract/insert subvector cost we need to use them.
605
81.8k
      return LT.first * OpCost;
606
81.8k
    }
607
6.65k
608
6.65k
    if (!TLI->isOperationExpand(ISD, LT.second)) {
609
82
      // If the operation is custom lowered, then assume that the code is twice
610
82
      // as expensive.
611
82
      return LT.first * 2 * OpCost;
612
82
    }
613
6.56k
614
6.56k
    // Else, assume that we need to scalarize this op.
615
6.56k
    // TODO: If one of the types get legalized by splitting, handle this
616
6.56k
    // similarly to what getCastInstrCost() does.
617
6.56k
    if (Ty->isVectorTy()) {
618
60
      unsigned Num = Ty->getVectorNumElements();
619
60
      unsigned Cost = static_cast<T *>(this)
620
60
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621
60
      // Return the cost of multiple scalar invocation plus the cost of
622
60
      // inserting and extracting the values.
623
60
      return getScalarizationOverhead(Ty, Args) + Num * Cost;
624
60
    }
625
6.50k
626
6.50k
    // We don't know anything about this scalar instruction.
627
6.50k
    return OpCost;
628
6.50k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getArithmeticInstrCost(unsigned int, llvm::Type*, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueKind, llvm::TargetTransformInfo::OperandValueProperties, llvm::TargetTransformInfo::OperandValueProperties, llvm::ArrayRef<llvm::Value const*>)
629
630
  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
631
8.01k
                          Type *SubTp) {
632
8.01k
    switch (Kind) {
633
8.01k
    case TTI::SK_Broadcast:
634
51
      return getBroadcastShuffleOverhead(Tp);
635
8.01k
    case TTI::SK_Select:
636
3.96k
    case TTI::SK_Reverse:
637
3.96k
    case TTI::SK_Transpose:
638
3.96k
    case TTI::SK_PermuteSingleSrc:
639
3.96k
    case TTI::SK_PermuteTwoSrc:
640
3.96k
      return getPermuteShuffleOverhead(Tp);
641
3.96k
    case TTI::SK_ExtractSubvector:
642
3.93k
      return getExtractSubvectorOverhead(Tp, Index, SubTp);
643
3.96k
    case TTI::SK_InsertSubvector:
644
73
      return getInsertSubvectorOverhead(Tp, Index, SubTp);
645
0
    }
646
0
    llvm_unreachable("Unknown TTI::ShuffleKind");
647
0
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Line
Count
Source
631
7.21k
                          Type *SubTp) {
632
7.21k
    switch (Kind) {
633
7.21k
    case TTI::SK_Broadcast:
634
2
      return getBroadcastShuffleOverhead(Tp);
635
7.21k
    case TTI::SK_Select:
636
3.32k
    case TTI::SK_Reverse:
637
3.32k
    case TTI::SK_Transpose:
638
3.32k
    case TTI::SK_PermuteSingleSrc:
639
3.32k
    case TTI::SK_PermuteTwoSrc:
640
3.32k
      return getPermuteShuffleOverhead(Tp);
641
3.89k
    case TTI::SK_ExtractSubvector:
642
3.89k
      return getExtractSubvectorOverhead(Tp, Index, SubTp);
643
3.32k
    case TTI::SK_InsertSubvector:
644
0
      return getInsertSubvectorOverhead(Tp, Index, SubTp);
645
0
    }
646
0
    llvm_unreachable("Unknown TTI::ShuffleKind");
647
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Line
Count
Source
631
81
                          Type *SubTp) {
632
81
    switch (Kind) {
633
81
    case TTI::SK_Broadcast:
634
2
      return getBroadcastShuffleOverhead(Tp);
635
81
    case TTI::SK_Select:
636
54
    case TTI::SK_Reverse:
637
54
    case TTI::SK_Transpose:
638
54
    case TTI::SK_PermuteSingleSrc:
639
54
    case TTI::SK_PermuteTwoSrc:
640
54
      return getPermuteShuffleOverhead(Tp);
641
54
    case TTI::SK_ExtractSubvector:
642
25
      return getExtractSubvectorOverhead(Tp, Index, SubTp);
643
54
    case TTI::SK_InsertSubvector:
644
0
      return getInsertSubvectorOverhead(Tp, Index, SubTp);
645
0
    }
646
0
    llvm_unreachable("Unknown TTI::ShuffleKind");
647
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Line
Count
Source
631
96
                          Type *SubTp) {
632
96
    switch (Kind) {
633
96
    case TTI::SK_Broadcast:
634
0
      return getBroadcastShuffleOverhead(Tp);
635
96
    case TTI::SK_Select:
636
96
    case TTI::SK_Reverse:
637
96
    case TTI::SK_Transpose:
638
96
    case TTI::SK_PermuteSingleSrc:
639
96
    case TTI::SK_PermuteTwoSrc:
640
96
      return getPermuteShuffleOverhead(Tp);
641
96
    case TTI::SK_ExtractSubvector:
642
0
      return getExtractSubvectorOverhead(Tp, Index, SubTp);
643
96
    case TTI::SK_InsertSubvector:
644
0
      return getInsertSubvectorOverhead(Tp, Index, SubTp);
645
0
    }
646
0
    llvm_unreachable("Unknown TTI::ShuffleKind");
647
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
Line
Count
Source
631
619
                          Type *SubTp) {
632
619
    switch (Kind) {
633
619
    case TTI::SK_Broadcast:
634
47
      return getBroadcastShuffleOverhead(Tp);
635
619
    case TTI::SK_Select:
636
487
    case TTI::SK_Reverse:
637
487
    case TTI::SK_Transpose:
638
487
    case TTI::SK_PermuteSingleSrc:
639
487
    case TTI::SK_PermuteTwoSrc:
640
487
      return getPermuteShuffleOverhead(Tp);
641
487
    case TTI::SK_ExtractSubvector:
642
12
      return getExtractSubvectorOverhead(Tp, Index, SubTp);
643
487
    case TTI::SK_InsertSubvector:
644
73
      return getInsertSubvectorOverhead(Tp, Index, SubTp);
645
0
    }
646
0
    llvm_unreachable("Unknown TTI::ShuffleKind");
647
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getShuffleCost(llvm::TargetTransformInfo::ShuffleKind, llvm::Type*, int, llvm::Type*)
648
649
  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
650
120k
                            const Instruction *I = nullptr) {
651
120k
    const TargetLoweringBase *TLI = getTLI();
652
120k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
120k
    assert(ISD && "Invalid opcode");
654
120k
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
120k
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
120k
657
120k
    // Check for NOOP conversions.
658
120k
    if (SrcLT.first == DstLT.first &&
659
120k
        
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()113k
) {
660
41.2k
661
41.2k
      // Bitcast between types that are legalized to the same type are free.
662
41.2k
      if (Opcode == Instruction::BitCast || 
Opcode == Instruction::Trunc29.3k
)
663
14.2k
        return 0;
664
106k
    }
665
106k
666
106k
    if (Opcode == Instruction::Trunc &&
667
106k
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)44.4k
)
668
24.4k
      return 0;
669
81.6k
670
81.6k
    if (Opcode == Instruction::ZExt &&
671
81.6k
        
TLI->isZExtFree(SrcLT.second, DstLT.second)20.6k
)
672
4.28k
      return 0;
673
77.3k
674
77.3k
    if (Opcode == Instruction::AddrSpaceCast &&
675
77.3k
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
15
                                 Dst->getPointerAddressSpace()))
677
6
      return 0;
678
77.3k
679
77.3k
    // If this is a zext/sext of a load, return 0 if the corresponding
680
77.3k
    // extending load exists on target.
681
77.3k
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt60.9k
) &&
682
77.3k
        
I30.8k
&&
isa<LoadInst>(I->getOperand(0))3.10k
) {
683
785
        EVT ExtVT = EVT::getEVT(Dst);
684
785
        EVT LoadVT = EVT::getEVT(Src);
685
785
        unsigned LType =
686
785
          ((Opcode == Instruction::ZExt) ? 
ISD::ZEXTLOAD519
:
ISD::SEXTLOAD266
);
687
785
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
572
          return 0;
689
76.7k
    }
690
76.7k
691
76.7k
    // If the cast is marked as legal (or promote) then assume low cost.
692
76.7k
    if (SrcLT.first == DstLT.first &&
693
76.7k
        
TLI->isOperationLegalOrPromote(ISD, DstLT.second)69.8k
)
694
65.4k
      return 1;
695
11.3k
696
11.3k
    // Handle scalar conversions.
697
11.3k
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()3.34k
) {
698
3.34k
      // Scalar bitcasts are usually free.
699
3.34k
      if (Opcode == Instruction::BitCast)
700
67
        return 0;
701
3.28k
702
3.28k
      // Just check the op cost. If the operation is legal then assume it costs
703
3.28k
      // 1.
704
3.28k
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
2.98k
        return 1;
706
298
707
298
      // Assume that illegal scalar instruction are expensive.
708
298
      return 4;
709
298
    }
710
7.99k
711
7.99k
    // Check vector-to-vector casts.
712
7.99k
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
7.99k
      // If the cast is between same-sized registers, then the check is simple.
714
7.99k
      if (SrcLT.first == DstLT.first &&
715
7.99k
          
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()2.59k
) {
716
2.22k
717
2.22k
        // Assume that Zext is done using AND.
718
2.22k
        if (Opcode == Instruction::ZExt)
719
1.07k
          return 1;
720
1.15k
721
1.15k
        // Assume that sext is done using SHL and SRA.
722
1.15k
        if (Opcode == Instruction::SExt)
723
256
          return 2;
724
896
725
896
        // Just check the op cost. If the operation is legal then assume it
726
896
        // costs
727
896
        // 1 and multiply by the type-legalization overhead.
728
896
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
36
          return SrcLT.first * 1;
730
6.62k
      }
731
6.62k
732
6.62k
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
6.62k
      // of casting the original vector twice. We also need to factor in the
734
6.62k
      // cost of the split itself. Count that as 1, to be consistent with
735
6.62k
      // TLI->getTypeLegalizationCost().
736
6.62k
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
6.62k
           TargetLowering::TypeSplitVector) ||
738
6.62k
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
4.79k
           TargetLowering::TypeSplitVector)) {
740
4.79k
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
4.79k
                                         Dst->getVectorNumElements() / 2);
742
4.79k
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
4.79k
                                         Src->getVectorNumElements() / 2);
744
4.79k
        T *TTI = static_cast<T *>(this);
745
4.79k
        return TTI->getVectorSplitCost() +
746
4.79k
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
4.79k
      }
748
1.83k
749
1.83k
      // In other cases where the source or destination are illegal, assume
750
1.83k
      // the operation will get scalarized.
751
1.83k
      unsigned Num = Dst->getVectorNumElements();
752
1.83k
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
1.83k
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
1.83k
755
1.83k
      // Return the cost of multiple scalar invocation plus the cost of
756
1.83k
      // inserting and extracting the values.
757
1.83k
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
1.83k
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
102k
                            const Instruction *I = nullptr) {
651
102k
    const TargetLoweringBase *TLI = getTLI();
652
102k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
102k
    assert(ISD && "Invalid opcode");
654
102k
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
102k
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
102k
657
102k
    // Check for NOOP conversions.
658
102k
    if (SrcLT.first == DstLT.first &&
659
102k
        
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()100k
) {
660
34.0k
661
34.0k
      // Bitcast between types that are legalized to the same type are free.
662
34.0k
      if (Opcode == Instruction::BitCast || 
Opcode == Instruction::Trunc23.3k
)
663
12.3k
        return 0;
664
89.9k
    }
665
89.9k
666
89.9k
    if (Opcode == Instruction::Trunc &&
667
89.9k
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)40.1k
)
668
23.3k
      return 0;
669
66.6k
670
66.6k
    if (Opcode == Instruction::ZExt &&
671
66.6k
        
TLI->isZExtFree(SrcLT.second, DstLT.second)16.9k
)
672
3.97k
      return 0;
673
62.6k
674
62.6k
    if (Opcode == Instruction::AddrSpaceCast &&
675
62.6k
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
0
                                 Dst->getPointerAddressSpace()))
677
0
      return 0;
678
62.6k
679
62.6k
    // If this is a zext/sext of a load, return 0 if the corresponding
680
62.6k
    // extending load exists on target.
681
62.6k
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt49.7k
) &&
682
62.6k
        
I26.0k
&&
isa<LoadInst>(I->getOperand(0))0
) {
683
0
        EVT ExtVT = EVT::getEVT(Dst);
684
0
        EVT LoadVT = EVT::getEVT(Src);
685
0
        unsigned LType =
686
0
          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
687
0
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
0
          return 0;
689
62.6k
    }
690
62.6k
691
62.6k
    // If the cast is marked as legal (or promote) then assume low cost.
692
62.6k
    if (SrcLT.first == DstLT.first &&
693
62.6k
        
TLI->isOperationLegalOrPromote(ISD, DstLT.second)61.1k
)
694
60.1k
      return 1;
695
2.52k
696
2.52k
    // Handle scalar conversions.
697
2.52k
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()1.10k
) {
698
1.10k
      // Scalar bitcasts are usually free.
699
1.10k
      if (Opcode == Instruction::BitCast)
700
0
        return 0;
701
1.10k
702
1.10k
      // Just check the op cost. If the operation is legal then assume it costs
703
1.10k
      // 1.
704
1.10k
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
1.10k
        return 1;
706
0
707
0
      // Assume that illegal scalar instruction are expensive.
708
0
      return 4;
709
0
    }
710
1.42k
711
1.42k
    // Check vector-to-vector casts.
712
1.42k
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
1.42k
      // If the cast is between same-sized registers, then the check is simple.
714
1.42k
      if (SrcLT.first == DstLT.first &&
715
1.42k
          
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()0
) {
716
0
717
0
        // Assume that Zext is done using AND.
718
0
        if (Opcode == Instruction::ZExt)
719
0
          return 1;
720
0
721
0
        // Assume that sext is done using SHL and SRA.
722
0
        if (Opcode == Instruction::SExt)
723
0
          return 2;
724
0
725
0
        // Just check the op cost. If the operation is legal then assume it
726
0
        // costs
727
0
        // 1 and multiply by the type-legalization overhead.
728
0
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
0
          return SrcLT.first * 1;
730
1.42k
      }
731
1.42k
732
1.42k
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
1.42k
      // of casting the original vector twice. We also need to factor in the
734
1.42k
      // cost of the split itself. Count that as 1, to be consistent with
735
1.42k
      // TLI->getTypeLegalizationCost().
736
1.42k
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
1.42k
           TargetLowering::TypeSplitVector) ||
738
1.42k
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
1.38k
           TargetLowering::TypeSplitVector)) {
740
1.38k
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
1.38k
                                         Dst->getVectorNumElements() / 2);
742
1.38k
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
1.38k
                                         Src->getVectorNumElements() / 2);
744
1.38k
        T *TTI = static_cast<T *>(this);
745
1.38k
        return TTI->getVectorSplitCost() +
746
1.38k
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
1.38k
      }
748
36
749
36
      // In other cases where the source or destination are illegal, assume
750
36
      // the operation will get scalarized.
751
36
      unsigned Num = Dst->getVectorNumElements();
752
36
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
36
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
36
755
36
      // Return the cost of multiple scalar invocation plus the cost of
756
36
      // inserting and extracting the values.
757
36
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
36
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
17
                            const Instruction *I = nullptr) {
651
17
    const TargetLoweringBase *TLI = getTLI();
652
17
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
17
    assert(ISD && "Invalid opcode");
654
17
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
17
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
17
657
17
    // Check for NOOP conversions.
658
17
    if (SrcLT.first == DstLT.first &&
659
17
        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
660
15
661
15
      // Bitcast between types that are legalized to the same type are free.
662
15
      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
663
0
        return 0;
664
17
    }
665
17
666
17
    if (Opcode == Instruction::Trunc &&
667
17
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)0
)
668
0
      return 0;
669
17
670
17
    if (Opcode == Instruction::ZExt &&
671
17
        
TLI->isZExtFree(SrcLT.second, DstLT.second)2
)
672
2
      return 0;
673
15
674
15
    if (Opcode == Instruction::AddrSpaceCast &&
675
15
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
15
                                 Dst->getPointerAddressSpace()))
677
6
      return 0;
678
9
679
9
    // If this is a zext/sext of a load, return 0 if the corresponding
680
9
    // extending load exists on target.
681
9
    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
682
9
        
I0
&&
isa<LoadInst>(I->getOperand(0))0
) {
683
0
        EVT ExtVT = EVT::getEVT(Dst);
684
0
        EVT LoadVT = EVT::getEVT(Src);
685
0
        unsigned LType =
686
0
          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
687
0
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
0
          return 0;
689
9
    }
690
9
691
9
    // If the cast is marked as legal (or promote) then assume low cost.
692
9
    if (SrcLT.first == DstLT.first &&
693
9
        TLI->isOperationLegalOrPromote(ISD, DstLT.second))
694
0
      return 1;
695
9
696
9
    // Handle scalar conversions.
697
9
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()3
) {
698
3
      // Scalar bitcasts are usually free.
699
3
      if (Opcode == Instruction::BitCast)
700
0
        return 0;
701
3
702
3
      // Just check the op cost. If the operation is legal then assume it costs
703
3
      // 1.
704
3
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
3
        return 1;
706
0
707
0
      // Assume that illegal scalar instruction are expensive.
708
0
      return 4;
709
0
    }
710
6
711
6
    // Check vector-to-vector casts.
712
6
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
6
      // If the cast is between same-sized registers, then the check is simple.
714
6
      if (SrcLT.first == DstLT.first &&
715
6
          SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
716
6
717
6
        // Assume that Zext is done using AND.
718
6
        if (Opcode == Instruction::ZExt)
719
0
          return 1;
720
6
721
6
        // Assume that sext is done using SHL and SRA.
722
6
        if (Opcode == Instruction::SExt)
723
0
          return 2;
724
6
725
6
        // Just check the op cost. If the operation is legal then assume it
726
6
        // costs
727
6
        // 1 and multiply by the type-legalization overhead.
728
6
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
0
          return SrcLT.first * 1;
730
6
      }
731
6
732
6
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
6
      // of casting the original vector twice. We also need to factor in the
734
6
      // cost of the split itself. Count that as 1, to be consistent with
735
6
      // TLI->getTypeLegalizationCost().
736
6
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
6
           TargetLowering::TypeSplitVector) ||
738
6
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
4
           TargetLowering::TypeSplitVector)) {
740
4
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
4
                                         Dst->getVectorNumElements() / 2);
742
4
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
4
                                         Src->getVectorNumElements() / 2);
744
4
        T *TTI = static_cast<T *>(this);
745
4
        return TTI->getVectorSplitCost() +
746
4
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
4
      }
748
2
749
2
      // In other cases where the source or destination are illegal, assume
750
2
      // the operation will get scalarized.
751
2
      unsigned Num = Dst->getVectorNumElements();
752
2
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
2
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
2
755
2
      // Return the cost of multiple scalar invocation plus the cost of
756
2
      // inserting and extracting the values.
757
2
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
2
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
1.18k
                            const Instruction *I = nullptr) {
651
1.18k
    const TargetLoweringBase *TLI = getTLI();
652
1.18k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
1.18k
    assert(ISD && "Invalid opcode");
654
1.18k
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
1.18k
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
1.18k
657
1.18k
    // Check for NOOP conversions.
658
1.18k
    if (SrcLT.first == DstLT.first &&
659
1.18k
        
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()856
) {
660
606
661
606
      // Bitcast between types that are legalized to the same type are free.
662
606
      if (Opcode == Instruction::BitCast || 
Opcode == Instruction::Trunc302
)
663
316
        return 0;
664
865
    }
665
865
666
865
    if (Opcode == Instruction::Trunc &&
667
865
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)168
)
668
0
      return 0;
669
865
670
865
    if (Opcode == Instruction::ZExt &&
671
865
        
TLI->isZExtFree(SrcLT.second, DstLT.second)258
)
672
0
      return 0;
673
865
674
865
    if (Opcode == Instruction::AddrSpaceCast &&
675
865
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
0
                                 Dst->getPointerAddressSpace()))
677
0
      return 0;
678
865
679
865
    // If this is a zext/sext of a load, return 0 if the corresponding
680
865
    // extending load exists on target.
681
865
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt607
) &&
682
865
        
I335
&&
isa<LoadInst>(I->getOperand(0))0
) {
683
0
        EVT ExtVT = EVT::getEVT(Dst);
684
0
        EVT LoadVT = EVT::getEVT(Src);
685
0
        unsigned LType =
686
0
          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
687
0
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
0
          return 0;
689
865
    }
690
865
691
865
    // If the cast is marked as legal (or promote) then assume low cost.
692
865
    if (SrcLT.first == DstLT.first &&
693
865
        
TLI->isOperationLegalOrPromote(ISD, DstLT.second)540
)
694
395
      return 1;
695
470
696
470
    // Handle scalar conversions.
697
470
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()116
) {
698
116
      // Scalar bitcasts are usually free.
699
116
      if (Opcode == Instruction::BitCast)
700
58
        return 0;
701
58
702
58
      // Just check the op cost. If the operation is legal then assume it costs
703
58
      // 1.
704
58
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
58
        return 1;
706
0
707
0
      // Assume that illegal scalar instruction are expensive.
708
0
      return 4;
709
0
    }
710
354
711
354
    // Check vector-to-vector casts.
712
354
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
354
      // If the cast is between same-sized registers, then the check is simple.
714
354
      if (SrcLT.first == DstLT.first &&
715
354
          
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()144
) {
716
66
717
66
        // Assume that Zext is done using AND.
718
66
        if (Opcode == Instruction::ZExt)
719
0
          return 1;
720
66
721
66
        // Assume that sext is done using SHL and SRA.
722
66
        if (Opcode == Instruction::SExt)
723
0
          return 2;
724
66
725
66
        // Just check the op cost. If the operation is legal then assume it
726
66
        // costs
727
66
        // 1 and multiply by the type-legalization overhead.
728
66
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
12
          return SrcLT.first * 1;
730
342
      }
731
342
732
342
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
342
      // of casting the original vector twice. We also need to factor in the
734
342
      // cost of the split itself. Count that as 1, to be consistent with
735
342
      // TLI->getTypeLegalizationCost().
736
342
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
342
           TargetLowering::TypeSplitVector) ||
738
342
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
228
           TargetLowering::TypeSplitVector)) {
740
228
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
228
                                         Dst->getVectorNumElements() / 2);
742
228
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
228
                                         Src->getVectorNumElements() / 2);
744
228
        T *TTI = static_cast<T *>(this);
745
228
        return TTI->getVectorSplitCost() +
746
228
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
228
      }
748
114
749
114
      // In other cases where the source or destination are illegal, assume
750
114
      // the operation will get scalarized.
751
114
      unsigned Num = Dst->getVectorNumElements();
752
114
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
114
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
114
755
114
      // Return the cost of multiple scalar invocation plus the cost of
756
114
      // inserting and extracting the values.
757
114
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
114
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
373
                            const Instruction *I = nullptr) {
651
373
    const TargetLoweringBase *TLI = getTLI();
652
373
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
373
    assert(ISD && "Invalid opcode");
654
373
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
373
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
373
657
373
    // Check for NOOP conversions.
658
373
    if (SrcLT.first == DstLT.first &&
659
373
        
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()372
) {
660
159
661
159
      // Bitcast between types that are legalized to the same type are free.
662
159
      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
663
0
        return 0;
664
373
    }
665
373
666
373
    if (Opcode == Instruction::Trunc &&
667
373
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)172
)
668
172
      return 0;
669
201
670
201
    if (Opcode == Instruction::ZExt &&
671
201
        
TLI->isZExtFree(SrcLT.second, DstLT.second)16
)
672
0
      return 0;
673
201
674
201
    if (Opcode == Instruction::AddrSpaceCast &&
675
201
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
0
                                 Dst->getPointerAddressSpace()))
677
0
      return 0;
678
201
679
201
    // If this is a zext/sext of a load, return 0 if the corresponding
680
201
    // extending load exists on target.
681
201
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt185
) &&
682
201
        
I21
&&
isa<LoadInst>(I->getOperand(0))0
) {
683
0
        EVT ExtVT = EVT::getEVT(Dst);
684
0
        EVT LoadVT = EVT::getEVT(Src);
685
0
        unsigned LType =
686
0
          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
687
0
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
0
          return 0;
689
201
    }
690
201
691
201
    // If the cast is marked as legal (or promote) then assume low cost.
692
201
    if (SrcLT.first == DstLT.first &&
693
201
        
TLI->isOperationLegalOrPromote(ISD, DstLT.second)200
)
694
200
      return 1;
695
1
696
1
    // Handle scalar conversions.
697
1
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()0
) {
698
0
      // Scalar bitcasts are usually free.
699
0
      if (Opcode == Instruction::BitCast)
700
0
        return 0;
701
0
702
0
      // Just check the op cost. If the operation is legal then assume it costs
703
0
      // 1.
704
0
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
0
        return 1;
706
0
707
0
      // Assume that illegal scalar instruction are expensive.
708
0
      return 4;
709
0
    }
710
1
711
1
    // Check vector-to-vector casts.
712
1
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
1
      // If the cast is between same-sized registers, then the check is simple.
714
1
      if (SrcLT.first == DstLT.first &&
715
1
          
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()0
) {
716
0
717
0
        // Assume that Zext is done using AND.
718
0
        if (Opcode == Instruction::ZExt)
719
0
          return 1;
720
0
721
0
        // Assume that sext is done using SHL and SRA.
722
0
        if (Opcode == Instruction::SExt)
723
0
          return 2;
724
0
725
0
        // Just check the op cost. If the operation is legal then assume it
726
0
        // costs
727
0
        // 1 and multiply by the type-legalization overhead.
728
0
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
0
          return SrcLT.first * 1;
730
1
      }
731
1
732
1
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
1
      // of casting the original vector twice. We also need to factor in the
734
1
      // cost of the split itself. Count that as 1, to be consistent with
735
1
      // TLI->getTypeLegalizationCost().
736
1
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
1
           TargetLowering::TypeSplitVector) ||
738
1
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
1
           TargetLowering::TypeSplitVector)) {
740
1
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
1
                                         Dst->getVectorNumElements() / 2);
742
1
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
1
                                         Src->getVectorNumElements() / 2);
744
1
        T *TTI = static_cast<T *>(this);
745
1
        return TTI->getVectorSplitCost() +
746
1
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
1
      }
748
0
749
0
      // In other cases where the source or destination are illegal, assume
750
0
      // the operation will get scalarized.
751
0
      unsigned Num = Dst->getVectorNumElements();
752
0
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
0
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
0
755
0
      // Return the cost of multiple scalar invocation plus the cost of
756
0
      // inserting and extracting the values.
757
0
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
0
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
351
                            const Instruction *I = nullptr) {
651
351
    const TargetLoweringBase *TLI = getTLI();
652
351
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
351
    assert(ISD && "Invalid opcode");
654
351
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
351
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
351
657
351
    // Check for NOOP conversions.
658
351
    if (SrcLT.first == DstLT.first &&
659
351
        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
660
104
661
104
      // Bitcast between types that are legalized to the same type are free.
662
104
      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
663
10
        return 0;
664
341
    }
665
341
666
341
    if (Opcode == Instruction::Trunc &&
667
341
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)41
)
668
41
      return 0;
669
300
670
300
    if (Opcode == Instruction::ZExt &&
671
300
        
TLI->isZExtFree(SrcLT.second, DstLT.second)22
)
672
0
      return 0;
673
300
674
300
    if (Opcode == Instruction::AddrSpaceCast &&
675
300
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
0
                                 Dst->getPointerAddressSpace()))
677
0
      return 0;
678
300
679
300
    // If this is a zext/sext of a load, return 0 if the corresponding
680
300
    // extending load exists on target.
681
300
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt278
) &&
682
300
        
I88
&&
isa<LoadInst>(I->getOperand(0))88
) {
683
76
        EVT ExtVT = EVT::getEVT(Dst);
684
76
        EVT LoadVT = EVT::getEVT(Src);
685
76
        unsigned LType =
686
76
          ((Opcode == Instruction::ZExt) ? 
ISD::ZEXTLOAD16
:
ISD::SEXTLOAD60
);
687
76
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
76
          return 0;
689
224
    }
690
224
691
224
    // If the cast is marked as legal (or promote) then assume low cost.
692
224
    if (SrcLT.first == DstLT.first &&
693
224
        TLI->isOperationLegalOrPromote(ISD, DstLT.second))
694
224
      return 1;
695
0
696
0
    // Handle scalar conversions.
697
0
    if (!Src->isVectorTy() && !Dst->isVectorTy()) {
698
0
      // Scalar bitcasts are usually free.
699
0
      if (Opcode == Instruction::BitCast)
700
0
        return 0;
701
0
702
0
      // Just check the op cost. If the operation is legal then assume it costs
703
0
      // 1.
704
0
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
0
        return 1;
706
0
707
0
      // Assume that illegal scalar instruction are expensive.
708
0
      return 4;
709
0
    }
710
0
711
0
    // Check vector-to-vector casts.
712
0
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
0
      // If the cast is between same-sized registers, then the check is simple.
714
0
      if (SrcLT.first == DstLT.first &&
715
0
          SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
716
0
717
0
        // Assume that Zext is done using AND.
718
0
        if (Opcode == Instruction::ZExt)
719
0
          return 1;
720
0
721
0
        // Assume that sext is done using SHL and SRA.
722
0
        if (Opcode == Instruction::SExt)
723
0
          return 2;
724
0
725
0
        // Just check the op cost. If the operation is legal then assume it
726
0
        // costs
727
0
        // 1 and multiply by the type-legalization overhead.
728
0
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
0
          return SrcLT.first * 1;
730
0
      }
731
0
732
0
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
0
      // of casting the original vector twice. We also need to factor in the
734
0
      // cost of the split itself. Count that as 1, to be consistent with
735
0
      // TLI->getTypeLegalizationCost().
736
0
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
0
           TargetLowering::TypeSplitVector) ||
738
0
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
0
           TargetLowering::TypeSplitVector)) {
740
0
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
0
                                         Dst->getVectorNumElements() / 2);
742
0
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
0
                                         Src->getVectorNumElements() / 2);
744
0
        T *TTI = static_cast<T *>(this);
745
0
        return TTI->getVectorSplitCost() +
746
0
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
0
      }
748
0
749
0
      // In other cases where the source or destination are illegal, assume
750
0
      // the operation will get scalarized.
751
0
      unsigned Num = Dst->getVectorNumElements();
752
0
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
0
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
0
755
0
      // Return the cost of multiple scalar invocation plus the cost of
756
0
      // inserting and extracting the values.
757
0
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
0
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
650
16.0k
                            const Instruction *I = nullptr) {
651
16.0k
    const TargetLoweringBase *TLI = getTLI();
652
16.0k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
653
16.0k
    assert(ISD && "Invalid opcode");
654
16.0k
    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655
16.0k
    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656
16.0k
657
16.0k
    // Check for NOOP conversions.
658
16.0k
    if (SrcLT.first == DstLT.first &&
659
16.0k
        
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()10.8k
) {
660
6.33k
661
6.33k
      // Bitcast between types that are legalized to the same type are free.
662
6.33k
      if (Opcode == Instruction::BitCast || 
Opcode == Instruction::Trunc5.44k
)
663
1.51k
        return 0;
664
14.5k
    }
665
14.5k
666
14.5k
    if (Opcode == Instruction::Trunc &&
667
14.5k
        
TLI->isTruncateFree(SrcLT.second, DstLT.second)3.89k
)
668
922
      return 0;
669
13.5k
670
13.5k
    if (Opcode == Instruction::ZExt &&
671
13.5k
        
TLI->isZExtFree(SrcLT.second, DstLT.second)3.46k
)
672
308
      return 0;
673
13.2k
674
13.2k
    if (Opcode == Instruction::AddrSpaceCast &&
675
13.2k
        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
676
0
                                 Dst->getPointerAddressSpace()))
677
0
      return 0;
678
13.2k
679
13.2k
    // If this is a zext/sext of a load, return 0 if the corresponding
680
13.2k
    // extending load exists on target.
681
13.2k
    if ((Opcode == Instruction::ZExt || 
Opcode == Instruction::SExt10.1k
) &&
682
13.2k
        
I4.37k
&&
isa<LoadInst>(I->getOperand(0))3.02k
) {
683
709
        EVT ExtVT = EVT::getEVT(Dst);
684
709
        EVT LoadVT = EVT::getEVT(Src);
685
709
        unsigned LType =
686
709
          ((Opcode == Instruction::ZExt) ? 
ISD::ZEXTLOAD503
:
ISD::SEXTLOAD206
);
687
709
        if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688
496
          return 0;
689
12.7k
    }
690
12.7k
691
12.7k
    // If the cast is marked as legal (or promote) then assume low cost.
692
12.7k
    if (SrcLT.first == DstLT.first &&
693
12.7k
        
TLI->isOperationLegalOrPromote(ISD, DstLT.second)7.70k
)
694
4.45k
      return 1;
695
8.33k
696
8.33k
    // Handle scalar conversions.
697
8.33k
    if (!Src->isVectorTy() && 
!Dst->isVectorTy()2.12k
) {
698
2.12k
      // Scalar bitcasts are usually free.
699
2.12k
      if (Opcode == Instruction::BitCast)
700
9
        return 0;
701
2.12k
702
2.12k
      // Just check the op cost. If the operation is legal then assume it costs
703
2.12k
      // 1.
704
2.12k
      if (!TLI->isOperationExpand(ISD, DstLT.second))
705
1.82k
        return 1;
706
298
707
298
      // Assume that illegal scalar instruction are expensive.
708
298
      return 4;
709
298
    }
710
6.20k
711
6.20k
    // Check vector-to-vector casts.
712
6.20k
    if (Dst->isVectorTy() && Src->isVectorTy()) {
713
6.20k
      // If the cast is between same-sized registers, then the check is simple.
714
6.20k
      if (SrcLT.first == DstLT.first &&
715
6.20k
          
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()2.44k
) {
716
2.15k
717
2.15k
        // Assume that Zext is done using AND.
718
2.15k
        if (Opcode == Instruction::ZExt)
719
1.07k
          return 1;
720
1.08k
721
1.08k
        // Assume that sext is done using SHL and SRA.
722
1.08k
        if (Opcode == Instruction::SExt)
723
256
          return 2;
724
824
725
824
        // Just check the op cost. If the operation is legal then assume it
726
824
        // costs
727
824
        // 1 and multiply by the type-legalization overhead.
728
824
        if (!TLI->isOperationExpand(ISD, DstLT.second))
729
24
          return SrcLT.first * 1;
730
4.85k
      }
731
4.85k
732
4.85k
      // If we are legalizing by splitting, query the concrete TTI for the cost
733
4.85k
      // of casting the original vector twice. We also need to factor in the
734
4.85k
      // cost of the split itself. Count that as 1, to be consistent with
735
4.85k
      // TLI->getTypeLegalizationCost().
736
4.85k
      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
737
4.85k
           TargetLowering::TypeSplitVector) ||
738
4.85k
          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
739
3.17k
           TargetLowering::TypeSplitVector)) {
740
3.17k
        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741
3.17k
                                         Dst->getVectorNumElements() / 2);
742
3.17k
        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743
3.17k
                                         Src->getVectorNumElements() / 2);
744
3.17k
        T *TTI = static_cast<T *>(this);
745
3.17k
        return TTI->getVectorSplitCost() +
746
3.17k
               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747
3.17k
      }
748
1.68k
749
1.68k
      // In other cases where the source or destination are illegal, assume
750
1.68k
      // the operation will get scalarized.
751
1.68k
      unsigned Num = Dst->getVectorNumElements();
752
1.68k
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753
1.68k
          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754
1.68k
755
1.68k
      // Return the cost of multiple scalar invocation plus the cost of
756
1.68k
      // inserting and extracting the values.
757
1.68k
      return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758
1.68k
    }
759
0
760
0
    // We already handled vector-to-vector and scalar-to-scalar conversions.
761
0
    // This
762
0
    // is where we handle bitcast between vectors and scalars. We need to assume
763
0
    //  that the conversion is scalarized in one way or another.
764
0
    if (Opcode == Instruction::BitCast)
765
0
      // Illegal bitcasts are done by storing and loading from a stack slot.
766
0
      return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767
0
                                : 0) +
768
0
             (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769
0
                                : 0);
770
0
771
0
    llvm_unreachable("Unhandled cast");
772
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCastInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
773
774
  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
775
8
                                    VectorType *VecTy, unsigned Index) {
776
8
    return static_cast<T *>(this)->getVectorInstrCost(
777
8
               Instruction::ExtractElement, VecTy, Index) +
778
8
           static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
779
8
                                                    VecTy->getElementType());
780
8
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
Line
Count
Source
775
8
                                    VectorType *VecTy, unsigned Index) {
776
8
    return static_cast<T *>(this)->getVectorInstrCost(
777
8
               Instruction::ExtractElement, VecTy, Index) +
778
8
           static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
779
8
                                                    VecTy->getElementType());
780
8
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getExtractWithExtendCost(unsigned int, llvm::Type*, llvm::VectorType*, unsigned int)
781
782
134k
  unsigned getCFInstrCost(unsigned Opcode) {
783
134k
    // Branches are assumed to be predicted.
784
134k
    return 0;
785
134k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
111k
  unsigned getCFInstrCost(unsigned Opcode) {
783
111k
    // Branches are assumed to be predicted.
784
111k
    return 0;
785
111k
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
15
  unsigned getCFInstrCost(unsigned Opcode) {
783
15
    // Branches are assumed to be predicted.
784
15
    return 0;
785
15
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCFInstrCost(unsigned int)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
3.08k
  unsigned getCFInstrCost(unsigned Opcode) {
783
3.08k
    // Branches are assumed to be predicted.
784
3.08k
    return 0;
785
3.08k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCFInstrCost(unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCFInstrCost(unsigned int)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
6
  unsigned getCFInstrCost(unsigned Opcode) {
783
6
    // Branches are assumed to be predicted.
784
6
    return 0;
785
6
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
503
  unsigned getCFInstrCost(unsigned Opcode) {
783
503
    // Branches are assumed to be predicted.
784
503
    return 0;
785
503
  }
llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
14
  unsigned getCFInstrCost(unsigned Opcode) {
783
14
    // Branches are assumed to be predicted.
784
14
    return 0;
785
14
  }
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
957
  unsigned getCFInstrCost(unsigned Opcode) {
783
957
    // Branches are assumed to be predicted.
784
957
    return 0;
785
957
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCFInstrCost(unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCFInstrCost(unsigned int)
Line
Count
Source
782
18.3k
  unsigned getCFInstrCost(unsigned Opcode) {
783
18.3k
    // Branches are assumed to be predicted.
784
18.3k
    return 0;
785
18.3k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCFInstrCost(unsigned int)
786
787
  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
788
204k
                              const Instruction *I) {
789
204k
    const TargetLoweringBase *TLI = getTLI();
790
204k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
204k
    assert(ISD && "Invalid opcode");
792
204k
793
204k
    // Selects on vectors are actually vector selects.
794
204k
    if (ISD == ISD::SELECT) {
795
69.1k
      assert(CondTy && "CondTy must exist");
796
69.1k
      if (CondTy->isVectorTy())
797
25.5k
        ISD = ISD::VSELECT;
798
69.1k
    }
799
204k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
204k
801
204k
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()66.5k
) &&
802
204k
        
!TLI->isOperationExpand(ISD, LT.second)203k
) {
803
177k
      // The operation is legal. Assume it costs 1. Multiply
804
177k
      // by the type-legalization overhead.
805
177k
      return LT.first * 1;
806
177k
    }
807
26.8k
808
26.8k
    // Otherwise, assume that the cast is scalarized.
809
26.8k
    // TODO: If one of the types get legalized by splitting, handle this
810
26.8k
    // similarly to what getCastInstrCost() does.
811
26.8k
    if (ValTy->isVectorTy()) {
812
24.4k
      unsigned Num = ValTy->getVectorNumElements();
813
24.4k
      if (CondTy)
814
24.4k
        CondTy = CondTy->getScalarType();
815
24.4k
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
24.4k
          Opcode, ValTy->getScalarType(), CondTy, I);
817
24.4k
818
24.4k
      // Return the cost of multiple scalar invocation plus the cost of
819
24.4k
      // inserting and extracting the values.
820
24.4k
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
24.4k
    }
822
2.41k
823
2.41k
    // Unknown scalar opcode.
824
2.41k
    return 1;
825
2.41k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
189k
                              const Instruction *I) {
789
189k
    const TargetLoweringBase *TLI = getTLI();
790
189k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
189k
    assert(ISD && "Invalid opcode");
792
189k
793
189k
    // Selects on vectors are actually vector selects.
794
189k
    if (ISD == ISD::SELECT) {
795
66.6k
      assert(CondTy && "CondTy must exist");
796
66.6k
      if (CondTy->isVectorTy())
797
25.1k
        ISD = ISD::VSELECT;
798
66.6k
    }
799
189k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
189k
801
189k
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()65.0k
) &&
802
189k
        
!TLI->isOperationExpand(ISD, LT.second)189k
) {
803
165k
      // The operation is legal. Assume it costs 1. Multiply
804
165k
      // by the type-legalization overhead.
805
165k
      return LT.first * 1;
806
165k
    }
807
23.9k
808
23.9k
    // Otherwise, assume that the cast is scalarized.
809
23.9k
    // TODO: If one of the types get legalized by splitting, handle this
810
23.9k
    // similarly to what getCastInstrCost() does.
811
23.9k
    if (ValTy->isVectorTy()) {
812
23.9k
      unsigned Num = ValTy->getVectorNumElements();
813
23.9k
      if (CondTy)
814
23.9k
        CondTy = CondTy->getScalarType();
815
23.9k
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
23.9k
          Opcode, ValTy->getScalarType(), CondTy, I);
817
23.9k
818
23.9k
      // Return the cost of multiple scalar invocation plus the cost of
819
23.9k
      // inserting and extracting the values.
820
23.9k
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
23.9k
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
223
                              const Instruction *I) {
789
223
    const TargetLoweringBase *TLI = getTLI();
790
223
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
223
    assert(ISD && "Invalid opcode");
792
223
793
223
    // Selects on vectors are actually vector selects.
794
223
    if (ISD == ISD::SELECT) {
795
107
      assert(CondTy && "CondTy must exist");
796
107
      if (CondTy->isVectorTy())
797
44
        ISD = ISD::VSELECT;
798
107
    }
799
223
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
223
801
223
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()88
) &&
802
223
        !TLI->isOperationExpand(ISD, LT.second)) {
803
135
      // The operation is legal. Assume it costs 1. Multiply
804
135
      // by the type-legalization overhead.
805
135
      return LT.first * 1;
806
135
    }
807
88
808
88
    // Otherwise, assume that the cast is scalarized.
809
88
    // TODO: If one of the types get legalized by splitting, handle this
810
88
    // similarly to what getCastInstrCost() does.
811
88
    if (ValTy->isVectorTy()) {
812
88
      unsigned Num = ValTy->getVectorNumElements();
813
88
      if (CondTy)
814
88
        CondTy = CondTy->getScalarType();
815
88
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
88
          Opcode, ValTy->getScalarType(), CondTy, I);
817
88
818
88
      // Return the cost of multiple scalar invocation plus the cost of
819
88
      // inserting and extracting the values.
820
88
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
88
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
3.95k
                              const Instruction *I) {
789
3.95k
    const TargetLoweringBase *TLI = getTLI();
790
3.95k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
3.95k
    assert(ISD && "Invalid opcode");
792
3.95k
793
3.95k
    // Selects on vectors are actually vector selects.
794
3.95k
    if (ISD == ISD::SELECT) {
795
426
      assert(CondTy && "CondTy must exist");
796
426
      if (CondTy->isVectorTy())
797
96
        ISD = ISD::VSELECT;
798
426
    }
799
3.95k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
3.95k
801
3.95k
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()1.11k
) &&
802
3.95k
        !TLI->isOperationExpand(ISD, LT.second)) {
803
1.37k
      // The operation is legal. Assume it costs 1. Multiply
804
1.37k
      // by the type-legalization overhead.
805
1.37k
      return LT.first * 1;
806
1.37k
    }
807
2.57k
808
2.57k
    // Otherwise, assume that the cast is scalarized.
809
2.57k
    // TODO: If one of the types get legalized by splitting, handle this
810
2.57k
    // similarly to what getCastInstrCost() does.
811
2.57k
    if (ValTy->isVectorTy()) {
812
162
      unsigned Num = ValTy->getVectorNumElements();
813
162
      if (CondTy)
814
162
        CondTy = CondTy->getScalarType();
815
162
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
162
          Opcode, ValTy->getScalarType(), CondTy, I);
817
162
818
162
      // Return the cost of multiple scalar invocation plus the cost of
819
162
      // inserting and extracting the values.
820
162
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
162
    }
822
2.41k
823
2.41k
    // Unknown scalar opcode.
824
2.41k
    return 1;
825
2.41k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
12
                              const Instruction *I) {
789
12
    const TargetLoweringBase *TLI = getTLI();
790
12
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
12
    assert(ISD && "Invalid opcode");
792
12
793
12
    // Selects on vectors are actually vector selects.
794
12
    if (ISD == ISD::SELECT) {
795
0
      assert(CondTy && "CondTy must exist");
796
0
      if (CondTy->isVectorTy())
797
0
        ISD = ISD::VSELECT;
798
0
    }
799
12
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
12
801
12
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()0
) &&
802
12
        !TLI->isOperationExpand(ISD, LT.second)) {
803
12
      // The operation is legal. Assume it costs 1. Multiply
804
12
      // by the type-legalization overhead.
805
12
      return LT.first * 1;
806
12
    }
807
0
808
0
    // Otherwise, assume that the cast is scalarized.
809
0
    // TODO: If one of the types get legalized by splitting, handle this
810
0
    // similarly to what getCastInstrCost() does.
811
0
    if (ValTy->isVectorTy()) {
812
0
      unsigned Num = ValTy->getVectorNumElements();
813
0
      if (CondTy)
814
0
        CondTy = CondTy->getScalarType();
815
0
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
0
          Opcode, ValTy->getScalarType(), CondTy, I);
817
0
818
0
      // Return the cost of multiple scalar invocation plus the cost of
819
0
      // inserting and extracting the values.
820
0
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
0
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
3
                              const Instruction *I) {
789
3
    const TargetLoweringBase *TLI = getTLI();
790
3
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
3
    assert(ISD && "Invalid opcode");
792
3
793
3
    // Selects on vectors are actually vector selects.
794
3
    if (ISD == ISD::SELECT) {
795
0
      assert(CondTy && "CondTy must exist");
796
0
      if (CondTy->isVectorTy())
797
0
        ISD = ISD::VSELECT;
798
0
    }
799
3
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
3
801
3
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()0
) &&
802
3
        !TLI->isOperationExpand(ISD, LT.second)) {
803
3
      // The operation is legal. Assume it costs 1. Multiply
804
3
      // by the type-legalization overhead.
805
3
      return LT.first * 1;
806
3
    }
807
0
808
0
    // Otherwise, assume that the cast is scalarized.
809
0
    // TODO: If one of the types get legalized by splitting, handle this
810
0
    // similarly to what getCastInstrCost() does.
811
0
    if (ValTy->isVectorTy()) {
812
0
      unsigned Num = ValTy->getVectorNumElements();
813
0
      if (CondTy)
814
0
        CondTy = CondTy->getScalarType();
815
0
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
0
          Opcode, ValTy->getScalarType(), CondTy, I);
817
0
818
0
      // Return the cost of multiple scalar invocation plus the cost of
819
0
      // inserting and extracting the values.
820
0
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
0
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
217
                              const Instruction *I) {
789
217
    const TargetLoweringBase *TLI = getTLI();
790
217
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
217
    assert(ISD && "Invalid opcode");
792
217
793
217
    // Selects on vectors are actually vector selects.
794
217
    if (ISD == ISD::SELECT) {
795
0
      assert(CondTy && "CondTy must exist");
796
0
      if (CondTy->isVectorTy())
797
0
        ISD = ISD::VSELECT;
798
0
    }
799
217
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
217
801
217
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()11
) &&
802
217
        
!TLI->isOperationExpand(ISD, LT.second)216
) {
803
216
      // The operation is legal. Assume it costs 1. Multiply
804
216
      // by the type-legalization overhead.
805
216
      return LT.first * 1;
806
216
    }
807
1
808
1
    // Otherwise, assume that the cast is scalarized.
809
1
    // TODO: If one of the types get legalized by splitting, handle this
810
1
    // similarly to what getCastInstrCost() does.
811
1
    if (ValTy->isVectorTy()) {
812
1
      unsigned Num = ValTy->getVectorNumElements();
813
1
      if (CondTy)
814
1
        CondTy = CondTy->getScalarType();
815
1
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
1
          Opcode, ValTy->getScalarType(), CondTy, I);
817
1
818
1
      // Return the cost of multiple scalar invocation plus the cost of
819
1
      // inserting and extracting the values.
820
1
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
1
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
188
                              const Instruction *I) {
789
188
    const TargetLoweringBase *TLI = getTLI();
790
188
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
188
    assert(ISD && "Invalid opcode");
792
188
793
188
    // Selects on vectors are actually vector selects.
794
188
    if (ISD == ISD::SELECT) {
795
0
      assert(CondTy && "CondTy must exist");
796
0
      if (CondTy->isVectorTy())
797
0
        ISD = ISD::VSELECT;
798
0
    }
799
188
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
188
801
188
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()0
) &&
802
188
        !TLI->isOperationExpand(ISD, LT.second)) {
803
188
      // The operation is legal. Assume it costs 1. Multiply
804
188
      // by the type-legalization overhead.
805
188
      return LT.first * 1;
806
188
    }
807
0
808
0
    // Otherwise, assume that the cast is scalarized.
809
0
    // TODO: If one of the types get legalized by splitting, handle this
810
0
    // similarly to what getCastInstrCost() does.
811
0
    if (ValTy->isVectorTy()) {
812
0
      unsigned Num = ValTy->getVectorNumElements();
813
0
      if (CondTy)
814
0
        CondTy = CondTy->getScalarType();
815
0
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
0
          Opcode, ValTy->getScalarType(), CondTy, I);
817
0
818
0
      // Return the cost of multiple scalar invocation plus the cost of
819
0
      // inserting and extracting the values.
820
0
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
0
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
Line
Count
Source
788
10.1k
                              const Instruction *I) {
789
10.1k
    const TargetLoweringBase *TLI = getTLI();
790
10.1k
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
791
10.1k
    assert(ISD && "Invalid opcode");
792
10.1k
793
10.1k
    // Selects on vectors are actually vector selects.
794
10.1k
    if (ISD == ISD::SELECT) {
795
1.99k
      assert(CondTy && "CondTy must exist");
796
1.99k
      if (CondTy->isVectorTy())
797
310
        ISD = ISD::VSELECT;
798
1.99k
    }
799
10.1k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800
10.1k
801
10.1k
    if (!(ValTy->isVectorTy() && 
!LT.second.isVector()332
) &&
802
10.1k
        
!TLI->isOperationExpand(ISD, LT.second)9.98k
) {
803
9.98k
      // The operation is legal. Assume it costs 1. Multiply
804
9.98k
      // by the type-legalization overhead.
805
9.98k
      return LT.first * 1;
806
9.98k
    }
807
208
808
208
    // Otherwise, assume that the cast is scalarized.
809
208
    // TODO: If one of the types get legalized by splitting, handle this
810
208
    // similarly to what getCastInstrCost() does.
811
208
    if (ValTy->isVectorTy()) {
812
208
      unsigned Num = ValTy->getVectorNumElements();
813
208
      if (CondTy)
814
208
        CondTy = CondTy->getScalarType();
815
208
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816
208
          Opcode, ValTy->getScalarType(), CondTy, I);
817
208
818
208
      // Return the cost of multiple scalar invocation plus the cost of
819
208
      // inserting and extracting the values.
820
208
      return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821
208
    }
822
0
823
0
    // Unknown scalar opcode.
824
0
    return 1;
825
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCmpSelInstrCost(unsigned int, llvm::Type*, llvm::Type*, llvm::Instruction const*)
826
827
324k
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
324k
    std::pair<unsigned, MVT> LT =
829
324k
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
324k
831
324k
    return LT.first;
832
324k
  }
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
763
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
763
    std::pair<unsigned, MVT> LT =
829
763
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
763
831
763
    return LT.first;
832
763
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
12.5k
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
12.5k
    std::pair<unsigned, MVT> LT =
829
12.5k
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
12.5k
831
12.5k
    return LT.first;
832
12.5k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
14
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
14
    std::pair<unsigned, MVT> LT =
829
14
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
14
831
14
    return LT.first;
832
14
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
2.32k
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
2.32k
    std::pair<unsigned, MVT> LT =
829
2.32k
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
2.32k
831
2.32k
    return LT.first;
832
2.32k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
2.00k
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
2.00k
    std::pair<unsigned, MVT> LT =
829
2.00k
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
2.00k
831
2.00k
    return LT.first;
832
2.00k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
Line
Count
Source
827
306k
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828
306k
    std::pair<unsigned, MVT> LT =
829
306k
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830
306k
831
306k
    return LT.first;
832
306k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getVectorInstrCost(unsigned int, llvm::Type*, unsigned int)
833
834
  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
835
1.49k
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
1.49k
    assert(!Src->isVoidTy() && "Invalid type");
837
1.49k
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
1.49k
839
1.49k
    // Assuming that all loads of legal types cost 1.
840
1.49k
    unsigned Cost = LT.first;
841
1.49k
842
1.49k
    if (Src->isVectorTy() &&
843
1.49k
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()637
) {
844
62
      // This is a vector load that legalizes to a larger type than the vector
845
62
      // itself. Unless the corresponding extending load or truncating store is
846
62
      // legal, then this will scalarize.
847
62
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
62
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
62
      if (Opcode == Instruction::Store)
850
38
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
24
      else
852
24
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
62
854
62
      if (LA != TargetLowering::Legal && 
LA != TargetLowering::Custom57
) {
855
57
        // This is a vector load/store for some illegal type that is scalarized.
856
57
        // We must account for the cost of building or decomposing the vector.
857
57
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
57
                                         Opcode == Instruction::Store);
859
57
      }
860
62
    }
861
1.49k
862
1.49k
    return Cost;
863
1.49k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Line
Count
Source
835
719
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
719
    assert(!Src->isVoidTy() && "Invalid type");
837
719
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
719
839
719
    // Assuming that all loads of legal types cost 1.
840
719
    unsigned Cost = LT.first;
841
719
842
719
    if (Src->isVectorTy() &&
843
719
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()423
) {
844
2
      // This is a vector load that legalizes to a larger type than the vector
845
2
      // itself. Unless the corresponding extending load or truncating store is
846
2
      // legal, then this will scalarize.
847
2
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
2
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
2
      if (Opcode == Instruction::Store)
850
1
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
1
      else
852
1
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
2
854
2
      if (LA != TargetLowering::Legal && 
LA != TargetLowering::Custom0
) {
855
0
        // This is a vector load/store for some illegal type that is scalarized.
856
0
        // We must account for the cost of building or decomposing the vector.
857
0
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
0
                                         Opcode == Instruction::Store);
859
0
      }
860
2
    }
861
719
862
719
    return Cost;
863
719
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Line
Count
Source
835
42
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
42
    assert(!Src->isVoidTy() && "Invalid type");
837
42
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
42
839
42
    // Assuming that all loads of legal types cost 1.
840
42
    unsigned Cost = LT.first;
841
42
842
42
    if (Src->isVectorTy() &&
843
42
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()15
) {
844
3
      // This is a vector load that legalizes to a larger type than the vector
845
3
      // itself. Unless the corresponding extending load or truncating store is
846
3
      // legal, then this will scalarize.
847
3
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
3
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
3
      if (Opcode == Instruction::Store)
850
3
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
0
      else
852
0
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
3
854
3
      if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
855
3
        // This is a vector load/store for some illegal type that is scalarized.
856
3
        // We must account for the cost of building or decomposing the vector.
857
3
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
3
                                         Opcode == Instruction::Store);
859
3
      }
860
3
    }
861
42
862
42
    return Cost;
863
42
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Line
Count
Source
835
8
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
8
    assert(!Src->isVoidTy() && "Invalid type");
837
8
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
8
839
8
    // Assuming that all loads of legal types cost 1.
840
8
    unsigned Cost = LT.first;
841
8
842
8
    if (Src->isVectorTy() &&
843
8
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()4
) {
844
0
      // This is a vector load that legalizes to a larger type than the vector
845
0
      // itself. Unless the corresponding extending load or truncating store is
846
0
      // legal, then this will scalarize.
847
0
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
0
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
0
      if (Opcode == Instruction::Store)
850
0
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
0
      else
852
0
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
0
854
0
      if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
855
0
        // This is a vector load/store for some illegal type that is scalarized.
856
0
        // We must account for the cost of building or decomposing the vector.
857
0
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
0
                                         Opcode == Instruction::Store);
859
0
      }
860
0
    }
861
8
862
8
    return Cost;
863
8
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Line
Count
Source
835
324
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
324
    assert(!Src->isVoidTy() && "Invalid type");
837
324
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
324
839
324
    // Assuming that all loads of legal types cost 1.
840
324
    unsigned Cost = LT.first;
841
324
842
324
    if (Src->isVectorTy() &&
843
324
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()195
) {
844
57
      // This is a vector load that legalizes to a larger type than the vector
845
57
      // itself. Unless the corresponding extending load or truncating store is
846
57
      // legal, then this will scalarize.
847
57
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
57
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
57
      if (Opcode == Instruction::Store)
850
34
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
23
      else
852
23
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
57
854
57
      if (LA != TargetLowering::Legal && 
LA != TargetLowering::Custom54
) {
855
54
        // This is a vector load/store for some illegal type that is scalarized.
856
54
        // We must account for the cost of building or decomposing the vector.
857
54
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
54
                                         Opcode == Instruction::Store);
859
54
      }
860
57
    }
861
324
862
324
    return Cost;
863
324
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
Line
Count
Source
835
397
                       unsigned AddressSpace, const Instruction *I = nullptr) {
836
397
    assert(!Src->isVoidTy() && "Invalid type");
837
397
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838
397
839
397
    // Assuming that all loads of legal types cost 1.
840
397
    unsigned Cost = LT.first;
841
397
842
397
    if (Src->isVectorTy() &&
843
397
        
Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()0
) {
844
0
      // This is a vector load that legalizes to a larger type than the vector
845
0
      // itself. Unless the corresponding extending load or truncating store is
846
0
      // legal, then this will scalarize.
847
0
      TargetLowering::LegalizeAction LA = TargetLowering::Expand;
848
0
      EVT MemVT = getTLI()->getValueType(DL, Src);
849
0
      if (Opcode == Instruction::Store)
850
0
        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851
0
      else
852
0
        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853
0
854
0
      if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
855
0
        // This is a vector load/store for some illegal type that is scalarized.
856
0
        // We must account for the cost of building or decomposing the vector.
857
0
        Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858
0
                                         Opcode == Instruction::Store);
859
0
      }
860
0
    }
861
397
862
397
    return Cost;
863
397
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMemoryOpCost(unsigned int, llvm::Type*, unsigned int, unsigned int, llvm::Instruction const*)
864
865
  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
866
                                      unsigned Factor,
867
                                      ArrayRef<unsigned> Indices,
868
                                      unsigned Alignment, unsigned AddressSpace,
869
                                      bool UseMaskForCond = false,
870
584
                                      bool UseMaskForGaps = false) {
871
584
    VectorType *VT = dyn_cast<VectorType>(VecTy);
872
584
    assert(VT && "Expect a vector type for interleaved memory op");
873
584
874
584
    unsigned NumElts = VT->getNumElements();
875
584
    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876
584
877
584
    unsigned NumSubElts = NumElts / Factor;
878
584
    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879
584
880
584
    // Firstly, the cost of load/store operation.
881
584
    unsigned Cost;
882
584
    if (UseMaskForCond || 
UseMaskForGaps573
)
883
12
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884
12
          Opcode, VecTy, Alignment, AddressSpace);
885
572
    else
886
572
      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887
572
                                                     AddressSpace);
888
584
889
584
    // Legalize the vector type, and get the legalized and unlegalized type
890
584
    // sizes.
891
584
    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892
584
    unsigned VecTySize =
893
584
        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894
584
    unsigned VecTyLTSize = VecTyLT.getStoreSize();
895
584
896
584
    // Return the ceiling of dividing A by B.
897
584
    auto ceil = [](unsigned A, unsigned B) 
{ return (A + B - 1) / B; }434
;
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Line
Count
Source
897
416
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
Line
Count
Source
897
18
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)::'lambda'(unsigned int, unsigned int)::operator()(unsigned int, unsigned int) const
898
584
899
584
    // Scale the cost of the memory operation by the fraction of legalized
900
584
    // instructions that will actually be used. We shouldn't account for the
901
584
    // cost of dead instructions since they will be removed.
902
584
    //
903
584
    // E.g., An interleaved load of factor 8:
904
584
    //       %vec = load <16 x i64>, <16 x i64>* %ptr
905
584
    //       %v0 = shufflevector %vec, undef, <0, 8>
906
584
    //
907
584
    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908
584
    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909
584
    // type). The other loads are unused.
910
584
    //
911
584
    // We only scale the cost of loads since interleaved store groups aren't
912
584
    // allowed to have gaps.
913
584
    if (Opcode == Instruction::Load && 
VecTySize > VecTyLTSize366
) {
914
217
      // The number of loads of a legal type it will take to represent a load
915
217
      // of the unlegalized vector type.
916
217
      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917
217
918
217
      // The number of elements of the unlegalized type that correspond to a
919
217
      // single legal instruction.
920
217
      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921
217
922
217
      // Determine which legal instructions will be used.
923
217
      BitVector UsedInsts(NumLegalInsts, false);
924
217
      for (unsigned Index : Indices)
925
3.24k
        
for (unsigned Elt = 0; 851
Elt < NumSubElts;
++Elt2.39k
)
926
2.39k
          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927
217
928
217
      // Scale the cost of the load by the fraction of legal instructions that
929
217
      // will be used.
930
217
      Cost *= UsedInsts.count() / NumLegalInsts;
931
217
    }
932
584
933
584
    // Then plus the cost of interleave operation.
934
584
    if (Opcode == Instruction::Load) {
935
366
      // The interleave cost is similar to extract sub vectors' elements
936
366
      // from the wide vector, and insert them into sub vectors.
937
366
      //
938
366
      // E.g. An interleaved load of factor 2 (with one member of index 0):
939
366
      //      %vec = load <8 x i32>, <8 x i32>* %ptr
940
366
      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
941
366
      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942
366
      // <8 x i32> vector and insert them into a <4 x i32> vector.
943
366
944
366
      assert(Indices.size() <= Factor &&
945
366
             "Interleaved memory op has too many members");
946
366
947
1.19k
      for (unsigned Index : Indices) {
948
1.19k
        assert(Index < Factor && "Invalid index for interleaved memory op");
949
1.19k
950
1.19k
        // Extract elements from loaded vector for each sub vector.
951
4.45k
        for (unsigned i = 0; i < NumSubElts; 
i++3.25k
)
952
3.25k
          Cost += static_cast<T *>(this)->getVectorInstrCost(
953
3.25k
              Instruction::ExtractElement, VT, Index + i * Factor);
954
1.19k
      }
955
366
956
366
      unsigned InsSubCost = 0;
957
1.37k
      for (unsigned i = 0; i < NumSubElts; 
i++1.01k
)
958
1.01k
        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959
1.01k
            Instruction::InsertElement, SubVT, i);
960
366
961
366
      Cost += Indices.size() * InsSubCost;
962
366
    } else {
963
218
      // The interleave cost is extract all elements from sub vectors, and
964
218
      // insert them into the wide vector.
965
218
      //
966
218
      // E.g. An interleaved store of factor 2:
967
218
      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968
218
      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969
218
      // The cost is estimated as extract all elements from both <4 x i32>
970
218
      // vectors and insert into the <8 x i32> vector.
971
218
972
218
      unsigned ExtSubCost = 0;
973
864
      for (unsigned i = 0; i < NumSubElts; 
i++646
)
974
646
        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975
646
            Instruction::ExtractElement, SubVT, i);
976
218
      Cost += ExtSubCost * Factor;
977
218
978
2.69k
      for (unsigned i = 0; i < NumElts; 
i++2.47k
)
979
2.47k
        Cost += static_cast<T *>(this)
980
2.47k
                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981
218
    }
982
584
983
584
    if (!UseMaskForCond)
984
573
      return Cost;
985
11
986
11
    Type *I8Type = Type::getInt8Ty(VT->getContext());
987
11
    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988
11
    SubVT = VectorType::get(I8Type, NumSubElts);
989
11
990
11
    // The Mask shuffling cost is extract all the elements of the Mask
991
11
    // and insert each of them Factor times into the wide vector:
992
11
    //
993
11
    // E.g. an interleaved group with factor 3:
994
11
    //    %mask = icmp ult <8 x i32> %vec1, %vec2
995
11
    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996
11
    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997
11
    // The cost is estimated as extract all mask elements from the <8xi1> mask
998
11
    // vector and insert them factor times into the <24xi1> shuffled mask
999
11
    // vector.
1000
99
    for (unsigned i = 0; i < NumSubElts; 
i++88
)
1001
88
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1002
88
          Instruction::ExtractElement, SubVT, i);
1003
11
1004
195
    for (unsigned i = 0; i < NumElts; 
i++184
)
1005
184
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1006
184
          Instruction::InsertElement, MaskVT, i);
1007
11
1008
11
    // The Gaps mask is invariant and created outside the loop, therefore the
1009
11
    // cost of creating it is not accounted for here. However if we have both
1010
11
    // a MaskForGaps and some other mask that guards the execution of the
1011
11
    // memory access, we need to account for the cost of And-ing the two masks
1012
11
    // inside the loop.
1013
11
    if (UseMaskForGaps)
1014
4
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015
4
          BinaryOperator::And, MaskVT);
1016
11
1017
11
    return Cost;
1018
11
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Line
Count
Source
870
488
                                      bool UseMaskForGaps = false) {
871
488
    VectorType *VT = dyn_cast<VectorType>(VecTy);
872
488
    assert(VT && "Expect a vector type for interleaved memory op");
873
488
874
488
    unsigned NumElts = VT->getNumElements();
875
488
    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876
488
877
488
    unsigned NumSubElts = NumElts / Factor;
878
488
    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879
488
880
488
    // Firstly, the cost of load/store operation.
881
488
    unsigned Cost;
882
488
    if (UseMaskForCond || UseMaskForGaps)
883
0
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884
0
          Opcode, VecTy, Alignment, AddressSpace);
885
488
    else
886
488
      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887
488
                                                     AddressSpace);
888
488
889
488
    // Legalize the vector type, and get the legalized and unlegalized type
890
488
    // sizes.
891
488
    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892
488
    unsigned VecTySize =
893
488
        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894
488
    unsigned VecTyLTSize = VecTyLT.getStoreSize();
895
488
896
488
    // Return the ceiling of dividing A by B.
897
488
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
898
488
899
488
    // Scale the cost of the memory operation by the fraction of legalized
900
488
    // instructions that will actually be used. We shouldn't account for the
901
488
    // cost of dead instructions since they will be removed.
902
488
    //
903
488
    // E.g., An interleaved load of factor 8:
904
488
    //       %vec = load <16 x i64>, <16 x i64>* %ptr
905
488
    //       %v0 = shufflevector %vec, undef, <0, 8>
906
488
    //
907
488
    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908
488
    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909
488
    // type). The other loads are unused.
910
488
    //
911
488
    // We only scale the cost of loads since interleaved store groups aren't
912
488
    // allowed to have gaps.
913
488
    if (Opcode == Instruction::Load && 
VecTySize > VecTyLTSize338
) {
914
208
      // The number of loads of a legal type it will take to represent a load
915
208
      // of the unlegalized vector type.
916
208
      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917
208
918
208
      // The number of elements of the unlegalized type that correspond to a
919
208
      // single legal instruction.
920
208
      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921
208
922
208
      // Determine which legal instructions will be used.
923
208
      BitVector UsedInsts(NumLegalInsts, false);
924
208
      for (unsigned Index : Indices)
925
3.19k
        
for (unsigned Elt = 0; 840
Elt < NumSubElts;
++Elt2.35k
)
926
2.35k
          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927
208
928
208
      // Scale the cost of the load by the fraction of legal instructions that
929
208
      // will be used.
930
208
      Cost *= UsedInsts.count() / NumLegalInsts;
931
208
    }
932
488
933
488
    // Then plus the cost of interleave operation.
934
488
    if (Opcode == Instruction::Load) {
935
338
      // The interleave cost is similar to extract sub vectors' elements
936
338
      // from the wide vector, and insert them into sub vectors.
937
338
      //
938
338
      // E.g. An interleaved load of factor 2 (with one member of index 0):
939
338
      //      %vec = load <8 x i32>, <8 x i32>* %ptr
940
338
      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
941
338
      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942
338
      // <8 x i32> vector and insert them into a <4 x i32> vector.
943
338
944
338
      assert(Indices.size() <= Factor &&
945
338
             "Interleaved memory op has too many members");
946
338
947
1.15k
      for (unsigned Index : Indices) {
948
1.15k
        assert(Index < Factor && "Invalid index for interleaved memory op");
949
1.15k
950
1.15k
        // Extract elements from loaded vector for each sub vector.
951
4.22k
        for (unsigned i = 0; i < NumSubElts; 
i++3.06k
)
952
3.06k
          Cost += static_cast<T *>(this)->getVectorInstrCost(
953
3.06k
              Instruction::ExtractElement, VT, Index + i * Factor);
954
1.15k
      }
955
338
956
338
      unsigned InsSubCost = 0;
957
1.22k
      for (unsigned i = 0; i < NumSubElts; 
i++882
)
958
882
        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959
882
            Instruction::InsertElement, SubVT, i);
960
338
961
338
      Cost += Indices.size() * InsSubCost;
962
338
    } else {
963
150
      // The interleave cost is extract all elements from sub vectors, and
964
150
      // insert them into the wide vector.
965
150
      //
966
150
      // E.g. An interleaved store of factor 2:
967
150
      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968
150
      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969
150
      // The cost is estimated as extract all elements from both <4 x i32>
970
150
      // vectors and insert into the <8 x i32> vector.
971
150
972
150
      unsigned ExtSubCost = 0;
973
580
      for (unsigned i = 0; i < NumSubElts; 
i++430
)
974
430
        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975
430
            Instruction::ExtractElement, SubVT, i);
976
150
      Cost += ExtSubCost * Factor;
977
150
978
2.10k
      for (unsigned i = 0; i < NumElts; 
i++1.95k
)
979
1.95k
        Cost += static_cast<T *>(this)
980
1.95k
                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981
150
    }
982
488
983
488
    if (!UseMaskForCond)
984
488
      return Cost;
985
0
986
0
    Type *I8Type = Type::getInt8Ty(VT->getContext());
987
0
    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988
0
    SubVT = VectorType::get(I8Type, NumSubElts);
989
0
990
0
    // The Mask shuffling cost is extract all the elements of the Mask
991
0
    // and insert each of them Factor times into the wide vector:
992
0
    //
993
0
    // E.g. an interleaved group with factor 3:
994
0
    //    %mask = icmp ult <8 x i32> %vec1, %vec2
995
0
    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996
0
    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997
0
    // The cost is estimated as extract all mask elements from the <8xi1> mask
998
0
    // vector and insert them factor times into the <24xi1> shuffled mask
999
0
    // vector.
1000
0
    for (unsigned i = 0; i < NumSubElts; i++)
1001
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1002
0
          Instruction::ExtractElement, SubVT, i);
1003
0
1004
0
    for (unsigned i = 0; i < NumElts; i++)
1005
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1006
0
          Instruction::InsertElement, MaskVT, i);
1007
0
1008
0
    // The Gaps mask is invariant and created outside the loop, therefore the
1009
0
    // cost of creating it is not accounted for here. However if we have both
1010
0
    // a MaskForGaps and some other mask that guards the execution of the
1011
0
    // memory access, we need to account for the cost of And-ing the two masks
1012
0
    // inside the loop.
1013
0
    if (UseMaskForGaps)
1014
0
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015
0
          BinaryOperator::And, MaskVT);
1016
0
1017
0
    return Cost;
1018
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Line
Count
Source
870
18
                                      bool UseMaskForGaps = false) {
871
18
    VectorType *VT = dyn_cast<VectorType>(VecTy);
872
18
    assert(VT && "Expect a vector type for interleaved memory op");
873
18
874
18
    unsigned NumElts = VT->getNumElements();
875
18
    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876
18
877
18
    unsigned NumSubElts = NumElts / Factor;
878
18
    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879
18
880
18
    // Firstly, the cost of load/store operation.
881
18
    unsigned Cost;
882
18
    if (UseMaskForCond || UseMaskForGaps)
883
0
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884
0
          Opcode, VecTy, Alignment, AddressSpace);
885
18
    else
886
18
      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887
18
                                                     AddressSpace);
888
18
889
18
    // Legalize the vector type, and get the legalized and unlegalized type
890
18
    // sizes.
891
18
    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892
18
    unsigned VecTySize =
893
18
        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894
18
    unsigned VecTyLTSize = VecTyLT.getStoreSize();
895
18
896
18
    // Return the ceiling of dividing A by B.
897
18
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
898
18
899
18
    // Scale the cost of the memory operation by the fraction of legalized
900
18
    // instructions that will actually be used. We shouldn't account for the
901
18
    // cost of dead instructions since they will be removed.
902
18
    //
903
18
    // E.g., An interleaved load of factor 8:
904
18
    //       %vec = load <16 x i64>, <16 x i64>* %ptr
905
18
    //       %v0 = shufflevector %vec, undef, <0, 8>
906
18
    //
907
18
    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908
18
    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909
18
    // type). The other loads are unused.
910
18
    //
911
18
    // We only scale the cost of loads since interleaved store groups aren't
912
18
    // allowed to have gaps.
913
18
    if (Opcode == Instruction::Load && 
VecTySize > VecTyLTSize0
) {
914
0
      // The number of loads of a legal type it will take to represent a load
915
0
      // of the unlegalized vector type.
916
0
      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917
0
918
0
      // The number of elements of the unlegalized type that correspond to a
919
0
      // single legal instruction.
920
0
      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921
0
922
0
      // Determine which legal instructions will be used.
923
0
      BitVector UsedInsts(NumLegalInsts, false);
924
0
      for (unsigned Index : Indices)
925
0
        for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
926
0
          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927
0
928
0
      // Scale the cost of the load by the fraction of legal instructions that
929
0
      // will be used.
930
0
      Cost *= UsedInsts.count() / NumLegalInsts;
931
0
    }
932
18
933
18
    // Then plus the cost of interleave operation.
934
18
    if (Opcode == Instruction::Load) {
935
0
      // The interleave cost is similar to extract sub vectors' elements
936
0
      // from the wide vector, and insert them into sub vectors.
937
0
      //
938
0
      // E.g. An interleaved load of factor 2 (with one member of index 0):
939
0
      //      %vec = load <8 x i32>, <8 x i32>* %ptr
940
0
      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
941
0
      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942
0
      // <8 x i32> vector and insert them into a <4 x i32> vector.
943
0
944
0
      assert(Indices.size() <= Factor &&
945
0
             "Interleaved memory op has too many members");
946
0
947
0
      for (unsigned Index : Indices) {
948
0
        assert(Index < Factor && "Invalid index for interleaved memory op");
949
0
950
0
        // Extract elements from loaded vector for each sub vector.
951
0
        for (unsigned i = 0; i < NumSubElts; i++)
952
0
          Cost += static_cast<T *>(this)->getVectorInstrCost(
953
0
              Instruction::ExtractElement, VT, Index + i * Factor);
954
0
      }
955
0
956
0
      unsigned InsSubCost = 0;
957
0
      for (unsigned i = 0; i < NumSubElts; i++)
958
0
        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959
0
            Instruction::InsertElement, SubVT, i);
960
0
961
0
      Cost += Indices.size() * InsSubCost;
962
18
    } else {
963
18
      // The interleave cost is extract all elements from sub vectors, and
964
18
      // insert them into the wide vector.
965
18
      //
966
18
      // E.g. An interleaved store of factor 2:
967
18
      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968
18
      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969
18
      // The cost is estimated as extract all elements from both <4 x i32>
970
18
      // vectors and insert into the <8 x i32> vector.
971
18
972
18
      unsigned ExtSubCost = 0;
973
54
      for (unsigned i = 0; i < NumSubElts; 
i++36
)
974
36
        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975
36
            Instruction::ExtractElement, SubVT, i);
976
18
      Cost += ExtSubCost * Factor;
977
18
978
90
      for (unsigned i = 0; i < NumElts; 
i++72
)
979
72
        Cost += static_cast<T *>(this)
980
72
                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981
18
    }
982
18
983
18
    if (!UseMaskForCond)
984
18
      return Cost;
985
0
986
0
    Type *I8Type = Type::getInt8Ty(VT->getContext());
987
0
    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988
0
    SubVT = VectorType::get(I8Type, NumSubElts);
989
0
990
0
    // The Mask shuffling cost is extract all the elements of the Mask
991
0
    // and insert each of them Factor times into the wide vector:
992
0
    //
993
0
    // E.g. an interleaved group with factor 3:
994
0
    //    %mask = icmp ult <8 x i32> %vec1, %vec2
995
0
    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996
0
    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997
0
    // The cost is estimated as extract all mask elements from the <8xi1> mask
998
0
    // vector and insert them factor times into the <24xi1> shuffled mask
999
0
    // vector.
1000
0
    for (unsigned i = 0; i < NumSubElts; i++)
1001
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1002
0
          Instruction::ExtractElement, SubVT, i);
1003
0
1004
0
    for (unsigned i = 0; i < NumElts; i++)
1005
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1006
0
          Instruction::InsertElement, MaskVT, i);
1007
0
1008
0
    // The Gaps mask is invariant and created outside the loop, therefore the
1009
0
    // cost of creating it is not accounted for here. However if we have both
1010
0
    // a MaskForGaps and some other mask that guards the execution of the
1011
0
    // memory access, we need to account for the cost of And-ing the two masks
1012
0
    // inside the loop.
1013
0
    if (UseMaskForGaps)
1014
0
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015
0
          BinaryOperator::And, MaskVT);
1016
0
1017
0
    return Cost;
1018
0
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Line
Count
Source
870
5
                                      bool UseMaskForGaps = false) {
871
5
    VectorType *VT = dyn_cast<VectorType>(VecTy);
872
5
    assert(VT && "Expect a vector type for interleaved memory op");
873
5
874
5
    unsigned NumElts = VT->getNumElements();
875
5
    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876
5
877
5
    unsigned NumSubElts = NumElts / Factor;
878
5
    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879
5
880
5
    // Firstly, the cost of load/store operation.
881
5
    unsigned Cost;
882
5
    if (UseMaskForCond || UseMaskForGaps)
883
0
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884
0
          Opcode, VecTy, Alignment, AddressSpace);
885
5
    else
886
5
      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887
5
                                                     AddressSpace);
888
5
889
5
    // Legalize the vector type, and get the legalized and unlegalized type
890
5
    // sizes.
891
5
    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892
5
    unsigned VecTySize =
893
5
        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894
5
    unsigned VecTyLTSize = VecTyLT.getStoreSize();
895
5
896
5
    // Return the ceiling of dividing A by B.
897
5
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
898
5
899
5
    // Scale the cost of the memory operation by the fraction of legalized
900
5
    // instructions that will actually be used. We shouldn't account for the
901
5
    // cost of dead instructions since they will be removed.
902
5
    //
903
5
    // E.g., An interleaved load of factor 8:
904
5
    //       %vec = load <16 x i64>, <16 x i64>* %ptr
905
5
    //       %v0 = shufflevector %vec, undef, <0, 8>
906
5
    //
907
5
    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908
5
    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909
5
    // type). The other loads are unused.
910
5
    //
911
5
    // We only scale the cost of loads since interleaved store groups aren't
912
5
    // allowed to have gaps.
913
5
    if (Opcode == Instruction::Load && 
VecTySize > VecTyLTSize0
) {
914
0
      // The number of loads of a legal type it will take to represent a load
915
0
      // of the unlegalized vector type.
916
0
      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917
0
918
0
      // The number of elements of the unlegalized type that correspond to a
919
0
      // single legal instruction.
920
0
      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921
0
922
0
      // Determine which legal instructions will be used.
923
0
      BitVector UsedInsts(NumLegalInsts, false);
924
0
      for (unsigned Index : Indices)
925
0
        for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
926
0
          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927
0
928
0
      // Scale the cost of the load by the fraction of legal instructions that
929
0
      // will be used.
930
0
      Cost *= UsedInsts.count() / NumLegalInsts;
931
0
    }
932
5
933
5
    // Then plus the cost of interleave operation.
934
5
    if (Opcode == Instruction::Load) {
935
0
      // The interleave cost is similar to extract sub vectors' elements
936
0
      // from the wide vector, and insert them into sub vectors.
937
0
      //
938
0
      // E.g. An interleaved load of factor 2 (with one member of index 0):
939
0
      //      %vec = load <8 x i32>, <8 x i32>* %ptr
940
0
      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
941
0
      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942
0
      // <8 x i32> vector and insert them into a <4 x i32> vector.
943
0
944
0
      assert(Indices.size() <= Factor &&
945
0
             "Interleaved memory op has too many members");
946
0
947
0
      for (unsigned Index : Indices) {
948
0
        assert(Index < Factor && "Invalid index for interleaved memory op");
949
0
950
0
        // Extract elements from loaded vector for each sub vector.
951
0
        for (unsigned i = 0; i < NumSubElts; i++)
952
0
          Cost += static_cast<T *>(this)->getVectorInstrCost(
953
0
              Instruction::ExtractElement, VT, Index + i * Factor);
954
0
      }
955
0
956
0
      unsigned InsSubCost = 0;
957
0
      for (unsigned i = 0; i < NumSubElts; i++)
958
0
        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959
0
            Instruction::InsertElement, SubVT, i);
960
0
961
0
      Cost += Indices.size() * InsSubCost;
962
5
    } else {
963
5
      // The interleave cost is extract all elements from sub vectors, and
964
5
      // insert them into the wide vector.
965
5
      //
966
5
      // E.g. An interleaved store of factor 2:
967
5
      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968
5
      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969
5
      // The cost is estimated as extract all elements from both <4 x i32>
970
5
      // vectors and insert into the <8 x i32> vector.
971
5
972
5
      unsigned ExtSubCost = 0;
973
67
      for (unsigned i = 0; i < NumSubElts; 
i++62
)
974
62
        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975
62
            Instruction::ExtractElement, SubVT, i);
976
5
      Cost += ExtSubCost * Factor;
977
5
978
129
      for (unsigned i = 0; i < NumElts; 
i++124
)
979
124
        Cost += static_cast<T *>(this)
980
124
                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981
5
    }
982
5
983
5
    if (!UseMaskForCond)
984
5
      return Cost;
985
0
986
0
    Type *I8Type = Type::getInt8Ty(VT->getContext());
987
0
    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988
0
    SubVT = VectorType::get(I8Type, NumSubElts);
989
0
990
0
    // The Mask shuffling cost is extract all the elements of the Mask
991
0
    // and insert each of them Factor times into the wide vector:
992
0
    //
993
0
    // E.g. an interleaved group with factor 3:
994
0
    //    %mask = icmp ult <8 x i32> %vec1, %vec2
995
0
    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996
0
    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997
0
    // The cost is estimated as extract all mask elements from the <8xi1> mask
998
0
    // vector and insert them factor times into the <24xi1> shuffled mask
999
0
    // vector.
1000
0
    for (unsigned i = 0; i < NumSubElts; i++)
1001
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1002
0
          Instruction::ExtractElement, SubVT, i);
1003
0
1004
0
    for (unsigned i = 0; i < NumElts; i++)
1005
0
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1006
0
          Instruction::InsertElement, MaskVT, i);
1007
0
1008
0
    // The Gaps mask is invariant and created outside the loop, therefore the
1009
0
    // cost of creating it is not accounted for here. However if we have both
1010
0
    // a MaskForGaps and some other mask that guards the execution of the
1011
0
    // memory access, we need to account for the cost of And-ing the two masks
1012
0
    // inside the loop.
1013
0
    if (UseMaskForGaps)
1014
0
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015
0
          BinaryOperator::And, MaskVT);
1016
0
1017
0
    return Cost;
1018
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
Line
Count
Source
870
73
                                      bool UseMaskForGaps = false) {
871
73
    VectorType *VT = dyn_cast<VectorType>(VecTy);
872
73
    assert(VT && "Expect a vector type for interleaved memory op");
873
73
874
73
    unsigned NumElts = VT->getNumElements();
875
73
    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876
73
877
73
    unsigned NumSubElts = NumElts / Factor;
878
73
    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879
73
880
73
    // Firstly, the cost of load/store operation.
881
73
    unsigned Cost;
882
73
    if (UseMaskForCond || 
UseMaskForGaps62
)
883
12
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884
12
          Opcode, VecTy, Alignment, AddressSpace);
885
61
    else
886
61
      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887
61
                                                     AddressSpace);
888
73
889
73
    // Legalize the vector type, and get the legalized and unlegalized type
890
73
    // sizes.
891
73
    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892
73
    unsigned VecTySize =
893
73
        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894
73
    unsigned VecTyLTSize = VecTyLT.getStoreSize();
895
73
896
73
    // Return the ceiling of dividing A by B.
897
73
    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
898
73
899
73
    // Scale the cost of the memory operation by the fraction of legalized
900
73
    // instructions that will actually be used. We shouldn't account for the
901
73
    // cost of dead instructions since they will be removed.
902
73
    //
903
73
    // E.g., An interleaved load of factor 8:
904
73
    //       %vec = load <16 x i64>, <16 x i64>* %ptr
905
73
    //       %v0 = shufflevector %vec, undef, <0, 8>
906
73
    //
907
73
    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908
73
    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909
73
    // type). The other loads are unused.
910
73
    //
911
73
    // We only scale the cost of loads since interleaved store groups aren't
912
73
    // allowed to have gaps.
913
73
    if (Opcode == Instruction::Load && 
VecTySize > VecTyLTSize28
) {
914
9
      // The number of loads of a legal type it will take to represent a load
915
9
      // of the unlegalized vector type.
916
9
      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917
9
918
9
      // The number of elements of the unlegalized type that correspond to a
919
9
      // single legal instruction.
920
9
      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921
9
922
9
      // Determine which legal instructions will be used.
923
9
      BitVector UsedInsts(NumLegalInsts, false);
924
9
      for (unsigned Index : Indices)
925
45
        
for (unsigned Elt = 0; 11
Elt < NumSubElts;
++Elt34
)
926
34
          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927
9
928
9
      // Scale the cost of the load by the fraction of legal instructions that
929
9
      // will be used.
930
9
      Cost *= UsedInsts.count() / NumLegalInsts;
931
9
    }
932
73
933
73
    // Then plus the cost of interleave operation.
934
73
    if (Opcode == Instruction::Load) {
935
28
      // The interleave cost is similar to extract sub vectors' elements
936
28
      // from the wide vector, and insert them into sub vectors.
937
28
      //
938
28
      // E.g. An interleaved load of factor 2 (with one member of index 0):
939
28
      //      %vec = load <8 x i32>, <8 x i32>* %ptr
940
28
      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
941
28
      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942
28
      // <8 x i32> vector and insert them into a <4 x i32> vector.
943
28
944
28
      assert(Indices.size() <= Factor &&
945
28
             "Interleaved memory op has too many members");
946
28
947
43
      for (unsigned Index : Indices) {
948
43
        assert(Index < Factor && "Invalid index for interleaved memory op");
949
43
950
43
        // Extract elements from loaded vector for each sub vector.
951
233
        for (unsigned i = 0; i < NumSubElts; 
i++190
)
952
190
          Cost += static_cast<T *>(this)->getVectorInstrCost(
953
190
              Instruction::ExtractElement, VT, Index + i * Factor);
954
43
      }
955
28
956
28
      unsigned InsSubCost = 0;
957
156
      for (unsigned i = 0; i < NumSubElts; 
i++128
)
958
128
        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959
128
            Instruction::InsertElement, SubVT, i);
960
28
961
28
      Cost += Indices.size() * InsSubCost;
962
45
    } else {
963
45
      // The interleave cost is extract all elements from sub vectors, and
964
45
      // insert them into the wide vector.
965
45
      //
966
45
      // E.g. An interleaved store of factor 2:
967
45
      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968
45
      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969
45
      // The cost is estimated as extract all elements from both <4 x i32>
970
45
      // vectors and insert into the <8 x i32> vector.
971
45
972
45
      unsigned ExtSubCost = 0;
973
163
      for (unsigned i = 0; i < NumSubElts; 
i++118
)
974
118
        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975
118
            Instruction::ExtractElement, SubVT, i);
976
45
      Cost += ExtSubCost * Factor;
977
45
978
367
      for (unsigned i = 0; i < NumElts; 
i++322
)
979
322
        Cost += static_cast<T *>(this)
980
322
                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981
45
    }
982
73
983
73
    if (!UseMaskForCond)
984
62
      return Cost;
985
11
986
11
    Type *I8Type = Type::getInt8Ty(VT->getContext());
987
11
    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988
11
    SubVT = VectorType::get(I8Type, NumSubElts);
989
11
990
11
    // The Mask shuffling cost is extract all the elements of the Mask
991
11
    // and insert each of them Factor times into the wide vector:
992
11
    //
993
11
    // E.g. an interleaved group with factor 3:
994
11
    //    %mask = icmp ult <8 x i32> %vec1, %vec2
995
11
    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996
11
    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997
11
    // The cost is estimated as extract all mask elements from the <8xi1> mask
998
11
    // vector and insert them factor times into the <24xi1> shuffled mask
999
11
    // vector.
1000
99
    for (unsigned i = 0; i < NumSubElts; 
i++88
)
1001
88
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1002
88
          Instruction::ExtractElement, SubVT, i);
1003
11
1004
195
    for (unsigned i = 0; i < NumElts; 
i++184
)
1005
184
      Cost += static_cast<T *>(this)->getVectorInstrCost(
1006
184
          Instruction::InsertElement, MaskVT, i);
1007
11
1008
11
    // The Gaps mask is invariant and created outside the loop, therefore the
1009
11
    // cost of creating it is not accounted for here. However if we have both
1010
11
    // a MaskForGaps and some other mask that guards the execution of the
1011
11
    // memory access, we need to account for the cost of And-ing the two masks
1012
11
    // inside the loop.
1013
11
    if (UseMaskForGaps)
1014
4
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015
4
          BinaryOperator::And, MaskVT);
1016
11
1017
11
    return Cost;
1018
11
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getInterleavedMemoryOpCost(unsigned int, llvm::Type*, unsigned int, llvm::ArrayRef<unsigned int>, unsigned int, unsigned int, bool, bool)
1019
1020
  /// Get intrinsic cost based on arguments.
1021
  unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1022
                                 ArrayRef<Value *> Args, FastMathFlags FMF,
1023
15.6k
                                 unsigned VF = 1) {
1024
15.6k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()4.96k
:
110.6k
);
1025
15.6k
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
15.6k
    auto *ConcreteTTI = static_cast<T *>(this);
1027
15.6k
1028
15.6k
    switch (IID) {
1029
15.6k
    default: {
1030
9.19k
      // Assume that we need to scalarize this intrinsic.
1031
9.19k
      SmallVector<Type *, 4> Types;
1032
17.8k
      for (Value *Op : Args) {
1033
17.8k
        Type *OpTy = Op->getType();
1034
17.8k
        assert(VF == 1 || !OpTy->isVectorTy());
1035
17.8k
        Types.push_back(VF == 1 ? 
OpTy11.9k
:
VectorType::get(OpTy, VF)5.88k
);
1036
17.8k
      }
1037
9.19k
1038
9.19k
      if (VF > 1 && 
!RetTy->isVoidTy()3.95k
)
1039
3.94k
        RetTy = VectorType::get(RetTy, VF);
1040
9.19k
1041
9.19k
      // Compute the scalarization overhead based on Args for a vector
1042
9.19k
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
9.19k
      // CostModel will pass a vector RetTy and VF is 1.
1044
9.19k
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
9.19k
      if (RetVF > 1 || 
VF > 16.62k
) {
1046
6.52k
        ScalarizationCost = 0;
1047
6.52k
        if (!RetTy->isVoidTy())
1048
6.52k
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
6.52k
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
6.52k
      }
1051
9.19k
1052
9.19k
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
9.19k
                                                ScalarizationCost);
1054
15.6k
    }
1055
15.6k
    case Intrinsic::masked_scatter: {
1056
378
      assert(VF == 1 && "Can't vectorize types here.");
1057
378
      Value *Mask = Args[3];
1058
378
      bool VarMask = !isa<Constant>(Mask);
1059
378
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
378
      return ConcreteTTI->getGatherScatterOpCost(
1061
378
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
15.6k
    }
1063
15.6k
    case Intrinsic::masked_gather: {
1064
463
      assert(VF == 1 && "Can't vectorize types here.");
1065
463
      Value *Mask = Args[2];
1066
463
      bool VarMask = !isa<Constant>(Mask);
1067
463
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
463
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
463
                                                 Args[0], VarMask, Alignment);
1070
15.6k
    }
1071
15.6k
    case Intrinsic::experimental_vector_reduce_add:
1072
3.71k
    case Intrinsic::experimental_vector_reduce_mul:
1073
3.71k
    case Intrinsic::experimental_vector_reduce_and:
1074
3.71k
    case Intrinsic::experimental_vector_reduce_or:
1075
3.71k
    case Intrinsic::experimental_vector_reduce_xor:
1076
3.71k
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
3.71k
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
3.71k
    case Intrinsic::experimental_vector_reduce_smax:
1079
3.71k
    case Intrinsic::experimental_vector_reduce_smin:
1080
3.71k
    case Intrinsic::experimental_vector_reduce_fmax:
1081
3.71k
    case Intrinsic::experimental_vector_reduce_fmin:
1082
3.71k
    case Intrinsic::experimental_vector_reduce_umax:
1083
3.71k
    case Intrinsic::experimental_vector_reduce_umin:
1084
3.71k
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
3.71k
    case Intrinsic::fshl:
1086
1.87k
    case Intrinsic::fshr: {
1087
1.87k
      Value *X = Args[0];
1088
1.87k
      Value *Y = Args[1];
1089
1.87k
      Value *Z = Args[2];
1090
1.87k
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
1.87k
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
1.87k
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
1.87k
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
1.87k
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
1.87k
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
1.87k
                                                              : 
TTI::OP_None0
;
1097
1.87k
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
1.87k
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
1.87k
      unsigned Cost = 0;
1100
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
1.87k
                                                  OpKindX, OpKindZ, OpPropsX);
1104
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
1.87k
                                                  OpKindY, OpKindZ, OpPropsY);
1106
1.87k
      // Non-constant shift amounts requires a modulo.
1107
1.87k
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
1.87k
          
OpKindZ != TTI::OK_NonUniformConstantValue1.40k
)
1109
936
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
936
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
936
                                                    OpPropsBW);
1112
1.87k
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
1.87k
      if (X != Y) {
1114
1.05k
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
1.05k
        if (RetVF > 1)
1116
1.05k
          CondTy = VectorType::get(CondTy, RetVF);
1117
1.05k
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
1.05k
                                                CondTy, nullptr);
1119
1.05k
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
1.05k
                                                CondTy, nullptr);
1121
1.05k
      }
1122
1.87k
      return Cost;
1123
1.87k
    }
1124
15.6k
    }
1125
15.6k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
1.91k
                                 unsigned VF = 1) {
1024
1.91k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()5
:
11.90k
);
1025
1.91k
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
1.91k
    auto *ConcreteTTI = static_cast<T *>(this);
1027
1.91k
1028
1.91k
    switch (IID) {
1029
1.91k
    default: {
1030
1.88k
      // Assume that we need to scalarize this intrinsic.
1031
1.88k
      SmallVector<Type *, 4> Types;
1032
2.15k
      for (Value *Op : Args) {
1033
2.15k
        Type *OpTy = Op->getType();
1034
2.15k
        assert(VF == 1 || !OpTy->isVectorTy());
1035
2.15k
        Types.push_back(VF == 1 ? 
OpTy302
:
VectorType::get(OpTy, VF)1.85k
);
1036
2.15k
      }
1037
1.88k
1038
1.88k
      if (VF > 1 && 
!RetTy->isVoidTy()1.60k
)
1039
1.60k
        RetTy = VectorType::get(RetTy, VF);
1040
1.88k
1041
1.88k
      // Compute the scalarization overhead based on Args for a vector
1042
1.88k
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
1.88k
      // CostModel will pass a vector RetTy and VF is 1.
1044
1.88k
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
1.88k
      if (RetVF > 1 || 
VF > 11.88k
) {
1046
1.61k
        ScalarizationCost = 0;
1047
1.61k
        if (!RetTy->isVoidTy())
1048
1.61k
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
1.61k
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
1.61k
      }
1051
1.88k
1052
1.88k
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
1.88k
                                                ScalarizationCost);
1054
1.91k
    }
1055
1.91k
    case Intrinsic::masked_scatter: {
1056
0
      assert(VF == 1 && "Can't vectorize types here.");
1057
0
      Value *Mask = Args[3];
1058
0
      bool VarMask = !isa<Constant>(Mask);
1059
0
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
0
      return ConcreteTTI->getGatherScatterOpCost(
1061
0
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
1.91k
    }
1063
1.91k
    case Intrinsic::masked_gather: {
1064
0
      assert(VF == 1 && "Can't vectorize types here.");
1065
0
      Value *Mask = Args[2];
1066
0
      bool VarMask = !isa<Constant>(Mask);
1067
0
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
0
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
0
                                                 Args[0], VarMask, Alignment);
1070
1.91k
    }
1071
1.91k
    case Intrinsic::experimental_vector_reduce_add:
1072
27
    case Intrinsic::experimental_vector_reduce_mul:
1073
27
    case Intrinsic::experimental_vector_reduce_and:
1074
27
    case Intrinsic::experimental_vector_reduce_or:
1075
27
    case Intrinsic::experimental_vector_reduce_xor:
1076
27
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
27
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
27
    case Intrinsic::experimental_vector_reduce_smax:
1079
27
    case Intrinsic::experimental_vector_reduce_smin:
1080
27
    case Intrinsic::experimental_vector_reduce_fmax:
1081
27
    case Intrinsic::experimental_vector_reduce_fmin:
1082
27
    case Intrinsic::experimental_vector_reduce_umax:
1083
27
    case Intrinsic::experimental_vector_reduce_umin:
1084
27
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
27
    case Intrinsic::fshl:
1086
0
    case Intrinsic::fshr: {
1087
0
      Value *X = Args[0];
1088
0
      Value *Y = Args[1];
1089
0
      Value *Z = Args[2];
1090
0
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
0
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
0
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
0
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
0
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
0
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
0
                                                              : TTI::OP_None;
1097
0
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
0
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
0
      unsigned Cost = 0;
1100
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
0
                                                  OpKindX, OpKindZ, OpPropsX);
1104
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
0
                                                  OpKindY, OpKindZ, OpPropsY);
1106
0
      // Non-constant shift amounts requires a modulo.
1107
0
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
0
          OpKindZ != TTI::OK_NonUniformConstantValue)
1109
0
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
0
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
0
                                                    OpPropsBW);
1112
0
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
0
      if (X != Y) {
1114
0
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
0
        if (RetVF > 1)
1116
0
          CondTy = VectorType::get(CondTy, RetVF);
1117
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
0
                                                CondTy, nullptr);
1119
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
0
                                                CondTy, nullptr);
1121
0
      }
1122
0
      return Cost;
1123
0
    }
1124
1.91k
    }
1125
1.91k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
22
                                 unsigned VF = 1) {
1024
22
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()7
:
115
);
1025
22
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
22
    auto *ConcreteTTI = static_cast<T *>(this);
1027
22
1028
22
    switch (IID) {
1029
22
    default: {
1030
22
      // Assume that we need to scalarize this intrinsic.
1031
22
      SmallVector<Type *, 4> Types;
1032
34
      for (Value *Op : Args) {
1033
34
        Type *OpTy = Op->getType();
1034
34
        assert(VF == 1 || !OpTy->isVectorTy());
1035
34
        Types.push_back(VF == 1 ? 
OpTy10
:
VectorType::get(OpTy, VF)24
);
1036
34
      }
1037
22
1038
22
      if (VF > 1 && 
!RetTy->isVoidTy()12
)
1039
12
        RetTy = VectorType::get(RetTy, VF);
1040
22
1041
22
      // Compute the scalarization overhead based on Args for a vector
1042
22
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
22
      // CostModel will pass a vector RetTy and VF is 1.
1044
22
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
22
      if (RetVF > 1 || 
VF > 115
) {
1046
19
        ScalarizationCost = 0;
1047
19
        if (!RetTy->isVoidTy())
1048
19
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
19
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
19
      }
1051
22
1052
22
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
22
                                                ScalarizationCost);
1054
22
    }
1055
22
    case Intrinsic::masked_scatter: {
1056
0
      assert(VF == 1 && "Can't vectorize types here.");
1057
0
      Value *Mask = Args[3];
1058
0
      bool VarMask = !isa<Constant>(Mask);
1059
0
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
0
      return ConcreteTTI->getGatherScatterOpCost(
1061
0
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
22
    }
1063
22
    case Intrinsic::masked_gather: {
1064
0
      assert(VF == 1 && "Can't vectorize types here.");
1065
0
      Value *Mask = Args[2];
1066
0
      bool VarMask = !isa<Constant>(Mask);
1067
0
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
0
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
0
                                                 Args[0], VarMask, Alignment);
1070
22
    }
1071
22
    case Intrinsic::experimental_vector_reduce_add:
1072
0
    case Intrinsic::experimental_vector_reduce_mul:
1073
0
    case Intrinsic::experimental_vector_reduce_and:
1074
0
    case Intrinsic::experimental_vector_reduce_or:
1075
0
    case Intrinsic::experimental_vector_reduce_xor:
1076
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
0
    case Intrinsic::experimental_vector_reduce_smax:
1079
0
    case Intrinsic::experimental_vector_reduce_smin:
1080
0
    case Intrinsic::experimental_vector_reduce_fmax:
1081
0
    case Intrinsic::experimental_vector_reduce_fmin:
1082
0
    case Intrinsic::experimental_vector_reduce_umax:
1083
0
    case Intrinsic::experimental_vector_reduce_umin:
1084
0
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
0
    case Intrinsic::fshl:
1086
0
    case Intrinsic::fshr: {
1087
0
      Value *X = Args[0];
1088
0
      Value *Y = Args[1];
1089
0
      Value *Z = Args[2];
1090
0
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
0
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
0
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
0
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
0
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
0
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
0
                                                              : TTI::OP_None;
1097
0
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
0
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
0
      unsigned Cost = 0;
1100
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
0
                                                  OpKindX, OpKindZ, OpPropsX);
1104
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
0
                                                  OpKindY, OpKindZ, OpPropsY);
1106
0
      // Non-constant shift amounts requires a modulo.
1107
0
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
0
          OpKindZ != TTI::OK_NonUniformConstantValue)
1109
0
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
0
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
0
                                                    OpPropsBW);
1112
0
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
0
      if (X != Y) {
1114
0
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
0
        if (RetVF > 1)
1116
0
          CondTy = VectorType::get(CondTy, RetVF);
1117
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
0
                                                CondTy, nullptr);
1119
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
0
                                                CondTy, nullptr);
1121
0
      }
1122
0
      return Cost;
1123
0
    }
1124
22
    }
1125
22
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
270
                                 unsigned VF = 1) {
1024
270
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()0
: 1);
1025
270
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
270
    auto *ConcreteTTI = static_cast<T *>(this);
1027
270
1028
270
    switch (IID) {
1029
270
    default: {
1030
270
      // Assume that we need to scalarize this intrinsic.
1031
270
      SmallVector<Type *, 4> Types;
1032
426
      for (Value *Op : Args) {
1033
426
        Type *OpTy = Op->getType();
1034
426
        assert(VF == 1 || !OpTy->isVectorTy());
1035
426
        Types.push_back(VF == 1 ? 
OpTy0
: VectorType::get(OpTy, VF));
1036
426
      }
1037
270
1038
270
      if (VF > 1 && !RetTy->isVoidTy())
1039
270
        RetTy = VectorType::get(RetTy, VF);
1040
270
1041
270
      // Compute the scalarization overhead based on Args for a vector
1042
270
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
270
      // CostModel will pass a vector RetTy and VF is 1.
1044
270
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
270
      if (RetVF > 1 || VF > 1) {
1046
270
        ScalarizationCost = 0;
1047
270
        if (!RetTy->isVoidTy())
1048
270
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
270
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
270
      }
1051
270
1052
270
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
270
                                                ScalarizationCost);
1054
270
    }
1055
270
    case Intrinsic::masked_scatter: {
1056
0
      assert(VF == 1 && "Can't vectorize types here.");
1057
0
      Value *Mask = Args[3];
1058
0
      bool VarMask = !isa<Constant>(Mask);
1059
0
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
0
      return ConcreteTTI->getGatherScatterOpCost(
1061
0
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
270
    }
1063
270
    case Intrinsic::masked_gather: {
1064
0
      assert(VF == 1 && "Can't vectorize types here.");
1065
0
      Value *Mask = Args[2];
1066
0
      bool VarMask = !isa<Constant>(Mask);
1067
0
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
0
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
0
                                                 Args[0], VarMask, Alignment);
1070
270
    }
1071
270
    case Intrinsic::experimental_vector_reduce_add:
1072
0
    case Intrinsic::experimental_vector_reduce_mul:
1073
0
    case Intrinsic::experimental_vector_reduce_and:
1074
0
    case Intrinsic::experimental_vector_reduce_or:
1075
0
    case Intrinsic::experimental_vector_reduce_xor:
1076
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
0
    case Intrinsic::experimental_vector_reduce_smax:
1079
0
    case Intrinsic::experimental_vector_reduce_smin:
1080
0
    case Intrinsic::experimental_vector_reduce_fmax:
1081
0
    case Intrinsic::experimental_vector_reduce_fmin:
1082
0
    case Intrinsic::experimental_vector_reduce_umax:
1083
0
    case Intrinsic::experimental_vector_reduce_umin:
1084
0
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
0
    case Intrinsic::fshl:
1086
0
    case Intrinsic::fshr: {
1087
0
      Value *X = Args[0];
1088
0
      Value *Y = Args[1];
1089
0
      Value *Z = Args[2];
1090
0
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
0
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
0
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
0
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
0
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
0
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
0
                                                              : TTI::OP_None;
1097
0
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
0
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
0
      unsigned Cost = 0;
1100
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
0
                                                  OpKindX, OpKindZ, OpPropsX);
1104
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
0
                                                  OpKindY, OpKindZ, OpPropsY);
1106
0
      // Non-constant shift amounts requires a modulo.
1107
0
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
0
          OpKindZ != TTI::OK_NonUniformConstantValue)
1109
0
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
0
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
0
                                                    OpPropsBW);
1112
0
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
0
      if (X != Y) {
1114
0
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
0
        if (RetVF > 1)
1116
0
          CondTy = VectorType::get(CondTy, RetVF);
1117
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
0
                                                CondTy, nullptr);
1119
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
0
                                                CondTy, nullptr);
1121
0
      }
1122
0
      return Cost;
1123
0
    }
1124
270
    }
1125
270
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
91
                                 unsigned VF = 1) {
1024
91
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()1
:
190
);
1025
91
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
91
    auto *ConcreteTTI = static_cast<T *>(this);
1027
91
1028
91
    switch (IID) {
1029
91
    default: {
1030
91
      // Assume that we need to scalarize this intrinsic.
1031
91
      SmallVector<Type *, 4> Types;
1032
101
      for (Value *Op : Args) {
1033
101
        Type *OpTy = Op->getType();
1034
101
        assert(VF == 1 || !OpTy->isVectorTy());
1035
101
        Types.push_back(VF == 1 ? 
OpTy21
:
VectorType::get(OpTy, VF)80
);
1036
101
      }
1037
91
1038
91
      if (VF > 1 && 
!RetTy->isVoidTy()72
)
1039
72
        RetTy = VectorType::get(RetTy, VF);
1040
91
1041
91
      // Compute the scalarization overhead based on Args for a vector
1042
91
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
91
      // CostModel will pass a vector RetTy and VF is 1.
1044
91
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
91
      if (RetVF > 1 || 
VF > 190
) {
1046
73
        ScalarizationCost = 0;
1047
73
        if (!RetTy->isVoidTy())
1048
73
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
73
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
73
      }
1051
91
1052
91
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
91
                                                ScalarizationCost);
1054
91
    }
1055
91
    case Intrinsic::masked_scatter: {
1056
0
      assert(VF == 1 && "Can't vectorize types here.");
1057
0
      Value *Mask = Args[3];
1058
0
      bool VarMask = !isa<Constant>(Mask);
1059
0
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
0
      return ConcreteTTI->getGatherScatterOpCost(
1061
0
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
91
    }
1063
91
    case Intrinsic::masked_gather: {
1064
0
      assert(VF == 1 && "Can't vectorize types here.");
1065
0
      Value *Mask = Args[2];
1066
0
      bool VarMask = !isa<Constant>(Mask);
1067
0
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
0
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
0
                                                 Args[0], VarMask, Alignment);
1070
91
    }
1071
91
    case Intrinsic::experimental_vector_reduce_add:
1072
0
    case Intrinsic::experimental_vector_reduce_mul:
1073
0
    case Intrinsic::experimental_vector_reduce_and:
1074
0
    case Intrinsic::experimental_vector_reduce_or:
1075
0
    case Intrinsic::experimental_vector_reduce_xor:
1076
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
0
    case Intrinsic::experimental_vector_reduce_smax:
1079
0
    case Intrinsic::experimental_vector_reduce_smin:
1080
0
    case Intrinsic::experimental_vector_reduce_fmax:
1081
0
    case Intrinsic::experimental_vector_reduce_fmin:
1082
0
    case Intrinsic::experimental_vector_reduce_umax:
1083
0
    case Intrinsic::experimental_vector_reduce_umin:
1084
0
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
0
    case Intrinsic::fshl:
1086
0
    case Intrinsic::fshr: {
1087
0
      Value *X = Args[0];
1088
0
      Value *Y = Args[1];
1089
0
      Value *Z = Args[2];
1090
0
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
0
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
0
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
0
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
0
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
0
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
0
                                                              : TTI::OP_None;
1097
0
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
0
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
0
      unsigned Cost = 0;
1100
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
0
                                                  OpKindX, OpKindZ, OpPropsX);
1104
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
0
                                                  OpKindY, OpKindZ, OpPropsY);
1106
0
      // Non-constant shift amounts requires a modulo.
1107
0
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
0
          OpKindZ != TTI::OK_NonUniformConstantValue)
1109
0
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
0
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
0
                                                    OpPropsBW);
1112
0
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
0
      if (X != Y) {
1114
0
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
0
        if (RetVF > 1)
1116
0
          CondTy = VectorType::get(CondTy, RetVF);
1117
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
0
                                                CondTy, nullptr);
1119
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
0
                                                CondTy, nullptr);
1121
0
      }
1122
0
      return Cost;
1123
0
    }
1124
91
    }
1125
91
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
29
                                 unsigned VF = 1) {
1024
29
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()0
: 1);
1025
29
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
29
    auto *ConcreteTTI = static_cast<T *>(this);
1027
29
1028
29
    switch (IID) {
1029
29
    default: {
1030
29
      // Assume that we need to scalarize this intrinsic.
1031
29
      SmallVector<Type *, 4> Types;
1032
34
      for (Value *Op : Args) {
1033
34
        Type *OpTy = Op->getType();
1034
34
        assert(VF == 1 || !OpTy->isVectorTy());
1035
34
        Types.push_back(VF == 1 ? 
OpTy26
:
VectorType::get(OpTy, VF)8
);
1036
34
      }
1037
29
1038
29
      if (VF > 1 && 
!RetTy->isVoidTy()4
)
1039
0
        RetTy = VectorType::get(RetTy, VF);
1040
29
1041
29
      // Compute the scalarization overhead based on Args for a vector
1042
29
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
29
      // CostModel will pass a vector RetTy and VF is 1.
1044
29
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
29
      if (RetVF > 1 || VF > 1) {
1046
4
        ScalarizationCost = 0;
1047
4
        if (!RetTy->isVoidTy())
1048
0
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
4
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
4
      }
1051
29
1052
29
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
29
                                                ScalarizationCost);
1054
29
    }
1055
29
    case Intrinsic::masked_scatter: {
1056
0
      assert(VF == 1 && "Can't vectorize types here.");
1057
0
      Value *Mask = Args[3];
1058
0
      bool VarMask = !isa<Constant>(Mask);
1059
0
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
0
      return ConcreteTTI->getGatherScatterOpCost(
1061
0
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
29
    }
1063
29
    case Intrinsic::masked_gather: {
1064
0
      assert(VF == 1 && "Can't vectorize types here.");
1065
0
      Value *Mask = Args[2];
1066
0
      bool VarMask = !isa<Constant>(Mask);
1067
0
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
0
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
0
                                                 Args[0], VarMask, Alignment);
1070
29
    }
1071
29
    case Intrinsic::experimental_vector_reduce_add:
1072
0
    case Intrinsic::experimental_vector_reduce_mul:
1073
0
    case Intrinsic::experimental_vector_reduce_and:
1074
0
    case Intrinsic::experimental_vector_reduce_or:
1075
0
    case Intrinsic::experimental_vector_reduce_xor:
1076
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
0
    case Intrinsic::experimental_vector_reduce_smax:
1079
0
    case Intrinsic::experimental_vector_reduce_smin:
1080
0
    case Intrinsic::experimental_vector_reduce_fmax:
1081
0
    case Intrinsic::experimental_vector_reduce_fmin:
1082
0
    case Intrinsic::experimental_vector_reduce_umax:
1083
0
    case Intrinsic::experimental_vector_reduce_umin:
1084
0
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
0
    case Intrinsic::fshl:
1086
0
    case Intrinsic::fshr: {
1087
0
      Value *X = Args[0];
1088
0
      Value *Y = Args[1];
1089
0
      Value *Z = Args[2];
1090
0
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
0
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
0
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
0
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
0
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
0
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
0
                                                              : TTI::OP_None;
1097
0
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
0
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
0
      unsigned Cost = 0;
1100
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
0
                                                  OpKindX, OpKindZ, OpPropsX);
1104
0
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
0
                                                  OpKindY, OpKindZ, OpPropsY);
1106
0
      // Non-constant shift amounts requires a modulo.
1107
0
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
0
          OpKindZ != TTI::OK_NonUniformConstantValue)
1109
0
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
0
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
0
                                                    OpPropsBW);
1112
0
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
0
      if (X != Y) {
1114
0
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
0
        if (RetVF > 1)
1116
0
          CondTy = VectorType::get(CondTy, RetVF);
1117
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
0
                                                CondTy, nullptr);
1119
0
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
0
                                                CondTy, nullptr);
1121
0
      }
1122
0
      return Cost;
1123
0
    }
1124
29
    }
1125
29
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1023
13.2k
                                 unsigned VF = 1) {
1024
13.2k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()4.95k
:
18.34k
);
1025
13.2k
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026
13.2k
    auto *ConcreteTTI = static_cast<T *>(this);
1027
13.2k
1028
13.2k
    switch (IID) {
1029
13.2k
    default: {
1030
6.89k
      // Assume that we need to scalarize this intrinsic.
1031
6.89k
      SmallVector<Type *, 4> Types;
1032
15.0k
      for (Value *Op : Args) {
1033
15.0k
        Type *OpTy = Op->getType();
1034
15.0k
        assert(VF == 1 || !OpTy->isVectorTy());
1035
15.0k
        Types.push_back(VF == 1 ? 
OpTy11.5k
:
VectorType::get(OpTy, VF)3.49k
);
1036
15.0k
      }
1037
6.89k
1038
6.89k
      if (VF > 1 && 
!RetTy->isVoidTy()1.98k
)
1039
1.98k
        RetTy = VectorType::get(RetTy, VF);
1040
6.89k
1041
6.89k
      // Compute the scalarization overhead based on Args for a vector
1042
6.89k
      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043
6.89k
      // CostModel will pass a vector RetTy and VF is 1.
1044
6.89k
      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045
6.89k
      if (RetVF > 1 || 
VF > 14.33k
) {
1046
4.54k
        ScalarizationCost = 0;
1047
4.54k
        if (!RetTy->isVoidTy())
1048
4.54k
          ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049
4.54k
        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050
4.54k
      }
1051
6.89k
1052
6.89k
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053
6.89k
                                                ScalarizationCost);
1054
13.2k
    }
1055
13.2k
    case Intrinsic::masked_scatter: {
1056
378
      assert(VF == 1 && "Can't vectorize types here.");
1057
378
      Value *Mask = Args[3];
1058
378
      bool VarMask = !isa<Constant>(Mask);
1059
378
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060
378
      return ConcreteTTI->getGatherScatterOpCost(
1061
378
          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062
13.2k
    }
1063
13.2k
    case Intrinsic::masked_gather: {
1064
463
      assert(VF == 1 && "Can't vectorize types here.");
1065
463
      Value *Mask = Args[2];
1066
463
      bool VarMask = !isa<Constant>(Mask);
1067
463
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068
463
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069
463
                                                 Args[0], VarMask, Alignment);
1070
13.2k
    }
1071
13.2k
    case Intrinsic::experimental_vector_reduce_add:
1072
3.68k
    case Intrinsic::experimental_vector_reduce_mul:
1073
3.68k
    case Intrinsic::experimental_vector_reduce_and:
1074
3.68k
    case Intrinsic::experimental_vector_reduce_or:
1075
3.68k
    case Intrinsic::experimental_vector_reduce_xor:
1076
3.68k
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1077
3.68k
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1078
3.68k
    case Intrinsic::experimental_vector_reduce_smax:
1079
3.68k
    case Intrinsic::experimental_vector_reduce_smin:
1080
3.68k
    case Intrinsic::experimental_vector_reduce_fmax:
1081
3.68k
    case Intrinsic::experimental_vector_reduce_fmin:
1082
3.68k
    case Intrinsic::experimental_vector_reduce_umax:
1083
3.68k
    case Intrinsic::experimental_vector_reduce_umin:
1084
3.68k
      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085
3.68k
    case Intrinsic::fshl:
1086
1.87k
    case Intrinsic::fshr: {
1087
1.87k
      Value *X = Args[0];
1088
1.87k
      Value *Y = Args[1];
1089
1.87k
      Value *Z = Args[2];
1090
1.87k
      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091
1.87k
      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092
1.87k
      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093
1.87k
      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1094
1.87k
      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1095
1.87k
      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096
1.87k
                                                              : 
TTI::OP_None0
;
1097
1.87k
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098
1.87k
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099
1.87k
      unsigned Cost = 0;
1100
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103
1.87k
                                                  OpKindX, OpKindZ, OpPropsX);
1104
1.87k
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105
1.87k
                                                  OpKindY, OpKindZ, OpPropsY);
1106
1.87k
      // Non-constant shift amounts requires a modulo.
1107
1.87k
      if (OpKindZ != TTI::OK_UniformConstantValue &&
1108
1.87k
          
OpKindZ != TTI::OK_NonUniformConstantValue1.40k
)
1109
936
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110
936
                                                    OpKindZ, OpKindBW, OpPropsZ,
1111
936
                                                    OpPropsBW);
1112
1.87k
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113
1.87k
      if (X != Y) {
1114
1.05k
        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115
1.05k
        if (RetVF > 1)
1116
1.05k
          CondTy = VectorType::get(CondTy, RetVF);
1117
1.05k
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118
1.05k
                                                CondTy, nullptr);
1119
1.05k
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120
1.05k
                                                CondTy, nullptr);
1121
1.05k
      }
1122
1.87k
      return Cost;
1123
1.87k
    }
1124
13.2k
    }
1125
13.2k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Value*>, llvm::FastMathFlags, unsigned int)
1126
1127
  /// Get intrinsic cost based on argument types.
1128
  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1129
  /// cost of scalarizing the arguments and the return value will be computed
1130
  /// based on types.
1131
  unsigned getIntrinsicInstrCost(
1132
      Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1133
16.1k
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
16.1k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()4.91k
:
111.2k
);
1135
16.1k
    auto *ConcreteTTI = static_cast<T *>(this);
1136
16.1k
1137
16.1k
    SmallVector<unsigned, 2> ISDs;
1138
16.1k
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
16.1k
    switch (IID) {
1140
16.1k
    default: {
1141
2.36k
      // Assume that we need to scalarize this intrinsic.
1142
2.36k
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
2.36k
      unsigned ScalarCalls = 1;
1144
2.36k
      Type *ScalarRetTy = RetTy;
1145
2.36k
      if (RetTy->isVectorTy()) {
1146
581
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
28
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
581
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
581
        ScalarRetTy = RetTy->getScalarType();
1150
581
      }
1151
2.36k
      SmallVector<Type *, 4> ScalarTys;
1152
7.82k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i5.46k
) {
1153
5.46k
        Type *Ty = Tys[i];
1154
5.46k
        if (Ty->isVectorTy()) {
1155
1.63k
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
728
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
1.63k
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
1.63k
          Ty = Ty->getScalarType();
1159
1.63k
        }
1160
5.46k
        ScalarTys.push_back(Ty);
1161
5.46k
      }
1162
2.36k
      if (ScalarCalls == 1)
1163
1.50k
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
861
1165
861
      unsigned ScalarCost =
1166
861
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
861
1168
861
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
861
    }
1170
861
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
861
    // intrinsic call.
1172
861
    case Intrinsic::sqrt:
1173
482
      ISDs.push_back(ISD::FSQRT);
1174
482
      break;
1175
861
    case Intrinsic::sin:
1176
165
      ISDs.push_back(ISD::FSIN);
1177
165
      break;
1178
861
    case Intrinsic::cos:
1179
497
      ISDs.push_back(ISD::FCOS);
1180
497
      break;
1181
861
    case Intrinsic::exp:
1182
483
      ISDs.push_back(ISD::FEXP);
1183
483
      break;
1184
861
    case Intrinsic::exp2:
1185
42
      ISDs.push_back(ISD::FEXP2);
1186
42
      break;
1187
861
    case Intrinsic::log:
1188
95
      ISDs.push_back(ISD::FLOG);
1189
95
      break;
1190
861
    case Intrinsic::log10:
1191
46
      ISDs.push_back(ISD::FLOG10);
1192
46
      break;
1193
861
    case Intrinsic::log2:
1194
18
      ISDs.push_back(ISD::FLOG2);
1195
18
      break;
1196
1.47k
    case Intrinsic::fabs:
1197
1.47k
      ISDs.push_back(ISD::FABS);
1198
1.47k
      break;
1199
861
    case Intrinsic::canonicalize:
1200
5
      ISDs.push_back(ISD::FCANONICALIZE);
1201
5
      break;
1202
861
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
861
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
1.08k
    case Intrinsic::copysign:
1213
1.08k
      ISDs.push_back(ISD::FCOPYSIGN);
1214
1.08k
      break;
1215
861
    case Intrinsic::floor:
1216
305
      ISDs.push_back(ISD::FFLOOR);
1217
305
      break;
1218
861
    case Intrinsic::ceil:
1219
270
      ISDs.push_back(ISD::FCEIL);
1220
270
      break;
1221
861
    case Intrinsic::trunc:
1222
256
      ISDs.push_back(ISD::FTRUNC);
1223
256
      break;
1224
861
    case Intrinsic::nearbyint:
1225
259
      ISDs.push_back(ISD::FNEARBYINT);
1226
259
      break;
1227
861
    case Intrinsic::rint:
1228
268
      ISDs.push_back(ISD::FRINT);
1229
268
      break;
1230
861
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
861
    case Intrinsic::pow:
1234
139
      ISDs.push_back(ISD::FPOW);
1235
139
      break;
1236
861
    case Intrinsic::fma:
1237
471
      ISDs.push_back(ISD::FMA);
1238
471
      break;
1239
861
    case Intrinsic::fmuladd:
1240
2
      ISDs.push_back(ISD::FMA);
1241
2
      break;
1242
861
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
861
    case Intrinsic::lifetime_start:
1244
5
    case Intrinsic::lifetime_end:
1245
5
    case Intrinsic::sideeffect:
1246
5
      return 0;
1247
378
    case Intrinsic::masked_store:
1248
378
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
378
                                                0);
1250
406
    case Intrinsic::masked_load:
1251
406
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
373
    case Intrinsic::experimental_vector_reduce_add:
1253
373
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
373
                                                     /*IsPairwiseForm=*/false);
1255
368
    case Intrinsic::experimental_vector_reduce_mul:
1256
368
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
368
                                                     /*IsPairwiseForm=*/false);
1258
496
    case Intrinsic::experimental_vector_reduce_and:
1259
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
496
                                                     /*IsPairwiseForm=*/false);
1261
496
    case Intrinsic::experimental_vector_reduce_or:
1262
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
496
                                                     /*IsPairwiseForm=*/false);
1264
496
    case Intrinsic::experimental_vector_reduce_xor:
1265
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
496
                                                     /*IsPairwiseForm=*/false);
1267
5
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
5
                                     // reductions.
1272
5
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
5
                                     // reductions.
1277
740
    case Intrinsic::experimental_vector_reduce_smax:
1278
740
    case Intrinsic::experimental_vector_reduce_smin:
1279
740
    case Intrinsic::experimental_vector_reduce_fmax:
1280
740
    case Intrinsic::experimental_vector_reduce_fmin:
1281
740
      return ConcreteTTI->getMinMaxReductionCost(
1282
740
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
740
          /*IsUnsigned=*/true);
1284
746
    case Intrinsic::experimental_vector_reduce_umax:
1285
746
    case Intrinsic::experimental_vector_reduce_umin:
1286
746
      return ConcreteTTI->getMinMaxReductionCost(
1287
746
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
746
          /*IsUnsigned=*/false);
1289
746
    case Intrinsic::sadd_sat:
1290
472
    case Intrinsic::ssub_sat: {
1291
472
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
472
      if (RetVF > 1)
1293
210
        CondTy = VectorType::get(CondTy, RetVF);
1294
472
1295
472
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
472
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
472
                                     ? 
Intrinsic::sadd_with_overflow236
1298
472
                                     : 
Intrinsic::ssub_with_overflow236
;
1299
472
1300
472
      // SatMax -> Overflow && SumDiff < 0
1301
472
      // SatMin -> Overflow && SumDiff >= 0
1302
472
      unsigned Cost = 0;
1303
472
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
472
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
472
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
472
                                              CondTy, nullptr);
1307
472
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
472
                                                  CondTy, nullptr);
1309
472
      return Cost;
1310
472
    }
1311
472
    case Intrinsic::uadd_sat:
1312
404
    case Intrinsic::usub_sat: {
1313
404
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
404
      if (RetVF > 1)
1315
144
        CondTy = VectorType::get(CondTy, RetVF);
1316
404
1317
404
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
404
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
404
                                     ? 
Intrinsic::uadd_with_overflow200
1320
404
                                     : 
Intrinsic::usub_with_overflow204
;
1321
404
1322
404
      unsigned Cost = 0;
1323
404
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
404
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
404
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
404
                                              CondTy, nullptr);
1327
404
      return Cost;
1328
404
    }
1329
794
    case Intrinsic::smul_fix:
1330
794
    case Intrinsic::umul_fix: {
1331
794
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
794
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
794
      if (RetVF > 1)
1334
477
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
794
1336
794
      unsigned ExtOp =
1337
794
          IID == Intrinsic::smul_fix ? 
Instruction::SExt388
:
Instruction::ZExt406
;
1338
794
1339
794
      unsigned Cost = 0;
1340
794
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
794
      Cost +=
1343
794
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
794
                                                  TTI::OK_AnyValue,
1346
794
                                                  TTI::OK_UniformConstantValue);
1347
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
794
                                                  TTI::OK_AnyValue,
1349
794
                                                  TTI::OK_UniformConstantValue);
1350
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
794
      return Cost;
1352
794
    }
1353
794
    case Intrinsic::sadd_with_overflow:
1354
450
    case Intrinsic::ssub_with_overflow: {
1355
450
      Type *SumTy = RetTy->getContainedType(0);
1356
450
      Type *OverflowTy = RetTy->getContainedType(1);
1357
450
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
450
                            ? 
BinaryOperator::Add225
1359
450
                            : 
BinaryOperator::Sub225
;
1360
450
1361
450
      //   LHSSign -> LHS >= 0
1362
450
      //   RHSSign -> RHS >= 0
1363
450
      //   SumSign -> Sum >= 0
1364
450
      //
1365
450
      //   Add:
1366
450
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
450
      //   Sub:
1368
450
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
450
      unsigned Cost = 0;
1370
450
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
450
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
450
                                                  OverflowTy, nullptr);
1373
450
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
450
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
450
      Cost +=
1376
450
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
450
      return Cost;
1378
450
    }
1379
450
    case Intrinsic::uadd_with_overflow:
1380
402
    case Intrinsic::usub_with_overflow: {
1381
402
      Type *SumTy = RetTy->getContainedType(0);
1382
402
      Type *OverflowTy = RetTy->getContainedType(1);
1383
402
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
402
                            ? 
BinaryOperator::Add199
1385
402
                            : 
BinaryOperator::Sub203
;
1386
402
1387
402
      unsigned Cost = 0;
1388
402
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
402
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
402
                                              OverflowTy, nullptr);
1391
402
      return Cost;
1392
402
    }
1393
402
    case Intrinsic::smul_with_overflow:
1394
320
    case Intrinsic::umul_with_overflow: {
1395
320
      Type *MulTy = RetTy->getContainedType(0);
1396
320
      Type *OverflowTy = RetTy->getContainedType(1);
1397
320
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
320
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
320
      if (MulTy->isVectorTy())
1400
240
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
320
1402
320
      unsigned ExtOp =
1403
320
          IID == Intrinsic::smul_fix ? 
Instruction::SExt0
: Instruction::ZExt;
1404
320
1405
320
      unsigned Cost = 0;
1406
320
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
320
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
320
      Cost +=
1409
320
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
320
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
320
                                                  TTI::OK_AnyValue,
1412
320
                                                  TTI::OK_UniformConstantValue);
1413
320
1414
320
      if (IID == Intrinsic::smul_with_overflow)
1415
160
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
160
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
160
            TTI::OK_UniformConstantValue);
1418
320
1419
320
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
320
                                              OverflowTy, nullptr);
1421
320
      return Cost;
1422
320
    }
1423
320
    case Intrinsic::ctpop:
1424
84
      ISDs.push_back(ISD::CTPOP);
1425
84
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
84
      // library call but still not a cheap instruction.
1427
84
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
84
      break;
1429
6.44k
    // FIXME: ctlz, cttz, ...
1430
6.44k
    }
1431
6.44k
1432
6.44k
    const TargetLoweringBase *TLI = getTLI();
1433
6.44k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
6.44k
1435
6.44k
    SmallVector<unsigned, 2> LegalCost;
1436
6.44k
    SmallVector<unsigned, 2> CustomCost;
1437
6.44k
    for (unsigned ISD : ISDs) {
1438
6.44k
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
2.53k
        if (IID == Intrinsic::fabs && 
LT.second.isFloatingPoint()1.06k
&&
1440
2.53k
            
TLI->isFAbsFree(LT.second)1.06k
) {
1441
14
          return 0;
1442
14
        }
1443
2.51k
1444
2.51k
        // The operation is legal. Assume it costs 1.
1445
2.51k
        // If the type is split to multiple registers, assume that there is some
1446
2.51k
        // overhead to this.
1447
2.51k
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
2.51k
        if (LT.first > 1)
1449
227
          LegalCost.push_back(LT.first * 2);
1450
2.29k
        else
1451
2.29k
          LegalCost.push_back(LT.first * 1);
1452
3.91k
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
1.26k
        // If the operation is custom lowered then assume
1454
1.26k
        // that the code is twice as expensive.
1455
1.26k
        CustomCost.push_back(LT.first * 2);
1456
1.26k
      }
1457
6.44k
    }
1458
6.44k
1459
6.44k
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
6.43k
    if (MinLegalCostI != LegalCost.end())
1461
2.51k
      return *MinLegalCostI;
1462
3.91k
1463
3.91k
    auto MinCustomCostI =
1464
3.91k
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
3.91k
    if (MinCustomCostI != CustomCost.end())
1466
1.26k
      return *MinCustomCostI;
1467
2.65k
1468
2.65k
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
2.65k
    // point mul followed by an add.
1470
2.65k
    if (IID == Intrinsic::fmuladd)
1471
2
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
2
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
2.65k
1474
2.65k
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
2.65k
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
2.65k
    // very expensive.
1477
2.65k
    if (RetTy->isVectorTy()) {
1478
1.17k
      unsigned ScalarizationCost =
1479
1.17k
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
1.17k
               ? ScalarizationCostPassed
1481
1.17k
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
1.17k
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
1.17k
      SmallVector<Type *, 4> ScalarTys;
1484
2.79k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i1.62k
) {
1485
1.62k
        Type *Ty = Tys[i];
1486
1.62k
        if (Ty->isVectorTy())
1487
1.62k
          Ty = Ty->getScalarType();
1488
1.62k
        ScalarTys.push_back(Ty);
1489
1.62k
      }
1490
1.17k
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
1.17k
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
2.79k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i1.62k
) {
1493
1.62k
        if (Tys[i]->isVectorTy()) {
1494
1.62k
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
1.62k
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
1.62k
        }
1498
1.62k
      }
1499
1.17k
1500
1.17k
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
1.17k
    }
1502
1.48k
1503
1.48k
    // This is going to be turned into a library call, make it expensive.
1504
1.48k
    return SingleCallCost;
1505
1.48k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
3.36k
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
3.36k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()1.61k
:
11.75k
);
1135
3.36k
    auto *ConcreteTTI = static_cast<T *>(this);
1136
3.36k
1137
3.36k
    SmallVector<unsigned, 2> ISDs;
1138
3.36k
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
3.36k
    switch (IID) {
1140
3.36k
    default: {
1141
548
      // Assume that we need to scalarize this intrinsic.
1142
548
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
548
      unsigned ScalarCalls = 1;
1144
548
      Type *ScalarRetTy = RetTy;
1145
548
      if (RetTy->isVectorTy()) {
1146
219
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
0
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
219
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
219
        ScalarRetTy = RetTy->getScalarType();
1150
219
      }
1151
548
      SmallVector<Type *, 4> ScalarTys;
1152
1.14k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i596
) {
1153
596
        Type *Ty = Tys[i];
1154
596
        if (Ty->isVectorTy()) {
1155
235
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
0
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
235
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
235
          Ty = Ty->getScalarType();
1159
235
        }
1160
596
        ScalarTys.push_back(Ty);
1161
596
      }
1162
548
      if (ScalarCalls == 1)
1163
329
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
219
1165
219
      unsigned ScalarCost =
1166
219
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
219
1168
219
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
219
    }
1170
219
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
219
    // intrinsic call.
1172
462
    case Intrinsic::sqrt:
1173
462
      ISDs.push_back(ISD::FSQRT);
1174
462
      break;
1175
219
    case Intrinsic::sin:
1176
109
      ISDs.push_back(ISD::FSIN);
1177
109
      break;
1178
441
    case Intrinsic::cos:
1179
441
      ISDs.push_back(ISD::FCOS);
1180
441
      break;
1181
419
    case Intrinsic::exp:
1182
419
      ISDs.push_back(ISD::FEXP);
1183
419
      break;
1184
219
    case Intrinsic::exp2:
1185
21
      ISDs.push_back(ISD::FEXP2);
1186
21
      break;
1187
219
    case Intrinsic::log:
1188
42
      ISDs.push_back(ISD::FLOG);
1189
42
      break;
1190
219
    case Intrinsic::log10:
1191
17
      ISDs.push_back(ISD::FLOG10);
1192
17
      break;
1193
219
    case Intrinsic::log2:
1194
0
      ISDs.push_back(ISD::FLOG2);
1195
0
      break;
1196
739
    case Intrinsic::fabs:
1197
739
      ISDs.push_back(ISD::FABS);
1198
739
      break;
1199
219
    case Intrinsic::canonicalize:
1200
0
      ISDs.push_back(ISD::FCANONICALIZE);
1201
0
      break;
1202
219
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
219
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
266
    case Intrinsic::copysign:
1213
266
      ISDs.push_back(ISD::FCOPYSIGN);
1214
266
      break;
1215
219
    case Intrinsic::floor:
1216
14
      ISDs.push_back(ISD::FFLOOR);
1217
14
      break;
1218
219
    case Intrinsic::ceil:
1219
0
      ISDs.push_back(ISD::FCEIL);
1220
0
      break;
1221
219
    case Intrinsic::trunc:
1222
0
      ISDs.push_back(ISD::FTRUNC);
1223
0
      break;
1224
219
    case Intrinsic::nearbyint:
1225
0
      ISDs.push_back(ISD::FNEARBYINT);
1226
0
      break;
1227
219
    case Intrinsic::rint:
1228
12
      ISDs.push_back(ISD::FRINT);
1229
12
      break;
1230
219
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
219
    case Intrinsic::pow:
1234
94
      ISDs.push_back(ISD::FPOW);
1235
94
      break;
1236
219
    case Intrinsic::fma:
1237
0
      ISDs.push_back(ISD::FMA);
1238
0
      break;
1239
219
    case Intrinsic::fmuladd:
1240
0
      ISDs.push_back(ISD::FMA);
1241
0
      break;
1242
219
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
219
    case Intrinsic::lifetime_start:
1244
0
    case Intrinsic::lifetime_end:
1245
0
    case Intrinsic::sideeffect:
1246
0
      return 0;
1247
0
    case Intrinsic::masked_store:
1248
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
0
                                                0);
1250
0
    case Intrinsic::masked_load:
1251
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
5
    case Intrinsic::experimental_vector_reduce_add:
1253
5
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
5
                                                     /*IsPairwiseForm=*/false);
1255
0
    case Intrinsic::experimental_vector_reduce_mul:
1256
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
0
                                                     /*IsPairwiseForm=*/false);
1258
0
    case Intrinsic::experimental_vector_reduce_and:
1259
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
0
                                                     /*IsPairwiseForm=*/false);
1261
0
    case Intrinsic::experimental_vector_reduce_or:
1262
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
0
                                                     /*IsPairwiseForm=*/false);
1264
0
    case Intrinsic::experimental_vector_reduce_xor:
1265
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
0
                                                     /*IsPairwiseForm=*/false);
1267
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
0
                                     // reductions.
1272
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
0
                                     // reductions.
1277
12
    case Intrinsic::experimental_vector_reduce_smax:
1278
12
    case Intrinsic::experimental_vector_reduce_smin:
1279
12
    case Intrinsic::experimental_vector_reduce_fmax:
1280
12
    case Intrinsic::experimental_vector_reduce_fmin:
1281
12
      return ConcreteTTI->getMinMaxReductionCost(
1282
12
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
12
          /*IsUnsigned=*/true);
1284
12
    case Intrinsic::experimental_vector_reduce_umax:
1285
10
    case Intrinsic::experimental_vector_reduce_umin:
1286
10
      return ConcreteTTI->getMinMaxReductionCost(
1287
10
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
10
          /*IsUnsigned=*/false);
1289
10
    case Intrinsic::sadd_sat:
1290
0
    case Intrinsic::ssub_sat: {
1291
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
0
      if (RetVF > 1)
1293
0
        CondTy = VectorType::get(CondTy, RetVF);
1294
0
1295
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
0
                                     ? Intrinsic::sadd_with_overflow
1298
0
                                     : Intrinsic::ssub_with_overflow;
1299
0
1300
0
      // SatMax -> Overflow && SumDiff < 0
1301
0
      // SatMin -> Overflow && SumDiff >= 0
1302
0
      unsigned Cost = 0;
1303
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
0
                                              CondTy, nullptr);
1307
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
0
                                                  CondTy, nullptr);
1309
0
      return Cost;
1310
0
    }
1311
76
    case Intrinsic::uadd_sat:
1312
76
    case Intrinsic::usub_sat: {
1313
76
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
76
      if (RetVF > 1)
1315
58
        CondTy = VectorType::get(CondTy, RetVF);
1316
76
1317
76
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
76
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
76
                                     ? 
Intrinsic::uadd_with_overflow36
1320
76
                                     : 
Intrinsic::usub_with_overflow40
;
1321
76
1322
76
      unsigned Cost = 0;
1323
76
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
76
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
76
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
76
                                              CondTy, nullptr);
1327
76
      return Cost;
1328
76
    }
1329
76
    case Intrinsic::smul_fix:
1330
0
    case Intrinsic::umul_fix: {
1331
0
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
0
      if (RetVF > 1)
1334
0
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
0
1336
0
      unsigned ExtOp =
1337
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338
0
1339
0
      unsigned Cost = 0;
1340
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
0
      Cost +=
1343
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
0
                                                  TTI::OK_AnyValue,
1346
0
                                                  TTI::OK_UniformConstantValue);
1347
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
0
                                                  TTI::OK_AnyValue,
1349
0
                                                  TTI::OK_UniformConstantValue);
1350
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
0
      return Cost;
1352
0
    }
1353
0
    case Intrinsic::sadd_with_overflow:
1354
0
    case Intrinsic::ssub_with_overflow: {
1355
0
      Type *SumTy = RetTy->getContainedType(0);
1356
0
      Type *OverflowTy = RetTy->getContainedType(1);
1357
0
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
0
                            ? BinaryOperator::Add
1359
0
                            : BinaryOperator::Sub;
1360
0
1361
0
      //   LHSSign -> LHS >= 0
1362
0
      //   RHSSign -> RHS >= 0
1363
0
      //   SumSign -> Sum >= 0
1364
0
      //
1365
0
      //   Add:
1366
0
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
0
      //   Sub:
1368
0
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
0
      unsigned Cost = 0;
1370
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
0
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
0
                                                  OverflowTy, nullptr);
1373
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
0
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
0
      Cost +=
1376
0
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
0
      return Cost;
1378
0
    }
1379
76
    case Intrinsic::uadd_with_overflow:
1380
76
    case Intrinsic::usub_with_overflow: {
1381
76
      Type *SumTy = RetTy->getContainedType(0);
1382
76
      Type *OverflowTy = RetTy->getContainedType(1);
1383
76
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
76
                            ? 
BinaryOperator::Add36
1385
76
                            : 
BinaryOperator::Sub40
;
1386
76
1387
76
      unsigned Cost = 0;
1388
76
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
76
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
76
                                              OverflowTy, nullptr);
1391
76
      return Cost;
1392
76
    }
1393
76
    case Intrinsic::smul_with_overflow:
1394
0
    case Intrinsic::umul_with_overflow: {
1395
0
      Type *MulTy = RetTy->getContainedType(0);
1396
0
      Type *OverflowTy = RetTy->getContainedType(1);
1397
0
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
0
      if (MulTy->isVectorTy())
1400
0
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
0
1402
0
      unsigned ExtOp =
1403
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404
0
1405
0
      unsigned Cost = 0;
1406
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
0
      Cost +=
1409
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
0
                                                  TTI::OK_AnyValue,
1412
0
                                                  TTI::OK_UniformConstantValue);
1413
0
1414
0
      if (IID == Intrinsic::smul_with_overflow)
1415
0
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
0
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
0
            TTI::OK_UniformConstantValue);
1418
0
1419
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
0
                                              OverflowTy, nullptr);
1421
0
      return Cost;
1422
0
    }
1423
0
    case Intrinsic::ctpop:
1424
0
      ISDs.push_back(ISD::CTPOP);
1425
0
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
0
      // library call but still not a cheap instruction.
1427
0
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
0
      break;
1429
2.63k
    // FIXME: ctlz, cttz, ...
1430
2.63k
    }
1431
2.63k
1432
2.63k
    const TargetLoweringBase *TLI = getTLI();
1433
2.63k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
2.63k
1435
2.63k
    SmallVector<unsigned, 2> LegalCost;
1436
2.63k
    SmallVector<unsigned, 2> CustomCost;
1437
2.63k
    for (unsigned ISD : ISDs) {
1438
2.63k
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
1.22k
        if (IID == Intrinsic::fabs && 
LT.second.isFloatingPoint()739
&&
1440
1.22k
            
TLI->isFAbsFree(LT.second)739
) {
1441
0
          return 0;
1442
0
        }
1443
1.22k
1444
1.22k
        // The operation is legal. Assume it costs 1.
1445
1.22k
        // If the type is split to multiple registers, assume that there is some
1446
1.22k
        // overhead to this.
1447
1.22k
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
1.22k
        if (LT.first > 1)
1449
87
          LegalCost.push_back(LT.first * 2);
1450
1.14k
        else
1451
1.14k
          LegalCost.push_back(LT.first * 1);
1452
1.40k
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
266
        // If the operation is custom lowered then assume
1454
266
        // that the code is twice as expensive.
1455
266
        CustomCost.push_back(LT.first * 2);
1456
266
      }
1457
2.63k
    }
1458
2.63k
1459
2.63k
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
2.63k
    if (MinLegalCostI != LegalCost.end())
1461
1.22k
      return *MinLegalCostI;
1462
1.40k
1463
1.40k
    auto MinCustomCostI =
1464
1.40k
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
1.40k
    if (MinCustomCostI != CustomCost.end())
1466
266
      return *MinCustomCostI;
1467
1.14k
1468
1.14k
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
1.14k
    // point mul followed by an add.
1470
1.14k
    if (IID == Intrinsic::fmuladd)
1471
0
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
0
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
1.14k
1474
1.14k
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
1.14k
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
1.14k
    // very expensive.
1477
1.14k
    if (RetTy->isVectorTy()) {
1478
463
      unsigned ScalarizationCost =
1479
463
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
463
               ? ScalarizationCostPassed
1481
463
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
463
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
463
      SmallVector<Type *, 4> ScalarTys;
1484
962
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i499
) {
1485
499
        Type *Ty = Tys[i];
1486
499
        if (Ty->isVectorTy())
1487
499
          Ty = Ty->getScalarType();
1488
499
        ScalarTys.push_back(Ty);
1489
499
      }
1490
463
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
463
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
962
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i499
) {
1493
499
        if (Tys[i]->isVectorTy()) {
1494
499
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
499
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
499
        }
1498
499
      }
1499
463
1500
463
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
463
    }
1502
680
1503
680
    // This is going to be turned into a library call, make it expensive.
1504
680
    return SingleCallCost;
1505
680
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
43
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
43
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()19
:
124
);
1135
43
    auto *ConcreteTTI = static_cast<T *>(this);
1136
43
1137
43
    SmallVector<unsigned, 2> ISDs;
1138
43
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
43
    switch (IID) {
1140
43
    default: {
1141
0
      // Assume that we need to scalarize this intrinsic.
1142
0
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
0
      unsigned ScalarCalls = 1;
1144
0
      Type *ScalarRetTy = RetTy;
1145
0
      if (RetTy->isVectorTy()) {
1146
0
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
0
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
0
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
0
        ScalarRetTy = RetTy->getScalarType();
1150
0
      }
1151
0
      SmallVector<Type *, 4> ScalarTys;
1152
0
      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1153
0
        Type *Ty = Tys[i];
1154
0
        if (Ty->isVectorTy()) {
1155
0
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
0
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
0
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
0
          Ty = Ty->getScalarType();
1159
0
        }
1160
0
        ScalarTys.push_back(Ty);
1161
0
      }
1162
0
      if (ScalarCalls == 1)
1163
0
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
0
1165
0
      unsigned ScalarCost =
1166
0
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
0
1168
0
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
0
    }
1170
0
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
0
    // intrinsic call.
1172
0
    case Intrinsic::sqrt:
1173
0
      ISDs.push_back(ISD::FSQRT);
1174
0
      break;
1175
0
    case Intrinsic::sin:
1176
0
      ISDs.push_back(ISD::FSIN);
1177
0
      break;
1178
0
    case Intrinsic::cos:
1179
0
      ISDs.push_back(ISD::FCOS);
1180
0
      break;
1181
0
    case Intrinsic::exp:
1182
0
      ISDs.push_back(ISD::FEXP);
1183
0
      break;
1184
0
    case Intrinsic::exp2:
1185
0
      ISDs.push_back(ISD::FEXP2);
1186
0
      break;
1187
0
    case Intrinsic::log:
1188
0
      ISDs.push_back(ISD::FLOG);
1189
0
      break;
1190
0
    case Intrinsic::log10:
1191
0
      ISDs.push_back(ISD::FLOG10);
1192
0
      break;
1193
0
    case Intrinsic::log2:
1194
0
      ISDs.push_back(ISD::FLOG2);
1195
0
      break;
1196
23
    case Intrinsic::fabs:
1197
23
      ISDs.push_back(ISD::FABS);
1198
23
      break;
1199
5
    case Intrinsic::canonicalize:
1200
5
      ISDs.push_back(ISD::FCANONICALIZE);
1201
5
      break;
1202
0
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
0
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
0
    case Intrinsic::copysign:
1213
0
      ISDs.push_back(ISD::FCOPYSIGN);
1214
0
      break;
1215
0
    case Intrinsic::floor:
1216
0
      ISDs.push_back(ISD::FFLOOR);
1217
0
      break;
1218
0
    case Intrinsic::ceil:
1219
0
      ISDs.push_back(ISD::FCEIL);
1220
0
      break;
1221
0
    case Intrinsic::trunc:
1222
0
      ISDs.push_back(ISD::FTRUNC);
1223
0
      break;
1224
0
    case Intrinsic::nearbyint:
1225
0
      ISDs.push_back(ISD::FNEARBYINT);
1226
0
      break;
1227
0
    case Intrinsic::rint:
1228
0
      ISDs.push_back(ISD::FRINT);
1229
0
      break;
1230
0
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
0
    case Intrinsic::pow:
1234
0
      ISDs.push_back(ISD::FPOW);
1235
0
      break;
1236
15
    case Intrinsic::fma:
1237
15
      ISDs.push_back(ISD::FMA);
1238
15
      break;
1239
0
    case Intrinsic::fmuladd:
1240
0
      ISDs.push_back(ISD::FMA);
1241
0
      break;
1242
0
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
0
    case Intrinsic::lifetime_start:
1244
0
    case Intrinsic::lifetime_end:
1245
0
    case Intrinsic::sideeffect:
1246
0
      return 0;
1247
0
    case Intrinsic::masked_store:
1248
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
0
                                                0);
1250
0
    case Intrinsic::masked_load:
1251
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
0
    case Intrinsic::experimental_vector_reduce_add:
1253
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
0
                                                     /*IsPairwiseForm=*/false);
1255
0
    case Intrinsic::experimental_vector_reduce_mul:
1256
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
0
                                                     /*IsPairwiseForm=*/false);
1258
0
    case Intrinsic::experimental_vector_reduce_and:
1259
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
0
                                                     /*IsPairwiseForm=*/false);
1261
0
    case Intrinsic::experimental_vector_reduce_or:
1262
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
0
                                                     /*IsPairwiseForm=*/false);
1264
0
    case Intrinsic::experimental_vector_reduce_xor:
1265
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
0
                                                     /*IsPairwiseForm=*/false);
1267
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
0
                                     // reductions.
1272
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
0
                                     // reductions.
1277
0
    case Intrinsic::experimental_vector_reduce_smax:
1278
0
    case Intrinsic::experimental_vector_reduce_smin:
1279
0
    case Intrinsic::experimental_vector_reduce_fmax:
1280
0
    case Intrinsic::experimental_vector_reduce_fmin:
1281
0
      return ConcreteTTI->getMinMaxReductionCost(
1282
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
0
          /*IsUnsigned=*/true);
1284
0
    case Intrinsic::experimental_vector_reduce_umax:
1285
0
    case Intrinsic::experimental_vector_reduce_umin:
1286
0
      return ConcreteTTI->getMinMaxReductionCost(
1287
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
0
          /*IsUnsigned=*/false);
1289
0
    case Intrinsic::sadd_sat:
1290
0
    case Intrinsic::ssub_sat: {
1291
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
0
      if (RetVF > 1)
1293
0
        CondTy = VectorType::get(CondTy, RetVF);
1294
0
1295
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
0
                                     ? Intrinsic::sadd_with_overflow
1298
0
                                     : Intrinsic::ssub_with_overflow;
1299
0
1300
0
      // SatMax -> Overflow && SumDiff < 0
1301
0
      // SatMin -> Overflow && SumDiff >= 0
1302
0
      unsigned Cost = 0;
1303
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
0
                                              CondTy, nullptr);
1307
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
0
                                                  CondTy, nullptr);
1309
0
      return Cost;
1310
0
    }
1311
0
    case Intrinsic::uadd_sat:
1312
0
    case Intrinsic::usub_sat: {
1313
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
0
      if (RetVF > 1)
1315
0
        CondTy = VectorType::get(CondTy, RetVF);
1316
0
1317
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
0
                                     ? Intrinsic::uadd_with_overflow
1320
0
                                     : Intrinsic::usub_with_overflow;
1321
0
1322
0
      unsigned Cost = 0;
1323
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
0
                                              CondTy, nullptr);
1327
0
      return Cost;
1328
0
    }
1329
0
    case Intrinsic::smul_fix:
1330
0
    case Intrinsic::umul_fix: {
1331
0
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
0
      if (RetVF > 1)
1334
0
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
0
1336
0
      unsigned ExtOp =
1337
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338
0
1339
0
      unsigned Cost = 0;
1340
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
0
      Cost +=
1343
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
0
                                                  TTI::OK_AnyValue,
1346
0
                                                  TTI::OK_UniformConstantValue);
1347
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
0
                                                  TTI::OK_AnyValue,
1349
0
                                                  TTI::OK_UniformConstantValue);
1350
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
0
      return Cost;
1352
0
    }
1353
0
    case Intrinsic::sadd_with_overflow:
1354
0
    case Intrinsic::ssub_with_overflow: {
1355
0
      Type *SumTy = RetTy->getContainedType(0);
1356
0
      Type *OverflowTy = RetTy->getContainedType(1);
1357
0
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
0
                            ? BinaryOperator::Add
1359
0
                            : BinaryOperator::Sub;
1360
0
1361
0
      //   LHSSign -> LHS >= 0
1362
0
      //   RHSSign -> RHS >= 0
1363
0
      //   SumSign -> Sum >= 0
1364
0
      //
1365
0
      //   Add:
1366
0
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
0
      //   Sub:
1368
0
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
0
      unsigned Cost = 0;
1370
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
0
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
0
                                                  OverflowTy, nullptr);
1373
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
0
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
0
      Cost +=
1376
0
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
0
      return Cost;
1378
0
    }
1379
0
    case Intrinsic::uadd_with_overflow:
1380
0
    case Intrinsic::usub_with_overflow: {
1381
0
      Type *SumTy = RetTy->getContainedType(0);
1382
0
      Type *OverflowTy = RetTy->getContainedType(1);
1383
0
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
0
                            ? BinaryOperator::Add
1385
0
                            : BinaryOperator::Sub;
1386
0
1387
0
      unsigned Cost = 0;
1388
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
0
                                              OverflowTy, nullptr);
1391
0
      return Cost;
1392
0
    }
1393
0
    case Intrinsic::smul_with_overflow:
1394
0
    case Intrinsic::umul_with_overflow: {
1395
0
      Type *MulTy = RetTy->getContainedType(0);
1396
0
      Type *OverflowTy = RetTy->getContainedType(1);
1397
0
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
0
      if (MulTy->isVectorTy())
1400
0
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
0
1402
0
      unsigned ExtOp =
1403
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404
0
1405
0
      unsigned Cost = 0;
1406
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
0
      Cost +=
1409
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
0
                                                  TTI::OK_AnyValue,
1412
0
                                                  TTI::OK_UniformConstantValue);
1413
0
1414
0
      if (IID == Intrinsic::smul_with_overflow)
1415
0
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
0
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
0
            TTI::OK_UniformConstantValue);
1418
0
1419
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
0
                                              OverflowTy, nullptr);
1421
0
      return Cost;
1422
0
    }
1423
0
    case Intrinsic::ctpop:
1424
0
      ISDs.push_back(ISD::CTPOP);
1425
0
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
0
      // library call but still not a cheap instruction.
1427
0
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
0
      break;
1429
43
    // FIXME: ctlz, cttz, ...
1430
43
    }
1431
43
1432
43
    const TargetLoweringBase *TLI = getTLI();
1433
43
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
43
1435
43
    SmallVector<unsigned, 2> LegalCost;
1436
43
    SmallVector<unsigned, 2> CustomCost;
1437
43
    for (unsigned ISD : ISDs) {
1438
43
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
34
        if (IID == Intrinsic::fabs && 
LT.second.isFloatingPoint()18
&&
1440
34
            
TLI->isFAbsFree(LT.second)18
) {
1441
14
          return 0;
1442
14
        }
1443
20
1444
20
        // The operation is legal. Assume it costs 1.
1445
20
        // If the type is split to multiple registers, assume that there is some
1446
20
        // overhead to this.
1447
20
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
20
        if (LT.first > 1)
1449
0
          LegalCost.push_back(LT.first * 2);
1450
20
        else
1451
20
          LegalCost.push_back(LT.first * 1);
1452
20
      } else 
if (9
!TLI->isOperationExpand(ISD, LT.second)9
) {
1453
0
        // If the operation is custom lowered then assume
1454
0
        // that the code is twice as expensive.
1455
0
        CustomCost.push_back(LT.first * 2);
1456
0
      }
1457
43
    }
1458
43
1459
43
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
29
    if (MinLegalCostI != LegalCost.end())
1461
20
      return *MinLegalCostI;
1462
9
1463
9
    auto MinCustomCostI =
1464
9
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
9
    if (MinCustomCostI != CustomCost.end())
1466
0
      return *MinCustomCostI;
1467
9
1468
9
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
9
    // point mul followed by an add.
1470
9
    if (IID == Intrinsic::fmuladd)
1471
0
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
0
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
9
1474
9
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
9
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
9
    // very expensive.
1477
9
    if (RetTy->isVectorTy()) {
1478
9
      unsigned ScalarizationCost =
1479
9
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
9
               ? ScalarizationCostPassed
1481
9
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
9
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
9
      SmallVector<Type *, 4> ScalarTys;
1484
24
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i15
) {
1485
15
        Type *Ty = Tys[i];
1486
15
        if (Ty->isVectorTy())
1487
15
          Ty = Ty->getScalarType();
1488
15
        ScalarTys.push_back(Ty);
1489
15
      }
1490
9
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
9
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
24
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i15
) {
1493
15
        if (Tys[i]->isVectorTy()) {
1494
15
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
15
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
15
        }
1498
15
      }
1499
9
1500
9
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
9
    }
1502
0
1503
0
    // This is going to be turned into a library call, make it expensive.
1504
0
    return SingleCallCost;
1505
0
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
768
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
768
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()270
:
1498
);
1135
768
    auto *ConcreteTTI = static_cast<T *>(this);
1136
768
1137
768
    SmallVector<unsigned, 2> ISDs;
1138
768
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
768
    switch (IID) {
1140
768
    default: {
1141
72
      // Assume that we need to scalarize this intrinsic.
1142
72
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
72
      unsigned ScalarCalls = 1;
1144
72
      Type *ScalarRetTy = RetTy;
1145
72
      if (RetTy->isVectorTy()) {
1146
24
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
0
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
24
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
24
        ScalarRetTy = RetTy->getScalarType();
1150
24
      }
1151
72
      SmallVector<Type *, 4> ScalarTys;
1152
216
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i144
) {
1153
144
        Type *Ty = Tys[i];
1154
144
        if (Ty->isVectorTy()) {
1155
48
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
0
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
48
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
48
          Ty = Ty->getScalarType();
1159
48
        }
1160
144
        ScalarTys.push_back(Ty);
1161
144
      }
1162
72
      if (ScalarCalls == 1)
1163
48
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
24
1165
24
      unsigned ScalarCost =
1166
24
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
24
1168
24
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
24
    }
1170
24
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
24
    // intrinsic call.
1172
24
    case Intrinsic::sqrt:
1173
0
      ISDs.push_back(ISD::FSQRT);
1174
0
      break;
1175
24
    case Intrinsic::sin:
1176
0
      ISDs.push_back(ISD::FSIN);
1177
0
      break;
1178
24
    case Intrinsic::cos:
1179
0
      ISDs.push_back(ISD::FCOS);
1180
0
      break;
1181
24
    case Intrinsic::exp:
1182
0
      ISDs.push_back(ISD::FEXP);
1183
0
      break;
1184
24
    case Intrinsic::exp2:
1185
0
      ISDs.push_back(ISD::FEXP2);
1186
0
      break;
1187
24
    case Intrinsic::log:
1188
0
      ISDs.push_back(ISD::FLOG);
1189
0
      break;
1190
24
    case Intrinsic::log10:
1191
0
      ISDs.push_back(ISD::FLOG10);
1192
0
      break;
1193
24
    case Intrinsic::log2:
1194
0
      ISDs.push_back(ISD::FLOG2);
1195
0
      break;
1196
300
    case Intrinsic::fabs:
1197
300
      ISDs.push_back(ISD::FABS);
1198
300
      break;
1199
24
    case Intrinsic::canonicalize:
1200
0
      ISDs.push_back(ISD::FCANONICALIZE);
1201
0
      break;
1202
24
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
24
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
396
    case Intrinsic::copysign:
1213
396
      ISDs.push_back(ISD::FCOPYSIGN);
1214
396
      break;
1215
24
    case Intrinsic::floor:
1216
0
      ISDs.push_back(ISD::FFLOOR);
1217
0
      break;
1218
24
    case Intrinsic::ceil:
1219
0
      ISDs.push_back(ISD::FCEIL);
1220
0
      break;
1221
24
    case Intrinsic::trunc:
1222
0
      ISDs.push_back(ISD::FTRUNC);
1223
0
      break;
1224
24
    case Intrinsic::nearbyint:
1225
0
      ISDs.push_back(ISD::FNEARBYINT);
1226
0
      break;
1227
24
    case Intrinsic::rint:
1228
0
      ISDs.push_back(ISD::FRINT);
1229
0
      break;
1230
24
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
24
    case Intrinsic::pow:
1234
0
      ISDs.push_back(ISD::FPOW);
1235
0
      break;
1236
24
    case Intrinsic::fma:
1237
0
      ISDs.push_back(ISD::FMA);
1238
0
      break;
1239
24
    case Intrinsic::fmuladd:
1240
0
      ISDs.push_back(ISD::FMA);
1241
0
      break;
1242
24
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
24
    case Intrinsic::lifetime_start:
1244
0
    case Intrinsic::lifetime_end:
1245
0
    case Intrinsic::sideeffect:
1246
0
      return 0;
1247
0
    case Intrinsic::masked_store:
1248
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
0
                                                0);
1250
0
    case Intrinsic::masked_load:
1251
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
0
    case Intrinsic::experimental_vector_reduce_add:
1253
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
0
                                                     /*IsPairwiseForm=*/false);
1255
0
    case Intrinsic::experimental_vector_reduce_mul:
1256
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
0
                                                     /*IsPairwiseForm=*/false);
1258
0
    case Intrinsic::experimental_vector_reduce_and:
1259
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
0
                                                     /*IsPairwiseForm=*/false);
1261
0
    case Intrinsic::experimental_vector_reduce_or:
1262
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
0
                                                     /*IsPairwiseForm=*/false);
1264
0
    case Intrinsic::experimental_vector_reduce_xor:
1265
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
0
                                                     /*IsPairwiseForm=*/false);
1267
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
0
                                     // reductions.
1272
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
0
                                     // reductions.
1277
0
    case Intrinsic::experimental_vector_reduce_smax:
1278
0
    case Intrinsic::experimental_vector_reduce_smin:
1279
0
    case Intrinsic::experimental_vector_reduce_fmax:
1280
0
    case Intrinsic::experimental_vector_reduce_fmin:
1281
0
      return ConcreteTTI->getMinMaxReductionCost(
1282
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
0
          /*IsUnsigned=*/true);
1284
0
    case Intrinsic::experimental_vector_reduce_umax:
1285
0
    case Intrinsic::experimental_vector_reduce_umin:
1286
0
      return ConcreteTTI->getMinMaxReductionCost(
1287
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
0
          /*IsUnsigned=*/false);
1289
0
    case Intrinsic::sadd_sat:
1290
0
    case Intrinsic::ssub_sat: {
1291
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
0
      if (RetVF > 1)
1293
0
        CondTy = VectorType::get(CondTy, RetVF);
1294
0
1295
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
0
                                     ? Intrinsic::sadd_with_overflow
1298
0
                                     : Intrinsic::ssub_with_overflow;
1299
0
1300
0
      // SatMax -> Overflow && SumDiff < 0
1301
0
      // SatMin -> Overflow && SumDiff >= 0
1302
0
      unsigned Cost = 0;
1303
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
0
                                              CondTy, nullptr);
1307
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
0
                                                  CondTy, nullptr);
1309
0
      return Cost;
1310
0
    }
1311
0
    case Intrinsic::uadd_sat:
1312
0
    case Intrinsic::usub_sat: {
1313
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
0
      if (RetVF > 1)
1315
0
        CondTy = VectorType::get(CondTy, RetVF);
1316
0
1317
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
0
                                     ? Intrinsic::uadd_with_overflow
1320
0
                                     : Intrinsic::usub_with_overflow;
1321
0
1322
0
      unsigned Cost = 0;
1323
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
0
                                              CondTy, nullptr);
1327
0
      return Cost;
1328
0
    }
1329
0
    case Intrinsic::smul_fix:
1330
0
    case Intrinsic::umul_fix: {
1331
0
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
0
      if (RetVF > 1)
1334
0
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
0
1336
0
      unsigned ExtOp =
1337
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338
0
1339
0
      unsigned Cost = 0;
1340
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
0
      Cost +=
1343
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
0
                                                  TTI::OK_AnyValue,
1346
0
                                                  TTI::OK_UniformConstantValue);
1347
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
0
                                                  TTI::OK_AnyValue,
1349
0
                                                  TTI::OK_UniformConstantValue);
1350
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
0
      return Cost;
1352
0
    }
1353
0
    case Intrinsic::sadd_with_overflow:
1354
0
    case Intrinsic::ssub_with_overflow: {
1355
0
      Type *SumTy = RetTy->getContainedType(0);
1356
0
      Type *OverflowTy = RetTy->getContainedType(1);
1357
0
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
0
                            ? BinaryOperator::Add
1359
0
                            : BinaryOperator::Sub;
1360
0
1361
0
      //   LHSSign -> LHS >= 0
1362
0
      //   RHSSign -> RHS >= 0
1363
0
      //   SumSign -> Sum >= 0
1364
0
      //
1365
0
      //   Add:
1366
0
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
0
      //   Sub:
1368
0
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
0
      unsigned Cost = 0;
1370
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
0
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
0
                                                  OverflowTy, nullptr);
1373
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
0
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
0
      Cost +=
1376
0
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
0
      return Cost;
1378
0
    }
1379
0
    case Intrinsic::uadd_with_overflow:
1380
0
    case Intrinsic::usub_with_overflow: {
1381
0
      Type *SumTy = RetTy->getContainedType(0);
1382
0
      Type *OverflowTy = RetTy->getContainedType(1);
1383
0
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
0
                            ? BinaryOperator::Add
1385
0
                            : BinaryOperator::Sub;
1386
0
1387
0
      unsigned Cost = 0;
1388
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
0
                                              OverflowTy, nullptr);
1391
0
      return Cost;
1392
0
    }
1393
0
    case Intrinsic::smul_with_overflow:
1394
0
    case Intrinsic::umul_with_overflow: {
1395
0
      Type *MulTy = RetTy->getContainedType(0);
1396
0
      Type *OverflowTy = RetTy->getContainedType(1);
1397
0
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
0
      if (MulTy->isVectorTy())
1400
0
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
0
1402
0
      unsigned ExtOp =
1403
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404
0
1405
0
      unsigned Cost = 0;
1406
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
0
      Cost +=
1409
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
0
                                                  TTI::OK_AnyValue,
1412
0
                                                  TTI::OK_UniformConstantValue);
1413
0
1414
0
      if (IID == Intrinsic::smul_with_overflow)
1415
0
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
0
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
0
            TTI::OK_UniformConstantValue);
1418
0
1419
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
0
                                              OverflowTy, nullptr);
1421
0
      return Cost;
1422
0
    }
1423
0
    case Intrinsic::ctpop:
1424
0
      ISDs.push_back(ISD::CTPOP);
1425
0
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
0
      // library call but still not a cheap instruction.
1427
0
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
0
      break;
1429
696
    // FIXME: ctlz, cttz, ...
1430
696
    }
1431
696
1432
696
    const TargetLoweringBase *TLI = getTLI();
1433
696
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
696
1435
696
    SmallVector<unsigned, 2> LegalCost;
1436
696
    SmallVector<unsigned, 2> CustomCost;
1437
696
    for (unsigned ISD : ISDs) {
1438
696
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
228
        if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1440
228
            TLI->isFAbsFree(LT.second)) {
1441
0
          return 0;
1442
0
        }
1443
228
1444
228
        // The operation is legal. Assume it costs 1.
1445
228
        // If the type is split to multiple registers, assume that there is some
1446
228
        // overhead to this.
1447
228
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
228
        if (LT.first > 1)
1449
0
          LegalCost.push_back(LT.first * 2);
1450
228
        else
1451
228
          LegalCost.push_back(LT.first * 1);
1452
468
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
264
        // If the operation is custom lowered then assume
1454
264
        // that the code is twice as expensive.
1455
264
        CustomCost.push_back(LT.first * 2);
1456
264
      }
1457
696
    }
1458
696
1459
696
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
696
    if (MinLegalCostI != LegalCost.end())
1461
228
      return *MinLegalCostI;
1462
468
1463
468
    auto MinCustomCostI =
1464
468
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
468
    if (MinCustomCostI != CustomCost.end())
1466
264
      return *MinCustomCostI;
1467
204
1468
204
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
204
    // point mul followed by an add.
1470
204
    if (IID == Intrinsic::fmuladd)
1471
0
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
0
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
204
1474
204
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
204
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
204
    // very expensive.
1477
204
    if (RetTy->isVectorTy()) {
1478
204
      unsigned ScalarizationCost =
1479
204
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
204
               ? ScalarizationCostPassed
1481
204
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
204
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
204
      SmallVector<Type *, 4> ScalarTys;
1484
540
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i336
) {
1485
336
        Type *Ty = Tys[i];
1486
336
        if (Ty->isVectorTy())
1487
336
          Ty = Ty->getScalarType();
1488
336
        ScalarTys.push_back(Ty);
1489
336
      }
1490
204
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
204
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
540
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i336
) {
1493
336
        if (Tys[i]->isVectorTy()) {
1494
336
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
336
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
336
        }
1498
336
      }
1499
204
1500
204
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
204
    }
1502
0
1503
0
    // This is going to be turned into a library call, make it expensive.
1504
0
    return SingleCallCost;
1505
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
155
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
155
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()73
:
182
);
1135
155
    auto *ConcreteTTI = static_cast<T *>(this);
1136
155
1137
155
    SmallVector<unsigned, 2> ISDs;
1138
155
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
155
    switch (IID) {
1140
155
    default: {
1141
0
      // Assume that we need to scalarize this intrinsic.
1142
0
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
0
      unsigned ScalarCalls = 1;
1144
0
      Type *ScalarRetTy = RetTy;
1145
0
      if (RetTy->isVectorTy()) {
1146
0
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
0
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
0
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
0
        ScalarRetTy = RetTy->getScalarType();
1150
0
      }
1151
0
      SmallVector<Type *, 4> ScalarTys;
1152
0
      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1153
0
        Type *Ty = Tys[i];
1154
0
        if (Ty->isVectorTy()) {
1155
0
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
0
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
0
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
0
          Ty = Ty->getScalarType();
1159
0
        }
1160
0
        ScalarTys.push_back(Ty);
1161
0
      }
1162
0
      if (ScalarCalls == 1)
1163
0
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
0
1165
0
      unsigned ScalarCost =
1166
0
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
0
1168
0
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
0
    }
1170
0
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
0
    // intrinsic call.
1172
10
    case Intrinsic::sqrt:
1173
10
      ISDs.push_back(ISD::FSQRT);
1174
10
      break;
1175
18
    case Intrinsic::sin:
1176
18
      ISDs.push_back(ISD::FSIN);
1177
18
      break;
1178
18
    case Intrinsic::cos:
1179
18
      ISDs.push_back(ISD::FCOS);
1180
18
      break;
1181
18
    case Intrinsic::exp:
1182
18
      ISDs.push_back(ISD::FEXP);
1183
18
      break;
1184
18
    case Intrinsic::exp2:
1185
18
      ISDs.push_back(ISD::FEXP2);
1186
18
      break;
1187
18
    case Intrinsic::log:
1188
18
      ISDs.push_back(ISD::FLOG);
1189
18
      break;
1190
18
    case Intrinsic::log10:
1191
18
      ISDs.push_back(ISD::FLOG10);
1192
18
      break;
1193
18
    case Intrinsic::log2:
1194
18
      ISDs.push_back(ISD::FLOG2);
1195
18
      break;
1196
0
    case Intrinsic::fabs:
1197
0
      ISDs.push_back(ISD::FABS);
1198
0
      break;
1199
0
    case Intrinsic::canonicalize:
1200
0
      ISDs.push_back(ISD::FCANONICALIZE);
1201
0
      break;
1202
0
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
0
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
0
    case Intrinsic::copysign:
1213
0
      ISDs.push_back(ISD::FCOPYSIGN);
1214
0
      break;
1215
0
    case Intrinsic::floor:
1216
0
      ISDs.push_back(ISD::FFLOOR);
1217
0
      break;
1218
0
    case Intrinsic::ceil:
1219
0
      ISDs.push_back(ISD::FCEIL);
1220
0
      break;
1221
0
    case Intrinsic::trunc:
1222
0
      ISDs.push_back(ISD::FTRUNC);
1223
0
      break;
1224
0
    case Intrinsic::nearbyint:
1225
0
      ISDs.push_back(ISD::FNEARBYINT);
1226
0
      break;
1227
0
    case Intrinsic::rint:
1228
0
      ISDs.push_back(ISD::FRINT);
1229
0
      break;
1230
0
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
18
    case Intrinsic::pow:
1234
18
      ISDs.push_back(ISD::FPOW);
1235
18
      break;
1236
0
    case Intrinsic::fma:
1237
0
      ISDs.push_back(ISD::FMA);
1238
0
      break;
1239
0
    case Intrinsic::fmuladd:
1240
0
      ISDs.push_back(ISD::FMA);
1241
0
      break;
1242
0
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
0
    case Intrinsic::lifetime_start:
1244
0
    case Intrinsic::lifetime_end:
1245
0
    case Intrinsic::sideeffect:
1246
0
      return 0;
1247
0
    case Intrinsic::masked_store:
1248
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
0
                                                0);
1250
0
    case Intrinsic::masked_load:
1251
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
0
    case Intrinsic::experimental_vector_reduce_add:
1253
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
0
                                                     /*IsPairwiseForm=*/false);
1255
0
    case Intrinsic::experimental_vector_reduce_mul:
1256
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
0
                                                     /*IsPairwiseForm=*/false);
1258
0
    case Intrinsic::experimental_vector_reduce_and:
1259
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
0
                                                     /*IsPairwiseForm=*/false);
1261
0
    case Intrinsic::experimental_vector_reduce_or:
1262
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
0
                                                     /*IsPairwiseForm=*/false);
1264
0
    case Intrinsic::experimental_vector_reduce_xor:
1265
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
0
                                                     /*IsPairwiseForm=*/false);
1267
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
0
                                     // reductions.
1272
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
0
                                     // reductions.
1277
0
    case Intrinsic::experimental_vector_reduce_smax:
1278
0
    case Intrinsic::experimental_vector_reduce_smin:
1279
0
    case Intrinsic::experimental_vector_reduce_fmax:
1280
0
    case Intrinsic::experimental_vector_reduce_fmin:
1281
0
      return ConcreteTTI->getMinMaxReductionCost(
1282
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
0
          /*IsUnsigned=*/true);
1284
0
    case Intrinsic::experimental_vector_reduce_umax:
1285
0
    case Intrinsic::experimental_vector_reduce_umin:
1286
0
      return ConcreteTTI->getMinMaxReductionCost(
1287
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
0
          /*IsUnsigned=*/false);
1289
0
    case Intrinsic::sadd_sat:
1290
0
    case Intrinsic::ssub_sat: {
1291
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
0
      if (RetVF > 1)
1293
0
        CondTy = VectorType::get(CondTy, RetVF);
1294
0
1295
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
0
                                     ? Intrinsic::sadd_with_overflow
1298
0
                                     : Intrinsic::ssub_with_overflow;
1299
0
1300
0
      // SatMax -> Overflow && SumDiff < 0
1301
0
      // SatMin -> Overflow && SumDiff >= 0
1302
0
      unsigned Cost = 0;
1303
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
0
                                              CondTy, nullptr);
1307
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
0
                                                  CondTy, nullptr);
1309
0
      return Cost;
1310
0
    }
1311
0
    case Intrinsic::uadd_sat:
1312
0
    case Intrinsic::usub_sat: {
1313
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
0
      if (RetVF > 1)
1315
0
        CondTy = VectorType::get(CondTy, RetVF);
1316
0
1317
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
0
                                     ? Intrinsic::uadd_with_overflow
1320
0
                                     : Intrinsic::usub_with_overflow;
1321
0
1322
0
      unsigned Cost = 0;
1323
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
0
                                              CondTy, nullptr);
1327
0
      return Cost;
1328
0
    }
1329
0
    case Intrinsic::smul_fix:
1330
0
    case Intrinsic::umul_fix: {
1331
0
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
0
      if (RetVF > 1)
1334
0
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
0
1336
0
      unsigned ExtOp =
1337
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338
0
1339
0
      unsigned Cost = 0;
1340
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
0
      Cost +=
1343
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
0
                                                  TTI::OK_AnyValue,
1346
0
                                                  TTI::OK_UniformConstantValue);
1347
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
0
                                                  TTI::OK_AnyValue,
1349
0
                                                  TTI::OK_UniformConstantValue);
1350
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
0
      return Cost;
1352
0
    }
1353
0
    case Intrinsic::sadd_with_overflow:
1354
0
    case Intrinsic::ssub_with_overflow: {
1355
0
      Type *SumTy = RetTy->getContainedType(0);
1356
0
      Type *OverflowTy = RetTy->getContainedType(1);
1357
0
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
0
                            ? BinaryOperator::Add
1359
0
                            : BinaryOperator::Sub;
1360
0
1361
0
      //   LHSSign -> LHS >= 0
1362
0
      //   RHSSign -> RHS >= 0
1363
0
      //   SumSign -> Sum >= 0
1364
0
      //
1365
0
      //   Add:
1366
0
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
0
      //   Sub:
1368
0
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
0
      unsigned Cost = 0;
1370
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
0
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
0
                                                  OverflowTy, nullptr);
1373
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
0
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
0
      Cost +=
1376
0
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
0
      return Cost;
1378
0
    }
1379
0
    case Intrinsic::uadd_with_overflow:
1380
0
    case Intrinsic::usub_with_overflow: {
1381
0
      Type *SumTy = RetTy->getContainedType(0);
1382
0
      Type *OverflowTy = RetTy->getContainedType(1);
1383
0
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
0
                            ? BinaryOperator::Add
1385
0
                            : BinaryOperator::Sub;
1386
0
1387
0
      unsigned Cost = 0;
1388
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
0
                                              OverflowTy, nullptr);
1391
0
      return Cost;
1392
0
    }
1393
0
    case Intrinsic::smul_with_overflow:
1394
0
    case Intrinsic::umul_with_overflow: {
1395
0
      Type *MulTy = RetTy->getContainedType(0);
1396
0
      Type *OverflowTy = RetTy->getContainedType(1);
1397
0
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
0
      if (MulTy->isVectorTy())
1400
0
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
0
1402
0
      unsigned ExtOp =
1403
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404
0
1405
0
      unsigned Cost = 0;
1406
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
0
      Cost +=
1409
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
0
                                                  TTI::OK_AnyValue,
1412
0
                                                  TTI::OK_UniformConstantValue);
1413
0
1414
0
      if (IID == Intrinsic::smul_with_overflow)
1415
0
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
0
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
0
            TTI::OK_UniformConstantValue);
1418
0
1419
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
0
                                              OverflowTy, nullptr);
1421
0
      return Cost;
1422
0
    }
1423
1
    case Intrinsic::ctpop:
1424
1
      ISDs.push_back(ISD::CTPOP);
1425
1
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
1
      // library call but still not a cheap instruction.
1427
1
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
1
      break;
1429
155
    // FIXME: ctlz, cttz, ...
1430
155
    }
1431
155
1432
155
    const TargetLoweringBase *TLI = getTLI();
1433
155
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
155
1435
155
    SmallVector<unsigned, 2> LegalCost;
1436
155
    SmallVector<unsigned, 2> CustomCost;
1437
155
    for (unsigned ISD : ISDs) {
1438
155
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
11
        if (IID == Intrinsic::fabs && 
LT.second.isFloatingPoint()0
&&
1440
11
            
TLI->isFAbsFree(LT.second)0
) {
1441
0
          return 0;
1442
0
        }
1443
11
1444
11
        // The operation is legal. Assume it costs 1.
1445
11
        // If the type is split to multiple registers, assume that there is some
1446
11
        // overhead to this.
1447
11
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
11
        if (LT.first > 1)
1449
0
          LegalCost.push_back(LT.first * 2);
1450
11
        else
1451
11
          LegalCost.push_back(LT.first * 1);
1452
144
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
0
        // If the operation is custom lowered then assume
1454
0
        // that the code is twice as expensive.
1455
0
        CustomCost.push_back(LT.first * 2);
1456
0
      }
1457
155
    }
1458
155
1459
155
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
155
    if (MinLegalCostI != LegalCost.end())
1461
11
      return *MinLegalCostI;
1462
144
1463
144
    auto MinCustomCostI =
1464
144
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
144
    if (MinCustomCostI != CustomCost.end())
1466
0
      return *MinCustomCostI;
1467
144
1468
144
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
144
    // point mul followed by an add.
1470
144
    if (IID == Intrinsic::fmuladd)
1471
0
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
0
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
144
1474
144
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
144
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
144
    // very expensive.
1477
144
    if (RetTy->isVectorTy()) {
1478
64
      unsigned ScalarizationCost =
1479
64
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
64
               ? ScalarizationCostPassed
1481
64
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
64
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
64
      SmallVector<Type *, 4> ScalarTys;
1484
136
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i72
) {
1485
72
        Type *Ty = Tys[i];
1486
72
        if (Ty->isVectorTy())
1487
72
          Ty = Ty->getScalarType();
1488
72
        ScalarTys.push_back(Ty);
1489
72
      }
1490
64
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
64
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
136
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i72
) {
1493
72
        if (Tys[i]->isVectorTy()) {
1494
72
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
72
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
72
        }
1498
72
      }
1499
64
1500
64
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
64
    }
1502
80
1503
80
    // This is going to be turned into a library call, make it expensive.
1504
80
    return SingleCallCost;
1505
80
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
29
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
29
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()0
: 1);
1135
29
    auto *ConcreteTTI = static_cast<T *>(this);
1136
29
1137
29
    SmallVector<unsigned, 2> ISDs;
1138
29
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
29
    switch (IID) {
1140
29
    default: {
1141
24
      // Assume that we need to scalarize this intrinsic.
1142
24
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
24
      unsigned ScalarCalls = 1;
1144
24
      Type *ScalarRetTy = RetTy;
1145
24
      if (RetTy->isVectorTy()) {
1146
0
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
0
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
0
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
0
        ScalarRetTy = RetTy->getScalarType();
1150
0
      }
1151
24
      SmallVector<Type *, 4> ScalarTys;
1152
48
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i24
) {
1153
24
        Type *Ty = Tys[i];
1154
24
        if (Ty->isVectorTy()) {
1155
0
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
0
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
0
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
0
          Ty = Ty->getScalarType();
1159
0
        }
1160
24
        ScalarTys.push_back(Ty);
1161
24
      }
1162
24
      if (ScalarCalls == 1)
1163
24
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
0
1165
0
      unsigned ScalarCost =
1166
0
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
0
1168
0
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
0
    }
1170
0
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
0
    // intrinsic call.
1172
0
    case Intrinsic::sqrt:
1173
0
      ISDs.push_back(ISD::FSQRT);
1174
0
      break;
1175
0
    case Intrinsic::sin:
1176
0
      ISDs.push_back(ISD::FSIN);
1177
0
      break;
1178
0
    case Intrinsic::cos:
1179
0
      ISDs.push_back(ISD::FCOS);
1180
0
      break;
1181
0
    case Intrinsic::exp:
1182
0
      ISDs.push_back(ISD::FEXP);
1183
0
      break;
1184
0
    case Intrinsic::exp2:
1185
0
      ISDs.push_back(ISD::FEXP2);
1186
0
      break;
1187
0
    case Intrinsic::log:
1188
0
      ISDs.push_back(ISD::FLOG);
1189
0
      break;
1190
0
    case Intrinsic::log10:
1191
0
      ISDs.push_back(ISD::FLOG10);
1192
0
      break;
1193
0
    case Intrinsic::log2:
1194
0
      ISDs.push_back(ISD::FLOG2);
1195
0
      break;
1196
0
    case Intrinsic::fabs:
1197
0
      ISDs.push_back(ISD::FABS);
1198
0
      break;
1199
0
    case Intrinsic::canonicalize:
1200
0
      ISDs.push_back(ISD::FCANONICALIZE);
1201
0
      break;
1202
0
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
0
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
0
    case Intrinsic::copysign:
1213
0
      ISDs.push_back(ISD::FCOPYSIGN);
1214
0
      break;
1215
0
    case Intrinsic::floor:
1216
0
      ISDs.push_back(ISD::FFLOOR);
1217
0
      break;
1218
0
    case Intrinsic::ceil:
1219
0
      ISDs.push_back(ISD::FCEIL);
1220
0
      break;
1221
0
    case Intrinsic::trunc:
1222
0
      ISDs.push_back(ISD::FTRUNC);
1223
0
      break;
1224
0
    case Intrinsic::nearbyint:
1225
0
      ISDs.push_back(ISD::FNEARBYINT);
1226
0
      break;
1227
0
    case Intrinsic::rint:
1228
0
      ISDs.push_back(ISD::FRINT);
1229
0
      break;
1230
0
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
0
    case Intrinsic::pow:
1234
0
      ISDs.push_back(ISD::FPOW);
1235
0
      break;
1236
0
    case Intrinsic::fma:
1237
0
      ISDs.push_back(ISD::FMA);
1238
0
      break;
1239
0
    case Intrinsic::fmuladd:
1240
0
      ISDs.push_back(ISD::FMA);
1241
0
      break;
1242
0
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
5
    case Intrinsic::lifetime_start:
1244
5
    case Intrinsic::lifetime_end:
1245
5
    case Intrinsic::sideeffect:
1246
5
      return 0;
1247
5
    case Intrinsic::masked_store:
1248
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
0
                                                0);
1250
5
    case Intrinsic::masked_load:
1251
0
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
5
    case Intrinsic::experimental_vector_reduce_add:
1253
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
0
                                                     /*IsPairwiseForm=*/false);
1255
5
    case Intrinsic::experimental_vector_reduce_mul:
1256
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
0
                                                     /*IsPairwiseForm=*/false);
1258
5
    case Intrinsic::experimental_vector_reduce_and:
1259
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
0
                                                     /*IsPairwiseForm=*/false);
1261
5
    case Intrinsic::experimental_vector_reduce_or:
1262
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
0
                                                     /*IsPairwiseForm=*/false);
1264
5
    case Intrinsic::experimental_vector_reduce_xor:
1265
0
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
0
                                                     /*IsPairwiseForm=*/false);
1267
5
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
5
                                     // reductions.
1272
5
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
5
                                     // reductions.
1277
5
    case Intrinsic::experimental_vector_reduce_smax:
1278
0
    case Intrinsic::experimental_vector_reduce_smin:
1279
0
    case Intrinsic::experimental_vector_reduce_fmax:
1280
0
    case Intrinsic::experimental_vector_reduce_fmin:
1281
0
      return ConcreteTTI->getMinMaxReductionCost(
1282
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
0
          /*IsUnsigned=*/true);
1284
0
    case Intrinsic::experimental_vector_reduce_umax:
1285
0
    case Intrinsic::experimental_vector_reduce_umin:
1286
0
      return ConcreteTTI->getMinMaxReductionCost(
1287
0
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
0
          /*IsUnsigned=*/false);
1289
0
    case Intrinsic::sadd_sat:
1290
0
    case Intrinsic::ssub_sat: {
1291
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
0
      if (RetVF > 1)
1293
0
        CondTy = VectorType::get(CondTy, RetVF);
1294
0
1295
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
0
                                     ? Intrinsic::sadd_with_overflow
1298
0
                                     : Intrinsic::ssub_with_overflow;
1299
0
1300
0
      // SatMax -> Overflow && SumDiff < 0
1301
0
      // SatMin -> Overflow && SumDiff >= 0
1302
0
      unsigned Cost = 0;
1303
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
0
                                              CondTy, nullptr);
1307
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
0
                                                  CondTy, nullptr);
1309
0
      return Cost;
1310
0
    }
1311
0
    case Intrinsic::uadd_sat:
1312
0
    case Intrinsic::usub_sat: {
1313
0
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
0
      if (RetVF > 1)
1315
0
        CondTy = VectorType::get(CondTy, RetVF);
1316
0
1317
0
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
0
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
0
                                     ? Intrinsic::uadd_with_overflow
1320
0
                                     : Intrinsic::usub_with_overflow;
1321
0
1322
0
      unsigned Cost = 0;
1323
0
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
0
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
0
                                              CondTy, nullptr);
1327
0
      return Cost;
1328
0
    }
1329
0
    case Intrinsic::smul_fix:
1330
0
    case Intrinsic::umul_fix: {
1331
0
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
0
      if (RetVF > 1)
1334
0
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
0
1336
0
      unsigned ExtOp =
1337
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338
0
1339
0
      unsigned Cost = 0;
1340
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
0
      Cost +=
1343
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
0
                                                  TTI::OK_AnyValue,
1346
0
                                                  TTI::OK_UniformConstantValue);
1347
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
0
                                                  TTI::OK_AnyValue,
1349
0
                                                  TTI::OK_UniformConstantValue);
1350
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
0
      return Cost;
1352
0
    }
1353
0
    case Intrinsic::sadd_with_overflow:
1354
0
    case Intrinsic::ssub_with_overflow: {
1355
0
      Type *SumTy = RetTy->getContainedType(0);
1356
0
      Type *OverflowTy = RetTy->getContainedType(1);
1357
0
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
0
                            ? BinaryOperator::Add
1359
0
                            : BinaryOperator::Sub;
1360
0
1361
0
      //   LHSSign -> LHS >= 0
1362
0
      //   RHSSign -> RHS >= 0
1363
0
      //   SumSign -> Sum >= 0
1364
0
      //
1365
0
      //   Add:
1366
0
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
0
      //   Sub:
1368
0
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
0
      unsigned Cost = 0;
1370
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
0
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
0
                                                  OverflowTy, nullptr);
1373
0
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
0
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
0
      Cost +=
1376
0
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
0
      return Cost;
1378
0
    }
1379
0
    case Intrinsic::uadd_with_overflow:
1380
0
    case Intrinsic::usub_with_overflow: {
1381
0
      Type *SumTy = RetTy->getContainedType(0);
1382
0
      Type *OverflowTy = RetTy->getContainedType(1);
1383
0
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
0
                            ? BinaryOperator::Add
1385
0
                            : BinaryOperator::Sub;
1386
0
1387
0
      unsigned Cost = 0;
1388
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
0
                                              OverflowTy, nullptr);
1391
0
      return Cost;
1392
0
    }
1393
0
    case Intrinsic::smul_with_overflow:
1394
0
    case Intrinsic::umul_with_overflow: {
1395
0
      Type *MulTy = RetTy->getContainedType(0);
1396
0
      Type *OverflowTy = RetTy->getContainedType(1);
1397
0
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
0
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
0
      if (MulTy->isVectorTy())
1400
0
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
0
1402
0
      unsigned ExtOp =
1403
0
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404
0
1405
0
      unsigned Cost = 0;
1406
0
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
0
      Cost +=
1409
0
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
0
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
0
                                                  TTI::OK_AnyValue,
1412
0
                                                  TTI::OK_UniformConstantValue);
1413
0
1414
0
      if (IID == Intrinsic::smul_with_overflow)
1415
0
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
0
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
0
            TTI::OK_UniformConstantValue);
1418
0
1419
0
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
0
                                              OverflowTy, nullptr);
1421
0
      return Cost;
1422
0
    }
1423
0
    case Intrinsic::ctpop:
1424
0
      ISDs.push_back(ISD::CTPOP);
1425
0
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
0
      // library call but still not a cheap instruction.
1427
0
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
0
      break;
1429
0
    // FIXME: ctlz, cttz, ...
1430
0
    }
1431
0
1432
0
    const TargetLoweringBase *TLI = getTLI();
1433
0
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
0
1435
0
    SmallVector<unsigned, 2> LegalCost;
1436
0
    SmallVector<unsigned, 2> CustomCost;
1437
0
    for (unsigned ISD : ISDs) {
1438
0
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
0
        if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1440
0
            TLI->isFAbsFree(LT.second)) {
1441
0
          return 0;
1442
0
        }
1443
0
1444
0
        // The operation is legal. Assume it costs 1.
1445
0
        // If the type is split to multiple registers, assume that there is some
1446
0
        // overhead to this.
1447
0
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
0
        if (LT.first > 1)
1449
0
          LegalCost.push_back(LT.first * 2);
1450
0
        else
1451
0
          LegalCost.push_back(LT.first * 1);
1452
0
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
0
        // If the operation is custom lowered then assume
1454
0
        // that the code is twice as expensive.
1455
0
        CustomCost.push_back(LT.first * 2);
1456
0
      }
1457
0
    }
1458
0
1459
0
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
0
    if (MinLegalCostI != LegalCost.end())
1461
0
      return *MinLegalCostI;
1462
0
1463
0
    auto MinCustomCostI =
1464
0
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
0
    if (MinCustomCostI != CustomCost.end())
1466
0
      return *MinCustomCostI;
1467
0
1468
0
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
0
    // point mul followed by an add.
1470
0
    if (IID == Intrinsic::fmuladd)
1471
0
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
0
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
0
1474
0
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
0
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
0
    // very expensive.
1477
0
    if (RetTy->isVectorTy()) {
1478
0
      unsigned ScalarizationCost =
1479
0
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
0
               ? ScalarizationCostPassed
1481
0
               : getScalarizationOverhead(RetTy, true, false));
1482
0
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
0
      SmallVector<Type *, 4> ScalarTys;
1484
0
      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1485
0
        Type *Ty = Tys[i];
1486
0
        if (Ty->isVectorTy())
1487
0
          Ty = Ty->getScalarType();
1488
0
        ScalarTys.push_back(Ty);
1489
0
      }
1490
0
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
0
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
0
      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1493
0
        if (Tys[i]->isVectorTy()) {
1494
0
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
0
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
0
        }
1498
0
      }
1499
0
1500
0
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
0
    }
1502
0
1503
0
    // This is going to be turned into a library call, make it expensive.
1504
0
    return SingleCallCost;
1505
0
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
Line
Count
Source
1133
11.7k
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134
11.7k
    unsigned RetVF = (RetTy->isVectorTy() ? 
RetTy->getVectorNumElements()2.93k
:
18.85k
);
1135
11.7k
    auto *ConcreteTTI = static_cast<T *>(this);
1136
11.7k
1137
11.7k
    SmallVector<unsigned, 2> ISDs;
1138
11.7k
    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139
11.7k
    switch (IID) {
1140
11.7k
    default: {
1141
1.71k
      // Assume that we need to scalarize this intrinsic.
1142
1.71k
      unsigned ScalarizationCost = ScalarizationCostPassed;
1143
1.71k
      unsigned ScalarCalls = 1;
1144
1.71k
      Type *ScalarRetTy = RetTy;
1145
1.71k
      if (RetTy->isVectorTy()) {
1146
338
        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147
28
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148
338
        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149
338
        ScalarRetTy = RetTy->getScalarType();
1150
338
      }
1151
1.71k
      SmallVector<Type *, 4> ScalarTys;
1152
6.41k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i4.69k
) {
1153
4.69k
        Type *Ty = Tys[i];
1154
4.69k
        if (Ty->isVectorTy()) {
1155
1.34k
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156
728
            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157
1.34k
          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158
1.34k
          Ty = Ty->getScalarType();
1159
1.34k
        }
1160
4.69k
        ScalarTys.push_back(Ty);
1161
4.69k
      }
1162
1.71k
      if (ScalarCalls == 1)
1163
1.10k
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164
618
1165
618
      unsigned ScalarCost =
1166
618
          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167
618
1168
618
      return ScalarCalls * ScalarCost + ScalarizationCost;
1169
618
    }
1170
618
    // Look for intrinsics that can be lowered directly or turned into a scalar
1171
618
    // intrinsic call.
1172
618
    case Intrinsic::sqrt:
1173
10
      ISDs.push_back(ISD::FSQRT);
1174
10
      break;
1175
618
    case Intrinsic::sin:
1176
38
      ISDs.push_back(ISD::FSIN);
1177
38
      break;
1178
618
    case Intrinsic::cos:
1179
38
      ISDs.push_back(ISD::FCOS);
1180
38
      break;
1181
618
    case Intrinsic::exp:
1182
46
      ISDs.push_back(ISD::FEXP);
1183
46
      break;
1184
618
    case Intrinsic::exp2:
1185
3
      ISDs.push_back(ISD::FEXP2);
1186
3
      break;
1187
618
    case Intrinsic::log:
1188
35
      ISDs.push_back(ISD::FLOG);
1189
35
      break;
1190
618
    case Intrinsic::log10:
1191
11
      ISDs.push_back(ISD::FLOG10);
1192
11
      break;
1193
618
    case Intrinsic::log2:
1194
0
      ISDs.push_back(ISD::FLOG2);
1195
0
      break;
1196
618
    case Intrinsic::fabs:
1197
414
      ISDs.push_back(ISD::FABS);
1198
414
      break;
1199
618
    case Intrinsic::canonicalize:
1200
0
      ISDs.push_back(ISD::FCANONICALIZE);
1201
0
      break;
1202
618
    case Intrinsic::minnum:
1203
0
      ISDs.push_back(ISD::FMINNUM);
1204
0
      if (FMF.noNaNs())
1205
0
        ISDs.push_back(ISD::FMINIMUM);
1206
0
      break;
1207
618
    case Intrinsic::maxnum:
1208
0
      ISDs.push_back(ISD::FMAXNUM);
1209
0
      if (FMF.noNaNs())
1210
0
        ISDs.push_back(ISD::FMAXIMUM);
1211
0
      break;
1212
618
    case Intrinsic::copysign:
1213
422
      ISDs.push_back(ISD::FCOPYSIGN);
1214
422
      break;
1215
618
    case Intrinsic::floor:
1216
291
      ISDs.push_back(ISD::FFLOOR);
1217
291
      break;
1218
618
    case Intrinsic::ceil:
1219
270
      ISDs.push_back(ISD::FCEIL);
1220
270
      break;
1221
618
    case Intrinsic::trunc:
1222
256
      ISDs.push_back(ISD::FTRUNC);
1223
256
      break;
1224
618
    case Intrinsic::nearbyint:
1225
259
      ISDs.push_back(ISD::FNEARBYINT);
1226
259
      break;
1227
618
    case Intrinsic::rint:
1228
256
      ISDs.push_back(ISD::FRINT);
1229
256
      break;
1230
618
    case Intrinsic::round:
1231
0
      ISDs.push_back(ISD::FROUND);
1232
0
      break;
1233
618
    case Intrinsic::pow:
1234
27
      ISDs.push_back(ISD::FPOW);
1235
27
      break;
1236
618
    case Intrinsic::fma:
1237
456
      ISDs.push_back(ISD::FMA);
1238
456
      break;
1239
618
    case Intrinsic::fmuladd:
1240
2
      ISDs.push_back(ISD::FMA);
1241
2
      break;
1242
618
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243
618
    case Intrinsic::lifetime_start:
1244
0
    case Intrinsic::lifetime_end:
1245
0
    case Intrinsic::sideeffect:
1246
0
      return 0;
1247
378
    case Intrinsic::masked_store:
1248
378
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249
378
                                                0);
1250
406
    case Intrinsic::masked_load:
1251
406
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252
368
    case Intrinsic::experimental_vector_reduce_add:
1253
368
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254
368
                                                     /*IsPairwiseForm=*/false);
1255
368
    case Intrinsic::experimental_vector_reduce_mul:
1256
368
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257
368
                                                     /*IsPairwiseForm=*/false);
1258
496
    case Intrinsic::experimental_vector_reduce_and:
1259
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260
496
                                                     /*IsPairwiseForm=*/false);
1261
496
    case Intrinsic::experimental_vector_reduce_or:
1262
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263
496
                                                     /*IsPairwiseForm=*/false);
1264
496
    case Intrinsic::experimental_vector_reduce_xor:
1265
496
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266
496
                                                     /*IsPairwiseForm=*/false);
1267
0
    case Intrinsic::experimental_vector_reduce_v2_fadd:
1268
0
      return ConcreteTTI->getArithmeticReductionCost(
1269
0
          Instruction::FAdd, Tys[0],
1270
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271
0
                                     // reductions.
1272
0
    case Intrinsic::experimental_vector_reduce_v2_fmul:
1273
0
      return ConcreteTTI->getArithmeticReductionCost(
1274
0
          Instruction::FMul, Tys[0],
1275
0
          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276
0
                                     // reductions.
1277
728
    case Intrinsic::experimental_vector_reduce_smax:
1278
728
    case Intrinsic::experimental_vector_reduce_smin:
1279
728
    case Intrinsic::experimental_vector_reduce_fmax:
1280
728
    case Intrinsic::experimental_vector_reduce_fmin:
1281
728
      return ConcreteTTI->getMinMaxReductionCost(
1282
728
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283
728
          /*IsUnsigned=*/true);
1284
736
    case Intrinsic::experimental_vector_reduce_umax:
1285
736
    case Intrinsic::experimental_vector_reduce_umin:
1286
736
      return ConcreteTTI->getMinMaxReductionCost(
1287
736
          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288
736
          /*IsUnsigned=*/false);
1289
736
    case Intrinsic::sadd_sat:
1290
472
    case Intrinsic::ssub_sat: {
1291
472
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292
472
      if (RetVF > 1)
1293
210
        CondTy = VectorType::get(CondTy, RetVF);
1294
472
1295
472
      Type *OpTy = StructType::create({RetTy, CondTy});
1296
472
      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297
472
                                     ? 
Intrinsic::sadd_with_overflow236
1298
472
                                     : 
Intrinsic::ssub_with_overflow236
;
1299
472
1300
472
      // SatMax -> Overflow && SumDiff < 0
1301
472
      // SatMin -> Overflow && SumDiff >= 0
1302
472
      unsigned Cost = 0;
1303
472
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1304
472
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305
472
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306
472
                                              CondTy, nullptr);
1307
472
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308
472
                                                  CondTy, nullptr);
1309
472
      return Cost;
1310
472
    }
1311
472
    case Intrinsic::uadd_sat:
1312
328
    case Intrinsic::usub_sat: {
1313
328
      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314
328
      if (RetVF > 1)
1315
86
        CondTy = VectorType::get(CondTy, RetVF);
1316
328
1317
328
      Type *OpTy = StructType::create({RetTy, CondTy});
1318
328
      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319
328
                                     ? 
Intrinsic::uadd_with_overflow164
1320
328
                                     : 
Intrinsic::usub_with_overflow164
;
1321
328
1322
328
      unsigned Cost = 0;
1323
328
      Cost += ConcreteTTI->getIntrinsicInstrCost(
1324
328
          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325
328
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326
328
                                              CondTy, nullptr);
1327
328
      return Cost;
1328
328
    }
1329
794
    case Intrinsic::smul_fix:
1330
794
    case Intrinsic::umul_fix: {
1331
794
      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332
794
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333
794
      if (RetVF > 1)
1334
477
        ExtTy = VectorType::get(ExtTy, RetVF);
1335
794
1336
794
      unsigned ExtOp =
1337
794
          IID == Intrinsic::smul_fix ? 
Instruction::SExt388
:
Instruction::ZExt406
;
1338
794
1339
794
      unsigned Cost = 0;
1340
794
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342
794
      Cost +=
1343
794
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345
794
                                                  TTI::OK_AnyValue,
1346
794
                                                  TTI::OK_UniformConstantValue);
1347
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348
794
                                                  TTI::OK_AnyValue,
1349
794
                                                  TTI::OK_UniformConstantValue);
1350
794
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351
794
      return Cost;
1352
794
    }
1353
794
    case Intrinsic::sadd_with_overflow:
1354
450
    case Intrinsic::ssub_with_overflow: {
1355
450
      Type *SumTy = RetTy->getContainedType(0);
1356
450
      Type *OverflowTy = RetTy->getContainedType(1);
1357
450
      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1358
450
                            ? 
BinaryOperator::Add225
1359
450
                            : 
BinaryOperator::Sub225
;
1360
450
1361
450
      //   LHSSign -> LHS >= 0
1362
450
      //   RHSSign -> RHS >= 0
1363
450
      //   SumSign -> Sum >= 0
1364
450
      //
1365
450
      //   Add:
1366
450
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367
450
      //   Sub:
1368
450
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369
450
      unsigned Cost = 0;
1370
450
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371
450
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372
450
                                                  OverflowTy, nullptr);
1373
450
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374
450
                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375
450
      Cost +=
1376
450
          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377
450
      return Cost;
1378
450
    }
1379
450
    case Intrinsic::uadd_with_overflow:
1380
326
    case Intrinsic::usub_with_overflow: {
1381
326
      Type *SumTy = RetTy->getContainedType(0);
1382
326
      Type *OverflowTy = RetTy->getContainedType(1);
1383
326
      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1384
326
                            ? 
BinaryOperator::Add163
1385
326
                            : 
BinaryOperator::Sub163
;
1386
326
1387
326
      unsigned Cost = 0;
1388
326
      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389
326
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390
326
                                              OverflowTy, nullptr);
1391
326
      return Cost;
1392
326
    }
1393
326
    case Intrinsic::smul_with_overflow:
1394
320
    case Intrinsic::umul_with_overflow: {
1395
320
      Type *MulTy = RetTy->getContainedType(0);
1396
320
      Type *OverflowTy = RetTy->getContainedType(1);
1397
320
      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398
320
      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399
320
      if (MulTy->isVectorTy())
1400
240
        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401
320
1402
320
      unsigned ExtOp =
1403
320
          IID == Intrinsic::smul_fix ? 
Instruction::SExt0
: Instruction::ZExt;
1404
320
1405
320
      unsigned Cost = 0;
1406
320
      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407
320
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408
320
      Cost +=
1409
320
          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410
320
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411
320
                                                  TTI::OK_AnyValue,
1412
320
                                                  TTI::OK_UniformConstantValue);
1413
320
1414
320
      if (IID == Intrinsic::smul_with_overflow)
1415
160
        Cost += ConcreteTTI->getArithmeticInstrCost(
1416
160
            Instruction::AShr, MulTy, TTI::OK_AnyValue,
1417
160
            TTI::OK_UniformConstantValue);
1418
320
1419
320
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420
320
                                              OverflowTy, nullptr);
1421
320
      return Cost;
1422
320
    }
1423
320
    case Intrinsic::ctpop:
1424
83
      ISDs.push_back(ISD::CTPOP);
1425
83
      // In case of legalization use TCC_Expensive. This is cheaper than a
1426
83
      // library call but still not a cheap instruction.
1427
83
      SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428
83
      break;
1429
2.91k
    // FIXME: ctlz, cttz, ...
1430
2.91k
    }
1431
2.91k
1432
2.91k
    const TargetLoweringBase *TLI = getTLI();
1433
2.91k
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434
2.91k
1435
2.91k
    SmallVector<unsigned, 2> LegalCost;
1436
2.91k
    SmallVector<unsigned, 2> CustomCost;
1437
2.91k
    for (unsigned ISD : ISDs) {
1438
2.91k
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439
1.03k
        if (IID == Intrinsic::fabs && 
LT.second.isFloatingPoint()78
&&
1440
1.03k
            
TLI->isFAbsFree(LT.second)76
) {
1441
0
          return 0;
1442
0
        }
1443
1.03k
1444
1.03k
        // The operation is legal. Assume it costs 1.
1445
1.03k
        // If the type is split to multiple registers, assume that there is some
1446
1.03k
        // overhead to this.
1447
1.03k
        // TODO: Once we have extract/insert subvector cost we need to use them.
1448
1.03k
        if (LT.first > 1)
1449
140
          LegalCost.push_back(LT.first * 2);
1450
892
        else
1451
892
          LegalCost.push_back(LT.first * 1);
1452
1.88k
      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453
730
        // If the operation is custom lowered then assume
1454
730
        // that the code is twice as expensive.
1455
730
        CustomCost.push_back(LT.first * 2);
1456
730
      }
1457
2.91k
    }
1458
2.91k
1459
2.91k
    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460
2.91k
    if (MinLegalCostI != LegalCost.end())
1461
1.03k
      return *MinLegalCostI;
1462
1.88k
1463
1.88k
    auto MinCustomCostI =
1464
1.88k
        std::min_element(CustomCost.begin(), CustomCost.end());
1465
1.88k
    if (MinCustomCostI != CustomCost.end())
1466
730
      return *MinCustomCostI;
1467
1.15k
1468
1.15k
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469
1.15k
    // point mul followed by an add.
1470
1.15k
    if (IID == Intrinsic::fmuladd)
1471
2
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472
2
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473
1.15k
1474
1.15k
    // Else, assume that we need to scalarize this intrinsic. For math builtins
1475
1.15k
    // this will emit a costly libcall, adding call overhead and spills. Make it
1476
1.15k
    // very expensive.
1477
1.15k
    if (RetTy->isVectorTy()) {
1478
431
      unsigned ScalarizationCost =
1479
431
          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480
431
               ? ScalarizationCostPassed
1481
431
               : 
getScalarizationOverhead(RetTy, true, false)0
);
1482
431
      unsigned ScalarCalls = RetTy->getVectorNumElements();
1483
431
      SmallVector<Type *, 4> ScalarTys;
1484
1.13k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i702
) {
1485
702
        Type *Ty = Tys[i];
1486
702
        if (Ty->isVectorTy())
1487
702
          Ty = Ty->getScalarType();
1488
702
        ScalarTys.push_back(Ty);
1489
702
      }
1490
431
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491
431
          IID, RetTy->getScalarType(), ScalarTys, FMF);
1492
1.13k
      for (unsigned i = 0, ie = Tys.size(); i != ie; 
++i702
) {
1493
702
        if (Tys[i]->isVectorTy()) {
1494
702
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495
0
            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496
702
          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497
702
        }
1498
702
      }
1499
431
1500
431
      return ScalarCalls * ScalarCost + ScalarizationCost;
1501
431
    }
1502
722
1503
722
    // This is going to be turned into a library call, make it expensive.
1504
722
    return SingleCallCost;
1505
722
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getIntrinsicInstrCost(llvm::Intrinsic::ID, llvm::Type*, llvm::ArrayRef<llvm::Type*>, llvm::FastMathFlags, unsigned int)
1506
1507
  /// Compute a cost of the given call instruction.
1508
  ///
1509
  /// Compute the cost of calling function F with return type RetTy and
1510
  /// argument types Tys. F might be nullptr, in this case the cost of an
1511
  /// arbitrary call with the specified signature will be returned.
1512
  /// This is used, for instance,  when we estimate call of a vector
1513
  /// counterpart of the given function.
1514
  /// \param F Called function, might be nullptr.
1515
  /// \param RetTy Return value types.
1516
  /// \param Tys Argument types.
1517
  /// \returns The cost of Call instruction.
1518
2.15k
  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1519
2.15k
    return 10;
1520
2.15k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Line
Count
Source
1518
1.18k
  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1519
1.18k
    return 10;
1520
1.18k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Line
Count
Source
1518
562
  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1519
562
    return 10;
1520
562
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Line
Count
Source
1518
5
  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1519
5
    return 10;
1520
5
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
Line
Count
Source
1518
407
  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1519
407
    return 10;
1520
407
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getCallInstrCost(llvm::Function*, llvm::Type*, llvm::ArrayRef<llvm::Type*>)
1521
1522
635k
  unsigned getNumberOfParts(Type *Tp) {
1523
635k
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
635k
    return LT.first;
1525
635k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
560k
  unsigned getNumberOfParts(Type *Tp) {
1523
560k
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
560k
    return LT.first;
1525
560k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getNumberOfParts(llvm::Type*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
90
  unsigned getNumberOfParts(Type *Tp) {
1523
90
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
90
    return LT.first;
1525
90
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
14.7k
  unsigned getNumberOfParts(Type *Tp) {
1523
14.7k
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
14.7k
    return LT.first;
1525
14.7k
  }
llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
74
  unsigned getNumberOfParts(Type *Tp) {
1523
74
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
74
    return LT.first;
1525
74
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getNumberOfParts(llvm::Type*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getNumberOfParts(llvm::Type*)
llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
6
  unsigned getNumberOfParts(Type *Tp) {
1523
6
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
6
    return LT.first;
1525
6
  }
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
713
  unsigned getNumberOfParts(Type *Tp) {
1523
713
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
713
    return LT.first;
1525
713
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getNumberOfParts(llvm::Type*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
180
  unsigned getNumberOfParts(Type *Tp) {
1523
180
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
180
    return LT.first;
1525
180
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getNumberOfParts(llvm::Type*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getNumberOfParts(llvm::Type*)
Line
Count
Source
1522
59.5k
  unsigned getNumberOfParts(Type *Tp) {
1523
59.5k
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524
59.5k
    return LT.first;
1525
59.5k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getNumberOfParts(llvm::Type*)
1526
1527
  unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1528
3.28k
                                     const SCEV *) {
1529
3.28k
    return 0;
1530
3.28k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Line
Count
Source
1528
7
                                     const SCEV *) {
1529
7
    return 0;
1530
7
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Line
Count
Source
1528
99
                                     const SCEV *) {
1529
99
    return 0;
1530
99
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Line
Count
Source
1528
6
                                     const SCEV *) {
1529
6
    return 0;
1530
6
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
Line
Count
Source
1528
3.17k
                                     const SCEV *) {
1529
3.17k
    return 0;
1530
3.17k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getAddressComputationCost(llvm::Type*, llvm::ScalarEvolution*, llvm::SCEV const*)
1531
1532
  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1533
  /// We're assuming that reduction operation are performing the following way:
1534
  /// 1. Non-pairwise reduction
1535
  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1536
  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1537
  ///            \----------------v-------------/  \----------v------------/
1538
  ///                            n/2 elements               n/2 elements
1539
  /// %red1 = op <n x t> %val, <n x t> val1
1540
  /// After this operation we have a vector %red1 where only the first n/2
1541
  /// elements are meaningful, the second n/2 elements are undefined and can be
1542
  /// dropped. All other operations are actually working with the vector of
1543
  /// length n/2, not n, though the real vector length is still n.
1544
  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1545
  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1546
  ///            \----------------v-------------/  \----------v------------/
1547
  ///                            n/4 elements               3*n/4 elements
1548
  /// %red2 = op <n x t> %red1, <n x t> val2  - working with the vector of
1549
  /// length n/2, the resulting vector has length n/4 etc.
1550
  /// 2. Pairwise reduction:
1551
  /// Everything is the same except for an additional shuffle operation which
1552
  /// is used to produce operands for pairwise kind of reductions.
1553
  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1554
  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1555
  ///            \-------------v----------/  \----------v------------/
1556
  ///                   n/2 elements               n/2 elements
1557
  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1558
  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1559
  ///            \-------------v----------/  \----------v------------/
1560
  ///                   n/2 elements               n/2 elements
1561
  /// %red1 = op <n x t> %val1, <n x t> val2
1562
  /// Again, the operation is performed on <n x t> vector, but the resulting
1563
  /// vector %red1 is <n/2 x t> vector.
1564
  ///
1565
  /// The cost model should take into account that the actual length of the
1566
  /// vector is reduced on each iteration.
1567
  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1568
4.98k
                                      bool IsPairwise) {
1569
4.98k
    assert(Ty->isVectorTy() && "Expect a vector type");
1570
4.98k
    Type *ScalarTy = Ty->getVectorElementType();
1571
4.98k
    unsigned NumVecElts = Ty->getVectorNumElements();
1572
4.98k
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1573
4.98k
    unsigned ArithCost = 0;
1574
4.98k
    unsigned ShuffleCost = 0;
1575
4.98k
    auto *ConcreteTTI = static_cast<T *>(this);
1576
4.98k
    std::pair<unsigned, MVT> LT =
1577
4.98k
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578
4.98k
    unsigned LongVectorCount = 0;
1579
4.98k
    unsigned MVTLen =
1580
4.98k
        LT.second.isVector() ? 
LT.second.getVectorNumElements()4.87k
:
1110
;
1581
7.26k
    while (NumVecElts > MVTLen) {
1582
2.28k
      NumVecElts /= 2;
1583
2.28k
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584
2.28k
      // Assume the pairwise shuffles add a cost.
1585
2.28k
      ShuffleCost += (IsPairwise + 1) *
1586
2.28k
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587
2.28k
                                                 NumVecElts, SubTy);
1588
2.28k
      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589
2.28k
      Ty = SubTy;
1590
2.28k
      ++LongVectorCount;
1591
2.28k
    }
1592
4.98k
1593
4.98k
    NumReduxLevels -= LongVectorCount;
1594
4.98k
1595
4.98k
    // The minimal length of the vector is limited by the real length of vector
1596
4.98k
    // operations performed on the current platform. That's why several final
1597
4.98k
    // reduction operations are performed on the vectors with the same
1598
4.98k
    // architecture-dependent length.
1599
4.98k
1600
4.98k
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601
4.98k
    // reductions need two shuffles on every level, but the last one. On that
1602
4.98k
    // level one of the shuffles is <0, u, u, ...> which is identity.
1603
4.98k
    unsigned NumShuffles = NumReduxLevels;
1604
4.98k
    if (IsPairwise && 
NumReduxLevels >= 12.30k
)
1605
2.30k
      NumShuffles += NumReduxLevels - 1;
1606
4.98k
    ShuffleCost += NumShuffles *
1607
4.98k
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608
4.98k
                                               0, Ty);
1609
4.98k
    ArithCost += NumReduxLevels *
1610
4.98k
                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611
4.98k
    return ShuffleCost + ArithCost +
1612
4.98k
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613
4.98k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Line
Count
Source
1568
2.92k
                                      bool IsPairwise) {
1569
2.92k
    assert(Ty->isVectorTy() && "Expect a vector type");
1570
2.92k
    Type *ScalarTy = Ty->getVectorElementType();
1571
2.92k
    unsigned NumVecElts = Ty->getVectorNumElements();
1572
2.92k
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1573
2.92k
    unsigned ArithCost = 0;
1574
2.92k
    unsigned ShuffleCost = 0;
1575
2.92k
    auto *ConcreteTTI = static_cast<T *>(this);
1576
2.92k
    std::pair<unsigned, MVT> LT =
1577
2.92k
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578
2.92k
    unsigned LongVectorCount = 0;
1579
2.92k
    unsigned MVTLen =
1580
2.92k
        LT.second.isVector() ? LT.second.getVectorNumElements() : 
10
;
1581
3.93k
    while (NumVecElts > MVTLen) {
1582
1.01k
      NumVecElts /= 2;
1583
1.01k
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584
1.01k
      // Assume the pairwise shuffles add a cost.
1585
1.01k
      ShuffleCost += (IsPairwise + 1) *
1586
1.01k
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587
1.01k
                                                 NumVecElts, SubTy);
1588
1.01k
      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589
1.01k
      Ty = SubTy;
1590
1.01k
      ++LongVectorCount;
1591
1.01k
    }
1592
2.92k
1593
2.92k
    NumReduxLevels -= LongVectorCount;
1594
2.92k
1595
2.92k
    // The minimal length of the vector is limited by the real length of vector
1596
2.92k
    // operations performed on the current platform. That's why several final
1597
2.92k
    // reduction operations are performed on the vectors with the same
1598
2.92k
    // architecture-dependent length.
1599
2.92k
1600
2.92k
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601
2.92k
    // reductions need two shuffles on every level, but the last one. On that
1602
2.92k
    // level one of the shuffles is <0, u, u, ...> which is identity.
1603
2.92k
    unsigned NumShuffles = NumReduxLevels;
1604
2.92k
    if (IsPairwise && 
NumReduxLevels >= 12.24k
)
1605
2.24k
      NumShuffles += NumReduxLevels - 1;
1606
2.92k
    ShuffleCost += NumShuffles *
1607
2.92k
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608
2.92k
                                               0, Ty);
1609
2.92k
    ArithCost += NumReduxLevels *
1610
2.92k
                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611
2.92k
    return ShuffleCost + ArithCost +
1612
2.92k
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613
2.92k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Line
Count
Source
1568
19
                                      bool IsPairwise) {
1569
19
    assert(Ty->isVectorTy() && "Expect a vector type");
1570
19
    Type *ScalarTy = Ty->getVectorElementType();
1571
19
    unsigned NumVecElts = Ty->getVectorNumElements();
1572
19
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1573
19
    unsigned ArithCost = 0;
1574
19
    unsigned ShuffleCost = 0;
1575
19
    auto *ConcreteTTI = static_cast<T *>(this);
1576
19
    std::pair<unsigned, MVT> LT =
1577
19
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578
19
    unsigned LongVectorCount = 0;
1579
19
    unsigned MVTLen =
1580
19
        LT.second.isVector() ? LT.second.getVectorNumElements() : 
10
;
1581
31
    while (NumVecElts > MVTLen) {
1582
12
      NumVecElts /= 2;
1583
12
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584
12
      // Assume the pairwise shuffles add a cost.
1585
12
      ShuffleCost += (IsPairwise + 1) *
1586
12
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587
12
                                                 NumVecElts, SubTy);
1588
12
      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589
12
      Ty = SubTy;
1590
12
      ++LongVectorCount;
1591
12
    }
1592
19
1593
19
    NumReduxLevels -= LongVectorCount;
1594
19
1595
19
    // The minimal length of the vector is limited by the real length of vector
1596
19
    // operations performed on the current platform. That's why several final
1597
19
    // reduction operations are performed on the vectors with the same
1598
19
    // architecture-dependent length.
1599
19
1600
19
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601
19
    // reductions need two shuffles on every level, but the last one. On that
1602
19
    // level one of the shuffles is <0, u, u, ...> which is identity.
1603
19
    unsigned NumShuffles = NumReduxLevels;
1604
19
    if (IsPairwise && 
NumReduxLevels >= 112
)
1605
12
      NumShuffles += NumReduxLevels - 1;
1606
19
    ShuffleCost += NumShuffles *
1607
19
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608
19
                                               0, Ty);
1609
19
    ArithCost += NumReduxLevels *
1610
19
                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611
19
    return ShuffleCost + ArithCost +
1612
19
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613
19
  }
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Line
Count
Source
1568
30
                                      bool IsPairwise) {
1569
30
    assert(Ty->isVectorTy() && "Expect a vector type");
1570
30
    Type *ScalarTy = Ty->getVectorElementType();
1571
30
    unsigned NumVecElts = Ty->getVectorNumElements();
1572
30
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1573
30
    unsigned ArithCost = 0;
1574
30
    unsigned ShuffleCost = 0;
1575
30
    auto *ConcreteTTI = static_cast<T *>(this);
1576
30
    std::pair<unsigned, MVT> LT =
1577
30
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578
30
    unsigned LongVectorCount = 0;
1579
30
    unsigned MVTLen =
1580
30
        LT.second.isVector() ? LT.second.getVectorNumElements() : 
10
;
1581
30
    while (NumVecElts > MVTLen) {
1582
0
      NumVecElts /= 2;
1583
0
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584
0
      // Assume the pairwise shuffles add a cost.
1585
0
      ShuffleCost += (IsPairwise + 1) *
1586
0
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587
0
                                                 NumVecElts, SubTy);
1588
0
      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589
0
      Ty = SubTy;
1590
0
      ++LongVectorCount;
1591
0
    }
1592
30
1593
30
    NumReduxLevels -= LongVectorCount;
1594
30
1595
30
    // The minimal length of the vector is limited by the real length of vector
1596
30
    // operations performed on the current platform. That's why several final
1597
30
    // reduction operations are performed on the vectors with the same
1598
30
    // architecture-dependent length.
1599
30
1600
30
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601
30
    // reductions need two shuffles on every level, but the last one. On that
1602
30
    // level one of the shuffles is <0, u, u, ...> which is identity.
1603
30
    unsigned NumShuffles = NumReduxLevels;
1604
30
    if (IsPairwise && 
NumReduxLevels >= 115
)
1605
15
      NumShuffles += NumReduxLevels - 1;
1606
30
    ShuffleCost += NumShuffles *
1607
30
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608
30
                                               0, Ty);
1609
30
    ArithCost += NumReduxLevels *
1610
30
                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611
30
    return ShuffleCost + ArithCost +
1612
30
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613
30
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
Line
Count
Source
1568
2.01k
                                      bool IsPairwise) {
1569
2.01k
    assert(Ty->isVectorTy() && "Expect a vector type");
1570
2.01k
    Type *ScalarTy = Ty->getVectorElementType();
1571
2.01k
    unsigned NumVecElts = Ty->getVectorNumElements();
1572
2.01k
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1573
2.01k
    unsigned ArithCost = 0;
1574
2.01k
    unsigned ShuffleCost = 0;
1575
2.01k
    auto *ConcreteTTI = static_cast<T *>(this);
1576
2.01k
    std::pair<unsigned, MVT> LT =
1577
2.01k
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578
2.01k
    unsigned LongVectorCount = 0;
1579
2.01k
    unsigned MVTLen =
1580
2.01k
        LT.second.isVector() ? 
LT.second.getVectorNumElements()1.90k
:
1110
;
1581
3.27k
    while (NumVecElts > MVTLen) {
1582
1.26k
      NumVecElts /= 2;
1583
1.26k
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584
1.26k
      // Assume the pairwise shuffles add a cost.
1585
1.26k
      ShuffleCost += (IsPairwise + 1) *
1586
1.26k
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587
1.26k
                                                 NumVecElts, SubTy);
1588
1.26k
      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589
1.26k
      Ty = SubTy;
1590
1.26k
      ++LongVectorCount;
1591
1.26k
    }
1592
2.01k
1593
2.01k
    NumReduxLevels -= LongVectorCount;
1594
2.01k
1595
2.01k
    // The minimal length of the vector is limited by the real length of vector
1596
2.01k
    // operations performed on the current platform. That's why several final
1597
2.01k
    // reduction operations are performed on the vectors with the same
1598
2.01k
    // architecture-dependent length.
1599
2.01k
1600
2.01k
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601
2.01k
    // reductions need two shuffles on every level, but the last one. On that
1602
2.01k
    // level one of the shuffles is <0, u, u, ...> which is identity.
1603
2.01k
    unsigned NumShuffles = NumReduxLevels;
1604
2.01k
    if (IsPairwise && 
NumReduxLevels >= 137
)
1605
37
      NumShuffles += NumReduxLevels - 1;
1606
2.01k
    ShuffleCost += NumShuffles *
1607
2.01k
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608
2.01k
                                               0, Ty);
1609
2.01k
    ArithCost += NumReduxLevels *
1610
2.01k
                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611
2.01k
    return ShuffleCost + ArithCost +
1612
2.01k
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613
2.01k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getArithmeticReductionCost(unsigned int, llvm::Type*, bool)
1614
1615
  /// Try to calculate op costs for min/max reduction operations.
1616
  /// \param CondTy Conditional type for the Select instruction.
1617
  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1618
2.98k
                                  bool) {
1619
2.98k
    assert(Ty->isVectorTy() && "Expect a vector type");
1620
2.98k
    Type *ScalarTy = Ty->getVectorElementType();
1621
2.98k
    Type *ScalarCondTy = CondTy->getVectorElementType();
1622
2.98k
    unsigned NumVecElts = Ty->getVectorNumElements();
1623
2.98k
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1624
2.98k
    unsigned CmpOpcode;
1625
2.98k
    if (Ty->isFPOrFPVectorTy()) {
1626
12
      CmpOpcode = Instruction::FCmp;
1627
2.97k
    } else {
1628
2.97k
      assert(Ty->isIntOrIntVectorTy() &&
1629
2.97k
             "expecting floating point or integer type for min/max reduction");
1630
2.97k
      CmpOpcode = Instruction::ICmp;
1631
2.97k
    }
1632
2.98k
    unsigned MinMaxCost = 0;
1633
2.98k
    unsigned ShuffleCost = 0;
1634
2.98k
    auto *ConcreteTTI = static_cast<T *>(this);
1635
2.98k
    std::pair<unsigned, MVT> LT =
1636
2.98k
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1637
2.98k
    unsigned LongVectorCount = 0;
1638
2.98k
    unsigned MVTLen =
1639
2.98k
        LT.second.isVector() ? 
LT.second.getVectorNumElements()2.92k
:
164
;
1640
5.81k
    while (NumVecElts > MVTLen) {
1641
2.82k
      NumVecElts /= 2;
1642
2.82k
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1643
2.82k
      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1644
2.82k
1645
2.82k
      // Assume the pairwise shuffles add a cost.
1646
2.82k
      ShuffleCost += (IsPairwise + 1) *
1647
2.82k
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1648
2.82k
                                                 NumVecElts, SubTy);
1649
2.82k
      MinMaxCost +=
1650
2.82k
          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1651
2.82k
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1652
2.82k
                                          nullptr);
1653
2.82k
      Ty = SubTy;
1654
2.82k
      ++LongVectorCount;
1655
2.82k
    }
1656
2.98k
1657
2.98k
    NumReduxLevels -= LongVectorCount;
1658
2.98k
1659
2.98k
    // The minimal length of the vector is limited by the real length of vector
1660
2.98k
    // operations performed on the current platform. That's why several final
1661
2.98k
    // reduction opertions are perfomed on the vectors with the same
1662
2.98k
    // architecture-dependent length.
1663
2.98k
1664
2.98k
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1665
2.98k
    // reductions need two shuffles on every level, but the last one. On that
1666
2.98k
    // level one of the shuffles is <0, u, u, ...> which is identity.
1667
2.98k
    unsigned NumShuffles = NumReduxLevels;
1668
2.98k
    if (IsPairwise && 
NumReduxLevels >= 11.43k
)
1669
1.43k
      NumShuffles += NumReduxLevels - 1;
1670
2.98k
    ShuffleCost += NumShuffles *
1671
2.98k
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1672
2.98k
                                               0, Ty);
1673
2.98k
    MinMaxCost +=
1674
2.98k
        NumReduxLevels *
1675
2.98k
        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1676
2.98k
         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1677
2.98k
                                         nullptr));
1678
2.98k
    // The last min/max should be in vector registers and we counted it above.
1679
2.98k
    // So just need a single extractelement.
1680
2.98k
    return ShuffleCost + MinMaxCost +
1681
2.98k
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1682
2.98k
  }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Line
Count
Source
1618
2.86k
                                  bool) {
1619
2.86k
    assert(Ty->isVectorTy() && "Expect a vector type");
1620
2.86k
    Type *ScalarTy = Ty->getVectorElementType();
1621
2.86k
    Type *ScalarCondTy = CondTy->getVectorElementType();
1622
2.86k
    unsigned NumVecElts = Ty->getVectorNumElements();
1623
2.86k
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1624
2.86k
    unsigned CmpOpcode;
1625
2.86k
    if (Ty->isFPOrFPVectorTy()) {
1626
2
      CmpOpcode = Instruction::FCmp;
1627
2.86k
    } else {
1628
2.86k
      assert(Ty->isIntOrIntVectorTy() &&
1629
2.86k
             "expecting floating point or integer type for min/max reduction");
1630
2.86k
      CmpOpcode = Instruction::ICmp;
1631
2.86k
    }
1632
2.86k
    unsigned MinMaxCost = 0;
1633
2.86k
    unsigned ShuffleCost = 0;
1634
2.86k
    auto *ConcreteTTI = static_cast<T *>(this);
1635
2.86k
    std::pair<unsigned, MVT> LT =
1636
2.86k
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1637
2.86k
    unsigned LongVectorCount = 0;
1638
2.86k
    unsigned MVTLen =
1639
2.86k
        LT.second.isVector() ? LT.second.getVectorNumElements() : 
10
;
1640
5.66k
    while (NumVecElts > MVTLen) {
1641
2.80k
      NumVecElts /= 2;
1642
2.80k
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1643
2.80k
      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1644
2.80k
1645
2.80k
      // Assume the pairwise shuffles add a cost.
1646
2.80k
      ShuffleCost += (IsPairwise + 1) *
1647
2.80k
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1648
2.80k
                                                 NumVecElts, SubTy);
1649
2.80k
      MinMaxCost +=
1650
2.80k
          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1651
2.80k
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1652
2.80k
                                          nullptr);
1653
2.80k
      Ty = SubTy;
1654
2.80k
      ++LongVectorCount;
1655
2.80k
    }
1656
2.86k
1657
2.86k
    NumReduxLevels -= LongVectorCount;
1658
2.86k
1659
2.86k
    // The minimal length of the vector is limited by the real length of vector
1660
2.86k
    // operations performed on the current platform. That's why several final
1661
2.86k
    // reduction opertions are perfomed on the vectors with the same
1662
2.86k
    // architecture-dependent length.
1663
2.86k
1664
2.86k
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1665
2.86k
    // reductions need two shuffles on every level, but the last one. On that
1666
2.86k
    // level one of the shuffles is <0, u, u, ...> which is identity.
1667
2.86k
    unsigned NumShuffles = NumReduxLevels;
1668
2.86k
    if (IsPairwise && 
NumReduxLevels >= 11.42k
)
1669
1.42k
      NumShuffles += NumReduxLevels - 1;
1670
2.86k
    ShuffleCost += NumShuffles *
1671
2.86k
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1672
2.86k
                                               0, Ty);
1673
2.86k
    MinMaxCost +=
1674
2.86k
        NumReduxLevels *
1675
2.86k
        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1676
2.86k
         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1677
2.86k
                                         nullptr));
1678
2.86k
    // The last min/max should be in vector registers and we counted it above.
1679
2.86k
    // So just need a single extractelement.
1680
2.86k
    return ShuffleCost + MinMaxCost +
1681
2.86k
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1682
2.86k
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::GCNTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Line
Count
Source
1618
31
                                  bool) {
1619
31
    assert(Ty->isVectorTy() && "Expect a vector type");
1620
31
    Type *ScalarTy = Ty->getVectorElementType();
1621
31
    Type *ScalarCondTy = CondTy->getVectorElementType();
1622
31
    unsigned NumVecElts = Ty->getVectorNumElements();
1623
31
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1624
31
    unsigned CmpOpcode;
1625
31
    if (Ty->isFPOrFPVectorTy()) {
1626
10
      CmpOpcode = Instruction::FCmp;
1627
21
    } else {
1628
21
      assert(Ty->isIntOrIntVectorTy() &&
1629
21
             "expecting floating point or integer type for min/max reduction");
1630
21
      CmpOpcode = Instruction::ICmp;
1631
21
    }
1632
31
    unsigned MinMaxCost = 0;
1633
31
    unsigned ShuffleCost = 0;
1634
31
    auto *ConcreteTTI = static_cast<T *>(this);
1635
31
    std::pair<unsigned, MVT> LT =
1636
31
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1637
31
    unsigned LongVectorCount = 0;
1638
31
    unsigned MVTLen =
1639
31
        LT.second.isVector() ? LT.second.getVectorNumElements() : 
10
;
1640
44
    while (NumVecElts > MVTLen) {
1641
13
      NumVecElts /= 2;
1642
13
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1643
13
      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1644
13
1645
13
      // Assume the pairwise shuffles add a cost.
1646
13
      ShuffleCost += (IsPairwise + 1) *
1647
13
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1648
13
                                                 NumVecElts, SubTy);
1649
13
      MinMaxCost +=
1650
13
          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1651
13
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1652
13
                                          nullptr);
1653
13
      Ty = SubTy;
1654
13
      ++LongVectorCount;
1655
13
    }
1656
31
1657
31
    NumReduxLevels -= LongVectorCount;
1658
31
1659
31
    // The minimal length of the vector is limited by the real length of vector
1660
31
    // operations performed on the current platform. That's why several final
1661
31
    // reduction opertions are perfomed on the vectors with the same
1662
31
    // architecture-dependent length.
1663
31
1664
31
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1665
31
    // reductions need two shuffles on every level, but the last one. On that
1666
31
    // level one of the shuffles is <0, u, u, ...> which is identity.
1667
31
    unsigned NumShuffles = NumReduxLevels;
1668
31
    if (IsPairwise && 
NumReduxLevels >= 119
)
1669
19
      NumShuffles += NumReduxLevels - 1;
1670
31
    ShuffleCost += NumShuffles *
1671
31
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1672
31
                                               0, Ty);
1673
31
    MinMaxCost +=
1674
31
        NumReduxLevels *
1675
31
        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1676
31
         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1677
31
                                         nullptr));
1678
31
    // The last min/max should be in vector registers and we counted it above.
1679
31
    // So just need a single extractelement.
1680
31
    return ShuffleCost + MinMaxCost +
1681
31
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1682
31
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::HexagonTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
Line
Count
Source
1618
96
                                  bool) {
1619
96
    assert(Ty->isVectorTy() && "Expect a vector type");
1620
96
    Type *ScalarTy = Ty->getVectorElementType();
1621
96
    Type *ScalarCondTy = CondTy->getVectorElementType();
1622
96
    unsigned NumVecElts = Ty->getVectorNumElements();
1623
96
    unsigned NumReduxLevels = Log2_32(NumVecElts);
1624
96
    unsigned CmpOpcode;
1625
96
    if (Ty->isFPOrFPVectorTy()) {
1626
0
      CmpOpcode = Instruction::FCmp;
1627
96
    } else {
1628
96
      assert(Ty->isIntOrIntVectorTy() &&
1629
96
             "expecting floating point or integer type for min/max reduction");
1630
96
      CmpOpcode = Instruction::ICmp;
1631
96
    }
1632
96
    unsigned MinMaxCost = 0;
1633
96
    unsigned ShuffleCost = 0;
1634
96
    auto *ConcreteTTI = static_cast<T *>(this);
1635
96
    std::pair<unsigned, MVT> LT =
1636
96
        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1637
96
    unsigned LongVectorCount = 0;
1638
96
    unsigned MVTLen =
1639
96
        LT.second.isVector() ? 
LT.second.getVectorNumElements()32
:
164
;
1640
112
    while (NumVecElts > MVTLen) {
1641
16
      NumVecElts /= 2;
1642
16
      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1643
16
      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1644
16
1645
16
      // Assume the pairwise shuffles add a cost.
1646
16
      ShuffleCost += (IsPairwise + 1) *
1647
16
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1648
16
                                                 NumVecElts, SubTy);
1649
16
      MinMaxCost +=
1650
16
          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1651
16
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1652
16
                                          nullptr);
1653
16
      Ty = SubTy;
1654
16
      ++LongVectorCount;
1655
16
    }
1656
96
1657
96
    NumReduxLevels -= LongVectorCount;
1658
96
1659
96
    // The minimal length of the vector is limited by the real length of vector
1660
96
    // operations performed on the current platform. That's why several final
1661
96
    // reduction opertions are perfomed on the vectors with the same
1662
96
    // architecture-dependent length.
1663
96
1664
96
    // Non pairwise reductions need one shuffle per reduction level. Pairwise
1665
96
    // reductions need two shuffles on every level, but the last one. On that
1666
96
    // level one of the shuffles is <0, u, u, ...> which is identity.
1667
96
    unsigned NumShuffles = NumReduxLevels;
1668
96
    if (IsPairwise && 
NumReduxLevels >= 10
)
1669
0
      NumShuffles += NumReduxLevels - 1;
1670
96
    ShuffleCost += NumShuffles *
1671
96
                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1672
96
                                               0, Ty);
1673
96
    MinMaxCost +=
1674
96
        NumReduxLevels *
1675
96
        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1676
96
         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1677
96
                                         nullptr));
1678
96
    // The last min/max should be in vector registers and we counted it above.
1679
96
    // So just need a single extractelement.
1680
96
    return ShuffleCost + MinMaxCost +
1681
96
           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1682
96
  }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getMinMaxReductionCost(llvm::Type*, llvm::Type*, bool, bool)
1683
1684
4.78k
  unsigned getVectorSplitCost() { return 1; }
llvm::BasicTTIImplBase<llvm::AArch64TTIImpl>::getVectorSplitCost()
Line
Count
Source
1684
1.38k
  unsigned getVectorSplitCost() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::R600TTIImpl>::getVectorSplitCost()
llvm::BasicTTIImplBase<llvm::ARMTTIImpl>::getVectorSplitCost()
Line
Count
Source
1684
228
  unsigned getVectorSplitCost() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::LanaiTTIImpl>::getVectorSplitCost()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::BasicTTIImpl>::getVectorSplitCost()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::NVPTXTTIImpl>::getVectorSplitCost()
llvm::BasicTTIImplBase<llvm::PPCTTIImpl>::getVectorSplitCost()
Line
Count
Source
1684
1
  unsigned getVectorSplitCost() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::RISCVTTIImpl>::getVectorSplitCost()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::SystemZTTIImpl>::getVectorSplitCost()
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::WebAssemblyTTIImpl>::getVectorSplitCost()
llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getVectorSplitCost()
Line
Count
Source
1684
3.17k
  unsigned getVectorSplitCost() { return 1; }
Unexecuted instantiation: llvm::BasicTTIImplBase<llvm::XCoreTTIImpl>::getVectorSplitCost()
1685
1686
  /// @}
1687
};
1688
1689
/// Concrete BasicTTIImpl that can be used if no further customization
1690
/// is needed.
1691
class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1692
  using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1693
1694
  friend class BasicTTIImplBase<BasicTTIImpl>;
1695
1696
  const TargetSubtargetInfo *ST;
1697
  const TargetLoweringBase *TLI;
1698
1699
0
  const TargetSubtargetInfo *getST() const { return ST; }
1700
1.36k
  const TargetLoweringBase *getTLI() const { return TLI; }
1701
1702
public:
1703
  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1704
};
1705
1706
} // end namespace llvm
1707
1708
#endif // LLVM_CODEGEN_BASICTTIIMPL_H