Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the AArch64 specific subclass of TargetSubtarget.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AArch64Subtarget.h"
14
15
#include "AArch64.h"
16
#include "AArch64CallLowering.h"
17
#include "AArch64InstrInfo.h"
18
#include "AArch64LegalizerInfo.h"
19
#include "AArch64PBQPRegAlloc.h"
20
#include "AArch64RegisterBankInfo.h"
21
#include "AArch64TargetMachine.h"
22
#include "MCTargetDesc/AArch64AddressingModes.h"
23
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24
#include "llvm/CodeGen/MachineScheduler.h"
25
#include "llvm/IR/GlobalValue.h"
26
#include "llvm/Support/TargetParser.h"
27
28
using namespace llvm;
29
30
#define DEBUG_TYPE "aarch64-subtarget"
31
32
#define GET_SUBTARGETINFO_CTOR
33
#define GET_SUBTARGETINFO_TARGET_DESC
34
#include "AArch64GenSubtargetInfo.inc"
35
36
static cl::opt<bool>
37
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38
                     "converter pass"), cl::init(true), cl::Hidden);
39
40
// If OS supports TBI, use this flag to enable it.
41
static cl::opt<bool>
42
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43
                         "an address is ignored"), cl::init(false), cl::Hidden);
44
45
static cl::opt<bool>
46
    UseNonLazyBind("aarch64-enable-nonlazybind",
47
                   cl::desc("Call nonlazybind functions via direct GOT load"),
48
                   cl::init(false), cl::Hidden);
49
50
AArch64Subtarget &
51
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
52
9.10k
                                                  StringRef CPUString) {
53
9.10k
  // Determine default and user-specified characteristics
54
9.10k
55
9.10k
  if (CPUString.empty())
56
1.71k
    CPUString = "generic";
57
9.10k
58
9.10k
  ParseSubtargetFeatures(CPUString, FS);
59
9.10k
  initializeProperties();
60
9.10k
61
9.10k
  return *this;
62
9.10k
}
63
64
9.10k
void AArch64Subtarget::initializeProperties() {
65
9.10k
  // Initialize CPU specific properties. We should add a tablegen feature for
66
9.10k
  // this in the future so we can specify it together with the subtarget
67
9.10k
  // features.
68
9.10k
  switch (ARMProcFamily) {
69
9.10k
  case Others:
70
1.76k
    break;
71
9.10k
  case CortexA35:
72
3
    break;
73
9.10k
  case CortexA53:
74
20
    PrefFunctionAlignment = 3;
75
20
    break;
76
9.10k
  case CortexA55:
77
2
    break;
78
9.10k
  case CortexA57:
79
38
    MaxInterleaveFactor = 4;
80
38
    PrefFunctionAlignment = 4;
81
38
    break;
82
9.10k
  case CortexA72:
83
14
  case CortexA73:
84
14
  case CortexA75:
85
14
  case CortexA76:
86
14
    PrefFunctionAlignment = 4;
87
14
    break;
88
7.17k
  case Cyclone:
89
7.17k
    CacheLineSize = 64;
90
7.17k
    PrefetchDistance = 280;
91
7.17k
    MinPrefetchStride = 2048;
92
7.17k
    MaxPrefetchIterationsAhead = 3;
93
7.17k
    break;
94
21
  case ExynosM1:
95
21
    MaxInterleaveFactor = 4;
96
21
    MaxJumpTableSize = 8;
97
21
    PrefFunctionAlignment = 4;
98
21
    PrefLoopAlignment = 3;
99
21
    break;
100
26
  case ExynosM3:
101
26
    MaxInterleaveFactor = 4;
102
26
    MaxJumpTableSize = 20;
103
26
    PrefFunctionAlignment = 5;
104
26
    PrefLoopAlignment = 4;
105
26
    break;
106
14
  case Falkor:
107
13
    MaxInterleaveFactor = 4;
108
13
    // FIXME: remove this to enable 64-bit SLP if performance looks good.
109
13
    MinVectorRegisterBitWidth = 128;
110
13
    CacheLineSize = 128;
111
13
    PrefetchDistance = 820;
112
13
    MinPrefetchStride = 2048;
113
13
    MaxPrefetchIterationsAhead = 8;
114
13
    break;
115
14
  case Kryo:
116
12
    MaxInterleaveFactor = 4;
117
12
    VectorInsertExtractBaseCost = 2;
118
12
    CacheLineSize = 128;
119
12
    PrefetchDistance = 740;
120
12
    MinPrefetchStride = 1024;
121
12
    MaxPrefetchIterationsAhead = 11;
122
12
    // FIXME: remove this to enable 64-bit SLP if performance looks good.
123
12
    MinVectorRegisterBitWidth = 128;
124
12
    break;
125
14
  case Saphira:
126
4
    MaxInterleaveFactor = 4;
127
4
    // FIXME: remove this to enable 64-bit SLP if performance looks good.
128
4
    MinVectorRegisterBitWidth = 128;
129
4
    break;
130
14
  case ThunderX2T99:
131
5
    CacheLineSize = 64;
132
5
    PrefFunctionAlignment = 3;
133
5
    PrefLoopAlignment = 2;
134
5
    MaxInterleaveFactor = 4;
135
5
    PrefetchDistance = 128;
136
5
    MinPrefetchStride = 1024;
137
5
    MaxPrefetchIterationsAhead = 4;
138
5
    // FIXME: remove this to enable 64-bit SLP if performance looks good.
139
5
    MinVectorRegisterBitWidth = 128;
140
5
    break;
141
14
  case ThunderX:
142
4
  case ThunderXT88:
143
4
  case ThunderXT81:
144
4
  case ThunderXT83:
145
4
    CacheLineSize = 128;
146
4
    PrefFunctionAlignment = 3;
147
4
    PrefLoopAlignment = 2;
148
4
    // FIXME: remove this to enable 64-bit SLP if performance looks good.
149
4
    MinVectorRegisterBitWidth = 128;
150
4
    break;
151
4
  case TSV110:
152
2
    CacheLineSize = 64;
153
2
    PrefFunctionAlignment = 4;
154
2
    PrefLoopAlignment = 2;
155
2
    break;
156
9.10k
  }
157
9.10k
}
158
159
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
160
                                   const std::string &FS,
161
                                   const TargetMachine &TM, bool LittleEndian)
162
    : AArch64GenSubtargetInfo(TT, CPU, FS),
163
      ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
164
      CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
165
      IsLittle(LittleEndian),
166
      TargetTriple(TT), FrameLowering(),
167
      InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
168
9.10k
      TLInfo(TM, *this) {
169
9.10k
  if (AArch64::isX18ReservedByDefault(TT))
170
7.63k
    ReserveXRegister.set(18);
171
9.10k
172
9.10k
  CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
173
9.10k
  Legalizer.reset(new AArch64LegalizerInfo(*this));
174
9.10k
175
9.10k
  auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
176
9.10k
177
9.10k
  // FIXME: At this point, we can't rely on Subtarget having RBI.
178
9.10k
  // It's awkward to mix passing RBI and the Subtarget; should we pass
179
9.10k
  // TII/TRI as well?
180
9.10k
  InstSelector.reset(createAArch64InstructionSelector(
181
9.10k
      *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
182
9.10k
183
9.10k
  RegBankInfo.reset(RBI);
184
9.10k
}
185
186
271k
const CallLowering *AArch64Subtarget::getCallLowering() const {
187
271k
  return CallLoweringInfo.get();
188
271k
}
189
190
231k
const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
191
231k
  return InstSelector.get();
192
231k
}
193
194
235k
const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
195
235k
  return Legalizer.get();
196
235k
}
197
198
298k
const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
199
298k
  return RegBankInfo.get();
200
298k
}
201
202
/// Find the target operand flags that describe how a global value should be
203
/// referenced for the current subtarget.
204
unsigned char
205
AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
206
893k
                                          const TargetMachine &TM) const {
207
893k
  // MachO large model always goes via a GOT, simply to get a single 8-byte
208
893k
  // absolute relocation on all global addresses.
209
893k
  if (TM.getCodeModel() == CodeModel::Large && 
isTargetMachO()68
)
210
13
    return AArch64II::MO_GOT;
211
892k
212
892k
  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
213
216k
    if (GV->hasDLLImportStorageClass())
214
11
      return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
215
216k
    if (getTargetTriple().isOSWindows())
216
8
      return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
217
216k
    return AArch64II::MO_GOT;
218
216k
  }
219
676k
220
676k
  // The small code model's direct accesses use ADRP, which cannot
221
676k
  // necessarily produce the value 0 (if the code is above 4GB).
222
676k
  // Same for the tiny code model, where we have a pc relative LDR.
223
676k
  if ((useSmallAddressing() || 
TM.getCodeModel() == CodeModel::Tiny298
) &&
224
676k
      
GV->hasExternalWeakLinkage()676k
)
225
12
    return AArch64II::MO_GOT;
226
676k
227
676k
  return AArch64II::MO_NO_FLAG;
228
676k
}
229
230
unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
231
240k
    const GlobalValue *GV, const TargetMachine &TM) const {
232
240k
  // MachO large model always goes via a GOT, because we don't have the
233
240k
  // relocations available to do anything else..
234
240k
  if (TM.getCodeModel() == CodeModel::Large && 
isTargetMachO()3
&&
235
240k
      
!GV->hasInternalLinkage()0
)
236
0
    return AArch64II::MO_GOT;
237
240k
238
240k
  // NonLazyBind goes via GOT unless we know it's available locally.
239
240k
  auto *F = dyn_cast<Function>(GV);
240
240k
  if (UseNonLazyBind && 
F3
&&
F->hasFnAttribute(Attribute::NonLazyBind)3
&&
241
240k
      
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)3
)
242
2
    return AArch64II::MO_GOT;
243
240k
244
240k
  return AArch64II::MO_NO_FLAG;
245
240k
}
246
247
void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
248
3.99M
                                           unsigned NumRegionInstrs) const {
249
3.99M
  // LNT run (at least on Cyclone) showed reasonably significant gains for
250
3.99M
  // bi-directional scheduling. 253.perlbmk.
251
3.99M
  Policy.OnlyTopDown = false;
252
3.99M
  Policy.OnlyBottomUp = false;
253
3.99M
  // Enabling or Disabling the latency heuristic is a close call: It seems to
254
3.99M
  // help nearly no benchmark on out-of-order architectures, on the other hand
255
3.99M
  // it regresses register pressure on a few benchmarking.
256
3.99M
  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
257
3.99M
}
258
259
257k
bool AArch64Subtarget::enableEarlyIfConversion() const {
260
257k
  return EnableEarlyIfConvert;
261
257k
}
262
263
1.32M
bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
264
1.32M
  if (!UseAddressTopByteIgnored)
265
1.32M
    return false;
266
7
267
7
  if (TargetTriple.isiOS()) {
268
7
    unsigned Major, Minor, Micro;
269
7
    TargetTriple.getiOSVersion(Major, Minor, Micro);
270
7
    return Major >= 8;
271
7
  }
272
0
273
0
  return false;
274
0
}
275
276
std::unique_ptr<PBQPRAConstraint>
277
5
AArch64Subtarget::getCustomPBQPConstraints() const {
278
5
  return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : 
nullptr0
;
279
5
}
280
281
1.62k
void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
282
1.62k
  // We usually compute max call frame size after ISel. Do the computation now
283
1.62k
  // if the .mir file didn't specify it. Note that this will probably give you
284
1.62k
  // bogus values after PEI has eliminated the callframe setup/destroy pseudo
285
1.62k
  // instructions, specify explicitly if you need it to be correct.
286
1.62k
  MachineFrameInfo &MFI = MF.getFrameInfo();
287
1.62k
  if (!MFI.isMaxCallFrameSizeComputed())
288
1.56k
    MFI.computeMaxCallFrameSize(MF);
289
1.62k
}