Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMSubtarget.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the ARM specific subclass of TargetSubtargetInfo.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "ARM.h"
14
15
#include "ARMCallLowering.h"
16
#include "ARMLegalizerInfo.h"
17
#include "ARMRegisterBankInfo.h"
18
#include "ARMSubtarget.h"
19
#include "ARMFrameLowering.h"
20
#include "ARMInstrInfo.h"
21
#include "ARMSubtarget.h"
22
#include "ARMTargetMachine.h"
23
#include "MCTargetDesc/ARMMCTargetDesc.h"
24
#include "Thumb1FrameLowering.h"
25
#include "Thumb1InstrInfo.h"
26
#include "Thumb2InstrInfo.h"
27
#include "llvm/ADT/StringRef.h"
28
#include "llvm/ADT/Triple.h"
29
#include "llvm/ADT/Twine.h"
30
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
31
#include "llvm/CodeGen/MachineFunction.h"
32
#include "llvm/IR/Function.h"
33
#include "llvm/IR/GlobalValue.h"
34
#include "llvm/MC/MCAsmInfo.h"
35
#include "llvm/MC/MCTargetOptions.h"
36
#include "llvm/Support/CodeGen.h"
37
#include "llvm/Support/CommandLine.h"
38
#include "llvm/Support/TargetParser.h"
39
#include "llvm/Target/TargetOptions.h"
40
41
using namespace llvm;
42
43
#define DEBUG_TYPE "arm-subtarget"
44
45
#define GET_SUBTARGETINFO_TARGET_DESC
46
#define GET_SUBTARGETINFO_CTOR
47
#include "ARMGenSubtargetInfo.inc"
48
49
static cl::opt<bool>
50
UseFusedMulOps("arm-use-mulops",
51
               cl::init(true), cl::Hidden);
52
53
enum ITMode {
54
  DefaultIT,
55
  RestrictedIT,
56
  NoRestrictedIT
57
};
58
59
static cl::opt<ITMode>
60
IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
61
   cl::ZeroOrMore,
62
   cl::values(clEnumValN(DefaultIT, "arm-default-it",
63
                         "Generate IT block based on arch"),
64
              clEnumValN(RestrictedIT, "arm-restrict-it",
65
                         "Disallow deprecated IT based on ARMv8"),
66
              clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
67
                         "Allow IT blocks based on ARMv7")));
68
69
/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
70
/// currently supported (for testing only).
71
static cl::opt<bool>
72
ForceFastISel("arm-force-fast-isel",
73
               cl::init(false), cl::Hidden);
74
75
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
76
/// so that we can use initializer lists for subtarget initialization.
77
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
78
7.46k
                                                            StringRef FS) {
79
7.46k
  initializeEnvironment();
80
7.46k
  initSubtargetFeatures(CPU, FS);
81
7.46k
  return *this;
82
7.46k
}
83
84
ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
85
7.46k
                                                        StringRef FS) {
86
7.46k
  ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
87
7.46k
  if (STI.isThumb1Only())
88
740
    return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
89
6.72k
90
6.72k
  return new ARMFrameLowering(STI);
91
6.72k
}
92
93
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
94
                           const std::string &FS,
95
                           const ARMBaseTargetMachine &TM, bool IsLittle,
96
                           bool MinSize)
97
    : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
98
      CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
99
      TargetTriple(TT), Options(TM.Options), TM(TM),
100
      FrameLowering(initializeFrameLowering(CPU, FS)),
101
      // At this point initializeSubtargetDependencies has been called so
102
      // we can query directly.
103
      InstrInfo(isThumb1Only()
104
                    ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
105
                    : !isThumb()
106
                          ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
107
                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
108
7.46k
      TLInfo(TM, *this) {
109
7.46k
110
7.46k
  CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
111
7.46k
  Legalizer.reset(new ARMLegalizerInfo(*this));
112
7.46k
113
7.46k
  auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
114
7.46k
115
7.46k
  // FIXME: At this point, we can't rely on Subtarget having RBI.
116
7.46k
  // It's awkward to mix passing RBI and the Subtarget; should we pass
117
7.46k
  // TII/TRI as well?
118
7.46k
  InstSelector.reset(createARMInstructionSelector(
119
7.46k
      *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
120
7.46k
121
7.46k
  RegBankInfo.reset(RBI);
122
7.46k
}
123
124
757
const CallLowering *ARMSubtarget::getCallLowering() const {
125
757
  return CallLoweringInfo.get();
126
757
}
127
128
588
const InstructionSelector *ARMSubtarget::getInstructionSelector() const {
129
588
  return InstSelector.get();
130
588
}
131
132
807
const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const {
133
807
  return Legalizer.get();
134
807
}
135
136
538
const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const {
137
538
  return RegBankInfo.get();
138
538
}
139
140
8
bool ARMSubtarget::isXRaySupported() const {
141
8
  // We don't currently suppport Thumb, but Windows requires Thumb.
142
8
  return hasV6Ops() && hasARMOps() && !isTargetWindows();
143
8
}
144
145
7.46k
void ARMSubtarget::initializeEnvironment() {
146
7.46k
  // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
147
7.46k
  // directly from it, but we can try to make sure they're consistent when both
148
7.46k
  // available.
149
7.46k
  UseSjLjEH = (isTargetDarwin() && 
!isTargetWatchABI()1.91k
&&
150
7.46k
               
Options.ExceptionModel == ExceptionHandling::None1.57k
) ||
151
7.46k
              
Options.ExceptionModel == ExceptionHandling::SjLj6.68k
;
152
7.46k
  assert((!TM.getMCAsmInfo() ||
153
7.46k
          (TM.getMCAsmInfo()->getExceptionHandlingType() ==
154
7.46k
           ExceptionHandling::SjLj) == UseSjLjEH) &&
155
7.46k
         "inconsistent sjlj choice between CodeGen and MC");
156
7.46k
}
157
158
7.46k
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
159
7.46k
  if (CPUString.empty()) {
160
4.21k
    CPUString = "generic";
161
4.21k
162
4.21k
    if (isTargetDarwin()) {
163
582
      StringRef ArchName = TargetTriple.getArchName();
164
582
      ARM::ArchKind AK = ARM::parseArch(ArchName);
165
582
      if (AK == ARM::ArchKind::ARMV7S)
166
37
        // Default to the Swift CPU when targeting armv7s/thumbv7s.
167
37
        CPUString = "swift";
168
545
      else if (AK == ARM::ArchKind::ARMV7K)
169
19
        // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
170
19
        // ARMv7k does not use SjLj exception handling.
171
19
        CPUString = "cortex-a7";
172
582
    }
173
4.21k
  }
174
7.46k
175
7.46k
  // Insert the architecture feature derived from the target triple into the
176
7.46k
  // feature string. This is important for setting features that are implied
177
7.46k
  // based on the architecture version.
178
7.46k
  std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString);
179
7.46k
  if (!FS.empty()) {
180
4.46k
    if (!ArchFS.empty())
181
3.67k
      ArchFS = (Twine(ArchFS) + "," + FS).str();
182
795
    else
183
795
      ArchFS = FS;
184
4.46k
  }
185
7.46k
  ParseSubtargetFeatures(CPUString, ArchFS);
186
7.46k
187
7.46k
  // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
188
7.46k
  // Assert this for now to make the change obvious.
189
7.46k
  assert(hasV6T2Ops() || !hasThumb2());
190
7.46k
191
7.46k
  // Execute only support requires movt support
192
7.46k
  if (genExecuteOnly()) {
193
50
    NoMovt = false;
194
50
    assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
195
50
  }
196
7.46k
197
7.46k
  // Keep a pointer to static instruction cost data for the specified CPU.
198
7.46k
  SchedModel = getSchedModelForCPU(CPUString);
199
7.46k
200
7.46k
  // Initialize scheduling itinerary for the specified CPU.
201
7.46k
  InstrItins = getInstrItineraryForCPU(CPUString);
202
7.46k
203
7.46k
  // FIXME: this is invalid for WindowsCE
204
7.46k
  if (isTargetWindows())
205
87
    NoARM = true;
206
7.46k
207
7.46k
  if (isAAPCS_ABI())
208
6.19k
    stackAlignment = 8;
209
7.46k
  if (isTargetNaCl() || 
isAAPCS16_ABI()7.45k
)
210
351
    stackAlignment = 16;
211
7.46k
212
7.46k
  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
213
7.46k
  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
214
7.46k
  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
215
7.46k
  // support in the assembler and linker to be used. This would need to be
216
7.46k
  // fixed to fully support tail calls in Thumb1.
217
7.46k
  //
218
7.46k
  // For ARMv8-M, we /do/ implement tail calls.  Doing this is tricky for v8-M
219
7.46k
  // baseline, since the LDM/POP instruction on Thumb doesn't take LR.  This
220
7.46k
  // means if we need to reload LR, it takes extra instructions, which outweighs
221
7.46k
  // the value of the tail call; but here we don't know yet whether LR is going
222
7.46k
  // to be used. We take the optimistic approach of generating the tail call and
223
7.46k
  // perhaps taking a hit if we need to restore the LR.
224
7.46k
225
7.46k
  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
226
7.46k
  // but we need to make sure there are enough registers; the only valid
227
7.46k
  // registers are the 4 used for parameters.  We don't currently do this
228
7.46k
  // case.
229
7.46k
230
7.46k
  SupportsTailCall = !isThumb() || 
hasV8MBaselineOps()4.26k
;
231
7.46k
232
7.46k
  if (isTargetMachO() && 
isTargetIOS()2.59k
&&
getTargetTriple().isOSVersionLT(5, 0)1.19k
)
233
291
    SupportsTailCall = false;
234
7.46k
235
7.46k
  switch (IT) {
236
7.46k
  case DefaultIT:
237
7.43k
    RestrictIT = hasV8Ops();
238
7.43k
    break;
239
7.46k
  case RestrictedIT:
240
20
    RestrictIT = true;
241
20
    break;
242
7.46k
  case NoRestrictedIT:
243
10
    RestrictIT = false;
244
10
    break;
245
7.46k
  }
246
7.46k
247
7.46k
  // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
248
7.46k
  const FeatureBitset &Bits = getFeatureBits();
249
7.46k
  if ((Bits[ARM::ProcA5] || 
Bits[ARM::ProcA8]7.44k
) && // Where this matters
250
7.46k
      
(348
Options.UnsafeFPMath348
||
isTargetDarwin()318
))
251
156
    UseNEONForSinglePrecisionFP = true;
252
7.46k
253
7.46k
  if (isRWPI())
254
75
    ReserveR9 = true;
255
7.46k
256
7.46k
  // FIXME: Teach TableGen to deal with these instead of doing it manually here.
257
7.46k
  switch (ARMProcFamily) {
258
7.46k
  case Others:
259
5.78k
  case CortexA5:
260
5.78k
    break;
261
5.78k
  case CortexA7:
262
437
    LdStMultipleTiming = DoubleIssue;
263
437
    break;
264
5.78k
  case CortexA8:
265
329
    LdStMultipleTiming = DoubleIssue;
266
329
    break;
267
5.78k
  case CortexA9:
268
113
    LdStMultipleTiming = DoubleIssueCheckUnalignedAccess;
269
113
    PreISelOperandLatencyAdjustment = 1;
270
113
    break;
271
5.78k
  case CortexA12:
272
10
    break;
273
5.78k
  case CortexA15:
274
50
    MaxInterleaveFactor = 2;
275
50
    PreISelOperandLatencyAdjustment = 1;
276
50
    PartialUpdateClearance = 12;
277
50
    break;
278
5.78k
  case CortexA17:
279
326
  case CortexA32:
280
326
  case CortexA35:
281
326
  case CortexA53:
282
326
  case CortexA55:
283
326
  case CortexA57:
284
326
  case CortexA72:
285
326
  case CortexA73:
286
326
  case CortexA75:
287
326
  case CortexA76:
288
326
  case CortexR4:
289
326
  case CortexR4F:
290
326
  case CortexR5:
291
326
  case CortexR7:
292
326
  case CortexM3:
293
326
  case CortexR52:
294
326
    break;
295
326
  case Exynos:
296
50
    LdStMultipleTiming = SingleIssuePlusExtras;
297
50
    MaxInterleaveFactor = 4;
298
50
    if (!isThumb())
299
50
      PrefLoopAlignment = 3;
300
50
    break;
301
326
  case Kryo:
302
0
    break;
303
326
  case Krait:
304
12
    PreISelOperandLatencyAdjustment = 1;
305
12
    break;
306
352
  case Swift:
307
352
    MaxInterleaveFactor = 2;
308
352
    LdStMultipleTiming = SingleIssuePlusExtras;
309
352
    PreISelOperandLatencyAdjustment = 1;
310
352
    PartialUpdateClearance = 12;
311
352
    break;
312
7.46k
  }
313
7.46k
}
314
315
1.07k
bool ARMSubtarget::isTargetHardFloat() const { return TM.isTargetHardFloat(); }
316
317
5
bool ARMSubtarget::isAPCS_ABI() const {
318
5
  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
319
5
  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS;
320
5
}
321
484k
bool ARMSubtarget::isAAPCS_ABI() const {
322
484k
  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
323
484k
  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
324
484k
         
TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16379k
;
325
484k
}
326
7.45k
bool ARMSubtarget::isAAPCS16_ABI() const {
327
7.45k
  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
328
7.45k
  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
329
7.45k
}
330
331
131k
bool ARMSubtarget::isROPI() const {
332
131k
  return TM.getRelocationModel() == Reloc::ROPI ||
333
131k
         
TM.getRelocationModel() == Reloc::ROPI_RWPI130k
;
334
131k
}
335
19.6k
bool ARMSubtarget::isRWPI() const {
336
19.6k
  return TM.getRelocationModel() == Reloc::RWPI ||
337
19.6k
         
TM.getRelocationModel() == Reloc::ROPI_RWPI19.4k
;
338
19.6k
}
339
340
88.6k
bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
341
88.6k
  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
342
23.3k
    return true;
343
65.3k
344
65.3k
  // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
345
65.3k
  // the section that is being relocated. This means we have to use o load even
346
65.3k
  // for GVs that are known to be local to the dso.
347
65.3k
  if (isTargetMachO() && 
TM.isPositionIndependent()65.2k
&&
348
65.3k
      
(64.6k
GV->isDeclarationForLinker()64.6k
||
GV->hasCommonLinkage()64.3k
))
349
347
    return true;
350
65.0k
351
65.0k
  return false;
352
65.0k
}
353
354
64
bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const {
355
64
  return isTargetELF() && 
TM.isPositionIndependent()32
&&
356
64
         
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)32
;
357
64
}
358
359
27.1k
unsigned ARMSubtarget::getMispredictionPenalty() const {
360
27.1k
  return SchedModel.MispredictPenalty;
361
27.1k
}
362
363
156k
bool ARMSubtarget::enableMachineScheduler() const {
364
156k
  // The MachineScheduler can increase register usage, so we use more high
365
156k
  // registers and end up with more T2 instructions that cannot be converted to
366
156k
  // T1 instructions. At least until we do better at converting to thumb1
367
156k
  // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
368
156k
  // Machine scheduler, relying on the DAG register pressure scheduler instead.
369
156k
  if (isMClass() && 
hasMinSize()18.6k
)
370
6.96k
    return false;
371
149k
  // Enable the MachineScheduler before register allocation for subtargets
372
149k
  // with the use-misched feature.
373
149k
  return useMachineScheduler();
374
149k
}
375
376
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
377
25.0k
bool ARMSubtarget::enablePostRAScheduler() const {
378
25.0k
  if (disablePostRAScheduler())
379
3.08k
    return false;
380
21.9k
  // Don't reschedule potential IT blocks.
381
21.9k
  return !isThumb1Only();
382
21.9k
}
383
384
26.6k
bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
385
386
2.28k
bool ARMSubtarget::useStride4VFPs() const {
387
2.28k
  // For general targets, the prologue can grow when VFPs are allocated with
388
2.28k
  // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
389
2.28k
  // format which it's more important to get right.
390
2.28k
  return isTargetWatchABI() ||
391
2.28k
         
(2.10k
useWideStrideVFP()2.10k
&&
!OptMinSize551
);
392
2.28k
}
393
394
161k
bool ARMSubtarget::useMovt() const {
395
161k
  // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
396
161k
  // immediates as it is inherently position independent, and may be out of
397
161k
  // range otherwise.
398
161k
  return !NoMovt && 
hasV8MBaselineOps()159k
&&
399
161k
         
(153k
isTargetWindows()153k
||
!OptMinSize153k
||
genExecuteOnly()2.59k
);
400
161k
}
401
402
1.32M
bool ARMSubtarget::useFastISel() const {
403
1.32M
  // Enable fast-isel for any target, for testing only.
404
1.32M
  if (ForceFastISel)
405
92
    return true;
406
1.32M
407
1.32M
  // Limit fast-isel to the targets that are or have been tested.
408
1.32M
  if (!hasV6Ops())
409
67.3k
    return false;
410
1.25M
411
1.25M
  // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
412
1.25M
  return TM.Options.EnableFastISel &&
413
1.25M
         
(45.0k
(45.0k
isTargetMachO()45.0k
&&
!isThumb1Only()32.7k
) ||
414
45.0k
          
(12.3k
isTargetLinux()12.3k
&&
!isThumb()5.13k
) ||
(8.69k
isTargetNaCl()8.69k
&&
!isThumb()166
));
415
1.25M
}
416
417
14.7k
unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
418
14.7k
  // The GPR register class has multiple possible allocation orders, with
419
14.7k
  // tradeoffs preferred by different sub-architectures and optimisation goals.
420
14.7k
  // The allocation orders are:
421
14.7k
  // 0: (the default tablegen order, not used)
422
14.7k
  // 1: r14, r0-r13
423
14.7k
  // 2: r0-r7
424
14.7k
  // 3: r0-r7, r12, lr, r8-r11
425
14.7k
  // Note that the register allocator will change this order so that
426
14.7k
  // callee-saved registers are used later, as they require extra work in the
427
14.7k
  // prologue/epilogue (though we sometimes override that).
428
14.7k
429
14.7k
  // For thumb1-only targets, only the low registers are allocatable.
430
14.7k
  if (isThumb1Only())
431
1
    return 2;
432
14.7k
433
14.7k
  // Allocate low registers first, so we can select more 16-bit instructions.
434
14.7k
  // We also (in ignoreCSRForAllocationOrder) override  the default behaviour
435
14.7k
  // with regards to callee-saved registers, because pushing extra registers is
436
14.7k
  // much cheaper (in terms of code size) than using high registers. After
437
14.7k
  // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
438
14.7k
  // can return with the pop, don't need an extra "bx lr") and then the rest of
439
14.7k
  // the high registers.
440
14.7k
  if (isThumb2() && 
MF.getFunction().hasMinSize()12.0k
)
441
3.24k
    return 3;
442
11.5k
443
11.5k
  // Otherwise, allocate in the default order, using LR first because saving it
444
11.5k
  // allows a shorter epilogue sequence.
445
11.5k
  return 1;
446
11.5k
}
447
448
bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
449
178k
                                               unsigned PhysReg) const {
450
178k
  // To minimize code size in Thumb2, we prefer the usage of low regs (lower
451
178k
  // cost per use) so we can  use narrow encoding. By default, caller-saved
452
178k
  // registers (e.g. lr, r12) are always  allocated first, regardless of
453
178k
  // their cost per use. When optForMinSize, we prefer the low regs even if
454
178k
  // they are CSR because usually push/pop can be folded into existing ones.
455
178k
  return isThumb2() && 
MF.getFunction().hasMinSize()139k
&&
456
178k
         
ARM::GPRRegClass.contains(PhysReg)31.4k
;
457
178k
}