Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86Subtarget.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the X86 specific subclass of TargetSubtargetInfo.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "X86.h"
14
15
#include "X86CallLowering.h"
16
#include "X86LegalizerInfo.h"
17
#include "X86MacroFusion.h"
18
#include "X86RegisterBankInfo.h"
19
#include "X86Subtarget.h"
20
#include "MCTargetDesc/X86BaseInfo.h"
21
#include "X86TargetMachine.h"
22
#include "llvm/ADT/Triple.h"
23
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
24
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
25
#include "llvm/IR/Attributes.h"
26
#include "llvm/IR/ConstantRange.h"
27
#include "llvm/IR/Function.h"
28
#include "llvm/IR/GlobalValue.h"
29
#include "llvm/Support/Casting.h"
30
#include "llvm/Support/CodeGen.h"
31
#include "llvm/Support/CommandLine.h"
32
#include "llvm/Support/Debug.h"
33
#include "llvm/Support/ErrorHandling.h"
34
#include "llvm/Support/raw_ostream.h"
35
#include "llvm/Target/TargetMachine.h"
36
37
#if defined(_MSC_VER)
38
#include <intrin.h>
39
#endif
40
41
using namespace llvm;
42
43
#define DEBUG_TYPE "subtarget"
44
45
#define GET_SUBTARGETINFO_TARGET_DESC
46
#define GET_SUBTARGETINFO_CTOR
47
#include "X86GenSubtargetInfo.inc"
48
49
// Temporary option to control early if-conversion for x86 while adding machine
50
// models.
51
static cl::opt<bool>
52
X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
53
               cl::desc("Enable early if-conversion on X86"));
54
55
56
/// Classify a blockaddress reference for the current subtarget according to how
57
/// we should reference it in a non-pcrel context.
58
26
unsigned char X86Subtarget::classifyBlockAddressReference() const {
59
26
  return classifyLocalReference(nullptr);
60
26
}
61
62
/// Classify a global variable reference for the current subtarget according to
63
/// how we should reference it in a non-pcrel context.
64
unsigned char
65
102k
X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
66
102k
  return classifyGlobalReference(GV, *GV->getParent());
67
102k
}
68
69
unsigned char
70
153k
X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
71
153k
  // If we're not PIC, it's not very interesting.
72
153k
  if (!isPositionIndependent())
73
46.0k
    return X86II::MO_NO_FLAG;
74
107k
75
107k
  if (is64Bit()) {
76
92.9k
    // 64-bit ELF PIC local references may use GOTOFF relocations.
77
92.9k
    if (isTargetELF()) {
78
419
      switch (TM.getCodeModel()) {
79
419
      // 64-bit small code model is simple: All rip-relative.
80
419
      case CodeModel::Tiny:
81
0
        llvm_unreachable("Tiny codesize model not supported on X86");
82
419
      case CodeModel::Small:
83
407
      case CodeModel::Kernel:
84
407
        return X86II::MO_NO_FLAG;
85
407
86
407
      // The large PIC code model uses GOTOFF.
87
407
      case CodeModel::Large:
88
7
        return X86II::MO_GOTOFF;
89
407
90
407
      // Medium is a hybrid: RIP-rel for code, GOTOFF for DSO local data.
91
407
      case CodeModel::Medium:
92
5
        if (isa<Function>(GV))
93
2
          return X86II::MO_NO_FLAG; // All code is RIP-relative
94
3
        return X86II::MO_GOTOFF;    // Local symbols use GOTOFF.
95
0
      }
96
0
      llvm_unreachable("invalid code model");
97
0
    }
98
92.5k
99
92.5k
    // Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
100
92.5k
    // both of which use MO_NO_FLAG.
101
92.5k
    return X86II::MO_NO_FLAG;
102
92.5k
  }
103
14.4k
104
14.4k
  // The COFF dynamic linker just patches the executable sections.
105
14.4k
  if (isTargetCOFF())
106
33
    return X86II::MO_NO_FLAG;
107
14.4k
108
14.4k
  if (isTargetDarwin()) {
109
14.1k
    // 32 bit macho has no relocation for a-b if a is undefined, even if
110
14.1k
    // b is in the section that is being relocated.
111
14.1k
    // This means we have to use o load even for GVs that are known to be
112
14.1k
    // local to the dso.
113
14.1k
    if (GV && 
(13.8k
GV->isDeclarationForLinker()13.8k
||
GV->hasCommonLinkage()13.7k
))
114
67
      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
115
14.0k
116
14.0k
    return X86II::MO_PIC_BASE_OFFSET;
117
14.0k
  }
118
277
119
277
  return X86II::MO_GOTOFF;
120
277
}
121
122
unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
123
185k
                                                    const Module &M) const {
124
185k
  // The static large model never uses stubs.
125
185k
  if (TM.getCodeModel() == CodeModel::Large && 
!isPositionIndependent()414
)
126
384
    return X86II::MO_NO_FLAG;
127
184k
128
184k
  // Absolute symbols can be referenced directly.
129
184k
  if (GV) {
130
184k
    if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
131
28
      // See if we can use the 8-bit immediate form. Note that some instructions
132
28
      // will sign extend the immediate operand, so to be conservative we only
133
28
      // accept the range [0,128).
134
28
      if (CR->getUnsignedMax().ult(128))
135
9
        return X86II::MO_ABS8;
136
19
      else
137
19
        return X86II::MO_NO_FLAG;
138
184k
    }
139
184k
  }
140
184k
141
184k
  if (TM.shouldAssumeDSOLocal(M, GV))
142
117k
    return classifyLocalReference(GV);
143
66.8k
144
66.8k
  if (isTargetCOFF()) {
145
185
    if (GV->hasDLLImportStorageClass())
146
45
      return X86II::MO_DLLIMPORT;
147
140
    return X86II::MO_COFFSTUB;
148
140
  }
149
66.6k
150
66.6k
  if (is64Bit()) {
151
54.8k
    // ELF supports a large, truly PIC code model with non-PC relative GOT
152
54.8k
    // references. Other object file formats do not. Use the no-flag, 64-bit
153
54.8k
    // reference for them.
154
54.8k
    if (TM.getCodeModel() == CodeModel::Large)
155
16
      return isTargetELF() ? 
X86II::MO_GOT4
:
X86II::MO_NO_FLAG12
;
156
54.8k
    return X86II::MO_GOTPCREL;
157
54.8k
  }
158
11.8k
159
11.8k
  if (isTargetDarwin()) {
160
11.4k
    if (!isPositionIndependent())
161
1.96k
      return X86II::MO_DARWIN_NONLAZY;
162
9.43k
    return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
163
9.43k
  }
164
412
165
412
  return X86II::MO_GOT;
166
412
}
167
168
unsigned char
169
923
X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
170
923
  return classifyGlobalFunctionReference(GV, *GV->getParent());
171
923
}
172
173
unsigned char
174
X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
175
140k
                                              const Module &M) const {
176
140k
  if (TM.shouldAssumeDSOLocal(M, GV))
177
24.0k
    return X86II::MO_NO_FLAG;
178
116k
179
116k
  // Functions on COFF can be non-DSO local for two reasons:
180
116k
  // - They are marked dllimport
181
116k
  // - They are extern_weak, and a stub is needed
182
116k
  if (isTargetCOFF()) {
183
40
    if (GV->hasDLLImportStorageClass())
184
38
      return X86II::MO_DLLIMPORT;
185
2
    return X86II::MO_COFFSTUB;
186
2
  }
187
116k
188
116k
  const Function *F = dyn_cast_or_null<Function>(GV);
189
116k
190
116k
  if (isTargetELF()) {
191
418
    if (is64Bit() && 
F259
&&
(CallingConv::X86_RegCall == F->getCallingConv())199
)
192
4
      // According to psABI, PLT stub clobbers XMM8-XMM15.
193
4
      // In Regcall calling convention those registers are used for passing
194
4
      // parameters. Thus we need to prevent lazy binding in Regcall.
195
4
      return X86II::MO_GOTPCREL;
196
414
    // If PLT must be avoided then the call should be via GOTPCREL.
197
414
    if (((F && 
F->hasFnAttribute(Attribute::NonLazyBind)288
) ||
198
414
         
(403
!F403
&&
M.getRtLibUseGOT()127
)) &&
199
414
        
is64Bit()16
)
200
12
       return X86II::MO_GOTPCREL;
201
402
    return X86II::MO_PLT;
202
402
  }
203
115k
204
115k
  if (is64Bit()) {
205
100k
    if (F && 
F->hasFnAttribute(Attribute::NonLazyBind)100k
)
206
14
      // If the function is marked as non-lazy, generate an indirect call
207
14
      // which loads from the GOT directly. This avoids runtime overhead
208
14
      // at the cost of eager binding (and one extra byte of encoding).
209
14
      return X86II::MO_GOTPCREL;
210
100k
    return X86II::MO_NO_FLAG;
211
100k
  }
212
14.8k
213
14.8k
  return X86II::MO_NO_FLAG;
214
14.8k
}
215
216
/// Return true if the subtarget allows calls to immediate address.
217
15.2k
bool X86Subtarget::isLegalToCallImmediateAddr() const {
218
15.2k
  // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
219
15.2k
  // but WinCOFFObjectWriter::RecordRelocation cannot emit them.  Once it does,
220
15.2k
  // the following check for Win32 should be removed.
221
15.2k
  if (In64BitMode || 
isTargetWin32()3.17k
)
222
12.4k
    return false;
223
2.82k
  return isTargetELF() || 
TM.getRelocationModel() == Reloc::Static1.06k
;
224
2.82k
}
225
226
15.2k
void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
227
15.2k
  std::string CPUName = CPU;
228
15.2k
  if (CPUName.empty())
229
9.75k
    CPUName = "generic";
230
15.2k
231
15.2k
  std::string FullFS = FS;
232
15.2k
  if (In64BitMode) {
233
12.1k
    // SSE2 should default to enabled in 64-bit mode, but can be turned off
234
12.1k
    // explicitly.
235
12.1k
    if (!FullFS.empty())
236
7.24k
      FullFS = "+sse2," + FullFS;
237
4.89k
    else
238
4.89k
      FullFS = "+sse2";
239
12.1k
240
12.1k
    // If no CPU was specified, enable 64bit feature to satisy later check.
241
12.1k
    if (CPUName == "generic") {
242
7.79k
      if (!FullFS.empty())
243
7.79k
        FullFS = "+64bit," + FullFS;
244
18.4E
      else
245
18.4E
        FullFS = "+64bit";
246
7.79k
    }
247
12.1k
  }
248
15.2k
249
15.2k
  // LAHF/SAHF are always supported in non-64-bit mode.
250
15.2k
  if (!In64BitMode) {
251
3.16k
    if (!FullFS.empty())
252
1.65k
      FullFS = "+sahf," + FullFS;
253
1.50k
    else
254
1.50k
      FullFS = "+sahf";
255
3.16k
  }
256
15.2k
257
15.2k
  // Parse features string and set the CPU.
258
15.2k
  ParseSubtargetFeatures(CPUName, FullFS);
259
15.2k
260
15.2k
  // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
261
15.2k
  // 16-bytes and under that are reasonably fast. These features were
262
15.2k
  // introduced with Intel's Nehalem/Silvermont and AMD's Family10h
263
15.2k
  // micro-architectures respectively.
264
15.2k
  if (hasSSE42() || 
hasSSE4A()10.2k
)
265
5.10k
    IsUAMem16Slow = false;
266
15.2k
267
15.2k
  // It's important to keep the MCSubtargetInfo feature bits in sync with
268
15.2k
  // target data structure which is shared with MC code emitter, etc.
269
15.2k
  if (In64BitMode)
270
12.1k
    ToggleFeature(X86::Mode64Bit);
271
3.15k
  else if (In32BitMode)
272
3.16k
    ToggleFeature(X86::Mode32Bit);
273
18.4E
  else if (In16BitMode)
274
1
    ToggleFeature(X86::Mode16Bit);
275
18.4E
  else
276
18.4E
    
llvm_unreachable18.4E
("Not 16-bit, 32-bit or 64-bit mode!");
277
15.2k
278
15.2k
  LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
279
15.2k
                    << ", 3DNowLevel " << X863DNowLevel << ", 64bit "
280
15.2k
                    << HasX86_64 << "\n");
281
15.2k
  if (In64BitMode && 
!HasX86_6412.1k
)
282
29
    report_fatal_error("64-bit code requested on a subtarget that doesn't "
283
29
                       "support it!");
284
15.2k
285
15.2k
  // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
286
15.2k
  // 32 and 64 bit) and for all 64-bit targets.
287
15.2k
  if (StackAlignOverride)
288
16
    stackAlignment = StackAlignOverride;
289
15.2k
  else if (isTargetDarwin() || 
isTargetLinux()8.75k
||
isTargetSolaris()5.57k
||
290
15.2k
           
isTargetKFreeBSD()5.56k
||
In64BitMode5.56k
)
291
13.7k
    stackAlignment = 16;
292
15.2k
293
15.2k
  // Some CPUs have more overhead for gather. The specified overhead is relative
294
15.2k
  // to the Load operation. "2" is the number provided by Intel architects. This
295
15.2k
  // parameter is used for cost estimation of Gather Op and comparison with
296
15.2k
  // other alternatives.
297
15.2k
  // TODO: Remove the explicit hasAVX512()?, That would mean we would only
298
15.2k
  // enable gather with a -march.
299
15.2k
  if (hasAVX512() || 
(13.5k
hasAVX2()13.5k
&&
hasFastGather()1.73k
))
300
1.69k
    GatherOverhead = 2;
301
15.2k
  if (hasAVX512())
302
1.66k
    ScatterOverhead = 2;
303
15.2k
304
15.2k
  // Consume the vector width attribute or apply any target specific limit.
305
15.2k
  if (PreferVectorWidthOverride)
306
26
    PreferVectorWidth = PreferVectorWidthOverride;
307
15.2k
  else if (Prefer256Bit)
308
49
    PreferVectorWidth = 256;
309
15.2k
}
310
311
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
312
15.2k
                                                            StringRef FS) {
313
15.2k
  initSubtargetFeatures(CPU, FS);
314
15.2k
  return *this;
315
15.2k
}
316
317
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
318
                           const X86TargetMachine &TM,
319
                           unsigned StackAlignOverride,
320
                           unsigned PreferVectorWidthOverride,
321
                           unsigned RequiredVectorWidth)
322
    : X86GenSubtargetInfo(TT, CPU, FS),
323
      PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
324
      StackAlignOverride(StackAlignOverride),
325
      PreferVectorWidthOverride(PreferVectorWidthOverride),
326
      RequiredVectorWidth(RequiredVectorWidth),
327
      In64BitMode(TargetTriple.getArch() == Triple::x86_64),
328
      In32BitMode(TargetTriple.getArch() == Triple::x86 &&
329
                  TargetTriple.getEnvironment() != Triple::CODE16),
330
      In16BitMode(TargetTriple.getArch() == Triple::x86 &&
331
                  TargetTriple.getEnvironment() == Triple::CODE16),
332
      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
333
15.2k
      FrameLowering(*this, getStackAlignment()) {
334
15.2k
  // Determine the PICStyle based on the target selected.
335
15.2k
  if (!isPositionIndependent())
336
8.36k
    setPICStyle(PICStyles::None);
337
6.93k
  else if (is64Bit())
338
6.21k
    setPICStyle(PICStyles::RIPRel);
339
725
  else if (isTargetCOFF())
340
29
    setPICStyle(PICStyles::None);
341
696
  else if (isTargetDarwin())
342
560
    setPICStyle(PICStyles::StubPIC);
343
136
  else if (isTargetELF())
344
105
    setPICStyle(PICStyles::GOT);
345
15.2k
346
15.2k
  CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
347
15.2k
  Legalizer.reset(new X86LegalizerInfo(*this, TM));
348
15.2k
349
15.2k
  auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
350
15.2k
  RegBankInfo.reset(RBI);
351
15.2k
  InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
352
15.2k
}
353
354
481
const CallLowering *X86Subtarget::getCallLowering() const {
355
481
  return CallLoweringInfo.get();
356
481
}
357
358
1.04k
const InstructionSelector *X86Subtarget::getInstructionSelector() const {
359
1.04k
  return InstSelector.get();
360
1.04k
}
361
362
683
const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
363
683
  return Legalizer.get();
364
683
}
365
366
1.10k
const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
367
1.10k
  return RegBankInfo.get();
368
1.10k
}
369
370
135k
bool X86Subtarget::enableEarlyIfConversion() const {
371
135k
  return hasCMov() && 
X86EarlyIfConv128k
;
372
135k
}
373
374
void X86Subtarget::getPostRAMutations(
375
1.13k
    std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
376
1.13k
  Mutations.push_back(createX86MacroFusionDAGMutation());
377
1.13k
}