Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Top-level implementation for the PowerPC target.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "PPCTargetMachine.h"
15
#include "MCTargetDesc/PPCMCTargetDesc.h"
16
#include "PPC.h"
17
#include "PPCSubtarget.h"
18
#include "PPCTargetObjectFile.h"
19
#include "PPCTargetTransformInfo.h"
20
#include "llvm/ADT/Optional.h"
21
#include "llvm/ADT/STLExtras.h"
22
#include "llvm/ADT/StringRef.h"
23
#include "llvm/ADT/Triple.h"
24
#include "llvm/Analysis/TargetTransformInfo.h"
25
#include "llvm/CodeGen/Passes.h"
26
#include "llvm/CodeGen/TargetPassConfig.h"
27
#include "llvm/IR/Attributes.h"
28
#include "llvm/IR/DataLayout.h"
29
#include "llvm/IR/Function.h"
30
#include "llvm/Pass.h"
31
#include "llvm/Support/CodeGen.h"
32
#include "llvm/Support/CommandLine.h"
33
#include "llvm/Support/TargetRegistry.h"
34
#include "llvm/Target/TargetLoweringObjectFile.h"
35
#include "llvm/Target/TargetOptions.h"
36
#include "llvm/Transforms/Scalar.h"
37
#include <cassert>
38
#include <memory>
39
#include <string>
40
41
using namespace llvm;
42
43
44
static cl::opt<bool>
45
    EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
46
                           cl::desc("enable coalescing of duplicate branches for PPC"));
47
static cl::
48
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
49
                        cl::desc("Disable CTR loops for PPC"));
50
51
static cl::
52
opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
53
                            cl::desc("Disable PPC loop preinc prep"));
54
55
static cl::opt<bool>
56
VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
57
  cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
58
59
static cl::
60
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
61
                                cl::desc("Disable VSX Swap Removal for PPC"));
62
63
static cl::
64
opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
65
                              cl::desc("Disable QPX load splat simplification"));
66
67
static cl::
68
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
69
                            cl::desc("Disable machine peepholes for PPC"));
70
71
static cl::opt<bool>
72
EnableGEPOpt("ppc-gep-opt", cl::Hidden,
73
             cl::desc("Enable optimizations on complex GEPs"),
74
             cl::init(true));
75
76
static cl::opt<bool>
77
EnablePrefetch("enable-ppc-prefetching",
78
                  cl::desc("disable software prefetching on PPC"),
79
                  cl::init(false), cl::Hidden);
80
81
static cl::opt<bool>
82
EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
83
                      cl::desc("Add extra TOC register dependencies"),
84
                      cl::init(true), cl::Hidden);
85
86
static cl::opt<bool>
87
EnableMachineCombinerPass("ppc-machine-combiner",
88
                          cl::desc("Enable the machine combiner pass"),
89
                          cl::init(true), cl::Hidden);
90
91
123k
extern "C" void LLVMInitializePowerPCTarget() {
92
123k
  // Register the targets
93
123k
  RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
94
123k
  RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
95
123k
  RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
96
123k
97
123k
  PassRegistry &PR = *PassRegistry::getPassRegistry();
98
123k
  initializePPCBoolRetToIntPass(PR);
99
123k
  initializePPCExpandISELPass(PR);
100
123k
  initializePPCTLSDynamicCallPass(PR);
101
123k
}
102
103
/// Return the datalayout string of a subtarget.
104
1.81k
static std::string getDataLayoutString(const Triple &T) {
105
982
  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
106
1.81k
  std::string Ret;
107
1.81k
108
1.81k
  // Most PPC* platforms are big endian, PPC64LE is little endian.
109
1.81k
  if (T.getArch() == Triple::ppc64le)
110
558
    Ret = "e";
111
1.81k
  else
112
1.26k
    Ret = "E";
113
1.81k
114
1.81k
  Ret += DataLayout::getManglingComponent(T);
115
1.81k
116
1.81k
  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
117
1.81k
  // pointers.
118
1.81k
  if (
!is64Bit || 1.81k
T.getOS() == Triple::Lv21.39k
)
119
425
    Ret += "-p:32:32";
120
1.81k
121
1.81k
  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
122
1.81k
  // documentation are wrong; these are correct (i.e. "what gcc does").
123
1.81k
  if (
is64Bit || 1.81k
!T.isOSDarwin()424
)
124
1.68k
    Ret += "-i64:64";
125
1.81k
  else
126
139
    Ret += "-f64:32:64";
127
1.81k
128
1.81k
  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
129
1.81k
  if (is64Bit)
130
1.39k
    Ret += "-n32:64";
131
1.81k
  else
132
424
    Ret += "-n32";
133
1.81k
134
1.81k
  return Ret;
135
1.81k
}
136
137
static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
138
3.22k
                                      const Triple &TT) {
139
3.22k
  std::string FullFS = FS;
140
3.22k
141
3.22k
  // Make sure 64-bit features are available when CPUname is generic
142
3.22k
  if (
TT.getArch() == Triple::ppc64 || 3.22k
TT.getArch() == Triple::ppc64le1.62k
) {
143
2.42k
    if (!FullFS.empty())
144
1.54k
      FullFS = "+64bit," + FullFS;
145
2.42k
    else
146
880
      FullFS = "+64bit";
147
2.42k
  }
148
3.22k
149
3.22k
  if (
OL >= CodeGenOpt::Default3.22k
) {
150
2.43k
    if (!FullFS.empty())
151
2.10k
      FullFS = "+crbits," + FullFS;
152
2.43k
    else
153
333
      FullFS = "+crbits";
154
2.43k
  }
155
3.22k
156
3.22k
  if (
OL != CodeGenOpt::None3.22k
) {
157
2.49k
    if (!FullFS.empty())
158
2.49k
      FullFS = "+invariant-function-descriptors," + FullFS;
159
2.49k
    else
160
3
      FullFS = "+invariant-function-descriptors";
161
2.49k
  }
162
3.22k
163
3.22k
  return FullFS;
164
3.22k
}
165
166
1.81k
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
167
1.81k
  // If it isn't a Mach-O file then it's going to be a linux ELF
168
1.81k
  // object file.
169
1.81k
  if (TT.isOSDarwin())
170
178
    return llvm::make_unique<TargetLoweringObjectFileMachO>();
171
1.64k
172
1.64k
  return llvm::make_unique<PPC64LinuxTargetObjectFile>();
173
1.64k
}
174
175
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
176
1.81k
                                                 const TargetOptions &Options) {
177
1.81k
  if (Options.MCOptions.getABIName().startswith("elfv1"))
178
11
    return PPCTargetMachine::PPC_ABI_ELFv1;
179
1.80k
  else 
if (1.80k
Options.MCOptions.getABIName().startswith("elfv2")1.80k
)
180
12
    return PPCTargetMachine::PPC_ABI_ELFv2;
181
1.79k
182
1.81k
  assert(Options.MCOptions.getABIName().empty() &&
183
1.79k
         "Unknown target-abi option!");
184
1.79k
185
1.79k
  if (TT.isMacOSX())
186
178
    return PPCTargetMachine::PPC_ABI_UNKNOWN;
187
1.61k
188
1.61k
  switch (TT.getArch()) {
189
546
  case Triple::ppc64le:
190
546
    return PPCTargetMachine::PPC_ABI_ELFv2;
191
787
  case Triple::ppc64:
192
787
    return PPCTargetMachine::PPC_ABI_ELFv1;
193
285
  default:
194
285
    return PPCTargetMachine::PPC_ABI_UNKNOWN;
195
0
  }
196
0
}
197
198
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
199
1.81k
                                           Optional<Reloc::Model> RM) {
200
1.81k
  if (RM.hasValue())
201
452
    return *RM;
202
1.36k
203
1.36k
  // Darwin defaults to dynamic-no-pic.
204
1.36k
  
if (1.36k
TT.isOSDarwin()1.36k
)
205
158
    return Reloc::DynamicNoPIC;
206
1.20k
207
1.20k
  // Non-darwin 64-bit platforms are PIC by default.
208
1.20k
  
if (1.20k
TT.getArch() == Triple::ppc64 || 1.20k
TT.getArch() == Triple::ppc64le502
)
209
959
    return Reloc::PIC_;
210
250
211
250
  // 32-bit is static by default.
212
250
  return Reloc::Static;
213
250
}
214
215
static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
216
                                              Optional<CodeModel::Model> CM,
217
1.81k
                                              bool JIT) {
218
1.81k
  if (CM)
219
52
    return *CM;
220
1.76k
  
if (1.76k
!TT.isOSDarwin() && 1.76k
!JIT1.58k
&&
221
1.58k
      
(TT.getArch() == Triple::ppc64 || 1.58k
TT.getArch() == Triple::ppc64le837
))
222
1.30k
    return CodeModel::Medium;
223
463
  return CodeModel::Small;
224
463
}
225
226
// The FeatureString here is a little subtle. We are modifying the feature
227
// string with what are (currently) non-function specific overrides as it goes
228
// into the LLVMTargetMachine constructor and then using the stored value in the
229
// Subtarget constructor below it.
230
PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
231
                                   StringRef CPU, StringRef FS,
232
                                   const TargetOptions &Options,
233
                                   Optional<Reloc::Model> RM,
234
                                   Optional<CodeModel::Model> CM,
235
                                   CodeGenOpt::Level OL, bool JIT)
236
    : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
237
                        computeFSAdditions(FS, OL, TT), Options,
238
                        getEffectiveRelocModel(TT, RM),
239
                        getEffectiveCodeModel(TT, CM, JIT), OL),
240
      TLOF(createTLOF(getTargetTriple())),
241
1.81k
      TargetABI(computeTargetABI(TT, Options)) {
242
1.81k
  initAsmInfo();
243
1.81k
}
244
245
1.79k
PPCTargetMachine::~PPCTargetMachine() = default;
246
247
const PPCSubtarget *
248
103k
PPCTargetMachine::getSubtargetImpl(const Function &F) const {
249
103k
  Attribute CPUAttr = F.getFnAttribute("target-cpu");
250
103k
  Attribute FSAttr = F.getFnAttribute("target-features");
251
103k
252
103k
  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
253
71.6k
                        ? CPUAttr.getValueAsString().str()
254
32.2k
                        : TargetCPU;
255
103k
  std::string FS = !FSAttr.hasAttribute(Attribute::None)
256
20.9k
                       ? FSAttr.getValueAsString().str()
257
82.9k
                       : TargetFS;
258
103k
259
103k
  // FIXME: This is related to the code below to reset the target options,
260
103k
  // we need to know whether or not the soft float flag is set on the
261
103k
  // function before we can generate a subtarget. We also need to use
262
103k
  // it as a key for the subtarget since that can be the only difference
263
103k
  // between two functions.
264
103k
  bool SoftFloat =
265
103k
      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
266
103k
  // If the soft float attribute is set on the function turn on the soft float
267
103k
  // subtarget feature.
268
103k
  if (SoftFloat)
269
280
    
FS += FS.empty() ? 280
"-hard-float"119
:
",-hard-float"161
;
270
103k
271
103k
  auto &I = SubtargetMap[CPU + FS];
272
103k
  if (
!I103k
) {
273
1.40k
    // This needs to be done before we create a new subtarget since any
274
1.40k
    // creation will depend on the TM and the code generation flags on the
275
1.40k
    // function that reside in TargetOptions.
276
1.40k
    resetTargetOptions(F);
277
1.40k
    I = llvm::make_unique<PPCSubtarget>(
278
1.40k
        TargetTriple, CPU,
279
1.40k
        // FIXME: It would be good to have the subtarget additions here
280
1.40k
        // not necessary. Anything that turns them on/off (overrides) ends
281
1.40k
        // up being put at the end of the feature string, but the defaults
282
1.40k
        // shouldn't require adding them. Fixing this means pulling Feature64Bit
283
1.40k
        // out of most of the target cpus in the .td file and making it set only
284
1.40k
        // as part of initialization via the TargetTriple.
285
1.40k
        computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
286
1.40k
  }
287
103k
  return I.get();
288
103k
}
289
290
//===----------------------------------------------------------------------===//
291
// Pass Pipeline Configuration
292
//===----------------------------------------------------------------------===//
293
294
namespace {
295
296
/// PPC Code Generator Pass Configuration Options.
297
class PPCPassConfig : public TargetPassConfig {
298
public:
299
  PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
300
1.44k
    : TargetPassConfig(TM, PM) {}
301
302
3.97k
  PPCTargetMachine &getPPCTargetMachine() const {
303
3.97k
    return getTM<PPCTargetMachine>();
304
3.97k
  }
305
306
  void addIRPasses() override;
307
  bool addPreISel() override;
308
  bool addILPOpts() override;
309
  bool addInstSelector() override;
310
  void addMachineSSAOptimization() override;
311
  void addPreRegAlloc() override;
312
  void addPreSched2() override;
313
  void addPreEmitPass() override;
314
};
315
316
} // end anonymous namespace
317
318
1.44k
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
319
1.44k
  return new PPCPassConfig(*this, PM);
320
1.44k
}
321
322
1.36k
void PPCPassConfig::addIRPasses() {
323
1.36k
  if (TM->getOptLevel() != CodeGenOpt::None)
324
1.23k
    addPass(createPPCBoolRetToIntPass());
325
1.36k
  addPass(createAtomicExpandPass());
326
1.36k
327
1.36k
  // For the BG/Q (or if explicitly requested), add explicit data prefetch
328
1.36k
  // intrinsics.
329
1.36k
  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
330
36
                        getOptLevel() != CodeGenOpt::None;
331
1.36k
  if (EnablePrefetch.getNumOccurrences() > 0)
332
7
    UsePrefetching = EnablePrefetch;
333
1.36k
  if (UsePrefetching)
334
41
    addPass(createLoopDataPrefetchPass());
335
1.36k
336
1.36k
  if (
TM->getOptLevel() >= CodeGenOpt::Default && 1.36k
EnableGEPOpt1.20k
) {
337
1.20k
    // Call SeparateConstOffsetFromGEP pass to extract constants within indices
338
1.20k
    // and lower a GEP with multiple indices to either arithmetic operations or
339
1.20k
    // multiple GEPs with single index.
340
1.20k
    addPass(createSeparateConstOffsetFromGEPPass(TM, true));
341
1.20k
    // Call EarlyCSE pass to find and remove subexpressions in the lowered
342
1.20k
    // result.
343
1.20k
    addPass(createEarlyCSEPass());
344
1.20k
    // Do loop invariant code motion in case part of the lowered result is
345
1.20k
    // invariant.
346
1.20k
    addPass(createLICMPass());
347
1.20k
  }
348
1.36k
349
1.36k
  TargetPassConfig::addIRPasses();
350
1.36k
}
351
352
1.36k
bool PPCPassConfig::addPreISel() {
353
1.36k
  if (
!DisablePreIncPrep && 1.36k
getOptLevel() != CodeGenOpt::None1.36k
)
354
1.23k
    addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
355
1.36k
356
1.36k
  if (
!DisableCTRLoops && 1.36k
getOptLevel() != CodeGenOpt::None1.36k
)
357
1.23k
    addPass(createPPCCTRLoops());
358
1.36k
359
1.36k
  return false;
360
1.36k
}
361
362
1.23k
bool PPCPassConfig::addILPOpts() {
363
1.23k
  addPass(&EarlyIfConverterID);
364
1.23k
365
1.23k
  if (EnableMachineCombinerPass)
366
1.23k
    addPass(&MachineCombinerID);
367
1.23k
368
1.23k
  return true;
369
1.23k
}
370
371
1.36k
bool PPCPassConfig::addInstSelector() {
372
1.36k
  // Install an instruction selector.
373
1.36k
  addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
374
1.36k
375
#ifndef NDEBUG
376
  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
377
    addPass(createPPCCTRLoopsVerify());
378
#endif
379
380
1.36k
  addPass(createPPCVSXCopyPass());
381
1.36k
  return false;
382
1.36k
}
383
384
1.23k
void PPCPassConfig::addMachineSSAOptimization() {
385
1.23k
  // PPCBranchCoalescingPass need to be done before machine sinking
386
1.23k
  // since it merges empty blocks.
387
1.23k
  if (
EnableBranchCoalescing && 1.23k
getOptLevel() != CodeGenOpt::None2
)
388
2
    addPass(createPPCBranchCoalescingPass());
389
1.23k
  TargetPassConfig::addMachineSSAOptimization();
390
1.23k
  // For little endian, remove where possible the vector swap instructions
391
1.23k
  // introduced at code generation to normalize vector element order.
392
1.23k
  if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
393
235
      !DisableVSXSwapRemoval)
394
233
    addPass(createPPCVSXSwapRemovalPass());
395
1.23k
  // Target-specific peephole cleanups performed after instruction
396
1.23k
  // selection.
397
1.23k
  if (
!DisableMIPeephole1.23k
) {
398
1.23k
    addPass(createPPCMIPeepholePass());
399
1.23k
    addPass(&DeadMachineInstructionElimID);
400
1.23k
  }
401
1.23k
}
402
403
1.36k
void PPCPassConfig::addPreRegAlloc() {
404
1.36k
  if (
getOptLevel() != CodeGenOpt::None1.36k
) {
405
1.23k
    initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
406
1.23k
    insertPass(VSXFMAMutateEarly ? 
&RegisterCoalescerID0
:
&MachineSchedulerID1.23k
,
407
1.23k
               &PPCVSXFMAMutateID);
408
1.23k
  }
409
1.36k
410
1.36k
  // FIXME: We probably don't need to run these for -fPIE.
411
1.36k
  if (
getPPCTargetMachine().isPositionIndependent()1.36k
) {
412
929
    // FIXME: LiveVariables should not be necessary here!
413
929
    // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
414
929
    // LiveVariables. This (unnecessary) dependency has been removed now,
415
929
    // however a stage-2 clang build fails without LiveVariables computed here.
416
929
    addPass(&LiveVariablesID, false);
417
929
    addPass(createPPCTLSDynamicCallPass());
418
929
  }
419
1.36k
  if (EnableExtraTOCRegDeps)
420
1.36k
    addPass(createPPCTOCRegDepsPass());
421
1.36k
}
422
423
1.36k
void PPCPassConfig::addPreSched2() {
424
1.36k
  if (
getOptLevel() != CodeGenOpt::None1.36k
) {
425
1.23k
    addPass(&IfConverterID);
426
1.23k
427
1.23k
    // This optimization must happen after anything that might do store-to-load
428
1.23k
    // forwarding. Here we're after RA (and, thus, when spills are inserted)
429
1.23k
    // but before post-RA scheduling.
430
1.23k
    if (!DisableQPXLoadSplat)
431
1.23k
      addPass(createPPCQPXLoadSplatPass());
432
1.23k
  }
433
1.36k
}
434
435
1.36k
void PPCPassConfig::addPreEmitPass() {
436
1.36k
  addPass(createPPCExpandISELPass());
437
1.36k
438
1.36k
  if (getOptLevel() != CodeGenOpt::None)
439
1.23k
    addPass(createPPCEarlyReturnPass(), false);
440
1.36k
  // Must run branch selection immediately preceding the asm printer.
441
1.36k
  addPass(createPPCBranchSelectionPass(), false);
442
1.36k
}
443
444
2.55k
TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
445
43.9k
  return TargetIRAnalysis([this](const Function &F) {
446
43.9k
    return TargetTransformInfo(PPCTTIImpl(this, F));
447
43.9k
  });
448
2.55k
}