Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
Line
Count
Source
1
//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass identifies floating point stores that should not be combined into
10
// store pairs. Later we may do the same for floating point loads.
11
// ===---------------------------------------------------------------------===//
12
13
#include "AArch64InstrInfo.h"
14
#include "llvm/CodeGen/MachineFunction.h"
15
#include "llvm/CodeGen/MachineFunctionPass.h"
16
#include "llvm/CodeGen/MachineInstr.h"
17
#include "llvm/CodeGen/MachineTraceMetrics.h"
18
#include "llvm/CodeGen/TargetInstrInfo.h"
19
#include "llvm/CodeGen/TargetSchedule.h"
20
#include "llvm/Support/Debug.h"
21
#include "llvm/Support/raw_ostream.h"
22
23
using namespace llvm;
24
25
#define DEBUG_TYPE "aarch64-stp-suppress"
26
27
265k
#define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
28
29
namespace {
30
class AArch64StorePairSuppress : public MachineFunctionPass {
31
  const AArch64InstrInfo *TII;
32
  const TargetRegisterInfo *TRI;
33
  const MachineRegisterInfo *MRI;
34
  TargetSchedModel SchedModel;
35
  MachineTraceMetrics *Traces;
36
  MachineTraceMetrics::Ensemble *MinInstr;
37
38
public:
39
  static char ID;
40
8.62k
  AArch64StorePairSuppress() : MachineFunctionPass(ID) {
41
8.62k
    initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
42
8.62k
  }
43
44
265k
  StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; }
45
46
  bool runOnMachineFunction(MachineFunction &F) override;
47
48
private:
49
  bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
50
51
  bool isNarrowFPStore(const MachineInstr &MI);
52
53
8.57k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
54
8.57k
    AU.setPreservesCFG();
55
8.57k
    AU.addRequired<MachineTraceMetrics>();
56
8.57k
    AU.addPreserved<MachineTraceMetrics>();
57
8.57k
    MachineFunctionPass::getAnalysisUsage(AU);
58
8.57k
  }
59
};
60
char AArch64StorePairSuppress::ID = 0;
61
} // anonymous
62
63
INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress",
64
                STPSUPPRESS_PASS_NAME, false, false)
65
66
8.62k
FunctionPass *llvm::createAArch64StorePairSuppressPass() {
67
8.62k
  return new AArch64StorePairSuppress();
68
8.62k
}
69
70
/// Return true if an STP can be added to this block without increasing the
71
/// critical resource height. STP is good to form in Ld/St limited blocks and
72
/// bad to form in float-point limited blocks. This is true independent of the
73
/// critical path. If the critical path is longer than the resource height, the
74
/// extra vector ops can limit physreg renaming. Otherwise, it could simply
75
/// oversaturate the vector units.
76
3.03k
bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
77
3.03k
  if (!MinInstr)
78
2.35k
    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
79
3.03k
80
3.03k
  MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
81
3.03k
  unsigned ResLength = BBTrace.getResourceLength();
82
3.03k
83
3.03k
  // Get the machine model's scheduling class for STPQi.
84
3.03k
  // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
85
3.03k
  unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
86
3.03k
  const MCSchedClassDesc *SCDesc =
87
3.03k
      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
88
3.03k
89
3.03k
  // If a subtarget does not define resources for STPQi, bail here.
90
3.03k
  if (SCDesc->isValid() && !SCDesc->isVariant()) {
91
3.03k
    unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc);
92
3.03k
    if (ResLenWithSTP > ResLength) {
93
1.85k
      LLVM_DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
94
1.85k
                        << " resources " << ResLength << " -> " << ResLenWithSTP
95
1.85k
                        << "\n");
96
1.85k
      return false;
97
1.85k
    }
98
1.18k
  }
99
1.18k
  return true;
100
1.18k
}
101
102
/// Return true if this is a floating-point store smaller than the V reg. On
103
/// cyclone, these require a vector shuffle before storing a pair.
104
/// Ideally we would call getMatchingPairOpcode() and have the machine model
105
/// tell us if it's profitable with no cpu knowledge here.
106
///
107
/// FIXME: We plan to develop a decent Target abstraction for simple loads and
108
/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
109
19.1M
bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
110
19.1M
  switch (MI.getOpcode()) {
111
19.1M
  default:
112
19.1M
    return false;
113
19.1M
  case AArch64::STRSui:
114
35.6k
  case AArch64::STRDui:
115
35.6k
  case AArch64::STURSi:
116
35.6k
  case AArch64::STURDi:
117
35.6k
    return true;
118
19.1M
  }
119
19.1M
}
120
121
257k
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
122
257k
  if (skipFunction(MF.getFunction()))
123
16
    return false;
124
257k
125
257k
  const TargetSubtargetInfo &ST = MF.getSubtarget();
126
257k
  TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
127
257k
  TRI = ST.getRegisterInfo();
128
257k
  MRI = &MF.getRegInfo();
129
257k
  SchedModel.init(&ST);
130
257k
  Traces = &getAnalysis<MachineTraceMetrics>();
131
257k
  MinInstr = nullptr;
132
257k
133
257k
  LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
134
257k
135
257k
  if (!SchedModel.hasInstrSchedModel()) {
136
14.1k
    LLVM_DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
137
14.1k
    return false;
138
14.1k
  }
139
242k
140
242k
  // Check for a sequence of stores to the same base address. We don't need to
141
242k
  // precisely determine whether a store pair can be formed. But we do want to
142
242k
  // filter out most situations where we can't form store pairs to avoid
143
242k
  // computing trace metrics in those cases.
144
1.97M
  
for (auto &MBB : MF)242k
{
145
1.97M
    bool SuppressSTP = false;
146
1.97M
    unsigned PrevBaseReg = 0;
147
19.1M
    for (auto &MI : MBB) {
148
19.1M
      if (!isNarrowFPStore(MI))
149
19.1M
        continue;
150
35.6k
      const MachineOperand *BaseOp;
151
35.6k
      int64_t Offset;
152
35.6k
      if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
153
35.6k
          
BaseOp->isReg()35.5k
) {
154
23.2k
        unsigned BaseReg = BaseOp->getReg();
155
23.2k
        if (PrevBaseReg == BaseReg) {
156
4.75k
          // If this block can take STPs, skip ahead to the next block.
157
4.75k
          if (!SuppressSTP && 
shouldAddSTPToBlock(MI.getParent())3.03k
)
158
1.18k
            break;
159
3.57k
          // Otherwise, continue unpairing the stores in this block.
160
3.57k
          LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n");
161
3.57k
          SuppressSTP = true;
162
3.57k
          TII->suppressLdStPair(MI);
163
3.57k
        }
164
23.2k
        PrevBaseReg = BaseReg;
165
22.0k
      } else
166
12.3k
        PrevBaseReg = 0;
167
35.6k
    }
168
1.97M
  }
169
242k
  // This pass just sets some internal MachineMemOperand flags. It can't really
170
242k
  // invalidate anything.
171
242k
  return false;
172
242k
}