/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
Line | Count | Source |
1 | | //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This pass identifies floating point stores that should not be combined into |
10 | | // store pairs. Later we may do the same for floating point loads. |
11 | | // ===---------------------------------------------------------------------===// |
12 | | |
13 | | #include "AArch64InstrInfo.h" |
14 | | #include "llvm/CodeGen/MachineFunction.h" |
15 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
16 | | #include "llvm/CodeGen/MachineInstr.h" |
17 | | #include "llvm/CodeGen/MachineTraceMetrics.h" |
18 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | | #include "llvm/CodeGen/TargetSchedule.h" |
20 | | #include "llvm/Support/Debug.h" |
21 | | #include "llvm/Support/raw_ostream.h" |
22 | | |
23 | | using namespace llvm; |
24 | | |
25 | | #define DEBUG_TYPE "aarch64-stp-suppress" |
26 | | |
27 | 265k | #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression" |
28 | | |
29 | | namespace { |
30 | | class AArch64StorePairSuppress : public MachineFunctionPass { |
31 | | const AArch64InstrInfo *TII; |
32 | | const TargetRegisterInfo *TRI; |
33 | | const MachineRegisterInfo *MRI; |
34 | | TargetSchedModel SchedModel; |
35 | | MachineTraceMetrics *Traces; |
36 | | MachineTraceMetrics::Ensemble *MinInstr; |
37 | | |
38 | | public: |
39 | | static char ID; |
40 | 8.62k | AArch64StorePairSuppress() : MachineFunctionPass(ID) { |
41 | 8.62k | initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry()); |
42 | 8.62k | } |
43 | | |
44 | 265k | StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; } |
45 | | |
46 | | bool runOnMachineFunction(MachineFunction &F) override; |
47 | | |
48 | | private: |
49 | | bool shouldAddSTPToBlock(const MachineBasicBlock *BB); |
50 | | |
51 | | bool isNarrowFPStore(const MachineInstr &MI); |
52 | | |
53 | 8.57k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
54 | 8.57k | AU.setPreservesCFG(); |
55 | 8.57k | AU.addRequired<MachineTraceMetrics>(); |
56 | 8.57k | AU.addPreserved<MachineTraceMetrics>(); |
57 | 8.57k | MachineFunctionPass::getAnalysisUsage(AU); |
58 | 8.57k | } |
59 | | }; |
60 | | char AArch64StorePairSuppress::ID = 0; |
61 | | } // anonymous |
62 | | |
63 | | INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress", |
64 | | STPSUPPRESS_PASS_NAME, false, false) |
65 | | |
66 | 8.62k | FunctionPass *llvm::createAArch64StorePairSuppressPass() { |
67 | 8.62k | return new AArch64StorePairSuppress(); |
68 | 8.62k | } |
69 | | |
70 | | /// Return true if an STP can be added to this block without increasing the |
71 | | /// critical resource height. STP is good to form in Ld/St limited blocks and |
72 | | /// bad to form in float-point limited blocks. This is true independent of the |
73 | | /// critical path. If the critical path is longer than the resource height, the |
74 | | /// extra vector ops can limit physreg renaming. Otherwise, it could simply |
75 | | /// oversaturate the vector units. |
76 | 3.03k | bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { |
77 | 3.03k | if (!MinInstr) |
78 | 2.35k | MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); |
79 | 3.03k | |
80 | 3.03k | MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); |
81 | 3.03k | unsigned ResLength = BBTrace.getResourceLength(); |
82 | 3.03k | |
83 | 3.03k | // Get the machine model's scheduling class for STPQi. |
84 | 3.03k | // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. |
85 | 3.03k | unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); |
86 | 3.03k | const MCSchedClassDesc *SCDesc = |
87 | 3.03k | SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); |
88 | 3.03k | |
89 | 3.03k | // If a subtarget does not define resources for STPQi, bail here. |
90 | 3.03k | if (SCDesc->isValid() && !SCDesc->isVariant()) { |
91 | 3.03k | unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc); |
92 | 3.03k | if (ResLenWithSTP > ResLength) { |
93 | 1.85k | LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() |
94 | 1.85k | << " resources " << ResLength << " -> " << ResLenWithSTP |
95 | 1.85k | << "\n"); |
96 | 1.85k | return false; |
97 | 1.85k | } |
98 | 1.18k | } |
99 | 1.18k | return true; |
100 | 1.18k | } |
101 | | |
102 | | /// Return true if this is a floating-point store smaller than the V reg. On |
103 | | /// cyclone, these require a vector shuffle before storing a pair. |
104 | | /// Ideally we would call getMatchingPairOpcode() and have the machine model |
105 | | /// tell us if it's profitable with no cpu knowledge here. |
106 | | /// |
107 | | /// FIXME: We plan to develop a decent Target abstraction for simple loads and |
108 | | /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. |
109 | 19.1M | bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { |
110 | 19.1M | switch (MI.getOpcode()) { |
111 | 19.1M | default: |
112 | 19.1M | return false; |
113 | 19.1M | case AArch64::STRSui: |
114 | 35.6k | case AArch64::STRDui: |
115 | 35.6k | case AArch64::STURSi: |
116 | 35.6k | case AArch64::STURDi: |
117 | 35.6k | return true; |
118 | 19.1M | } |
119 | 19.1M | } |
120 | | |
121 | 257k | bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { |
122 | 257k | if (skipFunction(MF.getFunction())) |
123 | 16 | return false; |
124 | 257k | |
125 | 257k | const TargetSubtargetInfo &ST = MF.getSubtarget(); |
126 | 257k | TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); |
127 | 257k | TRI = ST.getRegisterInfo(); |
128 | 257k | MRI = &MF.getRegInfo(); |
129 | 257k | SchedModel.init(&ST); |
130 | 257k | Traces = &getAnalysis<MachineTraceMetrics>(); |
131 | 257k | MinInstr = nullptr; |
132 | 257k | |
133 | 257k | LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n'); |
134 | 257k | |
135 | 257k | if (!SchedModel.hasInstrSchedModel()) { |
136 | 14.1k | LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); |
137 | 14.1k | return false; |
138 | 14.1k | } |
139 | 242k | |
140 | 242k | // Check for a sequence of stores to the same base address. We don't need to |
141 | 242k | // precisely determine whether a store pair can be formed. But we do want to |
142 | 242k | // filter out most situations where we can't form store pairs to avoid |
143 | 242k | // computing trace metrics in those cases. |
144 | 1.97M | for (auto &MBB : MF)242k { |
145 | 1.97M | bool SuppressSTP = false; |
146 | 1.97M | unsigned PrevBaseReg = 0; |
147 | 19.1M | for (auto &MI : MBB) { |
148 | 19.1M | if (!isNarrowFPStore(MI)) |
149 | 19.1M | continue; |
150 | 35.6k | const MachineOperand *BaseOp; |
151 | 35.6k | int64_t Offset; |
152 | 35.6k | if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && |
153 | 35.6k | BaseOp->isReg()35.5k ) { |
154 | 23.2k | unsigned BaseReg = BaseOp->getReg(); |
155 | 23.2k | if (PrevBaseReg == BaseReg) { |
156 | 4.75k | // If this block can take STPs, skip ahead to the next block. |
157 | 4.75k | if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())3.03k ) |
158 | 1.18k | break; |
159 | 3.57k | // Otherwise, continue unpairing the stores in this block. |
160 | 3.57k | LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n"); |
161 | 3.57k | SuppressSTP = true; |
162 | 3.57k | TII->suppressLdStPair(MI); |
163 | 3.57k | } |
164 | 23.2k | PrevBaseReg = BaseReg; |
165 | 22.0k | } else |
166 | 12.3k | PrevBaseReg = 0; |
167 | 35.6k | } |
168 | 1.97M | } |
169 | 242k | // This pass just sets some internal MachineMemOperand flags. It can't really |
170 | 242k | // invalidate anything. |
171 | 242k | return false; |
172 | 242k | } |