/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/TargetSchedule.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements a wrapper around MCSchedModel that allows the interface |
10 | | // to benefit from information currently only available in TargetInstrInfo. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/CodeGen/TargetSchedule.h" |
15 | | #include "llvm/CodeGen/MachineFunction.h" |
16 | | #include "llvm/CodeGen/MachineInstr.h" |
17 | | #include "llvm/CodeGen/MachineOperand.h" |
18 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
20 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
21 | | #include "llvm/MC/MCInstrDesc.h" |
22 | | #include "llvm/MC/MCInstrItineraries.h" |
23 | | #include "llvm/MC/MCSchedule.h" |
24 | | #include "llvm/Support/CommandLine.h" |
25 | | #include "llvm/Support/ErrorHandling.h" |
26 | | #include "llvm/Support/raw_ostream.h" |
27 | | #include <algorithm> |
28 | | #include <cassert> |
29 | | #include <cstdint> |
30 | | |
31 | | using namespace llvm; |
32 | | |
33 | | static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), |
34 | | cl::desc("Use TargetSchedModel for latency lookup")); |
35 | | |
36 | | static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), |
37 | | cl::desc("Use InstrItineraryData for latency lookup")); |
38 | | |
39 | 331M | bool TargetSchedModel::hasInstrSchedModel() const { |
40 | 331M | return EnableSchedModel && SchedModel.hasInstrSchedModel()331M ; |
41 | 331M | } |
42 | | |
43 | 124M | bool TargetSchedModel::hasInstrItineraries() const { |
44 | 124M | return EnableSchedItins && !InstrItins.isEmpty()124M ; |
45 | 124M | } |
46 | | |
47 | 30.8M | static unsigned gcd(unsigned Dividend, unsigned Divisor) { |
48 | 30.8M | // Dividend and Divisor will be naturally swapped as needed. |
49 | 63.9M | while (Divisor) { |
50 | 33.0M | unsigned Rem = Dividend % Divisor; |
51 | 33.0M | Dividend = Divisor; |
52 | 33.0M | Divisor = Rem; |
53 | 33.0M | }; |
54 | 30.8M | return Dividend; |
55 | 30.8M | } |
56 | | |
57 | 30.8M | static unsigned lcm(unsigned A, unsigned B) { |
58 | 30.8M | unsigned LCM = (uint64_t(A) * B) / gcd(A, B); |
59 | 30.8M | assert((LCM >= A && LCM >= B) && "LCM overflow"); |
60 | 30.8M | return LCM; |
61 | 30.8M | } |
62 | | |
63 | 3.47M | void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) { |
64 | 3.47M | STI = TSInfo; |
65 | 3.47M | SchedModel = TSInfo->getSchedModel(); |
66 | 3.47M | TII = TSInfo->getInstrInfo(); |
67 | 3.47M | STI->initInstrItins(InstrItins); |
68 | 3.47M | |
69 | 3.47M | unsigned NumRes = SchedModel.getNumProcResourceKinds(); |
70 | 3.47M | ResourceFactors.resize(NumRes); |
71 | 3.47M | ResourceLCM = SchedModel.IssueWidth; |
72 | 36.7M | for (unsigned Idx = 0; Idx < NumRes; ++Idx33.2M ) { |
73 | 33.2M | unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; |
74 | 33.2M | if (NumUnits > 0) |
75 | 30.8M | ResourceLCM = lcm(ResourceLCM, NumUnits); |
76 | 33.2M | } |
77 | 3.47M | MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; |
78 | 36.7M | for (unsigned Idx = 0; Idx < NumRes; ++Idx33.2M ) { |
79 | 33.2M | unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; |
80 | 33.2M | ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits)30.8M : 02.39M ; |
81 | 33.2M | } |
82 | 3.47M | } |
83 | | |
84 | | /// Returns true only if instruction is specified as single issue. |
85 | | bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI, |
86 | 21.7M | const MCSchedClassDesc *SC) const { |
87 | 21.7M | if (hasInstrSchedModel()) { |
88 | 20.6M | if (!SC) |
89 | 20.6M | SC = resolveSchedClass(MI); |
90 | 20.6M | if (SC->isValid()) |
91 | 20.6M | return SC->BeginGroup; |
92 | 1.12M | } |
93 | 1.12M | return false; |
94 | 1.12M | } |
95 | | |
96 | | bool TargetSchedModel::mustEndGroup(const MachineInstr *MI, |
97 | 38.5M | const MCSchedClassDesc *SC) const { |
98 | 38.5M | if (hasInstrSchedModel()) { |
99 | 36.3M | if (!SC) |
100 | 36.3M | SC = resolveSchedClass(MI); |
101 | 36.3M | if (SC->isValid()) |
102 | 36.3M | return SC->EndGroup; |
103 | 2.17M | } |
104 | 2.17M | return false; |
105 | 2.17M | } |
106 | | |
107 | | unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, |
108 | 88.6M | const MCSchedClassDesc *SC) const { |
109 | 88.6M | if (hasInstrItineraries()) { |
110 | 411k | int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); |
111 | 411k | return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI)0 ; |
112 | 411k | } |
113 | 88.2M | if (hasInstrSchedModel()) { |
114 | 84.2M | if (!SC) |
115 | 73.4M | SC = resolveSchedClass(MI); |
116 | 84.2M | if (SC->isValid()) |
117 | 84.1M | return SC->NumMicroOps; |
118 | 4.08M | } |
119 | 4.08M | return MI->isTransient() ? 01.50M : 12.58M ; |
120 | 4.08M | } |
121 | | |
122 | | // The machine model may explicitly specify an invalid latency, which |
123 | | // effectively means infinite latency. Since users of the TargetSchedule API |
124 | | // don't know how to handle this, we convert it to a very large latency that is |
125 | | // easy to distinguish when debugging the DAG but won't induce overflow. |
126 | 27.2M | static unsigned capLatency(int Cycles) { |
127 | 27.2M | return Cycles >= 0 ? Cycles : 10000 ; |
128 | 27.2M | } |
129 | | |
130 | | /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require |
131 | | /// evaluation of predicates that depend on instruction operands or flags. |
132 | | const MCSchedClassDesc *TargetSchedModel:: |
133 | 188M | resolveSchedClass(const MachineInstr *MI) const { |
134 | 188M | // Get the definition's scheduling class descriptor from this machine model. |
135 | 188M | unsigned SchedClass = MI->getDesc().getSchedClass(); |
136 | 188M | const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); |
137 | 188M | if (!SCDesc->isValid()) |
138 | 1.33M | return SCDesc; |
139 | 186M | |
140 | | #ifndef NDEBUG |
141 | | unsigned NIter = 0; |
142 | | #endif |
143 | 222M | while (186M SCDesc->isVariant()) { |
144 | 35.6M | assert(++NIter < 6 && "Variants are nested deeper than the magic number"); |
145 | 35.6M | |
146 | 35.6M | SchedClass = STI->resolveSchedClass(SchedClass, MI, this); |
147 | 35.6M | SCDesc = SchedModel.getSchedClassDesc(SchedClass); |
148 | 35.6M | } |
149 | 186M | return SCDesc; |
150 | 186M | } |
151 | | |
152 | | /// Find the def index of this operand. This index maps to the machine model and |
153 | | /// is independent of use operands. Def operands may be reordered with uses or |
154 | | /// merged with uses without affecting the def index (e.g. before/after |
155 | | /// regalloc). However, an instruction's def operands must never be reordered |
156 | | /// with respect to each other. |
157 | 17.0M | static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { |
158 | 17.0M | unsigned DefIdx = 0; |
159 | 22.0M | for (unsigned i = 0; i != DefOperIdx; ++i5.05M ) { |
160 | 5.05M | const MachineOperand &MO = MI->getOperand(i); |
161 | 5.05M | if (MO.isReg() && MO.isDef()3.22M ) |
162 | 1.09M | ++DefIdx; |
163 | 5.05M | } |
164 | 17.0M | return DefIdx; |
165 | 17.0M | } |
166 | | |
167 | | /// Find the use index of this operand. This is independent of the instruction's |
168 | | /// def operands. |
169 | | /// |
170 | | /// Note that uses are not determined by the operand's isUse property, which |
171 | | /// is simply the inverse of isDef. Here we consider any readsReg operand to be |
172 | | /// a "use". The machine model allows an operand to be both a Def and Use. |
173 | 4.49M | static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { |
174 | 4.49M | unsigned UseIdx = 0; |
175 | 11.1M | for (unsigned i = 0; i != UseOperIdx; ++i6.65M ) { |
176 | 6.65M | const MachineOperand &MO = MI->getOperand(i); |
177 | 6.65M | if (MO.isReg() && MO.readsReg()6.62M && !MO.isDef()2.18M ) |
178 | 2.17M | ++UseIdx; |
179 | 6.65M | } |
180 | 4.49M | return UseIdx; |
181 | 4.49M | } |
182 | | |
183 | | // Top-level API for clients that know the operand indices. |
184 | | unsigned TargetSchedModel::computeOperandLatency( |
185 | | const MachineInstr *DefMI, unsigned DefOperIdx, |
186 | 18.9M | const MachineInstr *UseMI, unsigned UseOperIdx) const { |
187 | 18.9M | |
188 | 18.9M | if (!hasInstrSchedModel() && !hasInstrItineraries()1.92M ) |
189 | 1.24M | return TII->defaultDefLatency(SchedModel, *DefMI); |
190 | 17.6M | |
191 | 17.6M | if (hasInstrItineraries()) { |
192 | 678k | int OperLatency = 0; |
193 | 678k | if (UseMI) { |
194 | 492k | OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx, |
195 | 492k | *UseMI, UseOperIdx); |
196 | 492k | } |
197 | 186k | else { |
198 | 186k | unsigned DefClass = DefMI->getDesc().getSchedClass(); |
199 | 186k | OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); |
200 | 186k | } |
201 | 678k | if (OperLatency >= 0) |
202 | 367k | return OperLatency; |
203 | 311k | |
204 | 311k | // No operand latency was found. |
205 | 311k | unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI); |
206 | 311k | |
207 | 311k | // Expected latency is the max of the stage latency and itinerary props. |
208 | 311k | // Rather than directly querying InstrItins stage latency, we call a TII |
209 | 311k | // hook to allow subtargets to specialize latency. This hook is only |
210 | 311k | // applicable to the InstrItins model. InstrSchedModel should model all |
211 | 311k | // special cases without TII hooks. |
212 | 311k | InstrLatency = |
213 | 311k | std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI)); |
214 | 311k | return InstrLatency; |
215 | 311k | } |
216 | 17.0M | // hasInstrSchedModel() |
217 | 17.0M | const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); |
218 | 17.0M | unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); |
219 | 17.0M | if (DefIdx < SCDesc->NumWriteLatencyEntries) { |
220 | 14.9M | // Lookup the definition's write latency in SubtargetInfo. |
221 | 14.9M | const MCWriteLatencyEntry *WLEntry = |
222 | 14.9M | STI->getWriteLatencyEntry(SCDesc, DefIdx); |
223 | 14.9M | unsigned WriteID = WLEntry->WriteResourceID; |
224 | 14.9M | unsigned Latency = capLatency(WLEntry->Cycles); |
225 | 14.9M | if (!UseMI) |
226 | 2.91M | return Latency; |
227 | 12.0M | |
228 | 12.0M | // Lookup the use's latency adjustment in SubtargetInfo. |
229 | 12.0M | const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); |
230 | 12.0M | if (UseDesc->NumReadAdvanceEntries == 0) |
231 | 7.55M | return Latency; |
232 | 4.49M | unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); |
233 | 4.49M | int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); |
234 | 4.49M | if (Advance > 0 && (unsigned)Advance > Latency100k ) // unsigned wrap |
235 | 3.57k | return 0; |
236 | 4.49M | return Latency - Advance; |
237 | 4.49M | } |
238 | 2.04M | // If DefIdx does not exist in the model (e.g. implicit defs), then return |
239 | 2.04M | // unit latency (defaultDefLatency may be too conservative). |
240 | | #ifndef NDEBUG |
241 | | if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() |
242 | | && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() |
243 | | && SchedModel.isComplete()) { |
244 | | errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " |
245 | | << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)"; |
246 | | llvm_unreachable("incomplete machine model"); |
247 | | } |
248 | | #endif |
249 | | // FIXME: Automatically giving all implicit defs defaultDefLatency is |
250 | 2.04M | // undesirable. We should only do it for defs that are known to the MC |
251 | 2.04M | // desc like flags. Truly implicit defs should get 1 cycle latency. |
252 | 2.04M | return DefMI->isTransient() ? 0691k : TII->defaultDefLatency(SchedModel, *DefMI)1.35M ; |
253 | 2.04M | } |
254 | | |
255 | | unsigned |
256 | 11.5M | TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const { |
257 | 11.5M | return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc)); |
258 | 11.5M | } |
259 | | |
260 | 778k | unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { |
261 | 778k | assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); |
262 | 778k | unsigned SCIdx = TII->get(Opcode).getSchedClass(); |
263 | 778k | return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx)); |
264 | 778k | } |
265 | | |
266 | 0 | unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const { |
267 | 0 | if (hasInstrSchedModel()) |
268 | 0 | return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst)); |
269 | 0 | return computeInstrLatency(Inst.getOpcode()); |
270 | 0 | } |
271 | | |
272 | | unsigned |
273 | | TargetSchedModel::computeInstrLatency(const MachineInstr *MI, |
274 | 13.5M | bool UseDefaultDefLatency) const { |
275 | 13.5M | // For the itinerary model, fall back to the old subtarget hook. |
276 | 13.5M | // Allow subtargets to compute Bundle latencies outside the machine model. |
277 | 13.5M | if (hasInstrItineraries() || MI->isBundle()12.6M || |
278 | 13.5M | (12.6M !hasInstrSchedModel()12.6M && !UseDefaultDefLatency1.08M )) |
279 | 921k | return TII->getInstrLatency(&InstrItins, *MI); |
280 | 12.6M | |
281 | 12.6M | if (hasInstrSchedModel()) { |
282 | 11.5M | const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); |
283 | 11.5M | if (SCDesc->isValid()) |
284 | 11.5M | return computeInstrLatency(*SCDesc); |
285 | 1.10M | } |
286 | 1.10M | return TII->defaultDefLatency(SchedModel, *MI); |
287 | 1.10M | } |
288 | | |
289 | | unsigned TargetSchedModel:: |
290 | | computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, |
291 | 1.28M | const MachineInstr *DepMI) const { |
292 | 1.28M | if (!SchedModel.isOutOfOrder()) |
293 | 478k | return 1; |
294 | 805k | |
295 | 805k | // Out-of-order processor can dispatch WAW dependencies in the same cycle. |
296 | 805k | |
297 | 805k | // Treat predication as a data dependency for out-of-order cpus. In-order |
298 | 805k | // cpus do not need to treat predicated writes specially. |
299 | 805k | // |
300 | 805k | // TODO: The following hack exists because predication passes do not |
301 | 805k | // correctly append imp-use operands, and readsReg() strangely returns false |
302 | 805k | // for predicated defs. |
303 | 805k | unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); |
304 | 805k | const MachineFunction &MF = *DefMI->getMF(); |
305 | 805k | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
306 | 805k | if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)309k ) |
307 | 0 | return computeInstrLatency(DefMI); |
308 | 805k | |
309 | 805k | // If we have a per operand scheduling model, check if this def is writing |
310 | 805k | // an unbuffered resource. If so, it treated like an in-order cpu. |
311 | 805k | if (hasInstrSchedModel()) { |
312 | 542k | const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); |
313 | 542k | if (SCDesc->isValid()) { |
314 | 539k | for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), |
315 | 1.37M | *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI838k ) { |
316 | 838k | if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) |
317 | 361 | return 1; |
318 | 838k | } |
319 | 539k | } |
320 | 542k | } |
321 | 805k | return 0805k ; |
322 | 805k | } |
323 | | |
324 | | double |
325 | 0 | TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const { |
326 | 0 | if (hasInstrItineraries()) { |
327 | 0 | unsigned SchedClass = MI->getDesc().getSchedClass(); |
328 | 0 | return MCSchedModel::getReciprocalThroughput(SchedClass, |
329 | 0 | *getInstrItineraries()); |
330 | 0 | } |
331 | 0 | |
332 | 0 | if (hasInstrSchedModel()) |
333 | 0 | return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI)); |
334 | 0 | |
335 | 0 | return 0.0; |
336 | 0 | } |
337 | | |
338 | | double |
339 | 0 | TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const { |
340 | 0 | unsigned SchedClass = TII->get(Opcode).getSchedClass(); |
341 | 0 | if (hasInstrItineraries()) |
342 | 0 | return MCSchedModel::getReciprocalThroughput(SchedClass, |
343 | 0 | *getInstrItineraries()); |
344 | 0 | if (hasInstrSchedModel()) { |
345 | 0 | const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass); |
346 | 0 | if (SCDesc.isValid() && !SCDesc.isVariant()) |
347 | 0 | return MCSchedModel::getReciprocalThroughput(*STI, SCDesc); |
348 | 0 | } |
349 | 0 | |
350 | 0 | return 0.0; |
351 | 0 | } |
352 | | |
353 | | double |
354 | 0 | TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const { |
355 | 0 | if (hasInstrSchedModel()) |
356 | 0 | return SchedModel.getReciprocalThroughput(*STI, *TII, MI); |
357 | 0 | return computeReciprocalThroughput(MI.getOpcode()); |
358 | 0 | } |
359 | | |