/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This implements bottom-up and top-down register pressure reduction list |
11 | | // schedulers, using standard algorithms. The basic approach uses a priority |
12 | | // queue of available nodes to schedule. One at a time, nodes are taken from |
13 | | // the priority queue (thus in priority order), checked for legality to |
14 | | // schedule, and emitted if legal. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #include "ScheduleDAGSDNodes.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SmallSet.h" |
21 | | #include "llvm/ADT/Statistic.h" |
22 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
23 | | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
24 | | #include "llvm/CodeGen/SchedulerRegistry.h" |
25 | | #include "llvm/CodeGen/SelectionDAGISel.h" |
26 | | #include "llvm/IR/DataLayout.h" |
27 | | #include "llvm/IR/InlineAsm.h" |
28 | | #include "llvm/Support/Debug.h" |
29 | | #include "llvm/Support/ErrorHandling.h" |
30 | | #include "llvm/Support/raw_ostream.h" |
31 | | #include "llvm/Target/TargetInstrInfo.h" |
32 | | #include "llvm/Target/TargetLowering.h" |
33 | | #include "llvm/Target/TargetRegisterInfo.h" |
34 | | #include "llvm/Target/TargetSubtargetInfo.h" |
35 | | #include <climits> |
36 | | using namespace llvm; |
37 | | |
38 | | #define DEBUG_TYPE "pre-RA-sched" |
39 | | |
40 | | STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); |
41 | | STATISTIC(NumUnfolds, "Number of nodes unfolded"); |
42 | | STATISTIC(NumDups, "Number of duplicated nodes"); |
43 | | STATISTIC(NumPRCopies, "Number of physical register copies"); |
44 | | |
45 | | static RegisterScheduler |
46 | | burrListDAGScheduler("list-burr", |
47 | | "Bottom-up register reduction list scheduling", |
48 | | createBURRListDAGScheduler); |
49 | | static RegisterScheduler |
50 | | sourceListDAGScheduler("source", |
51 | | "Similar to list-burr but schedules in source " |
52 | | "order when possible", |
53 | | createSourceListDAGScheduler); |
54 | | |
55 | | static RegisterScheduler |
56 | | hybridListDAGScheduler("list-hybrid", |
57 | | "Bottom-up register pressure aware list scheduling " |
58 | | "which tries to balance latency and register pressure", |
59 | | createHybridListDAGScheduler); |
60 | | |
61 | | static RegisterScheduler |
62 | | ILPListDAGScheduler("list-ilp", |
63 | | "Bottom-up register pressure aware list scheduling " |
64 | | "which tries to balance ILP and register pressure", |
65 | | createILPListDAGScheduler); |
66 | | |
67 | | static cl::opt<bool> DisableSchedCycles( |
68 | | "disable-sched-cycles", cl::Hidden, cl::init(false), |
69 | | cl::desc("Disable cycle-level precision during preRA scheduling")); |
70 | | |
71 | | // Temporary sched=list-ilp flags until the heuristics are robust. |
72 | | // Some options are also available under sched=list-hybrid. |
73 | | static cl::opt<bool> DisableSchedRegPressure( |
74 | | "disable-sched-reg-pressure", cl::Hidden, cl::init(false), |
75 | | cl::desc("Disable regpressure priority in sched=list-ilp")); |
76 | | static cl::opt<bool> DisableSchedLiveUses( |
77 | | "disable-sched-live-uses", cl::Hidden, cl::init(true), |
78 | | cl::desc("Disable live use priority in sched=list-ilp")); |
79 | | static cl::opt<bool> DisableSchedVRegCycle( |
80 | | "disable-sched-vrcycle", cl::Hidden, cl::init(false), |
81 | | cl::desc("Disable virtual register cycle interference checks")); |
82 | | static cl::opt<bool> DisableSchedPhysRegJoin( |
83 | | "disable-sched-physreg-join", cl::Hidden, cl::init(false), |
84 | | cl::desc("Disable physreg def-use affinity")); |
85 | | static cl::opt<bool> DisableSchedStalls( |
86 | | "disable-sched-stalls", cl::Hidden, cl::init(true), |
87 | | cl::desc("Disable no-stall priority in sched=list-ilp")); |
88 | | static cl::opt<bool> DisableSchedCriticalPath( |
89 | | "disable-sched-critical-path", cl::Hidden, cl::init(false), |
90 | | cl::desc("Disable critical path priority in sched=list-ilp")); |
91 | | static cl::opt<bool> DisableSchedHeight( |
92 | | "disable-sched-height", cl::Hidden, cl::init(false), |
93 | | cl::desc("Disable scheduled-height priority in sched=list-ilp")); |
94 | | static cl::opt<bool> Disable2AddrHack( |
95 | | "disable-2addr-hack", cl::Hidden, cl::init(true), |
96 | | cl::desc("Disable scheduler's two-address hack")); |
97 | | |
98 | | static cl::opt<int> MaxReorderWindow( |
99 | | "max-sched-reorder", cl::Hidden, cl::init(6), |
100 | | cl::desc("Number of instructions to allow ahead of the critical path " |
101 | | "in sched=list-ilp")); |
102 | | |
103 | | static cl::opt<unsigned> AvgIPC( |
104 | | "sched-avg-ipc", cl::Hidden, cl::init(1), |
105 | | cl::desc("Average inst/cycle whan no target itinerary exists.")); |
106 | | |
107 | | namespace { |
108 | | //===----------------------------------------------------------------------===// |
109 | | /// ScheduleDAGRRList - The actual register reduction list scheduler |
110 | | /// implementation. This supports both top-down and bottom-up scheduling. |
111 | | /// |
112 | | class ScheduleDAGRRList : public ScheduleDAGSDNodes { |
113 | | private: |
114 | | /// NeedLatency - True if the scheduler will make use of latency information. |
115 | | /// |
116 | | bool NeedLatency; |
117 | | |
118 | | /// AvailableQueue - The priority queue to use for the available SUnits. |
119 | | SchedulingPriorityQueue *AvailableQueue; |
120 | | |
121 | | /// PendingQueue - This contains all of the instructions whose operands have |
122 | | /// been issued, but their results are not ready yet (due to the latency of |
123 | | /// the operation). Once the operands becomes available, the instruction is |
124 | | /// added to the AvailableQueue. |
125 | | std::vector<SUnit*> PendingQueue; |
126 | | |
127 | | /// HazardRec - The hazard recognizer to use. |
128 | | ScheduleHazardRecognizer *HazardRec; |
129 | | |
130 | | /// CurCycle - The current scheduler state corresponds to this cycle. |
131 | | unsigned CurCycle; |
132 | | |
133 | | /// MinAvailableCycle - Cycle of the soonest available instruction. |
134 | | unsigned MinAvailableCycle; |
135 | | |
136 | | /// IssueCount - Count instructions issued in this cycle |
137 | | /// Currently valid only for bottom-up scheduling. |
138 | | unsigned IssueCount; |
139 | | |
140 | | /// LiveRegDefs - A set of physical registers and their definition |
141 | | /// that are "live". These nodes must be scheduled before any other nodes that |
142 | | /// modifies the registers can be scheduled. |
143 | | unsigned NumLiveRegs; |
144 | | std::unique_ptr<SUnit*[]> LiveRegDefs; |
145 | | std::unique_ptr<SUnit*[]> LiveRegGens; |
146 | | |
147 | | // Collect interferences between physical register use/defs. |
148 | | // Each interference is an SUnit and set of physical registers. |
149 | | SmallVector<SUnit*, 4> Interferences; |
150 | | typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; |
151 | | LRegsMapT LRegsMap; |
152 | | |
153 | | /// Topo - A topological ordering for SUnits which permits fast IsReachable |
154 | | /// and similar queries. |
155 | | ScheduleDAGTopologicalSort Topo; |
156 | | |
157 | | // Hack to keep track of the inverse of FindCallSeqStart without more crazy |
158 | | // DAG crawling. |
159 | | DenseMap<SUnit*, SUnit*> CallSeqEndForStart; |
160 | | |
161 | | public: |
162 | | ScheduleDAGRRList(MachineFunction &mf, bool needlatency, |
163 | | SchedulingPriorityQueue *availqueue, |
164 | | CodeGenOpt::Level OptLevel) |
165 | | : ScheduleDAGSDNodes(mf), |
166 | | NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), |
167 | 3.42M | Topo(SUnits, nullptr) { |
168 | 3.42M | |
169 | 3.42M | const TargetSubtargetInfo &STI = mf.getSubtarget(); |
170 | 3.42M | if (DisableSchedCycles || 3.42M !NeedLatency3.42M ) |
171 | 3.37M | HazardRec = new ScheduleHazardRecognizer(); |
172 | 3.42M | else |
173 | 52.4k | HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); |
174 | 3.42M | } |
175 | | |
176 | 3.42M | ~ScheduleDAGRRList() override { |
177 | 3.42M | delete HazardRec; |
178 | 3.42M | delete AvailableQueue; |
179 | 3.42M | } |
180 | | |
181 | | void Schedule() override; |
182 | | |
183 | 16.3M | ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } |
184 | | |
185 | | /// IsReachable - Checks if SU is reachable from TargetSU. |
186 | 391 | bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { |
187 | 391 | return Topo.IsReachable(SU, TargetSU); |
188 | 391 | } |
189 | | |
190 | | /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will |
191 | | /// create a cycle. |
192 | 3.85k | bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { |
193 | 3.85k | return Topo.WillCreateCycle(SU, TargetSU); |
194 | 3.85k | } |
195 | | |
196 | | /// AddPred - adds a predecessor edge to SUnit SU. |
197 | | /// This returns true if this is a new predecessor. |
198 | | /// Updates the topological ordering if required. |
199 | 4.18k | void AddPred(SUnit *SU, const SDep &D) { |
200 | 4.18k | Topo.AddPred(SU, D.getSUnit()); |
201 | 4.18k | SU->addPred(D); |
202 | 4.18k | } |
203 | | |
204 | | /// RemovePred - removes a predecessor edge from SUnit SU. |
205 | | /// This returns true if an edge was removed. |
206 | | /// Updates the topological ordering if required. |
207 | 853 | void RemovePred(SUnit *SU, const SDep &D) { |
208 | 853 | Topo.RemovePred(SU, D.getSUnit()); |
209 | 853 | SU->removePred(D); |
210 | 853 | } |
211 | | |
212 | | private: |
213 | 26.3M | bool isReady(SUnit *SU) { |
214 | 26.3M | return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || |
215 | 0 | AvailableQueue->isReady(SU); |
216 | 26.3M | } |
217 | | |
218 | | void ReleasePred(SUnit *SU, const SDep *PredEdge); |
219 | | void ReleasePredecessors(SUnit *SU); |
220 | | void ReleasePending(); |
221 | | void AdvanceToCycle(unsigned NextCycle); |
222 | | void AdvancePastStalls(SUnit *SU); |
223 | | void EmitNode(SUnit *SU); |
224 | | void ScheduleNodeBottomUp(SUnit*); |
225 | | void CapturePred(SDep *PredEdge); |
226 | | void UnscheduleNodeBottomUp(SUnit*); |
227 | | void RestoreHazardCheckerBottomUp(); |
228 | | void BacktrackBottomUp(SUnit*, SUnit*); |
229 | | SUnit *TryUnfoldSU(SUnit *); |
230 | | SUnit *CopyAndMoveSuccessors(SUnit*); |
231 | | void InsertCopiesAndMoveSuccs(SUnit*, unsigned, |
232 | | const TargetRegisterClass*, |
233 | | const TargetRegisterClass*, |
234 | | SmallVectorImpl<SUnit*>&); |
235 | | bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); |
236 | | |
237 | | void releaseInterferences(unsigned Reg = 0); |
238 | | |
239 | | SUnit *PickNodeToScheduleBottomUp(); |
240 | | void ListScheduleBottomUp(); |
241 | | |
242 | | /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. |
243 | | /// Updates the topological ordering if required. |
244 | 130 | SUnit *CreateNewSUnit(SDNode *N) { |
245 | 130 | unsigned NumSUnits = SUnits.size(); |
246 | 130 | SUnit *NewNode = newSUnit(N); |
247 | 130 | // Update the topological ordering. |
248 | 130 | if (NewNode->NodeNum >= NumSUnits) |
249 | 130 | Topo.InitDAGTopologicalSorting(); |
250 | 130 | return NewNode; |
251 | 130 | } |
252 | | |
253 | | /// CreateClone - Creates a new SUnit from an existing one. |
254 | | /// Updates the topological ordering if required. |
255 | 520 | SUnit *CreateClone(SUnit *N) { |
256 | 520 | unsigned NumSUnits = SUnits.size(); |
257 | 520 | SUnit *NewNode = Clone(N); |
258 | 520 | // Update the topological ordering. |
259 | 520 | if (NewNode->NodeNum >= NumSUnits) |
260 | 520 | Topo.InitDAGTopologicalSorting(); |
261 | 520 | return NewNode; |
262 | 520 | } |
263 | | |
264 | | /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't |
265 | | /// need actual latency information but the hybrid scheduler does. |
266 | 66.0M | bool forceUnitLatencies() const override { |
267 | 66.0M | return !NeedLatency; |
268 | 66.0M | } |
269 | | }; |
270 | | } // end anonymous namespace |
271 | | |
272 | | /// GetCostForDef - Looks up the register class and cost for a given definition. |
273 | | /// Typically this just means looking up the representative register class, |
274 | | /// but for untyped values (MVT::Untyped) it means inspecting the node's |
275 | | /// opcode to determine what register class is being generated. |
276 | | static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, |
277 | | const TargetLowering *TLI, |
278 | | const TargetInstrInfo *TII, |
279 | | const TargetRegisterInfo *TRI, |
280 | | unsigned &RegClass, unsigned &Cost, |
281 | 942k | const MachineFunction &MF) { |
282 | 942k | MVT VT = RegDefPos.GetValue(); |
283 | 942k | |
284 | 942k | // Special handling for untyped values. These values can only come from |
285 | 942k | // the expansion of custom DAG-to-DAG patterns. |
286 | 942k | if (VT == MVT::Untyped942k ) { |
287 | 1.95k | const SDNode *Node = RegDefPos.GetNode(); |
288 | 1.95k | |
289 | 1.95k | // Special handling for CopyFromReg of untyped values. |
290 | 1.95k | if (!Node->isMachineOpcode() && 1.95k Node->getOpcode() == ISD::CopyFromReg136 ) { |
291 | 136 | unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); |
292 | 136 | const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); |
293 | 136 | RegClass = RC->getID(); |
294 | 136 | Cost = 1; |
295 | 136 | return; |
296 | 136 | } |
297 | 1.81k | |
298 | 1.81k | unsigned Opcode = Node->getMachineOpcode(); |
299 | 1.81k | if (Opcode == TargetOpcode::REG_SEQUENCE1.81k ) { |
300 | 255 | unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); |
301 | 255 | const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); |
302 | 255 | RegClass = RC->getID(); |
303 | 255 | Cost = 1; |
304 | 255 | return; |
305 | 255 | } |
306 | 1.55k | |
307 | 1.55k | unsigned Idx = RegDefPos.GetIdx(); |
308 | 1.55k | const MCInstrDesc Desc = TII->get(Opcode); |
309 | 1.55k | const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); |
310 | 1.55k | RegClass = RC->getID(); |
311 | 1.55k | // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a |
312 | 1.55k | // better way to determine it. |
313 | 1.55k | Cost = 1; |
314 | 942k | } else { |
315 | 940k | RegClass = TLI->getRepRegClassFor(VT)->getID(); |
316 | 940k | Cost = TLI->getRepRegClassCostFor(VT); |
317 | 940k | } |
318 | 942k | } |
319 | | |
320 | | /// Schedule - Schedule the DAG using list scheduling. |
321 | 3.42M | void ScheduleDAGRRList::Schedule() { |
322 | 3.42M | DEBUG(dbgs() |
323 | 3.42M | << "********** List Scheduling BB#" << BB->getNumber() |
324 | 3.42M | << " '" << BB->getName() << "' **********\n"); |
325 | 3.42M | |
326 | 3.42M | CurCycle = 0; |
327 | 3.42M | IssueCount = 0; |
328 | 0 | MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; |
329 | 3.42M | NumLiveRegs = 0; |
330 | 3.42M | // Allocate slots for each physical register, plus one for a special register |
331 | 3.42M | // to track the virtual resource of a calling sequence. |
332 | 3.42M | LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); |
333 | 3.42M | LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); |
334 | 3.42M | CallSeqEndForStart.clear(); |
335 | 3.42M | assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); |
336 | 3.42M | |
337 | 3.42M | // Build the scheduling graph. |
338 | 3.42M | BuildSchedGraph(nullptr); |
339 | 3.42M | |
340 | 3.42M | DEBUG(for (SUnit &SU : SUnits) |
341 | 3.42M | SU.dumpAll(this)); |
342 | 3.42M | Topo.InitDAGTopologicalSorting(); |
343 | 3.42M | |
344 | 3.42M | AvailableQueue->initNodes(SUnits); |
345 | 3.42M | |
346 | 3.42M | HazardRec->Reset(); |
347 | 3.42M | |
348 | 3.42M | // Execute the actual scheduling loop. |
349 | 3.42M | ListScheduleBottomUp(); |
350 | 3.42M | |
351 | 3.42M | AvailableQueue->releaseState(); |
352 | 3.42M | |
353 | 3.42M | DEBUG({ |
354 | 3.42M | dbgs() << "*** Final schedule ***\n"; |
355 | 3.42M | dumpSchedule(); |
356 | 3.42M | dbgs() << '\n'; |
357 | 3.42M | }); |
358 | 3.42M | } |
359 | | |
360 | | //===----------------------------------------------------------------------===// |
361 | | // Bottom-Up Scheduling |
362 | | //===----------------------------------------------------------------------===// |
363 | | |
364 | | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to |
365 | | /// the AvailableQueue if the count reaches zero. Also update its cycle bound. |
366 | 34.2M | void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { |
367 | 34.2M | SUnit *PredSU = PredEdge->getSUnit(); |
368 | 34.2M | |
369 | | #ifndef NDEBUG |
370 | | if (PredSU->NumSuccsLeft == 0) { |
371 | | dbgs() << "*** Scheduling failed! ***\n"; |
372 | | PredSU->dump(this); |
373 | | dbgs() << " has been released too many times!\n"; |
374 | | llvm_unreachable(nullptr); |
375 | | } |
376 | | #endif |
377 | | --PredSU->NumSuccsLeft; |
378 | 34.2M | |
379 | 34.2M | if (!forceUnitLatencies()34.2M ) { |
380 | 461k | // Updating predecessor's height. This is now the cycle when the |
381 | 461k | // predecessor can be scheduled without causing a pipeline stall. |
382 | 461k | PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); |
383 | 461k | } |
384 | 34.2M | |
385 | 34.2M | // If all the node's successors are scheduled, this node is ready |
386 | 34.2M | // to be scheduled. Ignore the special EntrySU node. |
387 | 34.2M | if (PredSU->NumSuccsLeft == 0 && 34.2M PredSU != &EntrySU26.3M ) { |
388 | 26.3M | PredSU->isAvailable = true; |
389 | 26.3M | |
390 | 26.3M | unsigned Height = PredSU->getHeight(); |
391 | 26.3M | if (Height < MinAvailableCycle) |
392 | 12.1M | MinAvailableCycle = Height; |
393 | 26.3M | |
394 | 26.3M | if (isReady(PredSU)26.3M ) { |
395 | 26.3M | AvailableQueue->push(PredSU); |
396 | 26.3M | } |
397 | 26.3M | // CapturePred and others may have left the node in the pending queue, avoid |
398 | 26.3M | // adding it twice. |
399 | 18.4E | else if (18.4E !PredSU->isPending18.4E ) { |
400 | 0 | PredSU->isPending = true; |
401 | 0 | PendingQueue.push_back(PredSU); |
402 | 0 | } |
403 | 26.3M | } |
404 | 34.2M | } |
405 | | |
406 | | /// IsChainDependent - Test if Outer is reachable from Inner through |
407 | | /// chain dependencies. |
408 | | static bool IsChainDependent(SDNode *Outer, SDNode *Inner, |
409 | | unsigned NestLevel, |
410 | 45.9k | const TargetInstrInfo *TII) { |
411 | 45.9k | SDNode *N = Outer; |
412 | 91.9k | for (;;) { |
413 | 91.9k | if (N == Inner) |
414 | 3 | return true; |
415 | 91.9k | // For a TokenFactor, examine each operand. There may be multiple ways |
416 | 91.9k | // to get to the CALLSEQ_BEGIN, but we need to find the path with the |
417 | 91.9k | // most nesting in order to ensure that we find the corresponding match. |
418 | 91.9k | if (91.9k N->getOpcode() == ISD::TokenFactor91.9k ) { |
419 | 4.80k | for (const SDValue &Op : N->op_values()) |
420 | 40.7k | if (40.7k IsChainDependent(Op.getNode(), Inner, NestLevel, TII)40.7k ) |
421 | 3 | return true; |
422 | 4.79k | return false; |
423 | 4.79k | } |
424 | 87.1k | // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. |
425 | 87.1k | if (87.1k N->isMachineOpcode()87.1k ) { |
426 | 86.7k | if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()86.7k ) { |
427 | 0 | ++NestLevel; |
428 | 86.7k | } else if (86.7k N->getMachineOpcode() == TII->getCallFrameSetupOpcode()86.7k ) { |
429 | 41.1k | if (NestLevel == 0) |
430 | 41.1k | return false; |
431 | 0 | --NestLevel; |
432 | 0 | } |
433 | 86.7k | } |
434 | 87.1k | // Otherwise, find the chain and continue climbing. |
435 | 46.0k | for (const SDValue &Op : N->op_values()) |
436 | 300k | if (300k Op.getValueType() == MVT::Other300k ) { |
437 | 46.0k | N = Op.getNode(); |
438 | 46.0k | goto found_chain_operand; |
439 | 46.0k | } |
440 | 0 | return false; |
441 | 46.0k | found_chain_operand:; |
442 | 46.0k | if (N->getOpcode() == ISD::EntryToken) |
443 | 0 | return false; |
444 | 0 | } |
445 | 45.9k | } |
446 | | |
447 | | /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate |
448 | | /// the corresponding (lowered) CALLSEQ_BEGIN node. |
449 | | /// |
450 | | /// NestLevel and MaxNested are used in recursion to indcate the current level |
451 | | /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum |
452 | | /// level seen so far. |
453 | | /// |
454 | | /// TODO: It would be better to give CALLSEQ_END an explicit operand to point |
455 | | /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. |
456 | | static SDNode * |
457 | | FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, |
458 | 1.67M | const TargetInstrInfo *TII) { |
459 | 8.22M | for (;;) { |
460 | 8.22M | // For a TokenFactor, examine each operand. There may be multiple ways |
461 | 8.22M | // to get to the CALLSEQ_BEGIN, but we need to find the path with the |
462 | 8.22M | // most nesting in order to ensure that we find the corresponding match. |
463 | 8.22M | if (N->getOpcode() == ISD::TokenFactor8.22M ) { |
464 | 33.5k | SDNode *Best = nullptr; |
465 | 33.5k | unsigned BestMaxNest = MaxNest; |
466 | 104k | for (const SDValue &Op : N->op_values()) { |
467 | 104k | unsigned MyNestLevel = NestLevel; |
468 | 104k | unsigned MyMaxNest = MaxNest; |
469 | 104k | if (SDNode *New = FindCallSeqStart(Op.getNode(), |
470 | 104k | MyNestLevel, MyMaxNest, TII)) |
471 | 104k | if (104k !Best || 104k (MyMaxNest > BestMaxNest)70.9k ) { |
472 | 33.5k | Best = New; |
473 | 33.5k | BestMaxNest = MyMaxNest; |
474 | 33.5k | } |
475 | 104k | } |
476 | 33.5k | assert(Best); |
477 | 33.5k | MaxNest = BestMaxNest; |
478 | 33.5k | return Best; |
479 | 33.5k | } |
480 | 8.19M | // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. |
481 | 8.19M | if (8.19M N->isMachineOpcode()8.19M ) { |
482 | 4.89M | if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()4.89M ) { |
483 | 1.56M | ++NestLevel; |
484 | 1.56M | MaxNest = std::max(MaxNest, NestLevel); |
485 | 4.89M | } else if (3.32M N->getMachineOpcode() == TII->getCallFrameSetupOpcode()3.32M ) { |
486 | 1.63M | assert(NestLevel != 0); |
487 | 1.63M | --NestLevel; |
488 | 1.63M | if (NestLevel == 0) |
489 | 1.63M | return N; |
490 | 6.55M | } |
491 | 4.89M | } |
492 | 6.55M | // Otherwise, find the chain and continue climbing. |
493 | 6.55M | for (const SDValue &Op : N->op_values()) |
494 | 16.6M | if (16.6M Op.getValueType() == MVT::Other16.6M ) { |
495 | 6.55M | N = Op.getNode(); |
496 | 6.55M | goto found_chain_operand; |
497 | 6.55M | } |
498 | 0 | return nullptr; |
499 | 6.55M | found_chain_operand:; |
500 | 6.55M | if (N->getOpcode() == ISD::EntryToken) |
501 | 37 | return nullptr; |
502 | 0 | } |
503 | 1.67M | } |
504 | | |
505 | | /// Call ReleasePred for each predecessor, then update register live def/gen. |
506 | | /// Always update LiveRegDefs for a register dependence even if the current SU |
507 | | /// also defines the register. This effectively create one large live range |
508 | | /// across a sequence of two-address node. This is important because the |
509 | | /// entire chain must be scheduled together. Example: |
510 | | /// |
511 | | /// flags = (3) add |
512 | | /// flags = (2) addc flags |
513 | | /// flags = (1) addc flags |
514 | | /// |
515 | | /// results in |
516 | | /// |
517 | | /// LiveRegDefs[flags] = 3 |
518 | | /// LiveRegGens[flags] = 1 |
519 | | /// |
520 | | /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid |
521 | | /// interference on flags. |
522 | 33.1M | void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { |
523 | 33.1M | // Bottom up: release predecessors |
524 | 34.2M | for (SDep &Pred : SU->Preds) { |
525 | 34.2M | ReleasePred(SU, &Pred); |
526 | 34.2M | if (Pred.isAssignedRegDep()34.2M ) { |
527 | 1.00M | // This is a physical register dependency and it's impossible or |
528 | 1.00M | // expensive to copy the register. Make sure nothing that can |
529 | 1.00M | // clobber the register is scheduled between the predecessor and |
530 | 1.00M | // this node. |
531 | 1.00M | SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; |
532 | 1.00M | assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && |
533 | 1.00M | "interference on register dependence"); |
534 | 1.00M | LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); |
535 | 1.00M | if (!LiveRegGens[Pred.getReg()]1.00M ) { |
536 | 967k | ++NumLiveRegs; |
537 | 967k | LiveRegGens[Pred.getReg()] = SU; |
538 | 967k | } |
539 | 1.00M | } |
540 | 34.2M | } |
541 | 33.1M | |
542 | 33.1M | // If we're scheduling a lowered CALLSEQ_END, find the corresponding |
543 | 33.1M | // CALLSEQ_BEGIN. Inject an artificial physical register dependence between |
544 | 33.1M | // these nodes, to prevent other calls from being interscheduled with them. |
545 | 33.1M | unsigned CallResource = TRI->getNumRegs(); |
546 | 33.1M | if (!LiveRegDefs[CallResource]) |
547 | 56.8M | for (SDNode *Node = SU->getNode(); 29.3M Node56.8M ; Node = Node->getGluedNode()27.4M ) |
548 | 29.0M | if (29.0M Node->isMachineOpcode() && |
549 | 29.0M | Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()16.9M ) { |
550 | 1.56M | unsigned NestLevel = 0; |
551 | 1.56M | unsigned MaxNest = 0; |
552 | 1.56M | SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); |
553 | 1.56M | assert(N && "Must find call sequence start"); |
554 | 1.56M | |
555 | 1.56M | SUnit *Def = &SUnits[N->getNodeId()]; |
556 | 1.56M | CallSeqEndForStart[Def] = SU; |
557 | 1.56M | |
558 | 1.56M | ++NumLiveRegs; |
559 | 1.56M | LiveRegDefs[CallResource] = Def; |
560 | 1.56M | LiveRegGens[CallResource] = SU; |
561 | 1.56M | break; |
562 | 1.56M | } |
563 | 33.1M | } |
564 | | |
565 | | /// Check to see if any of the pending instructions are ready to issue. If |
566 | | /// so, add them to the available queue. |
567 | 29.5M | void ScheduleDAGRRList::ReleasePending() { |
568 | 29.5M | if (DisableSchedCycles29.5M ) { |
569 | 0 | assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); |
570 | 0 | return; |
571 | 0 | } |
572 | 29.5M | |
573 | 29.5M | // If the available queue is empty, it is safe to reset MinAvailableCycle. |
574 | 29.5M | if (29.5M AvailableQueue->empty()29.5M ) |
575 | 15.3M | MinAvailableCycle = UINT_MAX; |
576 | 29.5M | |
577 | 29.5M | // Check to see if any of the pending instructions are ready to issue. If |
578 | 29.5M | // so, add them to the available queue. |
579 | 29.5M | for (unsigned i = 0, e = PendingQueue.size(); i != e29.5M ; ++i0 ) { |
580 | 0 | unsigned ReadyCycle = PendingQueue[i]->getHeight(); |
581 | 0 | if (ReadyCycle < MinAvailableCycle) |
582 | 0 | MinAvailableCycle = ReadyCycle; |
583 | 0 |
|
584 | 0 | if (PendingQueue[i]->isAvailable0 ) { |
585 | 0 | if (!isReady(PendingQueue[i])) |
586 | 0 | continue; |
587 | 0 | AvailableQueue->push(PendingQueue[i]); |
588 | 0 | } |
589 | 0 | PendingQueue[i]->isPending = false; |
590 | 0 | PendingQueue[i] = PendingQueue.back(); |
591 | 0 | PendingQueue.pop_back(); |
592 | 0 | --i; --e; |
593 | 0 | } |
594 | 29.5M | } |
595 | | |
596 | | /// Move the scheduler state forward by the specified number of Cycles. |
597 | 87.2M | void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { |
598 | 87.2M | if (NextCycle <= CurCycle) |
599 | 57.6M | return; |
600 | 29.5M | |
601 | 29.5M | IssueCount = 0; |
602 | 29.5M | AvailableQueue->setCurCycle(NextCycle); |
603 | 29.5M | if (!HazardRec->isEnabled()29.5M ) { |
604 | 29.4M | // Bypass lots of virtual calls in case of long latency. |
605 | 29.4M | CurCycle = NextCycle; |
606 | 29.4M | } |
607 | 151k | else { |
608 | 381k | for (; CurCycle != NextCycle381k ; ++CurCycle230k ) { |
609 | 230k | HazardRec->RecedeCycle(); |
610 | 230k | } |
611 | 151k | } |
612 | 87.2M | // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the |
613 | 87.2M | // available Q to release pending nodes at least once before popping. |
614 | 87.2M | ReleasePending(); |
615 | 87.2M | } |
616 | | |
617 | | /// Move the scheduler state forward until the specified node's dependents are |
618 | | /// ready and can be scheduled with no resource conflicts. |
619 | 29.6M | void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { |
620 | 29.6M | if (DisableSchedCycles) |
621 | 0 | return; |
622 | 29.6M | |
623 | 29.6M | // FIXME: Nodes such as CopyFromReg probably should not advance the current |
624 | 29.6M | // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node |
625 | 29.6M | // has predecessors the cycle will be advanced when they are scheduled. |
626 | 29.6M | // But given the crude nature of modeling latency though such nodes, we |
627 | 29.6M | // currently need to treat these nodes like real instructions. |
628 | 29.6M | // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; |
629 | 29.6M | |
630 | 29.6M | unsigned ReadyCycle = SU->getHeight(); |
631 | 29.6M | |
632 | 29.6M | // Bump CurCycle to account for latency. We assume the latency of other |
633 | 29.6M | // available instructions may be hidden by the stall (not a full pipe stall). |
634 | 29.6M | // This updates the hazard recognizer's cycle before reserving resources for |
635 | 29.6M | // this instruction. |
636 | 29.6M | AdvanceToCycle(ReadyCycle); |
637 | 29.6M | |
638 | 29.6M | // Calls are scheduled in their preceding cycle, so don't conflict with |
639 | 29.6M | // hazards from instructions after the call. EmitNode will reset the |
640 | 29.6M | // scoreboard state before emitting the call. |
641 | 29.6M | if (SU->isCall) |
642 | 1.56M | return; |
643 | 28.1M | |
644 | 28.1M | // FIXME: For resource conflicts in very long non-pipelined stages, we |
645 | 28.1M | // should probably skip ahead here to avoid useless scoreboard checks. |
646 | 28.1M | int Stalls = 0; |
647 | 28.1M | while (true28.1M ) { |
648 | 28.1M | ScheduleHazardRecognizer::HazardType HT = |
649 | 28.1M | HazardRec->getHazardType(SU, -Stalls); |
650 | 28.1M | |
651 | 28.1M | if (HT == ScheduleHazardRecognizer::NoHazard) |
652 | 28.1M | break; |
653 | 20.7k | |
654 | 20.7k | ++Stalls; |
655 | 20.7k | } |
656 | 29.6M | AdvanceToCycle(CurCycle + Stalls); |
657 | 29.6M | } |
658 | | |
659 | | /// Record this SUnit in the HazardRecognizer. |
660 | | /// Does not update CurCycle. |
661 | 29.6M | void ScheduleDAGRRList::EmitNode(SUnit *SU) { |
662 | 29.6M | if (!HazardRec->isEnabled()) |
663 | 29.4M | return; |
664 | 255k | |
665 | 255k | // Check for phys reg copy. |
666 | 255k | if (255k !SU->getNode()255k ) |
667 | 2 | return; |
668 | 255k | |
669 | 255k | switch (SU->getNode()->getOpcode()) { |
670 | 160k | default: |
671 | 160k | assert(SU->getNode()->isMachineOpcode() && |
672 | 160k | "This target-independent node should not be scheduled."); |
673 | 160k | break; |
674 | 95.3k | case ISD::MERGE_VALUES: |
675 | 95.3k | case ISD::TokenFactor: |
676 | 95.3k | case ISD::LIFETIME_START: |
677 | 95.3k | case ISD::LIFETIME_END: |
678 | 95.3k | case ISD::CopyToReg: |
679 | 95.3k | case ISD::CopyFromReg: |
680 | 95.3k | case ISD::EH_LABEL: |
681 | 95.3k | // Noops don't affect the scoreboard state. Copies are likely to be |
682 | 95.3k | // removed. |
683 | 95.3k | return; |
684 | 420 | case ISD::INLINEASM: |
685 | 420 | // For inline asm, clear the pipeline state. |
686 | 420 | HazardRec->Reset(); |
687 | 420 | return; |
688 | 160k | } |
689 | 160k | if (160k SU->isCall160k ) { |
690 | 5.02k | // Calls are scheduled with their preceding instructions. For bottom-up |
691 | 5.02k | // scheduling, clear the pipeline state before emitting. |
692 | 5.02k | HazardRec->Reset(); |
693 | 5.02k | } |
694 | 29.6M | |
695 | 29.6M | HazardRec->EmitInstruction(SU); |
696 | 29.6M | } |
697 | | |
698 | | static void resetVRegCycle(SUnit *SU); |
699 | | |
700 | | /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending |
701 | | /// count of its predecessors. If a predecessor pending count is zero, add it to |
702 | | /// the Available queue. |
703 | 29.6M | void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { |
704 | 29.6M | DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); |
705 | 29.6M | DEBUG(SU->dump(this)); |
706 | 29.6M | |
707 | | #ifndef NDEBUG |
708 | | if (CurCycle < SU->getHeight()) |
709 | | DEBUG(dbgs() << " Height [" << SU->getHeight() |
710 | | << "] pipeline stall!\n"); |
711 | | #endif |
712 | | |
713 | 29.6M | // FIXME: Do not modify node height. It may interfere with |
714 | 29.6M | // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the |
715 | 29.6M | // node its ready cycle can aid heuristics, and after scheduling it can |
716 | 29.6M | // indicate the scheduled cycle. |
717 | 29.6M | SU->setHeightToAtLeast(CurCycle); |
718 | 29.6M | |
719 | 29.6M | // Reserve resources for the scheduled instruction. |
720 | 29.6M | EmitNode(SU); |
721 | 29.6M | |
722 | 29.6M | Sequence.push_back(SU); |
723 | 29.6M | |
724 | 29.6M | AvailableQueue->scheduledNode(SU); |
725 | 29.6M | |
726 | 29.6M | // If HazardRec is disabled, and each inst counts as one cycle, then |
727 | 29.6M | // advance CurCycle before ReleasePredecessors to avoid useless pushes to |
728 | 29.6M | // PendingQueue for schedulers that implement HasReadyFilter. |
729 | 29.6M | if (!HazardRec->isEnabled() && 29.6M AvgIPC < 229.4M ) |
730 | 29.4M | AdvanceToCycle(CurCycle + 1); |
731 | 29.6M | |
732 | 29.6M | // Update liveness of predecessors before successors to avoid treating a |
733 | 29.6M | // two-address node as a live range def. |
734 | 29.6M | ReleasePredecessors(SU); |
735 | 29.6M | |
736 | 29.6M | // Release all the implicit physical register defs that are live. |
737 | 34.3M | for (SDep &Succ : SU->Succs) { |
738 | 34.3M | // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. |
739 | 34.3M | if (Succ.isAssignedRegDep() && 34.3M LiveRegDefs[Succ.getReg()] == SU1.00M ) { |
740 | 965k | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); |
741 | 965k | --NumLiveRegs; |
742 | 965k | LiveRegDefs[Succ.getReg()] = nullptr; |
743 | 965k | LiveRegGens[Succ.getReg()] = nullptr; |
744 | 965k | releaseInterferences(Succ.getReg()); |
745 | 965k | } |
746 | 34.3M | } |
747 | 29.6M | // Release the special call resource dependence, if this is the beginning |
748 | 29.6M | // of a call. |
749 | 29.6M | unsigned CallResource = TRI->getNumRegs(); |
750 | 29.6M | if (LiveRegDefs[CallResource] == SU) |
751 | 3.13M | for (const SDNode *SUNode = SU->getNode(); 1.56M SUNode3.13M ; |
752 | 1.56M | SUNode = SUNode->getGluedNode()1.56M ) { |
753 | 1.56M | if (SUNode->isMachineOpcode() && |
754 | 1.56M | SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()1.56M ) { |
755 | 1.56M | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); |
756 | 1.56M | --NumLiveRegs; |
757 | 1.56M | LiveRegDefs[CallResource] = nullptr; |
758 | 1.56M | LiveRegGens[CallResource] = nullptr; |
759 | 1.56M | releaseInterferences(CallResource); |
760 | 1.56M | } |
761 | 1.56M | } |
762 | 29.6M | |
763 | 29.6M | resetVRegCycle(SU); |
764 | 29.6M | |
765 | 29.6M | SU->isScheduled = true; |
766 | 29.6M | |
767 | 29.6M | // Conditions under which the scheduler should eagerly advance the cycle: |
768 | 29.6M | // (1) No available instructions |
769 | 29.6M | // (2) All pipelines full, so available instructions must have hazards. |
770 | 29.6M | // |
771 | 29.6M | // If HazardRec is disabled, the cycle was pre-advanced before calling |
772 | 29.6M | // ReleasePredecessors. In that case, IssueCount should remain 0. |
773 | 29.6M | // |
774 | 29.6M | // Check AvailableQueue after ReleasePredecessors in case of zero latency. |
775 | 29.6M | if (HazardRec->isEnabled() || 29.6M AvgIPC > 129.4M ) { |
776 | 255k | if (SU->getNode() && 255k SU->getNode()->isMachineOpcode()255k ) |
777 | 159k | ++IssueCount; |
778 | 255k | if ((HazardRec->isEnabled() && 255k HazardRec->atIssueLimit()255k ) |
779 | 237k | || (!HazardRec->isEnabled() && 237k IssueCount == AvgIPC0 )) |
780 | 18.3k | AdvanceToCycle(CurCycle + 1); |
781 | 255k | } |
782 | 29.6M | } |
783 | | |
784 | | /// CapturePred - This does the opposite of ReleasePred. Since SU is being |
785 | | /// unscheduled, increase the succ left count of its predecessors. Remove |
786 | | /// them from AvailableQueue if necessary. |
787 | 113k | void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { |
788 | 113k | SUnit *PredSU = PredEdge->getSUnit(); |
789 | 113k | if (PredSU->isAvailable113k ) { |
790 | 55.2k | PredSU->isAvailable = false; |
791 | 55.2k | if (!PredSU->isPending) |
792 | 45.4k | AvailableQueue->remove(PredSU); |
793 | 55.2k | } |
794 | 113k | |
795 | 113k | assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); |
796 | 113k | ++PredSU->NumSuccsLeft; |
797 | 113k | } |
798 | | |
799 | | /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and |
800 | | /// its predecessor states to reflect the change. |
801 | 58.2k | void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { |
802 | 58.2k | DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); |
803 | 58.2k | DEBUG(SU->dump(this)); |
804 | 58.2k | |
805 | 113k | for (SDep &Pred : SU->Preds) { |
806 | 113k | CapturePred(&Pred); |
807 | 113k | if (Pred.isAssignedRegDep() && 113k SU == LiveRegGens[Pred.getReg()]26.9k ){ |
808 | 21.8k | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); |
809 | 21.8k | assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && |
810 | 21.8k | "Physical register dependency violated?"); |
811 | 21.8k | --NumLiveRegs; |
812 | 21.8k | LiveRegDefs[Pred.getReg()] = nullptr; |
813 | 21.8k | LiveRegGens[Pred.getReg()] = nullptr; |
814 | 21.8k | releaseInterferences(Pred.getReg()); |
815 | 21.8k | } |
816 | 113k | } |
817 | 58.2k | |
818 | 58.2k | // Reclaim the special call resource dependence, if this is the beginning |
819 | 58.2k | // of a call. |
820 | 58.2k | unsigned CallResource = TRI->getNumRegs(); |
821 | 146k | for (const SDNode *SUNode = SU->getNode(); SUNode; |
822 | 87.8k | SUNode = SUNode->getGluedNode()87.8k ) { |
823 | 87.8k | if (SUNode->isMachineOpcode() && |
824 | 87.8k | SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()59.0k ) { |
825 | 6 | SUnit *SeqEnd = CallSeqEndForStart[SU]; |
826 | 6 | assert(SeqEnd && "Call sequence start/end must be known"); |
827 | 6 | assert(!LiveRegDefs[CallResource]); |
828 | 6 | assert(!LiveRegGens[CallResource]); |
829 | 6 | ++NumLiveRegs; |
830 | 6 | LiveRegDefs[CallResource] = SU; |
831 | 6 | LiveRegGens[CallResource] = SeqEnd; |
832 | 6 | } |
833 | 87.8k | } |
834 | 58.2k | |
835 | 58.2k | // Release the special call resource dependence, if this is the end |
836 | 58.2k | // of a call. |
837 | 58.2k | if (LiveRegGens[CallResource] == SU) |
838 | 411 | for (const SDNode *SUNode = SU->getNode(); 137 SUNode411 ; |
839 | 274 | SUNode = SUNode->getGluedNode()274 ) { |
840 | 274 | if (SUNode->isMachineOpcode() && |
841 | 274 | SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()274 ) { |
842 | 137 | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); |
843 | 137 | assert(LiveRegDefs[CallResource]); |
844 | 137 | assert(LiveRegGens[CallResource]); |
845 | 137 | --NumLiveRegs; |
846 | 137 | LiveRegDefs[CallResource] = nullptr; |
847 | 137 | LiveRegGens[CallResource] = nullptr; |
848 | 137 | releaseInterferences(CallResource); |
849 | 137 | } |
850 | 137 | } |
851 | 58.2k | |
852 | 120k | for (auto &Succ : SU->Succs) { |
853 | 120k | if (Succ.isAssignedRegDep()120k ) { |
854 | 20.7k | auto Reg = Succ.getReg(); |
855 | 20.7k | if (!LiveRegDefs[Reg]) |
856 | 20.0k | ++NumLiveRegs; |
857 | 20.7k | // This becomes the nearest def. Note that an earlier def may still be |
858 | 20.7k | // pending if this is a two-address node. |
859 | 20.7k | LiveRegDefs[Reg] = SU; |
860 | 20.7k | |
861 | 20.7k | // Update LiveRegGen only if was empty before this unscheduling. |
862 | 20.7k | // This is to avoid incorrect updating LiveRegGen set in previous run. |
863 | 20.7k | if (!LiveRegGens[Reg]20.7k ) { |
864 | 20.0k | // Find the successor with the lowest height. |
865 | 20.0k | LiveRegGens[Reg] = Succ.getSUnit(); |
866 | 40.3k | for (auto &Succ2 : SU->Succs) { |
867 | 40.3k | if (Succ2.isAssignedRegDep() && 40.3k Succ2.getReg() == Reg20.0k && |
868 | 20.0k | Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) |
869 | 0 | LiveRegGens[Reg] = Succ2.getSUnit(); |
870 | 40.3k | } |
871 | 20.0k | } |
872 | 20.7k | } |
873 | 120k | } |
874 | 58.2k | if (SU->getHeight() < MinAvailableCycle) |
875 | 57.2k | MinAvailableCycle = SU->getHeight(); |
876 | 58.2k | |
877 | 58.2k | SU->setHeightDirty(); |
878 | 58.2k | SU->isScheduled = false; |
879 | 58.2k | SU->isAvailable = true; |
880 | 58.2k | if (!DisableSchedCycles && 58.2k AvailableQueue->hasReadyFilter()58.2k ) { |
881 | 0 | // Don't make available until backtracking is complete. |
882 | 0 | SU->isPending = true; |
883 | 0 | PendingQueue.push_back(SU); |
884 | 0 | } |
885 | 58.2k | else { |
886 | 58.2k | AvailableQueue->push(SU); |
887 | 58.2k | } |
888 | 58.2k | AvailableQueue->unscheduledNode(SU); |
889 | 58.2k | } |
890 | | |
891 | | /// After backtracking, the hazard checker needs to be restored to a state |
892 | | /// corresponding the current cycle. |
893 | 1.72k | void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { |
894 | 1.72k | HazardRec->Reset(); |
895 | 1.72k | |
896 | 1.72k | unsigned LookAhead = std::min((unsigned)Sequence.size(), |
897 | 1.72k | HazardRec->getMaxLookAhead()); |
898 | 1.72k | if (LookAhead == 0) |
899 | 1.68k | return; |
900 | 43 | |
901 | 43 | std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); |
902 | 43 | unsigned HazardCycle = (*I)->getHeight(); |
903 | 438 | for (auto E = Sequence.end(); I != E438 ; ++I395 ) { |
904 | 395 | SUnit *SU = *I; |
905 | 629 | for (; SU->getHeight() > HazardCycle629 ; ++HazardCycle234 ) { |
906 | 234 | HazardRec->RecedeCycle(); |
907 | 234 | } |
908 | 395 | EmitNode(SU); |
909 | 395 | } |
910 | 1.72k | } |
911 | | |
912 | | /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in |
913 | | /// BTCycle in order to schedule a specific node. |
914 | 1.72k | void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { |
915 | 1.72k | SUnit *OldSU = Sequence.back(); |
916 | 58.2k | while (true58.2k ) { |
917 | 58.2k | Sequence.pop_back(); |
918 | 58.2k | // FIXME: use ready cycle instead of height |
919 | 58.2k | CurCycle = OldSU->getHeight(); |
920 | 58.2k | UnscheduleNodeBottomUp(OldSU); |
921 | 58.2k | AvailableQueue->setCurCycle(CurCycle); |
922 | 58.2k | if (OldSU == BtSU) |
923 | 1.72k | break; |
924 | 56.4k | OldSU = Sequence.back(); |
925 | 56.4k | } |
926 | 1.72k | |
927 | 1.72k | assert(!SU->isSucc(OldSU) && "Something is wrong!"); |
928 | 1.72k | |
929 | 1.72k | RestoreHazardCheckerBottomUp(); |
930 | 1.72k | |
931 | 1.72k | ReleasePending(); |
932 | 1.72k | |
933 | 1.72k | ++NumBacktracks; |
934 | 1.72k | } |
935 | | |
936 | 4 | static bool isOperandOf(const SUnit *SU, SDNode *N) { |
937 | 5 | for (const SDNode *SUNode = SU->getNode(); SUNode; |
938 | 4 | SUNode = SUNode->getGluedNode()1 ) { |
939 | 4 | if (SUNode->isOperandOf(N)) |
940 | 3 | return true; |
941 | 4 | } |
942 | 1 | return false; |
943 | 4 | } |
944 | | |
945 | | /// TryUnfold - Attempt to unfold |
946 | 17 | SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { |
947 | 17 | SDNode *N = SU->getNode(); |
948 | 17 | // Use while over if to ease fall through. |
949 | 17 | SmallVector<SDNode *, 2> NewNodes; |
950 | 17 | if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) |
951 | 0 | return nullptr; |
952 | 17 | |
953 | 17 | // unfolding an x86 DEC64m operation results in store, dec, load which |
954 | 17 | // can't be handled here so quit |
955 | 17 | if (17 NewNodes.size() == 317 ) |
956 | 3 | return nullptr; |
957 | 14 | |
958 | 17 | assert(NewNodes.size() == 2 && "Expected a load folding node!"); |
959 | 14 | |
960 | 14 | N = NewNodes[1]; |
961 | 14 | SDNode *LoadNode = NewNodes[0]; |
962 | 14 | unsigned NumVals = N->getNumValues(); |
963 | 14 | unsigned OldNumVals = SU->getNode()->getNumValues(); |
964 | 14 | |
965 | 14 | // LoadNode may already exist. This can happen when there is another |
966 | 14 | // load from the same location and producing the same type of value |
967 | 14 | // but it has different alignment or volatileness. |
968 | 14 | bool isNewLoad = true; |
969 | 14 | SUnit *LoadSU; |
970 | 14 | if (LoadNode->getNodeId() != -114 ) { |
971 | 1 | LoadSU = &SUnits[LoadNode->getNodeId()]; |
972 | 1 | // If LoadSU has already been scheduled, we should clone it but |
973 | 1 | // this would negate the benefit to unfolding so just return SU. |
974 | 1 | if (LoadSU->isScheduled) |
975 | 1 | return SU; |
976 | 0 | isNewLoad = false; |
977 | 14 | } else { |
978 | 13 | LoadSU = CreateNewSUnit(LoadNode); |
979 | 13 | LoadNode->setNodeId(LoadSU->NodeNum); |
980 | 13 | |
981 | 13 | InitNumRegDefsLeft(LoadSU); |
982 | 13 | computeLatency(LoadSU); |
983 | 13 | } |
984 | 14 | |
985 | 13 | DEBUG13 (dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); |
986 | 13 | |
987 | 13 | // Now that we are committed to unfolding replace DAG Uses. |
988 | 27 | for (unsigned i = 0; i != NumVals27 ; ++i14 ) |
989 | 14 | DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); |
990 | 13 | DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), |
991 | 13 | SDValue(LoadNode, 1)); |
992 | 13 | |
993 | 13 | SUnit *NewSU = CreateNewSUnit(N); |
994 | 13 | assert(N->getNodeId() == -1 && "Node already inserted!"); |
995 | 13 | N->setNodeId(NewSU->NodeNum); |
996 | 13 | |
997 | 13 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); |
998 | 38 | for (unsigned i = 0; i != MCID.getNumOperands()38 ; ++i25 ) { |
999 | 26 | if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -126 ) { |
1000 | 1 | NewSU->isTwoAddress = true; |
1001 | 1 | break; |
1002 | 1 | } |
1003 | 26 | } |
1004 | 13 | if (MCID.isCommutable()) |
1005 | 1 | NewSU->isCommutable = true; |
1006 | 13 | |
1007 | 13 | InitNumRegDefsLeft(NewSU); |
1008 | 13 | computeLatency(NewSU); |
1009 | 13 | |
1010 | 13 | // Record all the edges to and from the old SU, by category. |
1011 | 13 | SmallVector<SDep, 4> ChainPreds; |
1012 | 13 | SmallVector<SDep, 4> ChainSuccs; |
1013 | 13 | SmallVector<SDep, 4> LoadPreds; |
1014 | 13 | SmallVector<SDep, 4> NodePreds; |
1015 | 13 | SmallVector<SDep, 4> NodeSuccs; |
1016 | 14 | for (SDep &Pred : SU->Preds) { |
1017 | 14 | if (Pred.isCtrl()) |
1018 | 10 | ChainPreds.push_back(Pred); |
1019 | 4 | else if (4 isOperandOf(Pred.getSUnit(), LoadNode)4 ) |
1020 | 3 | LoadPreds.push_back(Pred); |
1021 | 4 | else |
1022 | 1 | NodePreds.push_back(Pred); |
1023 | 14 | } |
1024 | 27 | for (SDep &Succ : SU->Succs) { |
1025 | 27 | if (Succ.isCtrl()) |
1026 | 13 | ChainSuccs.push_back(Succ); |
1027 | 27 | else |
1028 | 14 | NodeSuccs.push_back(Succ); |
1029 | 27 | } |
1030 | 13 | |
1031 | 13 | // Now assign edges to the newly-created nodes. |
1032 | 10 | for (const SDep &Pred : ChainPreds) { |
1033 | 10 | RemovePred(SU, Pred); |
1034 | 10 | if (isNewLoad) |
1035 | 10 | AddPred(LoadSU, Pred); |
1036 | 10 | } |
1037 | 3 | for (const SDep &Pred : LoadPreds) { |
1038 | 3 | RemovePred(SU, Pred); |
1039 | 3 | if (isNewLoad) |
1040 | 3 | AddPred(LoadSU, Pred); |
1041 | 3 | } |
1042 | 1 | for (const SDep &Pred : NodePreds) { |
1043 | 1 | RemovePred(SU, Pred); |
1044 | 1 | AddPred(NewSU, Pred); |
1045 | 1 | } |
1046 | 14 | for (SDep D : NodeSuccs) { |
1047 | 14 | SUnit *SuccDep = D.getSUnit(); |
1048 | 14 | D.setSUnit(SU); |
1049 | 14 | RemovePred(SuccDep, D); |
1050 | 14 | D.setSUnit(NewSU); |
1051 | 14 | AddPred(SuccDep, D); |
1052 | 14 | // Balance register pressure. |
1053 | 14 | if (AvailableQueue->tracksRegPressure() && 14 SuccDep->isScheduled0 && |
1054 | 14 | !D.isCtrl()0 && NewSU->NumRegDefsLeft > 00 ) |
1055 | 0 | --NewSU->NumRegDefsLeft; |
1056 | 14 | } |
1057 | 13 | for (SDep D : ChainSuccs) { |
1058 | 13 | SUnit *SuccDep = D.getSUnit(); |
1059 | 13 | D.setSUnit(SU); |
1060 | 13 | RemovePred(SuccDep, D); |
1061 | 13 | if (isNewLoad13 ) { |
1062 | 13 | D.setSUnit(LoadSU); |
1063 | 13 | AddPred(SuccDep, D); |
1064 | 13 | } |
1065 | 13 | } |
1066 | 13 | |
1067 | 13 | // Add a data dependency to reflect that NewSU reads the value defined |
1068 | 13 | // by LoadSU. |
1069 | 13 | SDep D(LoadSU, SDep::Data, 0); |
1070 | 13 | D.setLatency(LoadSU->Latency); |
1071 | 13 | AddPred(NewSU, D); |
1072 | 13 | |
1073 | 13 | if (isNewLoad) |
1074 | 13 | AvailableQueue->addNode(LoadSU); |
1075 | 13 | AvailableQueue->addNode(NewSU); |
1076 | 13 | |
1077 | 13 | ++NumUnfolds; |
1078 | 13 | |
1079 | 13 | if (NewSU->NumSuccsLeft == 0) |
1080 | 12 | NewSU->isAvailable = true; |
1081 | 13 | |
1082 | 13 | return NewSU; |
1083 | 17 | } |
1084 | | |
1085 | | /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled |
1086 | | /// successors to the newly created node. |
1087 | 584 | SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { |
1088 | 584 | SDNode *N = SU->getNode(); |
1089 | 584 | if (!N) |
1090 | 0 | return nullptr; |
1091 | 584 | |
1092 | 584 | if (584 SU->getNode()->getGluedNode()584 ) |
1093 | 49 | return nullptr; |
1094 | 535 | |
1095 | 535 | SUnit *NewSU; |
1096 | 535 | bool TryUnfold = false; |
1097 | 1.37k | for (unsigned i = 0, e = N->getNumValues(); i != e1.37k ; ++i844 ) { |
1098 | 844 | MVT VT = N->getSimpleValueType(i); |
1099 | 844 | if (VT == MVT::Glue) |
1100 | 0 | return nullptr; |
1101 | 844 | else if (844 VT == MVT::Other844 ) |
1102 | 17 | TryUnfold = true; |
1103 | 844 | } |
1104 | 535 | for (const SDValue &Op : N->op_values()) 535 { |
1105 | 1.30k | MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); |
1106 | 1.30k | if (VT == MVT::Glue) |
1107 | 0 | return nullptr; |
1108 | 535 | } |
1109 | 535 | |
1110 | 535 | // If possible unfold instruction. |
1111 | 535 | if (535 TryUnfold535 ) { |
1112 | 17 | SUnit *UnfoldSU = TryUnfoldSU(SU); |
1113 | 17 | if (!UnfoldSU) |
1114 | 3 | return nullptr; |
1115 | 14 | SU = UnfoldSU; |
1116 | 14 | N = SU->getNode(); |
1117 | 14 | // If this can be scheduled don't bother duplicating and just return |
1118 | 14 | if (SU->NumSuccsLeft == 0) |
1119 | 12 | return SU; |
1120 | 520 | } |
1121 | 520 | |
1122 | 520 | DEBUG520 (dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); |
1123 | 520 | NewSU = CreateClone(SU); |
1124 | 520 | |
1125 | 520 | // New SUnit has the exact same predecessors. |
1126 | 520 | for (SDep &Pred : SU->Preds) |
1127 | 605 | if (605 !Pred.isArtificial()605 ) |
1128 | 605 | AddPred(NewSU, Pred); |
1129 | 520 | |
1130 | 520 | // Only copy scheduled successors. Cut them from old node's successor |
1131 | 520 | // list and move them over. |
1132 | 520 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
1133 | 1.91k | for (SDep &Succ : SU->Succs) { |
1134 | 1.91k | if (Succ.isArtificial()) |
1135 | 0 | continue; |
1136 | 1.91k | SUnit *SuccSU = Succ.getSUnit(); |
1137 | 1.91k | if (SuccSU->isScheduled1.91k ) { |
1138 | 568 | SDep D = Succ; |
1139 | 568 | D.setSUnit(NewSU); |
1140 | 568 | AddPred(SuccSU, D); |
1141 | 568 | D.setSUnit(SU); |
1142 | 568 | DelDeps.push_back(std::make_pair(SuccSU, D)); |
1143 | 568 | } |
1144 | 1.91k | } |
1145 | 520 | for (auto &DelDep : DelDeps) |
1146 | 568 | RemovePred(DelDep.first, DelDep.second); |
1147 | 584 | |
1148 | 584 | AvailableQueue->updateNode(SU); |
1149 | 584 | AvailableQueue->addNode(NewSU); |
1150 | 584 | |
1151 | 584 | ++NumDups; |
1152 | 584 | return NewSU; |
1153 | 584 | } |
1154 | | |
1155 | | /// InsertCopiesAndMoveSuccs - Insert register copies and move all |
1156 | | /// scheduled successors of the given SUnit to the last copy. |
1157 | | void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, |
1158 | | const TargetRegisterClass *DestRC, |
1159 | | const TargetRegisterClass *SrcRC, |
1160 | 52 | SmallVectorImpl<SUnit*> &Copies) { |
1161 | 52 | SUnit *CopyFromSU = CreateNewSUnit(nullptr); |
1162 | 52 | CopyFromSU->CopySrcRC = SrcRC; |
1163 | 52 | CopyFromSU->CopyDstRC = DestRC; |
1164 | 52 | |
1165 | 52 | SUnit *CopyToSU = CreateNewSUnit(nullptr); |
1166 | 52 | CopyToSU->CopySrcRC = DestRC; |
1167 | 52 | CopyToSU->CopyDstRC = SrcRC; |
1168 | 52 | |
1169 | 52 | // Only copy scheduled successors. Cut them from old node's successor |
1170 | 52 | // list and move them over. |
1171 | 52 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
1172 | 170 | for (SDep &Succ : SU->Succs) { |
1173 | 170 | if (Succ.isArtificial()) |
1174 | 0 | continue; |
1175 | 170 | SUnit *SuccSU = Succ.getSUnit(); |
1176 | 170 | if (SuccSU->isScheduled170 ) { |
1177 | 83 | SDep D = Succ; |
1178 | 83 | D.setSUnit(CopyToSU); |
1179 | 83 | AddPred(SuccSU, D); |
1180 | 83 | DelDeps.push_back(std::make_pair(SuccSU, Succ)); |
1181 | 83 | } |
1182 | 87 | else { |
1183 | 87 | // Avoid scheduling the def-side copy before other successors. Otherwise |
1184 | 87 | // we could introduce another physreg interference on the copy and |
1185 | 87 | // continue inserting copies indefinitely. |
1186 | 87 | AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); |
1187 | 87 | } |
1188 | 170 | } |
1189 | 52 | for (auto &DelDep : DelDeps) |
1190 | 83 | RemovePred(DelDep.first, DelDep.second); |
1191 | 52 | |
1192 | 52 | SDep FromDep(SU, SDep::Data, Reg); |
1193 | 52 | FromDep.setLatency(SU->Latency); |
1194 | 52 | AddPred(CopyFromSU, FromDep); |
1195 | 52 | SDep ToDep(CopyFromSU, SDep::Data, 0); |
1196 | 52 | ToDep.setLatency(CopyFromSU->Latency); |
1197 | 52 | AddPred(CopyToSU, ToDep); |
1198 | 52 | |
1199 | 52 | AvailableQueue->updateNode(SU); |
1200 | 52 | AvailableQueue->addNode(CopyFromSU); |
1201 | 52 | AvailableQueue->addNode(CopyToSU); |
1202 | 52 | Copies.push_back(CopyFromSU); |
1203 | 52 | Copies.push_back(CopyToSU); |
1204 | 52 | |
1205 | 52 | ++NumPRCopies; |
1206 | 52 | } |
1207 | | |
1208 | | /// getPhysicalRegisterVT - Returns the ValueType of the physical register |
1209 | | /// definition of the specified node. |
1210 | | /// FIXME: Move to SelectionDAG? |
1211 | | static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, |
1212 | 584 | const TargetInstrInfo *TII) { |
1213 | 584 | unsigned NumRes; |
1214 | 584 | if (N->getOpcode() == ISD::CopyFromReg584 ) { |
1215 | 24 | // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. |
1216 | 24 | NumRes = 1; |
1217 | 584 | } else { |
1218 | 560 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); |
1219 | 560 | assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); |
1220 | 560 | NumRes = MCID.getNumDefs(); |
1221 | 562 | for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef562 ; ++ImpDef2 ) { |
1222 | 562 | if (Reg == *ImpDef) |
1223 | 560 | break; |
1224 | 2 | ++NumRes; |
1225 | 2 | } |
1226 | 560 | } |
1227 | 584 | return N->getSimpleValueType(NumRes); |
1228 | 584 | } |
1229 | | |
1230 | | /// CheckForLiveRegDef - Return true and update live register vector if the |
1231 | | /// specified register def of the specified SUnit clobbers any "live" registers. |
1232 | | static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, |
1233 | | SUnit **LiveRegDefs, |
1234 | | SmallSet<unsigned, 4> &RegAdded, |
1235 | | SmallVectorImpl<unsigned> &LRegs, |
1236 | 2.64M | const TargetRegisterInfo *TRI) { |
1237 | 6.94M | for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid()6.94M ; ++AliasI4.29M ) { |
1238 | 4.29M | |
1239 | 4.29M | // Check if Ref is live. |
1240 | 4.29M | if (!LiveRegDefs[*AliasI]4.29M ) continue3.27M ; |
1241 | 1.01M | |
1242 | 1.01M | // Allow multiple uses of the same def. |
1243 | 1.01M | if (1.01M LiveRegDefs[*AliasI] == SU1.01M ) continue1.00M ; |
1244 | 14.2k | |
1245 | 14.2k | // Add Reg to the set of interfering live regs. |
1246 | 14.2k | if (14.2k RegAdded.insert(*AliasI).second14.2k ) { |
1247 | 13.7k | LRegs.push_back(*AliasI); |
1248 | 13.7k | } |
1249 | 4.29M | } |
1250 | 2.64M | } |
1251 | | |
1252 | | /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered |
1253 | | /// by RegMask, and add them to LRegs. |
1254 | | static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, |
1255 | | ArrayRef<SUnit*> LiveRegDefs, |
1256 | | SmallSet<unsigned, 4> &RegAdded, |
1257 | 5.44k | SmallVectorImpl<unsigned> &LRegs) { |
1258 | 5.44k | // Look at all live registers. Skip Reg0 and the special CallResource. |
1259 | 1.37M | for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e1.37M ; ++i1.36M ) { |
1260 | 1.36M | if (!LiveRegDefs[i]1.36M ) continue1.36M ; |
1261 | 240 | if (240 LiveRegDefs[i] == SU240 ) continue0 ; |
1262 | 240 | if (240 !MachineOperand::clobbersPhysReg(RegMask, i)240 ) continue0 ; |
1263 | 240 | if (240 RegAdded.insert(i).second240 ) |
1264 | 130 | LRegs.push_back(i); |
1265 | 1.36M | } |
1266 | 5.44k | } |
1267 | | |
1268 | | /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. |
1269 | 3.83M | static const uint32_t *getNodeRegMask(const SDNode *N) { |
1270 | 3.83M | for (const SDValue &Op : N->op_values()) |
1271 | 10.3M | if (const auto *10.3M RegOp10.3M = dyn_cast<RegisterMaskSDNode>(Op.getNode())) |
1272 | 5.44k | return RegOp->getRegMask(); |
1273 | 3.83M | return nullptr; |
1274 | 3.83M | } |
1275 | | |
1276 | | /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay |
1277 | | /// scheduling of the given node to satisfy live physical register dependencies. |
1278 | | /// If the specific node is the last one that's available to schedule, do |
1279 | | /// whatever is necessary (i.e. backtracking or cloning) to make it possible. |
1280 | | bool ScheduleDAGRRList:: |
1281 | 29.6M | DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { |
1282 | 29.6M | if (NumLiveRegs == 0) |
1283 | 24.5M | return false; |
1284 | 5.10M | |
1285 | 5.10M | SmallSet<unsigned, 4> RegAdded; |
1286 | 5.10M | // If this node would clobber any "live" register, then it's not ready. |
1287 | 5.10M | // |
1288 | 5.10M | // If SU is the currently live definition of the same register that it uses, |
1289 | 5.10M | // then we are free to schedule it. |
1290 | 4.15M | for (SDep &Pred : SU->Preds) { |
1291 | 4.15M | if (Pred.isAssignedRegDep() && 4.15M LiveRegDefs[Pred.getReg()] != SU50.0k ) |
1292 | 28.8k | CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), |
1293 | 28.8k | RegAdded, LRegs, TRI); |
1294 | 4.15M | } |
1295 | 5.10M | |
1296 | 10.2M | for (SDNode *Node = SU->getNode(); Node10.2M ; Node = Node->getGluedNode()5.17M ) { |
1297 | 5.17M | if (Node->getOpcode() == ISD::INLINEASM5.17M ) { |
1298 | 33 | // Inline asm can clobber physical defs. |
1299 | 33 | unsigned NumOps = Node->getNumOperands(); |
1300 | 33 | if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) |
1301 | 0 | --NumOps; // Ignore the glue operand. |
1302 | 33 | |
1303 | 71 | for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps71 ;) { |
1304 | 38 | unsigned Flags = |
1305 | 38 | cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); |
1306 | 38 | unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); |
1307 | 38 | |
1308 | 38 | ++i; // Skip the ID value. |
1309 | 38 | if (InlineAsm::isRegDefKind(Flags) || |
1310 | 35 | InlineAsm::isRegDefEarlyClobberKind(Flags) || |
1311 | 38 | InlineAsm::isClobberKind(Flags)35 ) { |
1312 | 36 | // Check for def of register or earlyclobber register. |
1313 | 72 | for (; NumVals72 ; --NumVals, ++i36 ) { |
1314 | 36 | unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); |
1315 | 36 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
1316 | 33 | CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); |
1317 | 36 | } |
1318 | 36 | } else |
1319 | 2 | i += NumVals; |
1320 | 38 | } |
1321 | 33 | continue; |
1322 | 33 | } |
1323 | 5.17M | |
1324 | 5.17M | if (5.17M !Node->isMachineOpcode()5.17M ) |
1325 | 1.33M | continue; |
1326 | 3.83M | // If we're in the middle of scheduling a call, don't begin scheduling |
1327 | 3.83M | // another call. Also, don't allow any physical registers to be live across |
1328 | 3.83M | // the call. |
1329 | 3.83M | if (3.83M Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()3.83M ) { |
1330 | 5.45k | // Check the special calling-sequence resource. |
1331 | 5.45k | unsigned CallResource = TRI->getNumRegs(); |
1332 | 5.45k | if (LiveRegDefs[CallResource]5.45k ) { |
1333 | 5.21k | SDNode *Gen = LiveRegGens[CallResource]->getNode(); |
1334 | 11.8k | while (SDNode *Glued = Gen->getGluedNode()) |
1335 | 6.63k | Gen = Glued; |
1336 | 5.21k | if (!IsChainDependent(Gen, Node, 0, TII) && |
1337 | 5.21k | RegAdded.insert(CallResource).second) |
1338 | 5.21k | LRegs.push_back(CallResource); |
1339 | 5.21k | } |
1340 | 5.45k | } |
1341 | 3.83M | if (const uint32_t *RegMask = getNodeRegMask(Node)) |
1342 | 5.44k | CheckForLiveRegDefMasked(SU, RegMask, |
1343 | 5.44k | makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), |
1344 | 5.44k | RegAdded, LRegs); |
1345 | 3.83M | |
1346 | 3.83M | const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); |
1347 | 3.83M | if (MCID.hasOptionalDef()3.83M ) { |
1348 | 13.0k | // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. |
1349 | 13.0k | // This operand can be either a def of CPSR, if the S bit is set; or a use |
1350 | 13.0k | // of %noreg. When the OptionalDef is set to a valid register, we need to |
1351 | 13.0k | // handle it in the same way as an ImplicitDef. |
1352 | 26.6k | for (unsigned i = 0; i < MCID.getNumDefs()26.6k ; ++i13.6k ) |
1353 | 13.6k | if (13.6k MCID.OpInfo[i].isOptionalDef()13.6k ) { |
1354 | 641 | const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); |
1355 | 641 | unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); |
1356 | 641 | CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); |
1357 | 641 | } |
1358 | 13.0k | } |
1359 | 3.83M | if (!MCID.ImplicitDefs) |
1360 | 1.25M | continue; |
1361 | 5.19M | for (const MCPhysReg *Reg = MCID.getImplicitDefs(); 2.57M *Reg5.19M ; ++Reg2.61M ) |
1362 | 2.61M | CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); |
1363 | 5.17M | } |
1364 | 29.6M | |
1365 | 29.6M | return !LRegs.empty(); |
1366 | 29.6M | } |
1367 | | |
1368 | 2.55M | void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { |
1369 | 2.55M | // Add the nodes that aren't ready back onto the available list. |
1370 | 3.58M | for (unsigned i = Interferences.size(); i > 03.58M ; --i1.03M ) { |
1371 | 1.03M | SUnit *SU = Interferences[i-1]; |
1372 | 1.03M | LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); |
1373 | 1.03M | if (Reg1.03M ) { |
1374 | 1.03M | SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; |
1375 | 1.03M | if (!is_contained(LRegs, Reg)) |
1376 | 1.01M | continue; |
1377 | 19.0k | } |
1378 | 19.0k | SU->isPending = false; |
1379 | 19.0k | // The interfering node may no longer be available due to backtracking. |
1380 | 19.0k | // Furthermore, it may have been made available again, in which case it is |
1381 | 19.0k | // now already in the AvailableQueue. |
1382 | 19.0k | if (SU->isAvailable && 19.0k !SU->NodeQueueId9.35k ) { |
1383 | 8.76k | DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); |
1384 | 8.76k | AvailableQueue->push(SU); |
1385 | 8.76k | } |
1386 | 19.0k | if (i < Interferences.size()) |
1387 | 0 | Interferences[i-1] = Interferences.back(); |
1388 | 1.03M | Interferences.pop_back(); |
1389 | 1.03M | LRegsMap.erase(LRegsPos); |
1390 | 1.03M | } |
1391 | 2.55M | } |
1392 | | |
1393 | | /// Return a node that can be scheduled in this cycle. Requirements: |
1394 | | /// (1) Ready: latency has been satisfied |
1395 | | /// (2) No Hazards: resources are available |
1396 | | /// (3) No Interferences: may unschedule to break register interferences. |
1397 | 29.6M | SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { |
1398 | 29.6M | SUnit *CurSU = AvailableQueue->empty() ? nullptr726 : AvailableQueue->pop()29.6M ; |
1399 | 29.6M | auto FindAvailableNode = [&]() { |
1400 | 29.6M | while (CurSU29.6M ) { |
1401 | 29.6M | SmallVector<unsigned, 4> LRegs; |
1402 | 29.6M | if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) |
1403 | 29.6M | break; |
1404 | 19.0k | DEBUG19.0k (dbgs() << " Interfering reg " << |
1405 | 19.0k | (LRegs[0] == TRI->getNumRegs() ? "CallResource" |
1406 | 19.0k | : TRI->getName(LRegs[0])) |
1407 | 19.0k | << " SU #" << CurSU->NodeNum << '\n'); |
1408 | 19.0k | std::pair<LRegsMapT::iterator, bool> LRegsPair = |
1409 | 19.0k | LRegsMap.insert(std::make_pair(CurSU, LRegs)); |
1410 | 19.0k | if (LRegsPair.second19.0k ) { |
1411 | 19.0k | CurSU->isPending = true; // This SU is not in AvailableQueue right now. |
1412 | 19.0k | Interferences.push_back(CurSU); |
1413 | 19.0k | } |
1414 | 18.4E | else { |
1415 | 18.4E | assert(CurSU->isPending && "Interferences are pending"); |
1416 | 18.4E | // Update the interference with current live regs. |
1417 | 18.4E | LRegsPair.first->second = LRegs; |
1418 | 18.4E | } |
1419 | 29.6M | CurSU = AvailableQueue->pop(); |
1420 | 29.6M | } |
1421 | 29.6M | }; |
1422 | 29.6M | FindAvailableNode(); |
1423 | 29.6M | if (CurSU) |
1424 | 29.6M | return CurSU; |
1425 | 2.30k | |
1426 | 2.30k | // All candidates are delayed due to live physical reg dependencies. |
1427 | 2.30k | // Try backtracking, code duplication, or inserting cross class copies |
1428 | 2.30k | // to resolve it. |
1429 | 2.30k | for (SUnit *TrySU : Interferences) 2.30k { |
1430 | 3.85k | SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; |
1431 | 3.85k | |
1432 | 3.85k | // Try unscheduling up to the point where it's safe to schedule |
1433 | 3.85k | // this node. |
1434 | 3.85k | SUnit *BtSU = nullptr; |
1435 | 3.85k | unsigned LiveCycle = UINT_MAX; |
1436 | 3.85k | for (unsigned Reg : LRegs) { |
1437 | 3.85k | if (LiveRegGens[Reg]->getHeight() < LiveCycle3.85k ) { |
1438 | 3.85k | BtSU = LiveRegGens[Reg]; |
1439 | 3.85k | LiveCycle = BtSU->getHeight(); |
1440 | 3.85k | } |
1441 | 3.85k | } |
1442 | 3.85k | if (!WillCreateCycle(TrySU, BtSU)3.85k ) { |
1443 | 1.72k | // BacktrackBottomUp mutates Interferences! |
1444 | 1.72k | BacktrackBottomUp(TrySU, BtSU); |
1445 | 1.72k | |
1446 | 1.72k | // Force the current node to be scheduled before the node that |
1447 | 1.72k | // requires the physical reg dep. |
1448 | 1.72k | if (BtSU->isAvailable1.72k ) { |
1449 | 1.72k | BtSU->isAvailable = false; |
1450 | 1.72k | if (!BtSU->isPending) |
1451 | 1.72k | AvailableQueue->remove(BtSU); |
1452 | 1.72k | } |
1453 | 1.72k | DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" |
1454 | 1.72k | << TrySU->NodeNum << ")\n"); |
1455 | 1.72k | AddPred(TrySU, SDep(BtSU, SDep::Artificial)); |
1456 | 1.72k | |
1457 | 1.72k | // If one or more successors has been unscheduled, then the current |
1458 | 1.72k | // node is no longer available. |
1459 | 1.72k | if (!TrySU->isAvailable || 1.72k !TrySU->NodeQueueId578 ) { |
1460 | 1.14k | DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); |
1461 | 1.14k | CurSU = AvailableQueue->pop(); |
1462 | 1.72k | } else { |
1463 | 578 | DEBUG(dbgs() << "TrySU available\n"); |
1464 | 578 | // Available and in AvailableQueue |
1465 | 578 | AvailableQueue->remove(TrySU); |
1466 | 578 | CurSU = TrySU; |
1467 | 578 | } |
1468 | 1.72k | FindAvailableNode(); |
1469 | 1.72k | // Interferences has been mutated. We must break. |
1470 | 1.72k | break; |
1471 | 1.72k | } |
1472 | 2.30k | } |
1473 | 2.30k | |
1474 | 2.30k | if (!CurSU2.30k ) { |
1475 | 584 | // Can't backtrack. If it's too expensive to copy the value, then try |
1476 | 584 | // duplicate the nodes that produces these "too expensive to copy" |
1477 | 584 | // values to break the dependency. In case even that doesn't work, |
1478 | 584 | // insert cross class copies. |
1479 | 584 | // If it's not too expensive, i.e. cost != -1, issue copies. |
1480 | 584 | SUnit *TrySU = Interferences[0]; |
1481 | 584 | SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; |
1482 | 584 | assert(LRegs.size() == 1 && "Can't handle this yet!"); |
1483 | 584 | unsigned Reg = LRegs[0]; |
1484 | 584 | SUnit *LRDef = LiveRegDefs[Reg]; |
1485 | 584 | MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); |
1486 | 584 | const TargetRegisterClass *RC = |
1487 | 584 | TRI->getMinimalPhysRegClass(Reg, VT); |
1488 | 584 | const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); |
1489 | 584 | |
1490 | 584 | // If cross copy register class is the same as RC, then it must be possible |
1491 | 584 | // copy the value directly. Do not try duplicate the def. |
1492 | 584 | // If cross copy register class is not the same as RC, then it's possible to |
1493 | 584 | // copy the value but it require cross register class copies and it is |
1494 | 584 | // expensive. |
1495 | 584 | // If cross copy register class is null, then it's not possible to copy |
1496 | 584 | // the value at all. |
1497 | 584 | SUnit *NewDef = nullptr; |
1498 | 584 | if (DestRC != RC584 ) { |
1499 | 584 | NewDef = CopyAndMoveSuccessors(LRDef); |
1500 | 584 | if (!DestRC && 584 !NewDef0 ) |
1501 | 0 | report_fatal_error("Can't handle live physical register dependency!"); |
1502 | 584 | } |
1503 | 584 | if (584 !NewDef584 ) { |
1504 | 52 | // Issue copies, these can be expensive cross register class copies. |
1505 | 52 | SmallVector<SUnit*, 2> Copies; |
1506 | 52 | InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); |
1507 | 52 | DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum |
1508 | 52 | << " to SU #" << Copies.front()->NodeNum << "\n"); |
1509 | 52 | AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); |
1510 | 52 | NewDef = Copies.back(); |
1511 | 52 | } |
1512 | 584 | |
1513 | 584 | DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum |
1514 | 584 | << " to SU #" << TrySU->NodeNum << "\n"); |
1515 | 584 | LiveRegDefs[Reg] = NewDef; |
1516 | 584 | AddPred(NewDef, SDep(TrySU, SDep::Artificial)); |
1517 | 584 | TrySU->isAvailable = false; |
1518 | 584 | CurSU = NewDef; |
1519 | 584 | } |
1520 | 2.30k | assert(CurSU && "Unable to resolve live physical register dependencies!"); |
1521 | 2.30k | return CurSU; |
1522 | 29.6M | } |
1523 | | |
1524 | | /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up |
1525 | | /// schedulers. |
1526 | 3.42M | void ScheduleDAGRRList::ListScheduleBottomUp() { |
1527 | 3.42M | // Release any predecessors of the special Exit node. |
1528 | 3.42M | ReleasePredecessors(&ExitSU); |
1529 | 3.42M | |
1530 | 3.42M | // Add root to Available queue. |
1531 | 3.42M | if (!SUnits.empty()3.42M ) { |
1532 | 3.33M | SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; |
1533 | 3.33M | assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); |
1534 | 3.33M | RootSU->isAvailable = true; |
1535 | 3.33M | AvailableQueue->push(RootSU); |
1536 | 3.33M | } |
1537 | 3.42M | |
1538 | 3.42M | // While Available queue is not empty, grab the node with the highest |
1539 | 3.42M | // priority. If it is not ready put it back. Schedule the node. |
1540 | 3.42M | Sequence.reserve(SUnits.size()); |
1541 | 33.1M | while (!AvailableQueue->empty() || 33.1M !Interferences.empty()3.42M ) { |
1542 | 29.6M | DEBUG(dbgs() << "\nExamining Available:\n"; |
1543 | 29.6M | AvailableQueue->dump(this)); |
1544 | 29.6M | |
1545 | 29.6M | // Pick the best node to schedule taking all constraints into |
1546 | 29.6M | // consideration. |
1547 | 29.6M | SUnit *SU = PickNodeToScheduleBottomUp(); |
1548 | 29.6M | |
1549 | 29.6M | AdvancePastStalls(SU); |
1550 | 29.6M | |
1551 | 29.6M | ScheduleNodeBottomUp(SU); |
1552 | 29.6M | |
1553 | 29.6M | while (AvailableQueue->empty() && 29.6M !PendingQueue.empty()3.33M ) { |
1554 | 0 | // Advance the cycle to free resources. Skip ahead to the next ready SU. |
1555 | 0 | assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); |
1556 | 0 | AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); |
1557 | 0 | } |
1558 | 29.6M | } |
1559 | 3.42M | |
1560 | 3.42M | // Reverse the order if it is bottom up. |
1561 | 3.42M | std::reverse(Sequence.begin(), Sequence.end()); |
1562 | 3.42M | |
1563 | | #ifndef NDEBUG |
1564 | | VerifyScheduledSequence(/*isBottomUp=*/true); |
1565 | | #endif |
1566 | | } |
1567 | | |
1568 | | //===----------------------------------------------------------------------===// |
1569 | | // RegReductionPriorityQueue Definition |
1570 | | //===----------------------------------------------------------------------===// |
1571 | | // |
1572 | | // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers |
1573 | | // to reduce register pressure. |
1574 | | // |
1575 | | namespace { |
1576 | | class RegReductionPQBase; |
1577 | | |
1578 | | struct queue_sort { |
1579 | 0 | bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } |
1580 | | }; |
1581 | | |
1582 | | #ifndef NDEBUG |
1583 | | template<class SF> |
1584 | | struct reverse_sort : public queue_sort { |
1585 | | SF &SortFunc; |
1586 | | reverse_sort(SF &sf) : SortFunc(sf) {} |
1587 | | |
1588 | | bool operator()(SUnit* left, SUnit* right) const { |
1589 | | // reverse left/right rather than simply !SortFunc(left, right) |
1590 | | // to expose different paths in the comparison logic. |
1591 | | return SortFunc(right, left); |
1592 | | } |
1593 | | }; |
1594 | | #endif // NDEBUG |
1595 | | |
1596 | | /// bu_ls_rr_sort - Priority function for bottom up register pressure |
1597 | | // reduction scheduler. |
1598 | | struct bu_ls_rr_sort : public queue_sort { |
1599 | | enum { |
1600 | | IsBottomUp = true, |
1601 | | HasReadyFilter = false |
1602 | | }; |
1603 | | |
1604 | | RegReductionPQBase *SPQ; |
1605 | 17.5k | bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} |
1606 | | |
1607 | | bool operator()(SUnit* left, SUnit* right) const; |
1608 | | }; |
1609 | | |
1610 | | // src_ls_rr_sort - Priority function for source order scheduler. |
1611 | | struct src_ls_rr_sort : public queue_sort { |
1612 | | enum { |
1613 | | IsBottomUp = true, |
1614 | | HasReadyFilter = false |
1615 | | }; |
1616 | | |
1617 | | RegReductionPQBase *SPQ; |
1618 | | src_ls_rr_sort(RegReductionPQBase *spq) |
1619 | 3.35M | : SPQ(spq) {} |
1620 | | |
1621 | | bool operator()(SUnit* left, SUnit* right) const; |
1622 | | }; |
1623 | | |
1624 | | // hybrid_ls_rr_sort - Priority function for hybrid scheduler. |
1625 | | struct hybrid_ls_rr_sort : public queue_sort { |
1626 | | enum { |
1627 | | IsBottomUp = true, |
1628 | | HasReadyFilter = false |
1629 | | }; |
1630 | | |
1631 | | RegReductionPQBase *SPQ; |
1632 | | hybrid_ls_rr_sort(RegReductionPQBase *spq) |
1633 | 37.1k | : SPQ(spq) {} |
1634 | | |
1635 | | bool isReady(SUnit *SU, unsigned CurCycle) const; |
1636 | | |
1637 | | bool operator()(SUnit* left, SUnit* right) const; |
1638 | | }; |
1639 | | |
1640 | | // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) |
1641 | | // scheduler. |
1642 | | struct ilp_ls_rr_sort : public queue_sort { |
1643 | | enum { |
1644 | | IsBottomUp = true, |
1645 | | HasReadyFilter = false |
1646 | | }; |
1647 | | |
1648 | | RegReductionPQBase *SPQ; |
1649 | | ilp_ls_rr_sort(RegReductionPQBase *spq) |
1650 | 15.3k | : SPQ(spq) {} |
1651 | | |
1652 | | bool isReady(SUnit *SU, unsigned CurCycle) const; |
1653 | | |
1654 | | bool operator()(SUnit* left, SUnit* right) const; |
1655 | | }; |
1656 | | |
1657 | | class RegReductionPQBase : public SchedulingPriorityQueue { |
1658 | | protected: |
1659 | | std::vector<SUnit*> Queue; |
1660 | | unsigned CurQueueId; |
1661 | | bool TracksRegPressure; |
1662 | | bool SrcOrder; |
1663 | | |
1664 | | // SUnits - The SUnits for the current graph. |
1665 | | std::vector<SUnit> *SUnits; |
1666 | | |
1667 | | MachineFunction &MF; |
1668 | | const TargetInstrInfo *TII; |
1669 | | const TargetRegisterInfo *TRI; |
1670 | | const TargetLowering *TLI; |
1671 | | ScheduleDAGRRList *scheduleDAG; |
1672 | | |
1673 | | // SethiUllmanNumbers - The SethiUllman number for each node. |
1674 | | std::vector<unsigned> SethiUllmanNumbers; |
1675 | | |
1676 | | /// RegPressure - Tracking current reg pressure per register class. |
1677 | | /// |
1678 | | std::vector<unsigned> RegPressure; |
1679 | | |
1680 | | /// RegLimit - Tracking the number of allocatable registers per register |
1681 | | /// class. |
1682 | | std::vector<unsigned> RegLimit; |
1683 | | |
1684 | | public: |
1685 | | RegReductionPQBase(MachineFunction &mf, |
1686 | | bool hasReadyFilter, |
1687 | | bool tracksrp, |
1688 | | bool srcorder, |
1689 | | const TargetInstrInfo *tii, |
1690 | | const TargetRegisterInfo *tri, |
1691 | | const TargetLowering *tli) |
1692 | | : SchedulingPriorityQueue(hasReadyFilter), |
1693 | | CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), |
1694 | 3.42M | MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { |
1695 | 3.42M | if (TracksRegPressure3.42M ) { |
1696 | 52.4k | unsigned NumRC = TRI->getNumRegClasses(); |
1697 | 52.4k | RegLimit.resize(NumRC); |
1698 | 52.4k | RegPressure.resize(NumRC); |
1699 | 52.4k | std::fill(RegLimit.begin(), RegLimit.end(), 0); |
1700 | 52.4k | std::fill(RegPressure.begin(), RegPressure.end(), 0); |
1701 | 52.4k | for (const TargetRegisterClass *RC : TRI->regclasses()) |
1702 | 4.74M | RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); |
1703 | 52.4k | } |
1704 | 3.42M | } |
1705 | | |
1706 | 3.42M | void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { |
1707 | 3.42M | scheduleDAG = scheduleDag; |
1708 | 3.42M | } |
1709 | | |
1710 | 16.3M | ScheduleHazardRecognizer* getHazardRec() { |
1711 | 16.3M | return scheduleDAG->getHazardRec(); |
1712 | 16.3M | } |
1713 | | |
1714 | | void initNodes(std::vector<SUnit> &sunits) override; |
1715 | | |
1716 | | void addNode(const SUnit *SU) override; |
1717 | | |
1718 | | void updateNode(const SUnit *SU) override; |
1719 | | |
1720 | 3.42M | void releaseState() override { |
1721 | 3.42M | SUnits = nullptr; |
1722 | 3.42M | SethiUllmanNumbers.clear(); |
1723 | 3.42M | std::fill(RegPressure.begin(), RegPressure.end(), 0); |
1724 | 3.42M | } |
1725 | | |
1726 | | unsigned getNodePriority(const SUnit *SU) const; |
1727 | | |
1728 | 92.5M | unsigned getNodeOrdering(const SUnit *SU) const { |
1729 | 92.5M | if (!SU->getNode()92.5M ) return 072 ; |
1730 | 92.5M | |
1731 | 92.5M | return SU->getNode()->getIROrder(); |
1732 | 92.5M | } |
1733 | | |
1734 | 122M | bool empty() const override { return Queue.empty(); } |
1735 | | |
1736 | 29.7M | void push(SUnit *U) override { |
1737 | 29.7M | assert(!U->NodeQueueId && "Node in the queue already"); |
1738 | 29.7M | U->NodeQueueId = ++CurQueueId; |
1739 | 29.7M | Queue.push_back(U); |
1740 | 29.7M | } |
1741 | | |
1742 | 47.7k | void remove(SUnit *SU) override { |
1743 | 47.7k | assert(!Queue.empty() && "Queue is empty!"); |
1744 | 47.7k | assert(SU->NodeQueueId != 0 && "Not in queue!"); |
1745 | 47.7k | std::vector<SUnit *>::iterator I = find(Queue, SU); |
1746 | 47.7k | if (I != std::prev(Queue.end())) |
1747 | 11.9k | std::swap(*I, Queue.back()); |
1748 | 47.7k | Queue.pop_back(); |
1749 | 47.7k | SU->NodeQueueId = 0; |
1750 | 47.7k | } |
1751 | | |
1752 | 14 | bool tracksRegPressure() const override { return TracksRegPressure; } |
1753 | | |
1754 | | void dumpRegPressure() const; |
1755 | | |
1756 | | bool HighRegPressure(const SUnit *SU) const; |
1757 | | |
1758 | | bool MayReduceRegPressure(SUnit *SU) const; |
1759 | | |
1760 | | int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; |
1761 | | |
1762 | | void scheduledNode(SUnit *SU) override; |
1763 | | |
1764 | | void unscheduledNode(SUnit *SU) override; |
1765 | | |
1766 | | protected: |
1767 | | bool canClobber(const SUnit *SU, const SUnit *Op); |
1768 | | void AddPseudoTwoAddrDeps(); |
1769 | | void PrescheduleNodesWithMultipleUses(); |
1770 | | void CalculateSethiUllmanNumbers(); |
1771 | | }; |
1772 | | |
1773 | | template<class SF> |
1774 | 29.6M | static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { |
1775 | 29.6M | std::vector<SUnit *>::iterator Best = Q.begin(); |
1776 | 77.1M | for (auto I = std::next(Q.begin()), E = Q.end(); I != E77.1M ; ++I47.4M ) |
1777 | 47.4M | if (47.4M Picker(*Best, *I)47.4M ) |
1778 | 23.0M | Best = I; |
1779 | 29.6M | SUnit *V = *Best; |
1780 | 29.6M | if (Best != std::prev(Q.end())) |
1781 | 6.04M | std::swap(*Best, Q.back()); |
1782 | 29.6M | Q.pop_back(); |
1783 | 29.6M | return V; |
1784 | 29.6M | } ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::bu_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::bu_ls_rr_sort&) Line | Count | Source | 1774 | 130k | static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { | 1775 | 130k | std::vector<SUnit *>::iterator Best = Q.begin(); | 1776 | 253k | for (auto I = std::next(Q.begin()), E = Q.end(); I != E253k ; ++I122k ) | 1777 | 122k | if (122k Picker(*Best, *I)122k ) | 1778 | 55.9k | Best = I; | 1779 | 130k | SUnit *V = *Best; | 1780 | 130k | if (Best != std::prev(Q.end())) | 1781 | 29.6k | std::swap(*Best, Q.back()); | 1782 | 130k | Q.pop_back(); | 1783 | 130k | return V; | 1784 | 130k | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::src_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::src_ls_rr_sort&) Line | Count | Source | 1774 | 29.1M | static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { | 1775 | 29.1M | std::vector<SUnit *>::iterator Best = Q.begin(); | 1776 | 76.0M | for (auto I = std::next(Q.begin()), E = Q.end(); I != E76.0M ; ++I46.8M ) | 1777 | 46.8M | if (46.8M Picker(*Best, *I)46.8M ) | 1778 | 22.8M | Best = I; | 1779 | 29.1M | SUnit *V = *Best; | 1780 | 29.1M | if (Best != std::prev(Q.end())) | 1781 | 5.91M | std::swap(*Best, Q.back()); | 1782 | 29.1M | Q.pop_back(); | 1783 | 29.1M | return V; | 1784 | 29.1M | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::hybrid_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::hybrid_ls_rr_sort&) Line | Count | Source | 1774 | 286k | static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { | 1775 | 286k | std::vector<SUnit *>::iterator Best = Q.begin(); | 1776 | 634k | for (auto I = std::next(Q.begin()), E = Q.end(); I != E634k ; ++I347k ) | 1777 | 347k | if (347k Picker(*Best, *I)347k ) | 1778 | 155k | Best = I; | 1779 | 286k | SUnit *V = *Best; | 1780 | 286k | if (Best != std::prev(Q.end())) | 1781 | 74.9k | std::swap(*Best, Q.back()); | 1782 | 286k | Q.pop_back(); | 1783 | 286k | return V; | 1784 | 286k | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueueImpl<(anonymous namespace)::ilp_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::ilp_ls_rr_sort&) Line | Count | Source | 1774 | 121k | static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { | 1775 | 121k | std::vector<SUnit *>::iterator Best = Q.begin(); | 1776 | 274k | for (auto I = std::next(Q.begin()), E = Q.end(); I != E274k ; ++I152k ) | 1777 | 152k | if (152k Picker(*Best, *I)152k ) | 1778 | 65.4k | Best = I; | 1779 | 121k | SUnit *V = *Best; | 1780 | 121k | if (Best != std::prev(Q.end())) | 1781 | 26.4k | std::swap(*Best, Q.back()); | 1782 | 121k | Q.pop_back(); | 1783 | 121k | return V; | 1784 | 121k | } |
|
1785 | | |
1786 | | template<class SF> |
1787 | 29.6M | SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { |
1788 | | #ifndef NDEBUG |
1789 | | if (DAG->StressSched) { |
1790 | | reverse_sort<SF> RPicker(Picker); |
1791 | | return popFromQueueImpl(Q, RPicker); |
1792 | | } |
1793 | | #endif |
1794 | | (void)DAG; |
1795 | 29.6M | return popFromQueueImpl(Q, Picker); |
1796 | 29.6M | } ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::ilp_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::ilp_ls_rr_sort&, llvm::ScheduleDAG*) Line | Count | Source | 1787 | 121k | SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { | 1788 | | #ifndef NDEBUG | 1789 | | if (DAG->StressSched) { | 1790 | | reverse_sort<SF> RPicker(Picker); | 1791 | | return popFromQueueImpl(Q, RPicker); | 1792 | | } | 1793 | | #endif | 1794 | | (void)DAG; | 1795 | 121k | return popFromQueueImpl(Q, Picker); | 1796 | 121k | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::hybrid_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::hybrid_ls_rr_sort&, llvm::ScheduleDAG*) Line | Count | Source | 1787 | 286k | SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { | 1788 | | #ifndef NDEBUG | 1789 | | if (DAG->StressSched) { | 1790 | | reverse_sort<SF> RPicker(Picker); | 1791 | | return popFromQueueImpl(Q, RPicker); | 1792 | | } | 1793 | | #endif | 1794 | | (void)DAG; | 1795 | 286k | return popFromQueueImpl(Q, Picker); | 1796 | 286k | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::src_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::src_ls_rr_sort&, llvm::ScheduleDAG*) Line | Count | Source | 1787 | 29.1M | SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { | 1788 | | #ifndef NDEBUG | 1789 | | if (DAG->StressSched) { | 1790 | | reverse_sort<SF> RPicker(Picker); | 1791 | | return popFromQueueImpl(Q, RPicker); | 1792 | | } | 1793 | | #endif | 1794 | | (void)DAG; | 1795 | 29.1M | return popFromQueueImpl(Q, Picker); | 1796 | 29.1M | } |
ScheduleDAGRRList.cpp:llvm::SUnit* (anonymous namespace)::popFromQueue<(anonymous namespace)::bu_ls_rr_sort>(std::__1::vector<llvm::SUnit*, std::__1::allocator<llvm::SUnit*> >&, (anonymous namespace)::bu_ls_rr_sort&, llvm::ScheduleDAG*) Line | Count | Source | 1787 | 130k | SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { | 1788 | | #ifndef NDEBUG | 1789 | | if (DAG->StressSched) { | 1790 | | reverse_sort<SF> RPicker(Picker); | 1791 | | return popFromQueueImpl(Q, RPicker); | 1792 | | } | 1793 | | #endif | 1794 | | (void)DAG; | 1795 | 130k | return popFromQueueImpl(Q, Picker); | 1796 | 130k | } |
|
1797 | | |
1798 | | template<class SF> |
1799 | | class RegReductionPriorityQueue : public RegReductionPQBase { |
1800 | | SF Picker; |
1801 | | |
1802 | | public: |
1803 | | RegReductionPriorityQueue(MachineFunction &mf, |
1804 | | bool tracksrp, |
1805 | | bool srcorder, |
1806 | | const TargetInstrInfo *tii, |
1807 | | const TargetRegisterInfo *tri, |
1808 | | const TargetLowering *tli) |
1809 | | : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, |
1810 | | tii, tri, tli), |
1811 | 3.42M | Picker(this) {} ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*) Line | Count | Source | 1811 | 15.3k | Picker(this) {} |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*) Line | Count | Source | 1811 | 17.5k | Picker(this) {} |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*) Line | Count | Source | 1811 | 3.35M | Picker(this) {} |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::RegReductionPriorityQueue(llvm::MachineFunction&, bool, bool, llvm::TargetInstrInfo const*, llvm::TargetRegisterInfo const*, llvm::TargetLowering const*) Line | Count | Source | 1811 | 37.1k | Picker(this) {} |
|
1812 | | |
1813 | 0 | bool isBottomUp() const override { return SF::IsBottomUp; } Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::isBottomUp() const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::isBottomUp() const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::isBottomUp() const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::isBottomUp() const |
1814 | | |
1815 | 0 | bool isReady(SUnit *U) const override { |
1816 | 0 | return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); |
1817 | 0 | } Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::isReady(llvm::SUnit*) const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::isReady(llvm::SUnit*) const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::isReady(llvm::SUnit*) const Unexecuted instantiation: ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::isReady(llvm::SUnit*) const |
1818 | | |
1819 | 29.6M | SUnit *pop() override { |
1820 | 29.6M | if (Queue.empty()29.6M ) return nullptr1.58k ; |
1821 | 29.6M | |
1822 | 29.6M | SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); |
1823 | 29.6M | V->NodeQueueId = 0; |
1824 | 29.6M | return V; |
1825 | 29.6M | } ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::hybrid_ls_rr_sort>::pop() Line | Count | Source | 1819 | 287k | SUnit *pop() override { | 1820 | 287k | if (Queue.empty()287k ) return nullptr62 ; | 1821 | 286k | | 1822 | 286k | SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); | 1823 | 286k | V->NodeQueueId = 0; | 1824 | 286k | return V; | 1825 | 286k | } |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::bu_ls_rr_sort>::pop() Line | Count | Source | 1819 | 130k | SUnit *pop() override { | 1820 | 130k | if (Queue.empty()130k ) return nullptr71 ; | 1821 | 130k | | 1822 | 130k | SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); | 1823 | 130k | V->NodeQueueId = 0; | 1824 | 130k | return V; | 1825 | 130k | } |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::src_ls_rr_sort>::pop() Line | Count | Source | 1819 | 29.1M | SUnit *pop() override { | 1820 | 29.1M | if (Queue.empty()29.1M ) return nullptr1.44k ; | 1821 | 29.1M | | 1822 | 29.1M | SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); | 1823 | 29.1M | V->NodeQueueId = 0; | 1824 | 29.1M | return V; | 1825 | 29.1M | } |
ScheduleDAGRRList.cpp:(anonymous namespace)::RegReductionPriorityQueue<(anonymous namespace)::ilp_ls_rr_sort>::pop() Line | Count | Source | 1819 | 121k | SUnit *pop() override { | 1820 | 121k | if (Queue.empty()121k ) return nullptr3 ; | 1821 | 121k | | 1822 | 121k | SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); | 1823 | 121k | V->NodeQueueId = 0; | 1824 | 121k | return V; | 1825 | 121k | } |
|
1826 | | |
1827 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1828 | | LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { |
1829 | | // Emulate pop() without clobbering NodeQueueIds. |
1830 | | std::vector<SUnit*> DumpQueue = Queue; |
1831 | | SF DumpPicker = Picker; |
1832 | | while (!DumpQueue.empty()) { |
1833 | | SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); |
1834 | | dbgs() << "Height " << SU->getHeight() << ": "; |
1835 | | SU->dump(DAG); |
1836 | | } |
1837 | | } |
1838 | | #endif |
1839 | | }; |
1840 | | |
1841 | | typedef RegReductionPriorityQueue<bu_ls_rr_sort> |
1842 | | BURegReductionPriorityQueue; |
1843 | | |
1844 | | typedef RegReductionPriorityQueue<src_ls_rr_sort> |
1845 | | SrcRegReductionPriorityQueue; |
1846 | | |
1847 | | typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> |
1848 | | HybridBURRPriorityQueue; |
1849 | | |
1850 | | typedef RegReductionPriorityQueue<ilp_ls_rr_sort> |
1851 | | ILPBURRPriorityQueue; |
1852 | | } // end anonymous namespace |
1853 | | |
1854 | | //===----------------------------------------------------------------------===// |
1855 | | // Static Node Priority for Register Pressure Reduction |
1856 | | //===----------------------------------------------------------------------===// |
1857 | | |
1858 | | // Check for special nodes that bypass scheduling heuristics. |
1859 | | // Currently this pushes TokenFactor nodes down, but may be used for other |
1860 | | // pseudo-ops as well. |
1861 | | // |
1862 | | // Return -1 to schedule right above left, 1 for left above right. |
1863 | | // Return 0 if no bias exists. |
1864 | 47.4M | static int checkSpecialNodes(const SUnit *left, const SUnit *right) { |
1865 | 47.4M | bool LSchedLow = left->isScheduleLow; |
1866 | 47.4M | bool RSchedLow = right->isScheduleLow; |
1867 | 47.4M | if (LSchedLow != RSchedLow) |
1868 | 597k | return LSchedLow < RSchedLow ? 597k 1279k : -1318k ; |
1869 | 46.8M | return 0; |
1870 | 46.8M | } |
1871 | | |
1872 | | /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. |
1873 | | /// Smaller number is the higher priority. |
1874 | | static unsigned |
1875 | 29.6M | CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { |
1876 | 29.6M | if (SUNumbers[SU->NodeNum] != 0) |
1877 | 16.4M | return SUNumbers[SU->NodeNum]; |
1878 | 13.1M | |
1879 | 13.1M | // Use WorkList to avoid stack overflow on excessively large IRs. |
1880 | 13.1M | struct WorkState { |
1881 | 29.6M | WorkState(const SUnit *SU) : SU(SU) {} |
1882 | 13.1M | const SUnit *SU; |
1883 | 13.1M | unsigned PredsProcessed = 0; |
1884 | 13.1M | }; |
1885 | 13.1M | |
1886 | 13.1M | SmallVector<WorkState, 16> WorkList; |
1887 | 13.1M | WorkList.push_back(SU); |
1888 | 59.2M | while (!WorkList.empty()59.2M ) { |
1889 | 46.1M | auto &Temp = WorkList.back(); |
1890 | 46.1M | auto *TempSU = Temp.SU; |
1891 | 46.1M | bool AllPredsKnown = true; |
1892 | 46.1M | // Try to find a non-evaluated pred and push it into the processing stack. |
1893 | 63.8M | for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size()63.8M ; ++P17.6M ) { |
1894 | 34.1M | auto &Pred = TempSU->Preds[P]; |
1895 | 34.1M | if (Pred.isCtrl()34.1M ) continue12.6M ; // ignore chain preds |
1896 | 21.4M | SUnit *PredSU = Pred.getSUnit(); |
1897 | 21.4M | if (SUNumbers[PredSU->NodeNum] == 021.4M ) { |
1898 | | #ifndef NDEBUG |
1899 | | // In debug mode, check that we don't have such element in the stack. |
1900 | | for (auto It : WorkList) |
1901 | | assert(It.SU != PredSU && "Trying to push an element twice?"); |
1902 | | #endif |
1903 | | // Next time start processing this one starting from the next pred. |
1904 | 16.4M | Temp.PredsProcessed = P + 1; |
1905 | 16.4M | WorkList.push_back(PredSU); |
1906 | 16.4M | AllPredsKnown = false; |
1907 | 16.4M | break; |
1908 | 16.4M | } |
1909 | 34.1M | } |
1910 | 46.1M | |
1911 | 46.1M | if (!AllPredsKnown) |
1912 | 16.4M | continue; |
1913 | 29.6M | |
1914 | 29.6M | // Once all preds are known, we can calculate the answer for this one. |
1915 | 29.6M | unsigned SethiUllmanNumber = 0; |
1916 | 29.6M | unsigned Extra = 0; |
1917 | 34.1M | for (const SDep &Pred : TempSU->Preds) { |
1918 | 34.1M | if (Pred.isCtrl()34.1M ) continue12.6M ; // ignore chain preds |
1919 | 21.4M | SUnit *PredSU = Pred.getSUnit(); |
1920 | 21.4M | unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; |
1921 | 21.4M | assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); |
1922 | 21.4M | if (PredSethiUllman > SethiUllmanNumber21.4M ) { |
1923 | 16.2M | SethiUllmanNumber = PredSethiUllman; |
1924 | 16.2M | Extra = 0; |
1925 | 21.4M | } else if (5.19M PredSethiUllman == SethiUllmanNumber5.19M ) |
1926 | 4.09M | ++Extra; |
1927 | 34.1M | } |
1928 | 29.6M | |
1929 | 29.6M | SethiUllmanNumber += Extra; |
1930 | 29.6M | if (SethiUllmanNumber == 0) |
1931 | 14.3M | SethiUllmanNumber = 1; |
1932 | 46.1M | SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; |
1933 | 46.1M | WorkList.pop_back(); |
1934 | 46.1M | } |
1935 | 29.6M | |
1936 | 29.6M | assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); |
1937 | 29.6M | return SUNumbers[SU->NodeNum]; |
1938 | 29.6M | } |
1939 | | |
1940 | | /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all |
1941 | | /// scheduling units. |
1942 | 3.42M | void RegReductionPQBase::CalculateSethiUllmanNumbers() { |
1943 | 3.42M | SethiUllmanNumbers.assign(SUnits->size(), 0); |
1944 | 3.42M | |
1945 | 3.42M | for (const SUnit &SU : *SUnits) |
1946 | 29.6M | CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); |
1947 | 3.42M | } |
1948 | | |
1949 | 650 | void RegReductionPQBase::addNode(const SUnit *SU) { |
1950 | 650 | unsigned SUSize = SethiUllmanNumbers.size(); |
1951 | 650 | if (SUnits->size() > SUSize) |
1952 | 322 | SethiUllmanNumbers.resize(SUSize*2, 0); |
1953 | 650 | CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); |
1954 | 650 | } |
1955 | | |
1956 | 572 | void RegReductionPQBase::updateNode(const SUnit *SU) { |
1957 | 572 | SethiUllmanNumbers[SU->NodeNum] = 0; |
1958 | 572 | CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); |
1959 | 572 | } |
1960 | | |
1961 | | // Lower priority means schedule further down. For bottom-up scheduling, lower |
1962 | | // priority SUs are scheduled before higher priority SUs. |
1963 | 30.6M | unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { |
1964 | 30.6M | assert(SU->NodeNum < SethiUllmanNumbers.size()); |
1965 | 30.6M | unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode()30.6M : 017 ; |
1966 | 30.6M | if (Opc == ISD::TokenFactor || 30.6M Opc == ISD::CopyToReg30.6M ) |
1967 | 30.6M | // CopyToReg should be close to its uses to facilitate coalescing and |
1968 | 30.6M | // avoid spilling. |
1969 | 1.23M | return 0; |
1970 | 29.4M | if (29.4M Opc == TargetOpcode::EXTRACT_SUBREG || |
1971 | 29.4M | Opc == TargetOpcode::SUBREG_TO_REG || |
1972 | 29.4M | Opc == TargetOpcode::INSERT_SUBREG) |
1973 | 29.4M | // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be |
1974 | 29.4M | // close to their uses to facilitate coalescing. |
1975 | 0 | return 0; |
1976 | 29.4M | if (29.4M SU->NumSuccs == 0 && 29.4M SU->NumPreds != 07.65M ) |
1977 | 29.4M | // If SU does not have a register use, i.e. it doesn't produce a value |
1978 | 29.4M | // that would be consumed (e.g. store), then it terminates a chain of |
1979 | 29.4M | // computation. Give it a large SethiUllman number so it will be |
1980 | 29.4M | // scheduled right before its predecessors that it doesn't lengthen |
1981 | 29.4M | // their live ranges. |
1982 | 6.21M | return 0xffff; |
1983 | 23.2M | if (23.2M SU->NumPreds == 0 && 23.2M SU->NumSuccs != 09.84M ) |
1984 | 23.2M | // If SU does not have a register def, schedule it close to its uses |
1985 | 23.2M | // because it does not lengthen any live ranges. |
1986 | 8.40M | return 0; |
1987 | 14.8M | #if 1 |
1988 | 14.8M | return SethiUllmanNumbers[SU->NodeNum]; |
1989 | | #else |
1990 | | unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; |
1991 | | if (SU->isCallOp) { |
1992 | | // FIXME: This assumes all of the defs are used as call operands. |
1993 | | int NP = (int)Priority - SU->getNode()->getNumValues(); |
1994 | | return (NP > 0) ? NP : 0; |
1995 | | } |
1996 | | return Priority; |
1997 | | #endif |
1998 | | } |
1999 | | |
2000 | | //===----------------------------------------------------------------------===// |
2001 | | // Register Pressure Tracking |
2002 | | //===----------------------------------------------------------------------===// |
2003 | | |
2004 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2005 | | LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { |
2006 | | for (const TargetRegisterClass *RC : TRI->regclasses()) { |
2007 | | unsigned Id = RC->getID(); |
2008 | | unsigned RP = RegPressure[Id]; |
2009 | | if (!RP) continue; |
2010 | | DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " |
2011 | | << RegLimit[Id] << '\n'); |
2012 | | } |
2013 | | } |
2014 | | #endif |
2015 | | |
2016 | 666k | bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { |
2017 | 666k | if (!TLI) |
2018 | 0 | return false; |
2019 | 666k | |
2020 | 666k | for (const SDep &Pred : SU->Preds) 666k { |
2021 | 740k | if (Pred.isCtrl()) |
2022 | 88.8k | continue; |
2023 | 651k | SUnit *PredSU = Pred.getSUnit(); |
2024 | 651k | // NumRegDefsLeft is zero when enough uses of this node have been scheduled |
2025 | 651k | // to cover the number of registers defined (they are all live). |
2026 | 651k | if (PredSU->NumRegDefsLeft == 0651k ) { |
2027 | 235k | continue; |
2028 | 235k | } |
2029 | 416k | for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); |
2030 | 843k | RegDefPos.IsValid()843k ; RegDefPos.Advance()426k ) { |
2031 | 454k | unsigned RCId, Cost; |
2032 | 454k | GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); |
2033 | 454k | |
2034 | 454k | if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) |
2035 | 27.3k | return true; |
2036 | 454k | } |
2037 | 740k | } |
2038 | 639k | return false; |
2039 | 666k | } |
2040 | | |
2041 | 0 | bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { |
2042 | 0 | const SDNode *N = SU->getNode(); |
2043 | 0 |
|
2044 | 0 | if (!N->isMachineOpcode() || 0 !SU->NumSuccs0 ) |
2045 | 0 | return false; |
2046 | 0 |
|
2047 | 0 | unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); |
2048 | 0 | for (unsigned i = 0; i != NumDefs0 ; ++i0 ) { |
2049 | 0 | MVT VT = N->getSimpleValueType(i); |
2050 | 0 | if (!N->hasAnyUseOfValue(i)) |
2051 | 0 | continue; |
2052 | 0 | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2053 | 0 | if (RegPressure[RCId] >= RegLimit[RCId]) |
2054 | 0 | return true; |
2055 | 0 | } |
2056 | 0 | return false; |
2057 | 0 | } |
2058 | | |
2059 | | // Compute the register pressure contribution by this instruction by count up |
2060 | | // for uses that are not live and down for defs. Only count register classes |
2061 | | // that are already under high pressure. As a side effect, compute the number of |
2062 | | // uses of registers that are already live. |
2063 | | // |
2064 | | // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure |
2065 | | // so could probably be factored. |
2066 | 293k | int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { |
2067 | 293k | LiveUses = 0; |
2068 | 293k | int PDiff = 0; |
2069 | 310k | for (const SDep &Pred : SU->Preds) { |
2070 | 310k | if (Pred.isCtrl()) |
2071 | 26.1k | continue; |
2072 | 284k | SUnit *PredSU = Pred.getSUnit(); |
2073 | 284k | // NumRegDefsLeft is zero when enough uses of this node have been scheduled |
2074 | 284k | // to cover the number of registers defined (they are all live). |
2075 | 284k | if (PredSU->NumRegDefsLeft == 0284k ) { |
2076 | 67.8k | if (PredSU->getNode()->isMachineOpcode()) |
2077 | 56.0k | ++LiveUses; |
2078 | 67.8k | continue; |
2079 | 67.8k | } |
2080 | 216k | for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); |
2081 | 433k | RegDefPos.IsValid()433k ; RegDefPos.Advance()217k ) { |
2082 | 217k | MVT VT = RegDefPos.GetValue(); |
2083 | 217k | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2084 | 217k | if (RegPressure[RCId] >= RegLimit[RCId]) |
2085 | 82.3k | ++PDiff; |
2086 | 217k | } |
2087 | 310k | } |
2088 | 293k | const SDNode *N = SU->getNode(); |
2089 | 293k | |
2090 | 293k | if (!N || 293k !N->isMachineOpcode()293k || !SU->NumSuccs193k ) |
2091 | 165k | return PDiff; |
2092 | 128k | |
2093 | 128k | unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); |
2094 | 257k | for (unsigned i = 0; i != NumDefs257k ; ++i128k ) { |
2095 | 128k | MVT VT = N->getSimpleValueType(i); |
2096 | 128k | if (!N->hasAnyUseOfValue(i)) |
2097 | 6 | continue; |
2098 | 128k | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2099 | 128k | if (RegPressure[RCId] >= RegLimit[RCId]) |
2100 | 25.2k | --PDiff; |
2101 | 128k | } |
2102 | 293k | return PDiff; |
2103 | 293k | } |
2104 | | |
2105 | 29.6M | void RegReductionPQBase::scheduledNode(SUnit *SU) { |
2106 | 29.6M | if (!TracksRegPressure) |
2107 | 29.2M | return; |
2108 | 408k | |
2109 | 408k | if (408k !SU->getNode()408k ) |
2110 | 4 | return; |
2111 | 408k | |
2112 | 408k | for (const SDep &Pred : SU->Preds) 408k { |
2113 | 461k | if (Pred.isCtrl()) |
2114 | 159k | continue; |
2115 | 302k | SUnit *PredSU = Pred.getSUnit(); |
2116 | 302k | // NumRegDefsLeft is zero when enough uses of this node have been scheduled |
2117 | 302k | // to cover the number of registers defined (they are all live). |
2118 | 302k | if (PredSU->NumRegDefsLeft == 0302k ) { |
2119 | 60.3k | continue; |
2120 | 60.3k | } |
2121 | 241k | // FIXME: The ScheduleDAG currently loses information about which of a |
2122 | 241k | // node's values is consumed by each dependence. Consequently, if the node |
2123 | 241k | // defines multiple register classes, we don't know which to pressurize |
2124 | 241k | // here. Instead the following loop consumes the register defs in an |
2125 | 241k | // arbitrary order. At least it handles the common case of clustered loads |
2126 | 241k | // to the same class. For precise liveness, each SDep needs to indicate the |
2127 | 241k | // result number. But that tightly couples the ScheduleDAG with the |
2128 | 241k | // SelectionDAG making updates tricky. A simpler hack would be to attach a |
2129 | 241k | // value type or register class to SDep. |
2130 | 241k | // |
2131 | 241k | // The most important aspect of register tracking is balancing the increase |
2132 | 241k | // here with the reduction further below. Note that this SU may use multiple |
2133 | 241k | // defs in PredSU. The can't be determined here, but we've already |
2134 | 241k | // compensated by reducing NumRegDefsLeft in PredSU during |
2135 | 241k | // ScheduleDAGSDNodes::AddSchedEdges. |
2136 | 241k | --PredSU->NumRegDefsLeft; |
2137 | 241k | unsigned SkipRegDefs = PredSU->NumRegDefsLeft; |
2138 | 241k | for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); |
2139 | 244k | RegDefPos.IsValid()244k ; RegDefPos.Advance(), --SkipRegDefs2.34k ) { |
2140 | 244k | if (SkipRegDefs) |
2141 | 2.34k | continue; |
2142 | 241k | |
2143 | 241k | unsigned RCId, Cost; |
2144 | 241k | GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); |
2145 | 241k | RegPressure[RCId] += Cost; |
2146 | 241k | break; |
2147 | 241k | } |
2148 | 461k | } |
2149 | 408k | |
2150 | 408k | // We should have this assert, but there may be dead SDNodes that never |
2151 | 408k | // materialize as SUnits, so they don't appear to generate liveness. |
2152 | 408k | //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); |
2153 | 408k | int SkipRegDefs = (int)SU->NumRegDefsLeft; |
2154 | 408k | for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); |
2155 | 654k | RegDefPos.IsValid()654k ; RegDefPos.Advance(), --SkipRegDefs246k ) { |
2156 | 246k | if (SkipRegDefs > 0) |
2157 | 9 | continue; |
2158 | 246k | unsigned RCId, Cost; |
2159 | 246k | GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); |
2160 | 246k | if (RegPressure[RCId] < Cost246k ) { |
2161 | 4.54k | // Register pressure tracking is imprecise. This can happen. But we try |
2162 | 4.54k | // hard not to let it happen because it likely results in poor scheduling. |
2163 | 4.54k | DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); |
2164 | 4.54k | RegPressure[RCId] = 0; |
2165 | 4.54k | } |
2166 | 241k | else { |
2167 | 241k | RegPressure[RCId] -= Cost; |
2168 | 241k | } |
2169 | 246k | } |
2170 | 408k | DEBUG(dumpRegPressure()); |
2171 | 29.6M | } |
2172 | | |
2173 | 58.2k | void RegReductionPQBase::unscheduledNode(SUnit *SU) { |
2174 | 58.2k | if (!TracksRegPressure) |
2175 | 57.9k | return; |
2176 | 228 | |
2177 | 228 | const SDNode *N = SU->getNode(); |
2178 | 228 | if (!N228 ) return0 ; |
2179 | 228 | |
2180 | 228 | if (228 !N->isMachineOpcode()228 ) { |
2181 | 46 | if (N->getOpcode() != ISD::CopyToReg) |
2182 | 46 | return; |
2183 | 182 | } else { |
2184 | 182 | unsigned Opc = N->getMachineOpcode(); |
2185 | 182 | if (Opc == TargetOpcode::EXTRACT_SUBREG || |
2186 | 181 | Opc == TargetOpcode::INSERT_SUBREG || |
2187 | 181 | Opc == TargetOpcode::SUBREG_TO_REG || |
2188 | 181 | Opc == TargetOpcode::REG_SEQUENCE || |
2189 | 181 | Opc == TargetOpcode::IMPLICIT_DEF) |
2190 | 1 | return; |
2191 | 181 | } |
2192 | 181 | |
2193 | 181 | for (const SDep &Pred : SU->Preds) 181 { |
2194 | 284 | if (Pred.isCtrl()) |
2195 | 31 | continue; |
2196 | 253 | SUnit *PredSU = Pred.getSUnit(); |
2197 | 253 | // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only |
2198 | 253 | // counts data deps. |
2199 | 253 | if (PredSU->NumSuccsLeft != PredSU->Succs.size()) |
2200 | 87 | continue; |
2201 | 166 | const SDNode *PN = PredSU->getNode(); |
2202 | 166 | if (!PN->isMachineOpcode()166 ) { |
2203 | 42 | if (PN->getOpcode() == ISD::CopyFromReg42 ) { |
2204 | 42 | MVT VT = PN->getSimpleValueType(0); |
2205 | 42 | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2206 | 42 | RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); |
2207 | 42 | } |
2208 | 42 | continue; |
2209 | 42 | } |
2210 | 124 | unsigned POpc = PN->getMachineOpcode(); |
2211 | 124 | if (POpc == TargetOpcode::IMPLICIT_DEF) |
2212 | 0 | continue; |
2213 | 124 | if (124 POpc == TargetOpcode::EXTRACT_SUBREG || |
2214 | 124 | POpc == TargetOpcode::INSERT_SUBREG || |
2215 | 124 | POpc == TargetOpcode::SUBREG_TO_REG124 ) { |
2216 | 2 | MVT VT = PN->getSimpleValueType(0); |
2217 | 2 | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2218 | 2 | RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); |
2219 | 2 | continue; |
2220 | 2 | } |
2221 | 122 | unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); |
2222 | 228 | for (unsigned i = 0; i != NumDefs228 ; ++i106 ) { |
2223 | 106 | MVT VT = PN->getSimpleValueType(i); |
2224 | 106 | if (!PN->hasAnyUseOfValue(i)) |
2225 | 3 | continue; |
2226 | 103 | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2227 | 103 | if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) |
2228 | 103 | // Register pressure tracking is imprecise. This can happen. |
2229 | 20 | RegPressure[RCId] = 0; |
2230 | 103 | else |
2231 | 83 | RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); |
2232 | 106 | } |
2233 | 284 | } |
2234 | 181 | |
2235 | 181 | // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() |
2236 | 181 | // may transfer data dependencies to CopyToReg. |
2237 | 181 | if (SU->NumSuccs && 181 N->isMachineOpcode()161 ) { |
2238 | 161 | unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); |
2239 | 216 | for (unsigned i = NumDefs, e = N->getNumValues(); i != e216 ; ++i55 ) { |
2240 | 55 | MVT VT = N->getSimpleValueType(i); |
2241 | 55 | if (VT == MVT::Glue || 55 VT == MVT::Other55 ) |
2242 | 6 | continue; |
2243 | 49 | if (49 !N->hasAnyUseOfValue(i)49 ) |
2244 | 48 | continue; |
2245 | 1 | unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); |
2246 | 1 | RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); |
2247 | 1 | } |
2248 | 161 | } |
2249 | 181 | |
2250 | 181 | DEBUG(dumpRegPressure()); |
2251 | 58.2k | } |
2252 | | |
2253 | | //===----------------------------------------------------------------------===// |
2254 | | // Dynamic Node Priority for Register Pressure Reduction |
2255 | | //===----------------------------------------------------------------------===// |
2256 | | |
2257 | | /// closestSucc - Returns the scheduled cycle of the successor which is |
2258 | | /// closest to the current cycle. |
2259 | 25.7M | static unsigned closestSucc(const SUnit *SU) { |
2260 | 25.7M | unsigned MaxHeight = 0; |
2261 | 29.8M | for (const SDep &Succ : SU->Succs) { |
2262 | 29.8M | if (Succ.isCtrl()29.8M ) continue7.98M ; // ignore chain succs |
2263 | 21.8M | unsigned Height = Succ.getSUnit()->getHeight(); |
2264 | 21.8M | // If there are bunch of CopyToRegs stacked up, they should be considered |
2265 | 21.8M | // to be at the same position. |
2266 | 21.8M | if (Succ.getSUnit()->getNode() && |
2267 | 21.8M | Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) |
2268 | 966k | Height = closestSucc(Succ.getSUnit())+1; |
2269 | 21.8M | if (Height > MaxHeight) |
2270 | 19.2M | MaxHeight = Height; |
2271 | 29.8M | } |
2272 | 25.7M | return MaxHeight; |
2273 | 25.7M | } |
2274 | | |
2275 | | /// calcMaxScratches - Returns an cost estimate of the worse case requirement |
2276 | | /// for scratch registers, i.e. number of data dependencies. |
2277 | 11.1M | static unsigned calcMaxScratches(const SUnit *SU) { |
2278 | 11.1M | unsigned Scratches = 0; |
2279 | 16.7M | for (const SDep &Pred : SU->Preds) { |
2280 | 16.7M | if (Pred.isCtrl()16.7M ) continue4.13M ; // ignore chain preds |
2281 | 12.6M | Scratches++; |
2282 | 12.6M | } |
2283 | 11.1M | return Scratches; |
2284 | 11.1M | } |
2285 | | |
2286 | | /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are |
2287 | | /// CopyFromReg from a virtual register. |
2288 | 3.30M | static bool hasOnlyLiveInOpers(const SUnit *SU) { |
2289 | 3.30M | bool RetVal = false; |
2290 | 3.51M | for (const SDep &Pred : SU->Preds) { |
2291 | 3.51M | if (Pred.isCtrl()3.51M ) continue1.23M ; |
2292 | 2.27M | const SUnit *PredSU = Pred.getSUnit(); |
2293 | 2.27M | if (PredSU->getNode() && |
2294 | 2.27M | PredSU->getNode()->getOpcode() == ISD::CopyFromReg2.27M ) { |
2295 | 616k | unsigned Reg = |
2296 | 616k | cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); |
2297 | 616k | if (TargetRegisterInfo::isVirtualRegister(Reg)616k ) { |
2298 | 509k | RetVal = true; |
2299 | 509k | continue; |
2300 | 509k | } |
2301 | 1.76M | } |
2302 | 1.76M | return false; |
2303 | 1.76M | } |
2304 | 1.53M | return RetVal; |
2305 | 1.53M | } |
2306 | | |
2307 | | /// hasOnlyLiveOutUses - Return true if SU has only value successors that are |
2308 | | /// CopyToReg to a virtual register. This SU def is probably a liveout and |
2309 | | /// it has no other use. It should be scheduled closer to the terminator. |
2310 | 385k | static bool hasOnlyLiveOutUses(const SUnit *SU) { |
2311 | 385k | bool RetVal = false; |
2312 | 449k | for (const SDep &Succ : SU->Succs) { |
2313 | 449k | if (Succ.isCtrl()449k ) continue63.1k ; |
2314 | 386k | const SUnit *SuccSU = Succ.getSUnit(); |
2315 | 386k | if (SuccSU->getNode() && 386k SuccSU->getNode()->getOpcode() == ISD::CopyToReg386k ) { |
2316 | 73.1k | unsigned Reg = |
2317 | 73.1k | cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); |
2318 | 73.1k | if (TargetRegisterInfo::isVirtualRegister(Reg)73.1k ) { |
2319 | 73.1k | RetVal = true; |
2320 | 73.1k | continue; |
2321 | 73.1k | } |
2322 | 313k | } |
2323 | 313k | return false; |
2324 | 313k | } |
2325 | 71.9k | return RetVal; |
2326 | 71.9k | } |
2327 | | |
2328 | | // Set isVRegCycle for a node with only live in opers and live out uses. Also |
2329 | | // set isVRegCycle for its CopyFromReg operands. |
2330 | | // |
2331 | | // This is only relevant for single-block loops, in which case the VRegCycle |
2332 | | // node is likely an induction variable in which the operand and target virtual |
2333 | | // registers should be coalesced (e.g. pre/post increment values). Setting the |
2334 | | // isVRegCycle flag helps the scheduler prioritize other uses of the same |
2335 | | // CopyFromReg so that this node becomes the virtual register "kill". This |
2336 | | // avoids interference between the values live in and out of the block and |
2337 | | // eliminates a copy inside the loop. |
2338 | 3.30M | static void initVRegCycle(SUnit *SU) { |
2339 | 3.30M | if (DisableSchedVRegCycle) |
2340 | 0 | return; |
2341 | 3.30M | |
2342 | 3.30M | if (3.30M !hasOnlyLiveInOpers(SU) || 3.30M !hasOnlyLiveOutUses(SU)385k ) |
2343 | 3.24M | return; |
2344 | 61.1k | |
2345 | 61.1k | DEBUG61.1k (dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); |
2346 | 61.1k | |
2347 | 61.1k | SU->isVRegCycle = true; |
2348 | 61.1k | |
2349 | 63.5k | for (const SDep &Pred : SU->Preds) { |
2350 | 63.5k | if (Pred.isCtrl()63.5k ) continue250 ; |
2351 | 63.2k | Pred.getSUnit()->isVRegCycle = true; |
2352 | 63.2k | } |
2353 | 3.30M | } |
2354 | | |
2355 | | // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of |
2356 | | // CopyFromReg operands. We should no longer penalize other uses of this VReg. |
2357 | 29.6M | static void resetVRegCycle(SUnit *SU) { |
2358 | 29.6M | if (!SU->isVRegCycle) |
2359 | 29.6M | return; |
2360 | 61.1k | |
2361 | 61.1k | for (const SDep &Pred : SU->Preds) 61.1k { |
2362 | 63.5k | if (Pred.isCtrl()63.5k ) continue250 ; // ignore chain preds |
2363 | 63.2k | SUnit *PredSU = Pred.getSUnit(); |
2364 | 63.2k | if (PredSU->isVRegCycle63.2k ) { |
2365 | 63.0k | assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && |
2366 | 63.0k | "VRegCycle def must be CopyFromReg"); |
2367 | 63.0k | Pred.getSUnit()->isVRegCycle = false; |
2368 | 63.0k | } |
2369 | 63.5k | } |
2370 | 29.6M | } |
2371 | | |
2372 | | // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This |
2373 | | // means a node that defines the VRegCycle has not been scheduled yet. |
2374 | 11.4M | static bool hasVRegCycleUse(const SUnit *SU) { |
2375 | 11.4M | // If this SU also defines the VReg, don't hoist it as a "use". |
2376 | 11.4M | if (SU->isVRegCycle) |
2377 | 1.31k | return false; |
2378 | 11.4M | |
2379 | 11.4M | for (const SDep &Pred : SU->Preds) 11.4M { |
2380 | 16.6M | if (Pred.isCtrl()16.6M ) continue3.96M ; // ignore chain preds |
2381 | 12.6M | if (12.6M Pred.getSUnit()->isVRegCycle && |
2382 | 12.6M | Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg3.46k ) { |
2383 | 1.30k | DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); |
2384 | 1.30k | return true; |
2385 | 1.30k | } |
2386 | 11.4M | } |
2387 | 11.4M | return false; |
2388 | 11.4M | } |
2389 | | |
2390 | | // Check for either a dependence (latency) or resource (hazard) stall. |
2391 | | // |
2392 | | // Note: The ScheduleHazardRecognizer interface requires a non-const SU. |
2393 | 10.9M | static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { |
2394 | 10.9M | if ((int)SPQ->getCurCycle() < Height10.9M ) return true107k ; |
2395 | 10.8M | if (10.8M SPQ->getHazardRec()->getHazardType(SU, 0) |
2396 | 10.8M | != ScheduleHazardRecognizer::NoHazard) |
2397 | 80.2k | return true; |
2398 | 10.7M | return false; |
2399 | 10.7M | } |
2400 | | |
2401 | | // Return -1 if left has higher priority, 1 if right has higher priority. |
2402 | | // Return 0 if latency-based priority is equivalent. |
2403 | | static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, |
2404 | 5.72M | RegReductionPQBase *SPQ) { |
2405 | 5.72M | // Scheduling an instruction that uses a VReg whose postincrement has not yet |
2406 | 5.72M | // been scheduled will induce a copy. Model this as an extra cycle of latency. |
2407 | 5.72M | int LPenalty = hasVRegCycleUse(left) ? 1558 : 05.72M ; |
2408 | 5.72M | int RPenalty = hasVRegCycleUse(right) ? 1743 : 05.72M ; |
2409 | 5.72M | int LHeight = (int)left->getHeight() + LPenalty; |
2410 | 5.72M | int RHeight = (int)right->getHeight() + RPenalty; |
2411 | 5.72M | |
2412 | 312k | bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && |
2413 | 5.49M | BUHasStall(left, LHeight, SPQ); |
2414 | 312k | bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && |
2415 | 5.49M | BUHasStall(right, RHeight, SPQ); |
2416 | 5.72M | |
2417 | 5.72M | // If scheduling one of the node will cause a pipeline stall, delay it. |
2418 | 5.72M | // If scheduling either one of the node will cause a pipeline stall, sort |
2419 | 5.72M | // them according to their height. |
2420 | 5.72M | if (LStall5.72M ) { |
2421 | 90.2k | if (!RStall) |
2422 | 19.3k | return 1; |
2423 | 70.9k | if (70.9k LHeight != RHeight70.9k ) |
2424 | 10.4k | return LHeight > RHeight ? 10.4k 12.71k : -17.76k ; |
2425 | 5.63M | } else if (5.63M RStall5.63M ) |
2426 | 26.0k | return -1; |
2427 | 5.66M | |
2428 | 5.66M | // If either node is scheduling for latency, sort them by height/depth |
2429 | 5.66M | // and latency. |
2430 | 5.66M | if (5.66M !checkPref || 5.66M (left->SchedulingPref == Sched::ILP || |
2431 | 5.66M | right->SchedulingPref == Sched::ILP207k )) { |
2432 | 5.46M | // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer |
2433 | 5.46M | // is enabled, grouping instructions by cycle, then its height is already |
2434 | 5.46M | // covered so only its depth matters. We also reach this point if both stall |
2435 | 5.46M | // but have the same height. |
2436 | 5.46M | if (!SPQ->getHazardRec()->isEnabled()5.46M ) { |
2437 | 5.28M | if (LHeight != RHeight) |
2438 | 499k | return LHeight > RHeight ? 499k 1329k : -1170k ; |
2439 | 4.96M | } |
2440 | 4.96M | int LDepth = left->getDepth() - LPenalty; |
2441 | 4.96M | int RDepth = right->getDepth() - RPenalty; |
2442 | 4.96M | if (LDepth != RDepth4.96M ) { |
2443 | 670k | DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum |
2444 | 670k | << ") depth " << LDepth << " vs SU (" << right->NodeNum |
2445 | 670k | << ") depth " << RDepth << "\n"); |
2446 | 670k | return LDepth < RDepth ? 1359k : -1311k ; |
2447 | 670k | } |
2448 | 4.29M | if (4.29M left->Latency != right->Latency4.29M ) |
2449 | 9.61k | return left->Latency > right->Latency ? 9.61k 16.72k : -12.89k ; |
2450 | 4.48M | } |
2451 | 4.48M | return 0; |
2452 | 4.48M | } |
2453 | | |
2454 | 15.4M | static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { |
2455 | 15.4M | // Schedule physical register definitions close to their use. This is |
2456 | 15.4M | // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as |
2457 | 15.4M | // long as shortening physreg live ranges is generally good, we can defer |
2458 | 15.4M | // creating a subtarget hook. |
2459 | 15.4M | if (!DisableSchedPhysRegJoin15.4M ) { |
2460 | 15.4M | bool LHasPhysReg = left->hasPhysRegDefs; |
2461 | 15.4M | bool RHasPhysReg = right->hasPhysRegDefs; |
2462 | 15.4M | if (LHasPhysReg != RHasPhysReg15.4M ) { |
2463 | | #ifndef NDEBUG |
2464 | | static const char *const PhysRegMsg[] = { " has no physreg", |
2465 | | " defines a physreg" }; |
2466 | | #endif |
2467 | 98.4k | DEBUG(dbgs() << " SU (" << left->NodeNum << ") " |
2468 | 98.4k | << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " |
2469 | 98.4k | << PhysRegMsg[RHasPhysReg] << "\n"); |
2470 | 98.4k | return LHasPhysReg < RHasPhysReg; |
2471 | 98.4k | } |
2472 | 15.3M | } |
2473 | 15.3M | |
2474 | 15.3M | // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. |
2475 | 15.3M | unsigned LPriority = SPQ->getNodePriority(left); |
2476 | 15.3M | unsigned RPriority = SPQ->getNodePriority(right); |
2477 | 15.3M | |
2478 | 15.3M | // Be really careful about hoisting call operands above previous calls. |
2479 | 15.3M | // Only allows it if it would reduce register pressure. |
2480 | 15.3M | if (left->isCall && 15.3M right->isCallOp35.4k ) { |
2481 | 1.77k | unsigned RNumVals = right->getNode()->getNumValues(); |
2482 | 1.77k | RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals)517 : 01.25k ; |
2483 | 1.77k | } |
2484 | 15.3M | if (right->isCall && 15.3M left->isCallOp96.8k ) { |
2485 | 1.68k | unsigned LNumVals = left->getNode()->getNumValues(); |
2486 | 1.68k | LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals)418 : 01.26k ; |
2487 | 1.68k | } |
2488 | 15.3M | |
2489 | 15.3M | if (LPriority != RPriority) |
2490 | 2.94M | return LPriority > RPriority; |
2491 | 12.3M | |
2492 | 12.3M | // One or both of the nodes are calls and their sethi-ullman numbers are the |
2493 | 12.3M | // same, then keep source order. |
2494 | 12.3M | if (12.3M left->isCall || 12.3M right->isCall12.3M ) { |
2495 | 13.2k | unsigned LOrder = SPQ->getNodeOrdering(left); |
2496 | 13.2k | unsigned ROrder = SPQ->getNodeOrdering(right); |
2497 | 13.2k | |
2498 | 13.2k | // Prefer an ordering where the lower the non-zero order number, the higher |
2499 | 13.2k | // the preference. |
2500 | 13.2k | if ((LOrder || 13.2k ROrder23 ) && LOrder != ROrder13.2k ) |
2501 | 2.66k | return LOrder != 0 && 2.66k (LOrder < ROrder || 2.64k ROrder == 01.33k ); |
2502 | 12.3M | } |
2503 | 12.3M | |
2504 | 12.3M | // Try schedule def + use closer when Sethi-Ullman numbers are the same. |
2505 | 12.3M | // e.g. |
2506 | 12.3M | // t1 = op t2, c1 |
2507 | 12.3M | // t3 = op t4, c2 |
2508 | 12.3M | // |
2509 | 12.3M | // and the following instructions are both ready. |
2510 | 12.3M | // t2 = op c3 |
2511 | 12.3M | // t4 = op c4 |
2512 | 12.3M | // |
2513 | 12.3M | // Then schedule t2 = op first. |
2514 | 12.3M | // i.e. |
2515 | 12.3M | // t4 = op c4 |
2516 | 12.3M | // t2 = op c3 |
2517 | 12.3M | // t1 = op t2, c1 |
2518 | 12.3M | // t3 = op t4, c2 |
2519 | 12.3M | // |
2520 | 12.3M | // This creates more short live intervals. |
2521 | 12.3M | unsigned LDist = closestSucc(left); |
2522 | 12.3M | unsigned RDist = closestSucc(right); |
2523 | 12.3M | if (LDist != RDist) |
2524 | 6.81M | return LDist < RDist; |
2525 | 5.57M | |
2526 | 5.57M | // How many registers becomes live when the node is scheduled. |
2527 | 5.57M | unsigned LScratch = calcMaxScratches(left); |
2528 | 5.57M | unsigned RScratch = calcMaxScratches(right); |
2529 | 5.57M | if (LScratch != RScratch) |
2530 | 159k | return LScratch > RScratch; |
2531 | 5.41M | |
2532 | 5.41M | // Comparing latency against a call makes little sense unless the node |
2533 | 5.41M | // is register pressure-neutral. |
2534 | 5.41M | if (5.41M (left->isCall && 5.41M RPriority > 07.03k ) || (right->isCall && 5.40M LPriority > 031 )) |
2535 | 7.03k | return (left->NodeQueueId > right->NodeQueueId); |
2536 | 5.40M | |
2537 | 5.40M | // Do not compare latencies when one or both of the nodes are calls. |
2538 | 5.40M | if (5.40M !DisableSchedCycles && |
2539 | 5.40M | !(left->isCall || 5.40M right->isCall5.40M )) { |
2540 | 5.40M | int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); |
2541 | 5.40M | if (result != 0) |
2542 | 1.14M | return result > 0; |
2543 | 5.40M | } |
2544 | 27 | else { |
2545 | 27 | if (left->getHeight() != right->getHeight()) |
2546 | 0 | return left->getHeight() > right->getHeight(); |
2547 | 27 | |
2548 | 27 | if (27 left->getDepth() != right->getDepth()27 ) |
2549 | 8 | return left->getDepth() < right->getDepth(); |
2550 | 4.26M | } |
2551 | 4.26M | |
2552 | 5.40M | assert(left->NodeQueueId && right->NodeQueueId && |
2553 | 4.26M | "NodeQueueId cannot be zero"); |
2554 | 4.26M | return (left->NodeQueueId > right->NodeQueueId); |
2555 | 4.26M | } |
2556 | | |
2557 | | // Bottom up |
2558 | 122k | bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { |
2559 | 122k | if (int res = checkSpecialNodes(left, right)) |
2560 | 993 | return res > 0; |
2561 | 121k | |
2562 | 121k | return BURRSort(left, right, SPQ); |
2563 | 121k | } |
2564 | | |
2565 | | // Source order, otherwise bottom up. |
2566 | 46.8M | bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { |
2567 | 46.8M | if (int res = checkSpecialNodes(left, right)) |
2568 | 587k | return res > 0; |
2569 | 46.2M | |
2570 | 46.2M | unsigned LOrder = SPQ->getNodeOrdering(left); |
2571 | 46.2M | unsigned ROrder = SPQ->getNodeOrdering(right); |
2572 | 46.2M | |
2573 | 46.2M | // Prefer an ordering where the lower the non-zero order number, the higher |
2574 | 46.2M | // the preference. |
2575 | 46.2M | if ((LOrder || 46.2M ROrder1.57M ) && LOrder != ROrder46.0M ) |
2576 | 31.3M | return LOrder != 0 && 31.3M (LOrder < ROrder || 30.0M ROrder == 013.8M ); |
2577 | 14.9M | |
2578 | 14.9M | return BURRSort(left, right, SPQ); |
2579 | 14.9M | } |
2580 | | |
2581 | | // If the time between now and when the instruction will be ready can cover |
2582 | | // the spill code, then avoid adding it to the ready queue. This gives long |
2583 | | // stalls highest priority and allows hoisting across calls. It should also |
2584 | | // speed up processing the available queue. |
2585 | 0 | bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { |
2586 | 0 | static const unsigned ReadyDelay = 3; |
2587 | 0 |
|
2588 | 0 | if (SPQ->MayReduceRegPressure(SU)0 ) return true0 ; |
2589 | 0 |
|
2590 | 0 | if (0 SU->getHeight() > (CurCycle + ReadyDelay)0 ) return false0 ; |
2591 | 0 |
|
2592 | 0 | if (0 SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) |
2593 | 0 | != ScheduleHazardRecognizer::NoHazard) |
2594 | 0 | return false; |
2595 | 0 |
|
2596 | 0 | return true; |
2597 | 0 | } |
2598 | | |
2599 | | // Return true if right should be scheduled with higher priority than left. |
2600 | 347k | bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { |
2601 | 347k | if (int res = checkSpecialNodes(left, right)) |
2602 | 7.08k | return res > 0; |
2603 | 340k | |
2604 | 340k | if (340k left->isCall || 340k right->isCall337k ) |
2605 | 340k | // No way to compute latency of calls. |
2606 | 7.34k | return BURRSort(left, right, SPQ); |
2607 | 333k | |
2608 | 333k | bool LHigh = SPQ->HighRegPressure(left); |
2609 | 333k | bool RHigh = SPQ->HighRegPressure(right); |
2610 | 333k | // Avoid causing spills. If register pressure is high, schedule for |
2611 | 333k | // register pressure reduction. |
2612 | 333k | if (LHigh && 333k !RHigh8.58k ) { |
2613 | 1.72k | DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" |
2614 | 1.72k | << right->NodeNum << ")\n"); |
2615 | 1.72k | return true; |
2616 | 1.72k | } |
2617 | 331k | else if (331k !LHigh && 331k RHigh324k ) { |
2618 | 11.9k | DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" |
2619 | 331k | << left->NodeNum << ")\n"); |
2620 | 331k | return false; |
2621 | 331k | } |
2622 | 319k | if (319k !LHigh && 319k !RHigh312k ) { |
2623 | 312k | int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); |
2624 | 312k | if (result != 0) |
2625 | 90.2k | return result > 0; |
2626 | 229k | } |
2627 | 229k | return BURRSort(left, right, SPQ); |
2628 | 229k | } |
2629 | | |
2630 | | // Schedule as many instructions in each cycle as possible. So don't make an |
2631 | | // instruction available unless it is ready in the current cycle. |
2632 | 0 | bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { |
2633 | 0 | if (SU->getHeight() > CurCycle0 ) return false0 ; |
2634 | 0 |
|
2635 | 0 | if (0 SPQ->getHazardRec()->getHazardType(SU, 0) |
2636 | 0 | != ScheduleHazardRecognizer::NoHazard) |
2637 | 0 | return false; |
2638 | 0 |
|
2639 | 0 | return true; |
2640 | 0 | } |
2641 | | |
2642 | 34.1k | static bool canEnableCoalescing(SUnit *SU) { |
2643 | 34.1k | unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode()34.1k : 00 ; |
2644 | 34.1k | if (Opc == ISD::TokenFactor || 34.1k Opc == ISD::CopyToReg34.1k ) |
2645 | 34.1k | // CopyToReg should be close to its uses to facilitate coalescing and |
2646 | 34.1k | // avoid spilling. |
2647 | 8.23k | return true; |
2648 | 25.9k | |
2649 | 25.9k | if (25.9k Opc == TargetOpcode::EXTRACT_SUBREG || |
2650 | 25.9k | Opc == TargetOpcode::SUBREG_TO_REG || |
2651 | 25.9k | Opc == TargetOpcode::INSERT_SUBREG) |
2652 | 25.9k | // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be |
2653 | 25.9k | // close to their uses to facilitate coalescing. |
2654 | 0 | return true; |
2655 | 25.9k | |
2656 | 25.9k | if (25.9k SU->NumPreds == 0 && 25.9k SU->NumSuccs != 00 ) |
2657 | 25.9k | // If SU does not have a register def, schedule it close to its uses |
2658 | 25.9k | // because it does not lengthen any live ranges. |
2659 | 0 | return true; |
2660 | 25.9k | |
2661 | 25.9k | return false; |
2662 | 25.9k | } |
2663 | | |
2664 | | // list-ilp is currently an experimental scheduler that allows various |
2665 | | // heuristics to be enabled prior to the normal register reduction logic. |
2666 | 152k | bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { |
2667 | 152k | if (int res = checkSpecialNodes(left, right)) |
2668 | 1.88k | return res > 0; |
2669 | 150k | |
2670 | 150k | if (150k left->isCall || 150k right->isCall148k ) |
2671 | 150k | // No way to compute latency of calls. |
2672 | 3.78k | return BURRSort(left, right, SPQ); |
2673 | 146k | |
2674 | 146k | unsigned LLiveUses = 0, RLiveUses = 0; |
2675 | 146k | int LPDiff = 0, RPDiff = 0; |
2676 | 146k | if (!DisableSchedRegPressure || 146k !DisableSchedLiveUses0 ) { |
2677 | 146k | LPDiff = SPQ->RegPressureDiff(left, LLiveUses); |
2678 | 146k | RPDiff = SPQ->RegPressureDiff(right, RLiveUses); |
2679 | 146k | } |
2680 | 146k | if (!DisableSchedRegPressure && 146k LPDiff != RPDiff146k ) { |
2681 | 17.0k | DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff |
2682 | 17.0k | << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); |
2683 | 17.0k | return LPDiff > RPDiff; |
2684 | 17.0k | } |
2685 | 129k | |
2686 | 129k | if (129k !DisableSchedRegPressure && 129k (LPDiff > 0 || 129k RPDiff > 0112k )) { |
2687 | 17.0k | bool LReduce = canEnableCoalescing(left); |
2688 | 17.0k | bool RReduce = canEnableCoalescing(right); |
2689 | 17.0k | if (LReduce && 17.0k !RReduce4.29k ) return false413 ; |
2690 | 16.6k | if (16.6k RReduce && 16.6k !LReduce3.93k ) return true60 ; |
2691 | 129k | } |
2692 | 129k | |
2693 | 129k | if (129k !DisableSchedLiveUses && 129k (LLiveUses != RLiveUses)0 ) { |
2694 | 0 | DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses |
2695 | 0 | << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); |
2696 | 0 | return LLiveUses < RLiveUses; |
2697 | 0 | } |
2698 | 129k | |
2699 | 129k | if (129k !DisableSchedStalls129k ) { |
2700 | 0 | bool LStall = BUHasStall(left, left->getHeight(), SPQ); |
2701 | 0 | bool RStall = BUHasStall(right, right->getHeight(), SPQ); |
2702 | 0 | if (LStall != RStall) |
2703 | 0 | return left->getHeight() > right->getHeight(); |
2704 | 129k | } |
2705 | 129k | |
2706 | 129k | if (129k !DisableSchedCriticalPath129k ) { |
2707 | 129k | int spread = (int)left->getDepth() - (int)right->getDepth(); |
2708 | 129k | if (std::abs(spread) > MaxReorderWindow129k ) { |
2709 | 2.46k | DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " |
2710 | 2.46k | << left->getDepth() << " != SU(" << right->NodeNum << "): " |
2711 | 2.46k | << right->getDepth() << "\n"); |
2712 | 2.46k | return left->getDepth() < right->getDepth(); |
2713 | 2.46k | } |
2714 | 126k | } |
2715 | 126k | |
2716 | 126k | if (126k !DisableSchedHeight && 126k left->getHeight() != right->getHeight()126k ) { |
2717 | 47.8k | int spread = (int)left->getHeight() - (int)right->getHeight(); |
2718 | 47.8k | if (std::abs(spread) > MaxReorderWindow) |
2719 | 8.75k | return left->getHeight() > right->getHeight(); |
2720 | 118k | } |
2721 | 118k | |
2722 | 118k | return BURRSort(left, right, SPQ); |
2723 | 118k | } |
2724 | | |
2725 | 3.42M | void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { |
2726 | 3.42M | SUnits = &sunits; |
2727 | 3.42M | // Add pseudo dependency edges for two-address nodes. |
2728 | 3.42M | if (!Disable2AddrHack) |
2729 | 0 | AddPseudoTwoAddrDeps(); |
2730 | 3.42M | // Reroute edges to nodes with multiple uses. |
2731 | 3.42M | if (!TracksRegPressure && 3.42M !SrcOrder3.37M ) |
2732 | 17.5k | PrescheduleNodesWithMultipleUses(); |
2733 | 3.42M | // Calculate node priorities. |
2734 | 3.42M | CalculateSethiUllmanNumbers(); |
2735 | 3.42M | |
2736 | 3.42M | // For single block loops, mark nodes that look like canonical IV increments. |
2737 | 3.42M | if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) |
2738 | 188k | for (SUnit &SU : sunits) |
2739 | 3.30M | initVRegCycle(&SU); |
2740 | 3.42M | } |
2741 | | |
2742 | | //===----------------------------------------------------------------------===// |
2743 | | // Preschedule for Register Pressure |
2744 | | //===----------------------------------------------------------------------===// |
2745 | | |
2746 | 0 | bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { |
2747 | 0 | if (SU->isTwoAddress0 ) { |
2748 | 0 | unsigned Opc = SU->getNode()->getMachineOpcode(); |
2749 | 0 | const MCInstrDesc &MCID = TII->get(Opc); |
2750 | 0 | unsigned NumRes = MCID.getNumDefs(); |
2751 | 0 | unsigned NumOps = MCID.getNumOperands() - NumRes; |
2752 | 0 | for (unsigned i = 0; i != NumOps0 ; ++i0 ) { |
2753 | 0 | if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -10 ) { |
2754 | 0 | SDNode *DU = SU->getNode()->getOperand(i).getNode(); |
2755 | 0 | if (DU->getNodeId() != -1 && |
2756 | 0 | Op->OrigNode == &(*SUnits)[DU->getNodeId()]) |
2757 | 0 | return true; |
2758 | 0 | } |
2759 | 0 | } |
2760 | 0 | } |
2761 | 0 | return false; |
2762 | 0 | } |
2763 | | |
2764 | | /// canClobberReachingPhysRegUse - True if SU would clobber one of it's |
2765 | | /// successor's explicit physregs whose definition can reach DepSU. |
2766 | | /// i.e. DepSU should not be scheduled above SU. |
2767 | | static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, |
2768 | | ScheduleDAGRRList *scheduleDAG, |
2769 | | const TargetInstrInfo *TII, |
2770 | 0 | const TargetRegisterInfo *TRI) { |
2771 | 0 | const MCPhysReg *ImpDefs |
2772 | 0 | = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); |
2773 | 0 | const uint32_t *RegMask = getNodeRegMask(SU->getNode()); |
2774 | 0 | if(!ImpDefs && 0 !RegMask0 ) |
2775 | 0 | return false; |
2776 | 0 |
|
2777 | 0 | for (const SDep &Succ : SU->Succs) 0 { |
2778 | 0 | SUnit *SuccSU = Succ.getSUnit(); |
2779 | 0 | for (const SDep &SuccPred : SuccSU->Preds) { |
2780 | 0 | if (!SuccPred.isAssignedRegDep()) |
2781 | 0 | continue; |
2782 | 0 |
|
2783 | 0 | if (0 RegMask && |
2784 | 0 | MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && |
2785 | 0 | scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) |
2786 | 0 | return true; |
2787 | 0 |
|
2788 | 0 | if (0 ImpDefs0 ) |
2789 | 0 | for (const MCPhysReg *ImpDef = ImpDefs; 0 *ImpDef0 ; ++ImpDef0 ) |
2790 | 0 | // Return true if SU clobbers this physical register use and the |
2791 | 0 | // definition of the register reaches from DepSU. IsReachable queries |
2792 | 0 | // a topological forward sort of the DAG (following the successors). |
2793 | 0 | if (0 TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && |
2794 | 0 | scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) |
2795 | 0 | return true; |
2796 | 0 | } |
2797 | 0 | } |
2798 | 0 | return false; |
2799 | 0 | } |
2800 | | |
2801 | | /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's |
2802 | | /// physical register defs. |
2803 | | static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, |
2804 | | const TargetInstrInfo *TII, |
2805 | 0 | const TargetRegisterInfo *TRI) { |
2806 | 0 | SDNode *N = SuccSU->getNode(); |
2807 | 0 | unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); |
2808 | 0 | const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); |
2809 | 0 | assert(ImpDefs && "Caller should check hasPhysRegDefs"); |
2810 | 0 | for (const SDNode *SUNode = SU->getNode(); SUNode; |
2811 | 0 | SUNode = SUNode->getGluedNode()0 ) { |
2812 | 0 | if (!SUNode->isMachineOpcode()) |
2813 | 0 | continue; |
2814 | 0 | const MCPhysReg *SUImpDefs = |
2815 | 0 | TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); |
2816 | 0 | const uint32_t *SURegMask = getNodeRegMask(SUNode); |
2817 | 0 | if (!SUImpDefs && 0 !SURegMask0 ) |
2818 | 0 | continue; |
2819 | 0 | for (unsigned i = NumDefs, e = N->getNumValues(); 0 i != e0 ; ++i0 ) { |
2820 | 0 | MVT VT = N->getSimpleValueType(i); |
2821 | 0 | if (VT == MVT::Glue || 0 VT == MVT::Other0 ) |
2822 | 0 | continue; |
2823 | 0 | if (0 !N->hasAnyUseOfValue(i)0 ) |
2824 | 0 | continue; |
2825 | 0 | unsigned Reg = ImpDefs[i - NumDefs]; |
2826 | 0 | if (SURegMask && 0 MachineOperand::clobbersPhysReg(SURegMask, Reg)0 ) |
2827 | 0 | return true; |
2828 | 0 | if (0 !SUImpDefs0 ) |
2829 | 0 | continue; |
2830 | 0 | for (;0 *SUImpDefs0 ; ++SUImpDefs0 ) { |
2831 | 0 | unsigned SUReg = *SUImpDefs; |
2832 | 0 | if (TRI->regsOverlap(Reg, SUReg)) |
2833 | 0 | return true; |
2834 | 0 | } |
2835 | 0 | } |
2836 | 0 | } |
2837 | 0 | return false; |
2838 | 0 | } |
2839 | | |
2840 | | /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses |
2841 | | /// are not handled well by the general register pressure reduction |
2842 | | /// heuristics. When presented with code like this: |
2843 | | /// |
2844 | | /// N |
2845 | | /// / | |
2846 | | /// / | |
2847 | | /// U store |
2848 | | /// | |
2849 | | /// ... |
2850 | | /// |
2851 | | /// the heuristics tend to push the store up, but since the |
2852 | | /// operand of the store has another use (U), this would increase |
2853 | | /// the length of that other use (the U->N edge). |
2854 | | /// |
2855 | | /// This function transforms code like the above to route U's |
2856 | | /// dependence through the store when possible, like this: |
2857 | | /// |
2858 | | /// N |
2859 | | /// || |
2860 | | /// || |
2861 | | /// store |
2862 | | /// | |
2863 | | /// U |
2864 | | /// | |
2865 | | /// ... |
2866 | | /// |
2867 | | /// This results in the store being scheduled immediately |
2868 | | /// after N, which shortens the U->N live range, reducing |
2869 | | /// register pressure. |
2870 | | /// |
2871 | 17.5k | void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { |
2872 | 17.5k | // Visit all the nodes in topological order, working top-down. |
2873 | 130k | for (SUnit &SU : *SUnits) { |
2874 | 130k | // For now, only look at nodes with no data successors, such as stores. |
2875 | 130k | // These are especially important, due to the heuristics in |
2876 | 130k | // getNodePriority for nodes with no data successors. |
2877 | 130k | if (SU.NumSuccs != 0) |
2878 | 85.3k | continue; |
2879 | 44.9k | // For now, only look at nodes with exactly one data predecessor. |
2880 | 44.9k | if (44.9k SU.NumPreds != 144.9k ) |
2881 | 23.2k | continue; |
2882 | 21.6k | // Avoid prescheduling copies to virtual registers, which don't behave |
2883 | 21.6k | // like other nodes from the perspective of scheduling heuristics. |
2884 | 21.6k | if (SDNode *21.6k N21.6k = SU.getNode()) |
2885 | 21.6k | if (21.6k N->getOpcode() == ISD::CopyToReg && |
2886 | 8.82k | TargetRegisterInfo::isVirtualRegister |
2887 | 8.82k | (cast<RegisterSDNode>(N->getOperand(1))->getReg())) |
2888 | 8.73k | continue; |
2889 | 12.9k | |
2890 | 12.9k | // Locate the single data predecessor. |
2891 | 12.9k | SUnit *PredSU = nullptr; |
2892 | 12.9k | for (const SDep &Pred : SU.Preds) |
2893 | 15.7k | if (15.7k !Pred.isCtrl()15.7k ) { |
2894 | 12.9k | PredSU = Pred.getSUnit(); |
2895 | 12.9k | break; |
2896 | 12.9k | } |
2897 | 12.9k | assert(PredSU); |
2898 | 12.9k | |
2899 | 12.9k | // Don't rewrite edges that carry physregs, because that requires additional |
2900 | 12.9k | // support infrastructure. |
2901 | 12.9k | if (PredSU->hasPhysRegDefs) |
2902 | 214 | continue; |
2903 | 12.7k | // Short-circuit the case where SU is PredSU's only data successor. |
2904 | 12.7k | if (12.7k PredSU->NumSuccs == 112.7k ) |
2905 | 10.9k | continue; |
2906 | 1.80k | // Avoid prescheduling to copies from virtual registers, which don't behave |
2907 | 1.80k | // like other nodes from the perspective of scheduling heuristics. |
2908 | 1.80k | if (SDNode *1.80k N1.80k = SU.getNode()) |
2909 | 1.80k | if (1.80k N->getOpcode() == ISD::CopyFromReg && |
2910 | 8 | TargetRegisterInfo::isVirtualRegister |
2911 | 8 | (cast<RegisterSDNode>(N->getOperand(1))->getReg())) |
2912 | 0 | continue; |
2913 | 1.80k | |
2914 | 1.80k | // Perform checks on the successors of PredSU. |
2915 | 1.80k | for (const SDep &PredSucc : PredSU->Succs) 1.80k { |
2916 | 3.21k | SUnit *PredSuccSU = PredSucc.getSUnit(); |
2917 | 3.21k | if (PredSuccSU == &SU3.21k ) continue1.28k ; |
2918 | 1.92k | // If PredSU has another successor with no data successors, for |
2919 | 1.92k | // now don't attempt to choose either over the other. |
2920 | 1.92k | if (1.92k PredSuccSU->NumSuccs == 01.92k ) |
2921 | 1.53k | goto outer_loop_continue; |
2922 | 391 | // Don't break physical register dependencies. |
2923 | 391 | if (391 SU.hasPhysRegClobbers && 391 PredSuccSU->hasPhysRegDefs123 ) |
2924 | 0 | if (0 canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)0 ) |
2925 | 0 | goto outer_loop_continue; |
2926 | 391 | // Don't introduce graph cycles. |
2927 | 391 | if (391 scheduleDAG->IsReachable(&SU, PredSuccSU)391 ) |
2928 | 168 | goto outer_loop_continue; |
2929 | 100 | } |
2930 | 100 | |
2931 | 100 | // Ok, the transformation is safe and the heuristics suggest it is |
2932 | 100 | // profitable. Update the graph. |
2933 | 100 | DEBUG100 (dbgs() << " Prescheduling SU #" << SU.NodeNum |
2934 | 100 | << " next to PredSU #" << PredSU->NodeNum |
2935 | 100 | << " to guide scheduling in the presence of multiple uses\n"); |
2936 | 367 | for (unsigned i = 0; i != PredSU->Succs.size()367 ; ++i267 ) { |
2937 | 267 | SDep Edge = PredSU->Succs[i]; |
2938 | 267 | assert(!Edge.isAssignedRegDep()); |
2939 | 267 | SUnit *SuccSU = Edge.getSUnit(); |
2940 | 267 | if (SuccSU != &SU267 ) { |
2941 | 161 | Edge.setSUnit(PredSU); |
2942 | 161 | scheduleDAG->RemovePred(SuccSU, Edge); |
2943 | 161 | scheduleDAG->AddPred(&SU, Edge); |
2944 | 161 | Edge.setSUnit(&SU); |
2945 | 161 | scheduleDAG->AddPred(SuccSU, Edge); |
2946 | 161 | --i; |
2947 | 161 | } |
2948 | 267 | } |
2949 | 1.80k | outer_loop_continue:; |
2950 | 1.80k | } |
2951 | 17.5k | } |
2952 | | |
2953 | | /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses |
2954 | | /// it as a def&use operand. Add a pseudo control edge from it to the other |
2955 | | /// node (if it won't create a cycle) so the two-address one will be scheduled |
2956 | | /// first (lower in the schedule). If both nodes are two-address, favor the |
2957 | | /// one that has a CopyToReg use (more likely to be a loop induction update). |
2958 | | /// If both are two-address, but one is commutable while the other is not |
2959 | | /// commutable, favor the one that's not commutable. |
2960 | 0 | void RegReductionPQBase::AddPseudoTwoAddrDeps() { |
2961 | 0 | for (SUnit &SU : *SUnits) { |
2962 | 0 | if (!SU.isTwoAddress) |
2963 | 0 | continue; |
2964 | 0 |
|
2965 | 0 | SDNode *Node = SU.getNode(); |
2966 | 0 | if (!Node || 0 !Node->isMachineOpcode()0 || SU.getNode()->getGluedNode()0 ) |
2967 | 0 | continue; |
2968 | 0 |
|
2969 | 0 | bool isLiveOut = hasOnlyLiveOutUses(&SU); |
2970 | 0 | unsigned Opc = Node->getMachineOpcode(); |
2971 | 0 | const MCInstrDesc &MCID = TII->get(Opc); |
2972 | 0 | unsigned NumRes = MCID.getNumDefs(); |
2973 | 0 | unsigned NumOps = MCID.getNumOperands() - NumRes; |
2974 | 0 | for (unsigned j = 0; j != NumOps0 ; ++j0 ) { |
2975 | 0 | if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) |
2976 | 0 | continue; |
2977 | 0 | SDNode *DU = SU.getNode()->getOperand(j).getNode(); |
2978 | 0 | if (DU->getNodeId() == -1) |
2979 | 0 | continue; |
2980 | 0 | const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; |
2981 | 0 | if (!DUSU) |
2982 | 0 | continue; |
2983 | 0 | for (const SDep &Succ : DUSU->Succs) 0 { |
2984 | 0 | if (Succ.isCtrl()) |
2985 | 0 | continue; |
2986 | 0 | SUnit *SuccSU = Succ.getSUnit(); |
2987 | 0 | if (SuccSU == &SU) |
2988 | 0 | continue; |
2989 | 0 | // Be conservative. Ignore if nodes aren't at roughly the same |
2990 | 0 | // depth and height. |
2991 | 0 | if (0 SuccSU->getHeight() < SU.getHeight() && |
2992 | 0 | (SU.getHeight() - SuccSU->getHeight()) > 1) |
2993 | 0 | continue; |
2994 | 0 | // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge |
2995 | 0 | // constrains whatever is using the copy, instead of the copy |
2996 | 0 | // itself. In the case that the copy is coalesced, this |
2997 | 0 | // preserves the intent of the pseudo two-address heurietics. |
2998 | 0 | while (0 SuccSU->Succs.size() == 1 && |
2999 | 0 | SuccSU->getNode()->isMachineOpcode() && |
3000 | 0 | SuccSU->getNode()->getMachineOpcode() == |
3001 | 0 | TargetOpcode::COPY_TO_REGCLASS) |
3002 | 0 | SuccSU = SuccSU->Succs.front().getSUnit(); |
3003 | 0 | // Don't constrain non-instruction nodes. |
3004 | 0 | if (!SuccSU->getNode() || 0 !SuccSU->getNode()->isMachineOpcode()0 ) |
3005 | 0 | continue; |
3006 | 0 | // Don't constrain nodes with physical register defs if the |
3007 | 0 | // predecessor can clobber them. |
3008 | 0 | if (0 SuccSU->hasPhysRegDefs && 0 SU.hasPhysRegClobbers0 ) { |
3009 | 0 | if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) |
3010 | 0 | continue; |
3011 | 0 | } |
3012 | 0 | // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; |
3013 | 0 | // these may be coalesced away. We want them close to their uses. |
3014 | 0 | unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); |
3015 | 0 | if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || |
3016 | 0 | SuccOpc == TargetOpcode::INSERT_SUBREG || |
3017 | 0 | SuccOpc == TargetOpcode::SUBREG_TO_REG) |
3018 | 0 | continue; |
3019 | 0 | if (0 !canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && |
3020 | 0 | (!canClobber(SuccSU, DUSU) || |
3021 | 0 | (isLiveOut && 0 !hasOnlyLiveOutUses(SuccSU)0 ) || |
3022 | 0 | (!SU.isCommutable && 0 SuccSU->isCommutable0 )) && |
3023 | 0 | !scheduleDAG->IsReachable(SuccSU, &SU)0 ) { |
3024 | 0 | DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" |
3025 | 0 | << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); |
3026 | 0 | scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); |
3027 | 0 | } |
3028 | 0 | } |
3029 | 0 | } |
3030 | 0 | } |
3031 | 0 | } |
3032 | | |
3033 | | //===----------------------------------------------------------------------===// |
3034 | | // Public Constructor Functions |
3035 | | //===----------------------------------------------------------------------===// |
3036 | | |
3037 | | llvm::ScheduleDAGSDNodes * |
3038 | | llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, |
3039 | 17.5k | CodeGenOpt::Level OptLevel) { |
3040 | 17.5k | const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); |
3041 | 17.5k | const TargetInstrInfo *TII = STI.getInstrInfo(); |
3042 | 17.5k | const TargetRegisterInfo *TRI = STI.getRegisterInfo(); |
3043 | 17.5k | |
3044 | 17.5k | BURegReductionPriorityQueue *PQ = |
3045 | 17.5k | new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); |
3046 | 17.5k | ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); |
3047 | 17.5k | PQ->setScheduleDAG(SD); |
3048 | 17.5k | return SD; |
3049 | 17.5k | } |
3050 | | |
3051 | | llvm::ScheduleDAGSDNodes * |
3052 | | llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, |
3053 | 3.35M | CodeGenOpt::Level OptLevel) { |
3054 | 3.35M | const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); |
3055 | 3.35M | const TargetInstrInfo *TII = STI.getInstrInfo(); |
3056 | 3.35M | const TargetRegisterInfo *TRI = STI.getRegisterInfo(); |
3057 | 3.35M | |
3058 | 3.35M | SrcRegReductionPriorityQueue *PQ = |
3059 | 3.35M | new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); |
3060 | 3.35M | ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); |
3061 | 3.35M | PQ->setScheduleDAG(SD); |
3062 | 3.35M | return SD; |
3063 | 3.35M | } |
3064 | | |
3065 | | llvm::ScheduleDAGSDNodes * |
3066 | | llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, |
3067 | 37.1k | CodeGenOpt::Level OptLevel) { |
3068 | 37.1k | const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); |
3069 | 37.1k | const TargetInstrInfo *TII = STI.getInstrInfo(); |
3070 | 37.1k | const TargetRegisterInfo *TRI = STI.getRegisterInfo(); |
3071 | 37.1k | const TargetLowering *TLI = IS->TLI; |
3072 | 37.1k | |
3073 | 37.1k | HybridBURRPriorityQueue *PQ = |
3074 | 37.1k | new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); |
3075 | 37.1k | |
3076 | 37.1k | ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); |
3077 | 37.1k | PQ->setScheduleDAG(SD); |
3078 | 37.1k | return SD; |
3079 | 37.1k | } |
3080 | | |
3081 | | llvm::ScheduleDAGSDNodes * |
3082 | | llvm::createILPListDAGScheduler(SelectionDAGISel *IS, |
3083 | 15.3k | CodeGenOpt::Level OptLevel) { |
3084 | 15.3k | const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); |
3085 | 15.3k | const TargetInstrInfo *TII = STI.getInstrInfo(); |
3086 | 15.3k | const TargetRegisterInfo *TRI = STI.getRegisterInfo(); |
3087 | 15.3k | const TargetLowering *TLI = IS->TLI; |
3088 | 15.3k | |
3089 | 15.3k | ILPBURRPriorityQueue *PQ = |
3090 | 15.3k | new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); |
3091 | 15.3k | ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); |
3092 | 15.3k | PQ->setScheduleDAG(SD); |
3093 | 15.3k | return SD; |
3094 | 15.3k | } |