/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/PostRASchedulerList.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----- SchedulePostRAList.cpp - list scheduler ------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This implements a top-down list scheduler, using standard algorithms. |
11 | | // The basic approach uses a priority queue of available nodes to schedule. |
12 | | // One at a time, nodes are taken from the priority queue (thus in priority |
13 | | // order), checked for legality to schedule, and emitted if legal. |
14 | | // |
15 | | // Nodes may not be legal to schedule either due to structural hazards (e.g. |
16 | | // pipeline or resource constraints) or because an input to the instruction has |
17 | | // not completed execution. |
18 | | // |
19 | | //===----------------------------------------------------------------------===// |
20 | | |
21 | | #include "AggressiveAntiDepBreaker.h" |
22 | | #include "AntiDepBreaker.h" |
23 | | #include "CriticalAntiDepBreaker.h" |
24 | | #include "llvm/ADT/Statistic.h" |
25 | | #include "llvm/Analysis/AliasAnalysis.h" |
26 | | #include "llvm/CodeGen/LatencyPriorityQueue.h" |
27 | | #include "llvm/CodeGen/MachineDominators.h" |
28 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
29 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
31 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
32 | | #include "llvm/CodeGen/Passes.h" |
33 | | #include "llvm/CodeGen/RegisterClassInfo.h" |
34 | | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
35 | | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
36 | | #include "llvm/CodeGen/SchedulerRegistry.h" |
37 | | #include "llvm/CodeGen/TargetPassConfig.h" |
38 | | #include "llvm/Support/CommandLine.h" |
39 | | #include "llvm/Support/Debug.h" |
40 | | #include "llvm/Support/ErrorHandling.h" |
41 | | #include "llvm/Support/raw_ostream.h" |
42 | | #include "llvm/Target/TargetInstrInfo.h" |
43 | | #include "llvm/Target/TargetLowering.h" |
44 | | #include "llvm/Target/TargetRegisterInfo.h" |
45 | | #include "llvm/Target/TargetSubtargetInfo.h" |
46 | | using namespace llvm; |
47 | | |
48 | | #define DEBUG_TYPE "post-RA-sched" |
49 | | |
50 | | STATISTIC(NumNoops, "Number of noops inserted"); |
51 | | STATISTIC(NumStalls, "Number of pipeline stalls"); |
52 | | STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); |
53 | | |
54 | | // Post-RA scheduling is enabled with |
55 | | // TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to |
56 | | // override the target. |
57 | | static cl::opt<bool> |
58 | | EnablePostRAScheduler("post-RA-scheduler", |
59 | | cl::desc("Enable scheduling after register allocation"), |
60 | | cl::init(false), cl::Hidden); |
61 | | static cl::opt<std::string> |
62 | | EnableAntiDepBreaking("break-anti-dependencies", |
63 | | cl::desc("Break post-RA scheduling anti-dependencies: " |
64 | | "\"critical\", \"all\", or \"none\""), |
65 | | cl::init("none"), cl::Hidden); |
66 | | |
67 | | // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod |
68 | | static cl::opt<int> |
69 | | DebugDiv("postra-sched-debugdiv", |
70 | | cl::desc("Debug control MBBs that are scheduled"), |
71 | | cl::init(0), cl::Hidden); |
72 | | static cl::opt<int> |
73 | | DebugMod("postra-sched-debugmod", |
74 | | cl::desc("Debug control MBBs that are scheduled"), |
75 | | cl::init(0), cl::Hidden); |
76 | | |
77 | 9.62k | AntiDepBreaker::~AntiDepBreaker() { } |
78 | | |
79 | | namespace { |
80 | | class PostRAScheduler : public MachineFunctionPass { |
81 | | const TargetInstrInfo *TII; |
82 | | RegisterClassInfo RegClassInfo; |
83 | | |
84 | | public: |
85 | | static char ID; |
86 | 17.2k | PostRAScheduler() : MachineFunctionPass(ID) {} |
87 | | |
88 | 17.1k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
89 | 17.1k | AU.setPreservesCFG(); |
90 | 17.1k | AU.addRequired<AAResultsWrapperPass>(); |
91 | 17.1k | AU.addRequired<TargetPassConfig>(); |
92 | 17.1k | AU.addRequired<MachineDominatorTree>(); |
93 | 17.1k | AU.addPreserved<MachineDominatorTree>(); |
94 | 17.1k | AU.addRequired<MachineLoopInfo>(); |
95 | 17.1k | AU.addPreserved<MachineLoopInfo>(); |
96 | 17.1k | MachineFunctionPass::getAnalysisUsage(AU); |
97 | 17.1k | } |
98 | | |
99 | 17.1k | MachineFunctionProperties getRequiredProperties() const override { |
100 | 17.1k | return MachineFunctionProperties().set( |
101 | 17.1k | MachineFunctionProperties::Property::NoVRegs); |
102 | 17.1k | } |
103 | | |
104 | | bool runOnMachineFunction(MachineFunction &Fn) override; |
105 | | |
106 | | private: |
107 | | bool enablePostRAScheduler( |
108 | | const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, |
109 | | TargetSubtargetInfo::AntiDepBreakMode &Mode, |
110 | | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; |
111 | | }; |
112 | | char PostRAScheduler::ID = 0; |
113 | | |
114 | | class SchedulePostRATDList : public ScheduleDAGInstrs { |
115 | | /// AvailableQueue - The priority queue to use for the available SUnits. |
116 | | /// |
117 | | LatencyPriorityQueue AvailableQueue; |
118 | | |
119 | | /// PendingQueue - This contains all of the instructions whose operands have |
120 | | /// been issued, but their results are not ready yet (due to the latency of |
121 | | /// the operation). Once the operands becomes available, the instruction is |
122 | | /// added to the AvailableQueue. |
123 | | std::vector<SUnit*> PendingQueue; |
124 | | |
125 | | /// HazardRec - The hazard recognizer to use. |
126 | | ScheduleHazardRecognizer *HazardRec; |
127 | | |
128 | | /// AntiDepBreak - Anti-dependence breaking object, or NULL if none |
129 | | AntiDepBreaker *AntiDepBreak; |
130 | | |
131 | | /// AA - AliasAnalysis for making memory reference queries. |
132 | | AliasAnalysis *AA; |
133 | | |
134 | | /// The schedule. Null SUnit*'s represent noop instructions. |
135 | | std::vector<SUnit*> Sequence; |
136 | | |
137 | | /// Ordered list of DAG postprocessing steps. |
138 | | std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; |
139 | | |
140 | | /// The index in BB of RegionEnd. |
141 | | /// |
142 | | /// This is the instruction number from the top of the current block, not |
143 | | /// the SlotIndex. It is only used by the AntiDepBreaker. |
144 | | unsigned EndIndex; |
145 | | |
146 | | public: |
147 | | SchedulePostRATDList( |
148 | | MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, |
149 | | const RegisterClassInfo &, |
150 | | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, |
151 | | SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); |
152 | | |
153 | | ~SchedulePostRATDList() override; |
154 | | |
155 | | /// startBlock - Initialize register live-range state for scheduling in |
156 | | /// this block. |
157 | | /// |
158 | | void startBlock(MachineBasicBlock *BB) override; |
159 | | |
160 | | // Set the index of RegionEnd within the current BB. |
161 | 185k | void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } |
162 | | |
163 | | /// Initialize the scheduler state for the next scheduling region. |
164 | | void enterRegion(MachineBasicBlock *bb, |
165 | | MachineBasicBlock::iterator begin, |
166 | | MachineBasicBlock::iterator end, |
167 | | unsigned regioninstrs) override; |
168 | | |
169 | | /// Notify that the scheduler has finished scheduling the current region. |
170 | | void exitRegion() override; |
171 | | |
172 | | /// Schedule - Schedule the instruction range using list scheduling. |
173 | | /// |
174 | | void schedule() override; |
175 | | |
176 | | void EmitSchedule(); |
177 | | |
178 | | /// Observe - Update liveness information to account for the current |
179 | | /// instruction, which will not be scheduled. |
180 | | /// |
181 | | void Observe(MachineInstr &MI, unsigned Count); |
182 | | |
183 | | /// finishBlock - Clean up register live-range state. |
184 | | /// |
185 | | void finishBlock() override; |
186 | | |
187 | | private: |
188 | | /// Apply each ScheduleDAGMutation step in order. |
189 | | void postprocessDAG(); |
190 | | |
191 | | void ReleaseSucc(SUnit *SU, SDep *SuccEdge); |
192 | | void ReleaseSuccessors(SUnit *SU); |
193 | | void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); |
194 | | void ListScheduleTopDown(); |
195 | | |
196 | | void dumpSchedule() const; |
197 | | void emitNoop(unsigned CurCycle); |
198 | | }; |
199 | | } |
200 | | |
201 | | char &llvm::PostRASchedulerID = PostRAScheduler::ID; |
202 | | |
203 | | INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE, |
204 | | "Post RA top-down list latency scheduler", false, false) |
205 | | |
206 | | SchedulePostRATDList::SchedulePostRATDList( |
207 | | MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, |
208 | | const RegisterClassInfo &RCI, |
209 | | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, |
210 | | SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) |
211 | 33.9k | : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) { |
212 | 33.9k | |
213 | 33.9k | const InstrItineraryData *InstrItins = |
214 | 33.9k | MF.getSubtarget().getInstrItineraryData(); |
215 | 33.9k | HazardRec = |
216 | 33.9k | MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( |
217 | 33.9k | InstrItins, this); |
218 | 33.9k | MF.getSubtarget().getPostRAMutations(Mutations); |
219 | 33.9k | |
220 | 33.9k | assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || |
221 | 33.9k | MRI.tracksLiveness()) && |
222 | 33.9k | "Live-ins must be accurate for anti-dependency breaking"); |
223 | 33.9k | AntiDepBreak = |
224 | 33.9k | ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? |
225 | 7.52k | (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : |
226 | 26.3k | ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? |
227 | 26.3k | (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI)2.09k : nullptr24.2k )); |
228 | 33.9k | } |
229 | | |
230 | 33.9k | SchedulePostRATDList::~SchedulePostRATDList() { |
231 | 33.9k | delete HazardRec; |
232 | 33.9k | delete AntiDepBreak; |
233 | 33.9k | } |
234 | | |
235 | | /// Initialize state associated with the next scheduling region. |
236 | | void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, |
237 | | MachineBasicBlock::iterator begin, |
238 | | MachineBasicBlock::iterator end, |
239 | 185k | unsigned regioninstrs) { |
240 | 185k | ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); |
241 | 185k | Sequence.clear(); |
242 | 185k | } |
243 | | |
244 | | /// Print the schedule before exiting the region. |
245 | 185k | void SchedulePostRATDList::exitRegion() { |
246 | 185k | DEBUG({ |
247 | 185k | dbgs() << "*** Final schedule ***\n"; |
248 | 185k | dumpSchedule(); |
249 | 185k | dbgs() << '\n'; |
250 | 185k | }); |
251 | 185k | ScheduleDAGInstrs::exitRegion(); |
252 | 185k | } |
253 | | |
254 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
255 | | /// dumpSchedule - dump the scheduled Sequence. |
256 | | LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { |
257 | | for (unsigned i = 0, e = Sequence.size(); i != e; i++) { |
258 | | if (SUnit *SU = Sequence[i]) |
259 | | SU->dump(this); |
260 | | else |
261 | | dbgs() << "**** NOOP ****\n"; |
262 | | } |
263 | | } |
264 | | #endif |
265 | | |
266 | | bool PostRAScheduler::enablePostRAScheduler( |
267 | | const TargetSubtargetInfo &ST, |
268 | | CodeGenOpt::Level OptLevel, |
269 | | TargetSubtargetInfo::AntiDepBreakMode &Mode, |
270 | 124k | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { |
271 | 124k | Mode = ST.getAntiDepBreakMode(); |
272 | 124k | ST.getCriticalPathRCs(CriticalPathRCs); |
273 | 124k | |
274 | 124k | // Check for explicit enable/disable of post-ra scheduling. |
275 | 124k | if (EnablePostRAScheduler.getPosition() > 0) |
276 | 44 | return EnablePostRAScheduler; |
277 | 124k | |
278 | 124k | return ST.enablePostRAScheduler() && |
279 | 46.2k | OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); |
280 | 124k | } |
281 | | |
282 | 124k | bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { |
283 | 124k | if (skipFunction(*Fn.getFunction())) |
284 | 43 | return false; |
285 | 124k | |
286 | 124k | TII = Fn.getSubtarget().getInstrInfo(); |
287 | 124k | MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); |
288 | 124k | AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
289 | 124k | TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); |
290 | 124k | |
291 | 124k | RegClassInfo.runOnMachineFunction(Fn); |
292 | 124k | |
293 | 124k | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = |
294 | 124k | TargetSubtargetInfo::ANTIDEP_NONE; |
295 | 124k | SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; |
296 | 124k | |
297 | 124k | // Check that post-RA scheduling is enabled for this target. |
298 | 124k | // This may upgrade the AntiDepMode. |
299 | 124k | if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), |
300 | 124k | AntiDepMode, CriticalPathRCs)) |
301 | 90.4k | return false; |
302 | 33.9k | |
303 | 33.9k | // Check for antidep breaking override... |
304 | 33.9k | if (33.9k EnableAntiDepBreaking.getPosition() > 033.9k ) { |
305 | 6 | AntiDepMode = (EnableAntiDepBreaking == "all") |
306 | 1 | ? TargetSubtargetInfo::ANTIDEP_ALL |
307 | 5 | : ((EnableAntiDepBreaking == "critical") |
308 | 3 | ? TargetSubtargetInfo::ANTIDEP_CRITICAL |
309 | 5 | : TargetSubtargetInfo::ANTIDEP_NONE); |
310 | 6 | } |
311 | 33.9k | |
312 | 33.9k | DEBUG(dbgs() << "PostRAScheduler\n"); |
313 | 33.9k | |
314 | 33.9k | SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, |
315 | 33.9k | CriticalPathRCs); |
316 | 33.9k | |
317 | 33.9k | // Loop over all of the basic blocks |
318 | 66.3k | for (auto &MBB : Fn) { |
319 | | #ifndef NDEBUG |
320 | | // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod |
321 | | if (DebugDiv > 0) { |
322 | | static int bbcnt = 0; |
323 | | if (bbcnt++ % DebugDiv != DebugMod) |
324 | | continue; |
325 | | dbgs() << "*** DEBUG scheduling " << Fn.getName() |
326 | | << ":BB#" << MBB.getNumber() << " ***\n"; |
327 | | } |
328 | | #endif |
329 | | |
330 | 66.3k | // Initialize register live-range state for scheduling in this block. |
331 | 66.3k | Scheduler.startBlock(&MBB); |
332 | 66.3k | |
333 | 66.3k | // Schedule each sequence of instructions not interrupted by a label |
334 | 66.3k | // or anything else that effectively needs to shut down scheduling. |
335 | 66.3k | MachineBasicBlock::iterator Current = MBB.end(); |
336 | 66.3k | unsigned Count = MBB.size(), CurrentCount = Count; |
337 | 603k | for (MachineBasicBlock::iterator I = Current; I != MBB.begin()603k ;) { |
338 | 536k | MachineInstr &MI = *std::prev(I); |
339 | 536k | --Count; |
340 | 536k | // Calls are not scheduling boundaries before register allocation, but |
341 | 536k | // post-ra we don't gain anything by scheduling across calls since we |
342 | 536k | // don't need to worry about register pressure. |
343 | 536k | if (MI.isCall() || 536k TII->isSchedulingBoundary(MI, &MBB, Fn)519k ) { |
344 | 118k | Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); |
345 | 118k | Scheduler.setEndIndex(CurrentCount); |
346 | 118k | Scheduler.schedule(); |
347 | 118k | Scheduler.exitRegion(); |
348 | 118k | Scheduler.EmitSchedule(); |
349 | 118k | Current = &MI; |
350 | 118k | CurrentCount = Count; |
351 | 118k | Scheduler.Observe(MI, CurrentCount); |
352 | 118k | } |
353 | 536k | I = MI; |
354 | 536k | if (MI.isBundle()) |
355 | 6.01k | Count -= MI.getBundleSize(); |
356 | 536k | } |
357 | 66.3k | assert(Count == 0 && "Instruction count mismatch!"); |
358 | 66.3k | assert((MBB.begin() == Current || CurrentCount != 0) && |
359 | 66.3k | "Instruction count mismatch!"); |
360 | 66.3k | Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); |
361 | 66.3k | Scheduler.setEndIndex(CurrentCount); |
362 | 66.3k | Scheduler.schedule(); |
363 | 66.3k | Scheduler.exitRegion(); |
364 | 66.3k | Scheduler.EmitSchedule(); |
365 | 66.3k | |
366 | 66.3k | // Clean up register live-range state. |
367 | 66.3k | Scheduler.finishBlock(); |
368 | 66.3k | |
369 | 66.3k | // Update register kills |
370 | 66.3k | Scheduler.fixupKills(MBB); |
371 | 66.3k | } |
372 | 124k | |
373 | 124k | return true; |
374 | 124k | } |
375 | | |
376 | | /// StartBlock - Initialize register live-range state for scheduling in |
377 | | /// this block. |
378 | | /// |
379 | 66.3k | void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { |
380 | 66.3k | // Call the superclass. |
381 | 66.3k | ScheduleDAGInstrs::startBlock(BB); |
382 | 66.3k | |
383 | 66.3k | // Reset the hazard recognizer and anti-dep breaker. |
384 | 66.3k | HazardRec->Reset(); |
385 | 66.3k | if (AntiDepBreak) |
386 | 14.6k | AntiDepBreak->StartBlock(BB); |
387 | 66.3k | } |
388 | | |
389 | | /// Schedule - Schedule the instruction range using list scheduling. |
390 | | /// |
391 | 185k | void SchedulePostRATDList::schedule() { |
392 | 185k | // Build the scheduling graph. |
393 | 185k | buildSchedGraph(AA); |
394 | 185k | |
395 | 185k | if (AntiDepBreak185k ) { |
396 | 33.3k | unsigned Broken = |
397 | 33.3k | AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, |
398 | 33.3k | EndIndex, DbgValues); |
399 | 33.3k | |
400 | 33.3k | if (Broken != 033.3k ) { |
401 | 905 | // We made changes. Update the dependency graph. |
402 | 905 | // Theoretically we could update the graph in place: |
403 | 905 | // When a live range is changed to use a different register, remove |
404 | 905 | // the def's anti-dependence *and* output-dependence edges due to |
405 | 905 | // that register, and add new anti-dependence and output-dependence |
406 | 905 | // edges based on the next live range of the register. |
407 | 905 | ScheduleDAG::clearDAG(); |
408 | 905 | buildSchedGraph(AA); |
409 | 905 | |
410 | 905 | NumFixedAnti += Broken; |
411 | 905 | } |
412 | 33.3k | } |
413 | 185k | |
414 | 185k | postprocessDAG(); |
415 | 185k | |
416 | 185k | DEBUG(dbgs() << "********** List Scheduling **********\n"); |
417 | 185k | DEBUG( |
418 | 185k | for (const SUnit &SU : SUnits) { |
419 | 185k | SU.dumpAll(this); |
420 | 185k | dbgs() << '\n'; |
421 | 185k | } |
422 | 185k | ); |
423 | 185k | |
424 | 185k | AvailableQueue.initNodes(SUnits); |
425 | 185k | ListScheduleTopDown(); |
426 | 185k | AvailableQueue.releaseState(); |
427 | 185k | } |
428 | | |
429 | | /// Observe - Update liveness information to account for the current |
430 | | /// instruction, which will not be scheduled. |
431 | | /// |
432 | 118k | void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) { |
433 | 118k | if (AntiDepBreak) |
434 | 18.6k | AntiDepBreak->Observe(MI, Count, EndIndex); |
435 | 118k | } |
436 | | |
437 | | /// FinishBlock - Clean up register live-range state. |
438 | | /// |
439 | 66.3k | void SchedulePostRATDList::finishBlock() { |
440 | 66.3k | if (AntiDepBreak) |
441 | 14.6k | AntiDepBreak->FinishBlock(); |
442 | 66.3k | |
443 | 66.3k | // Call the superclass. |
444 | 66.3k | ScheduleDAGInstrs::finishBlock(); |
445 | 66.3k | } |
446 | | |
447 | | /// Apply each ScheduleDAGMutation step in order. |
448 | 185k | void SchedulePostRATDList::postprocessDAG() { |
449 | 185k | for (auto &M : Mutations) |
450 | 36.3k | M->apply(this); |
451 | 185k | } |
452 | | |
453 | | //===----------------------------------------------------------------------===// |
454 | | // Top-Down Scheduling |
455 | | //===----------------------------------------------------------------------===// |
456 | | |
457 | | /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to |
458 | | /// the PendingQueue if the count reaches zero. |
459 | 1.36M | void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { |
460 | 1.36M | SUnit *SuccSU = SuccEdge->getSUnit(); |
461 | 1.36M | |
462 | 1.36M | if (SuccEdge->isWeak()1.36M ) { |
463 | 0 | --SuccSU->WeakPredsLeft; |
464 | 0 | return; |
465 | 0 | } |
466 | | #ifndef NDEBUG |
467 | | if (SuccSU->NumPredsLeft == 0) { |
468 | | dbgs() << "*** Scheduling failed! ***\n"; |
469 | | SuccSU->dump(this); |
470 | | dbgs() << " has been released too many times!\n"; |
471 | | llvm_unreachable(nullptr); |
472 | | } |
473 | | #endif |
474 | 0 | --SuccSU->NumPredsLeft; |
475 | 1.36M | |
476 | 1.36M | // Standard scheduler algorithms will recompute the depth of the successor |
477 | 1.36M | // here as such: |
478 | 1.36M | // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); |
479 | 1.36M | // |
480 | 1.36M | // However, we lazily compute node depth instead. Note that |
481 | 1.36M | // ScheduleNodeTopDown has already updated the depth of this node which causes |
482 | 1.36M | // all descendents to be marked dirty. Setting the successor depth explicitly |
483 | 1.36M | // here would cause depth to be recomputed for all its ancestors. If the |
484 | 1.36M | // successor is not yet ready (because of a transitively redundant edge) then |
485 | 1.36M | // this causes depth computation to be quadratic in the size of the DAG. |
486 | 1.36M | |
487 | 1.36M | // If all the node's predecessors are scheduled, this node is ready |
488 | 1.36M | // to be scheduled. Ignore the special ExitSU node. |
489 | 1.36M | if (SuccSU->NumPredsLeft == 0 && 1.36M SuccSU != &ExitSU334k ) |
490 | 281k | PendingQueue.push_back(SuccSU); |
491 | 1.36M | } |
492 | | |
493 | | /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. |
494 | 602k | void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { |
495 | 602k | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); |
496 | 1.96M | I != E1.96M ; ++I1.36M ) { |
497 | 1.36M | ReleaseSucc(SU, &*I); |
498 | 1.36M | } |
499 | 602k | } |
500 | | |
501 | | /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending |
502 | | /// count of its successors. If a successor pending count is zero, add it to |
503 | | /// the Available queue. |
504 | 417k | void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { |
505 | 417k | DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); |
506 | 417k | DEBUG(SU->dump(this)); |
507 | 417k | |
508 | 417k | Sequence.push_back(SU); |
509 | 417k | assert(CurCycle >= SU->getDepth() && |
510 | 417k | "Node scheduled above its depth!"); |
511 | 417k | SU->setDepthToAtLeast(CurCycle); |
512 | 417k | |
513 | 417k | ReleaseSuccessors(SU); |
514 | 417k | SU->isScheduled = true; |
515 | 417k | AvailableQueue.scheduledNode(SU); |
516 | 417k | } |
517 | | |
518 | | /// emitNoop - Add a noop to the current instruction sequence. |
519 | 1.70k | void SchedulePostRATDList::emitNoop(unsigned CurCycle) { |
520 | 1.70k | DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); |
521 | 1.70k | HazardRec->EmitNoop(); |
522 | 1.70k | Sequence.push_back(nullptr); // NULL here means noop |
523 | 1.70k | ++NumNoops; |
524 | 1.70k | } |
525 | | |
526 | | /// ListScheduleTopDown - The main loop of list scheduling for top-down |
527 | | /// schedulers. |
528 | 185k | void SchedulePostRATDList::ListScheduleTopDown() { |
529 | 185k | unsigned CurCycle = 0; |
530 | 185k | |
531 | 185k | // We're scheduling top-down but we're visiting the regions in |
532 | 185k | // bottom-up order, so we don't know the hazards at the start of a |
533 | 185k | // region. So assume no hazards (this should usually be ok as most |
534 | 185k | // blocks are a single region). |
535 | 185k | HazardRec->Reset(); |
536 | 185k | |
537 | 185k | // Release any successors of the special Entry node. |
538 | 185k | ReleaseSuccessors(&EntrySU); |
539 | 185k | |
540 | 185k | // Add all leaves to Available queue. |
541 | 602k | for (unsigned i = 0, e = SUnits.size(); i != e602k ; ++i417k ) { |
542 | 417k | // It is available if it has no predecessors. |
543 | 417k | if (!SUnits[i].NumPredsLeft && 417k !SUnits[i].isAvailable135k ) { |
544 | 135k | AvailableQueue.push(&SUnits[i]); |
545 | 135k | SUnits[i].isAvailable = true; |
546 | 135k | } |
547 | 417k | } |
548 | 185k | |
549 | 185k | // In any cycle where we can't schedule any instructions, we must |
550 | 185k | // stall or emit a noop, depending on the target. |
551 | 185k | bool CycleHasInsts = false; |
552 | 185k | |
553 | 185k | // While Available queue is not empty, grab the node with the highest |
554 | 185k | // priority. If it is not ready put it back. Schedule the node. |
555 | 185k | std::vector<SUnit*> NotReady; |
556 | 185k | Sequence.reserve(SUnits.size()); |
557 | 1.23M | while (!AvailableQueue.empty() || 1.23M !PendingQueue.empty()945k ) { |
558 | 1.04M | // Check to see if any of the pending instructions are ready to issue. If |
559 | 1.04M | // so, add them to the available queue. |
560 | 1.04M | unsigned MinDepth = ~0u; |
561 | 2.30M | for (unsigned i = 0, e = PendingQueue.size(); i != e2.30M ; ++i1.25M ) { |
562 | 1.25M | if (PendingQueue[i]->getDepth() <= CurCycle1.25M ) { |
563 | 281k | AvailableQueue.push(PendingQueue[i]); |
564 | 281k | PendingQueue[i]->isAvailable = true; |
565 | 281k | PendingQueue[i] = PendingQueue.back(); |
566 | 281k | PendingQueue.pop_back(); |
567 | 281k | --i; --e; |
568 | 1.25M | } else if (975k PendingQueue[i]->getDepth() < MinDepth975k ) |
569 | 731k | MinDepth = PendingQueue[i]->getDepth(); |
570 | 1.25M | } |
571 | 1.04M | |
572 | 1.04M | DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); |
573 | 1.04M | |
574 | 1.04M | SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; |
575 | 1.04M | bool HasNoopHazards = false; |
576 | 1.13M | while (!AvailableQueue.empty()1.13M ) { |
577 | 502k | SUnit *CurSUnit = AvailableQueue.pop(); |
578 | 502k | |
579 | 502k | ScheduleHazardRecognizer::HazardType HT = |
580 | 502k | HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); |
581 | 502k | if (HT == ScheduleHazardRecognizer::NoHazard502k ) { |
582 | 417k | if (HazardRec->ShouldPreferAnother(CurSUnit)417k ) { |
583 | 977 | if (!NotPreferredSUnit977 ) { |
584 | 971 | // If this is the first non-preferred node for this cycle, then |
585 | 971 | // record it and continue searching for a preferred node. If this |
586 | 971 | // is not the first non-preferred node, then treat it as though |
587 | 971 | // there had been a hazard. |
588 | 971 | NotPreferredSUnit = CurSUnit; |
589 | 971 | continue; |
590 | 971 | } |
591 | 416k | } else { |
592 | 416k | FoundSUnit = CurSUnit; |
593 | 416k | break; |
594 | 416k | } |
595 | 84.6k | } |
596 | 84.6k | |
597 | 84.6k | // Remember if this is a noop hazard. |
598 | 84.6k | HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; |
599 | 84.6k | |
600 | 84.6k | NotReady.push_back(CurSUnit); |
601 | 84.6k | } |
602 | 1.04M | |
603 | 1.04M | // If we have a non-preferred node, push it back onto the available list. |
604 | 1.04M | // If we did not find a preferred node, then schedule this first |
605 | 1.04M | // non-preferred node. |
606 | 1.04M | if (NotPreferredSUnit1.04M ) { |
607 | 971 | if (!FoundSUnit971 ) { |
608 | 927 | DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); |
609 | 927 | FoundSUnit = NotPreferredSUnit; |
610 | 971 | } else { |
611 | 44 | AvailableQueue.push(NotPreferredSUnit); |
612 | 44 | } |
613 | 971 | |
614 | 971 | NotPreferredSUnit = nullptr; |
615 | 971 | } |
616 | 1.04M | |
617 | 1.04M | // Add the nodes that aren't ready back onto the available list. |
618 | 1.04M | if (!NotReady.empty()1.04M ) { |
619 | 35.6k | AvailableQueue.push_all(NotReady); |
620 | 35.6k | NotReady.clear(); |
621 | 35.6k | } |
622 | 1.04M | |
623 | 1.04M | // If we found a node to schedule... |
624 | 1.04M | if (FoundSUnit1.04M ) { |
625 | 417k | // If we need to emit noops prior to this instruction, then do so. |
626 | 417k | unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); |
627 | 417k | for (unsigned i = 0; i != NumPreNoops417k ; ++i0 ) |
628 | 0 | emitNoop(CurCycle); |
629 | 417k | |
630 | 417k | // ... schedule the node... |
631 | 417k | ScheduleNodeTopDown(FoundSUnit, CurCycle); |
632 | 417k | HazardRec->EmitInstruction(FoundSUnit); |
633 | 417k | CycleHasInsts = true; |
634 | 417k | if (HazardRec->atIssueLimit()417k ) { |
635 | 214k | DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); |
636 | 214k | HazardRec->AdvanceCycle(); |
637 | 214k | ++CurCycle; |
638 | 214k | CycleHasInsts = false; |
639 | 214k | } |
640 | 1.04M | } else { |
641 | 627k | if (CycleHasInsts627k ) { |
642 | 80.3k | DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); |
643 | 80.3k | HazardRec->AdvanceCycle(); |
644 | 627k | } else if (547k !HasNoopHazards547k ) { |
645 | 545k | // Otherwise, we have a pipeline stall, but no other problem, |
646 | 545k | // just advance the current cycle and try again. |
647 | 545k | DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); |
648 | 545k | HazardRec->AdvanceCycle(); |
649 | 545k | ++NumStalls; |
650 | 547k | } else { |
651 | 1.70k | // Otherwise, we have no instructions to issue and we have instructions |
652 | 1.70k | // that will fault if we don't do this right. This is the case for |
653 | 1.70k | // processors without pipeline interlocks and other cases. |
654 | 1.70k | emitNoop(CurCycle); |
655 | 1.70k | } |
656 | 627k | |
657 | 627k | ++CurCycle; |
658 | 627k | CycleHasInsts = false; |
659 | 627k | } |
660 | 1.04M | } |
661 | 185k | |
662 | | #ifndef NDEBUG |
663 | | unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); |
664 | | unsigned Noops = 0; |
665 | | for (unsigned i = 0, e = Sequence.size(); i != e; ++i) |
666 | | if (!Sequence[i]) |
667 | | ++Noops; |
668 | | assert(Sequence.size() - Noops == ScheduledNodes && |
669 | | "The number of nodes scheduled doesn't match the expected number!"); |
670 | | #endif // NDEBUG |
671 | | } |
672 | | |
673 | | // EmitSchedule - Emit the machine code in scheduled order. |
674 | 185k | void SchedulePostRATDList::EmitSchedule() { |
675 | 185k | RegionBegin = RegionEnd; |
676 | 185k | |
677 | 185k | // If first instruction was a DBG_VALUE then put it back. |
678 | 185k | if (FirstDbgValue) |
679 | 34 | BB->splice(RegionEnd, BB, FirstDbgValue); |
680 | 185k | |
681 | 185k | // Then re-insert them according to the given schedule. |
682 | 604k | for (unsigned i = 0, e = Sequence.size(); i != e604k ; i++419k ) { |
683 | 419k | if (SUnit *SU = Sequence[i]) |
684 | 417k | BB->splice(RegionEnd, BB, SU->getInstr()); |
685 | 419k | else |
686 | 419k | // Null SUnit* is a noop. |
687 | 1.70k | TII->insertNoop(*BB, RegionEnd); |
688 | 419k | |
689 | 419k | // Update the Begin iterator, as the first instruction in the block |
690 | 419k | // may have been scheduled later. |
691 | 419k | if (i == 0) |
692 | 76.6k | RegionBegin = std::prev(RegionEnd); |
693 | 419k | } |
694 | 185k | |
695 | 185k | // Reinsert any remaining debug_values. |
696 | 185k | for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator |
697 | 185k | DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE185k ; --DI114 ) { |
698 | 114 | std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); |
699 | 114 | MachineInstr *DbgValue = P.first; |
700 | 114 | MachineBasicBlock::iterator OrigPrivMI = P.second; |
701 | 114 | BB->splice(++OrigPrivMI, BB, DbgValue); |
702 | 114 | } |
703 | 185k | DbgValues.clear(); |
704 | 185k | FirstDbgValue = nullptr; |
705 | 185k | } |